{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.0,
  "eval_steps": 500,
  "global_step": 11184,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 8.94134477825465e-05,
      "grad_norm": 1.3380895730403135,
      "learning_rate": 5.952380952380952e-07,
      "loss": 1.0126,
      "step": 1
    },
    {
      "epoch": 0.000178826895565093,
      "grad_norm": 1.0250040044300388,
      "learning_rate": 1.1904761904761904e-06,
      "loss": 1.0498,
      "step": 2
    },
    {
      "epoch": 0.0002682403433476395,
      "grad_norm": 1.1475173384550095,
      "learning_rate": 1.7857142857142857e-06,
      "loss": 1.0589,
      "step": 3
    },
    {
      "epoch": 0.000357653791130186,
      "grad_norm": 1.02621803088714,
      "learning_rate": 2.3809523809523808e-06,
      "loss": 1.0841,
      "step": 4
    },
    {
      "epoch": 0.0004470672389127325,
      "grad_norm": 1.0093526906134724,
      "learning_rate": 2.9761904761904763e-06,
      "loss": 1.0293,
      "step": 5
    },
    {
      "epoch": 0.000536480686695279,
      "grad_norm": 1.142872173883972,
      "learning_rate": 3.5714285714285714e-06,
      "loss": 1.0226,
      "step": 6
    },
    {
      "epoch": 0.0006258941344778255,
      "grad_norm": 1.2009455475427864,
      "learning_rate": 4.166666666666667e-06,
      "loss": 1.0476,
      "step": 7
    },
    {
      "epoch": 0.000715307582260372,
      "grad_norm": 0.8414059825360085,
      "learning_rate": 4.7619047619047615e-06,
      "loss": 1.0036,
      "step": 8
    },
    {
      "epoch": 0.0008047210300429185,
      "grad_norm": 0.9070756114619443,
      "learning_rate": 5.357142857142857e-06,
      "loss": 1.0229,
      "step": 9
    },
    {
      "epoch": 0.000894134477825465,
      "grad_norm": 0.6058385638431223,
      "learning_rate": 5.9523809523809525e-06,
      "loss": 0.9767,
      "step": 10
    },
    {
      "epoch": 0.0009835479256080114,
      "grad_norm": 0.6879357467984415,
      "learning_rate": 6.547619047619048e-06,
      "loss": 1.017,
      "step": 11
    },
    {
      "epoch": 0.001072961373390558,
      "grad_norm": 0.4278074250669813,
      "learning_rate": 7.142857142857143e-06,
      "loss": 1.0618,
      "step": 12
    },
    {
      "epoch": 0.0011623748211731044,
      "grad_norm": 0.30749598296408615,
      "learning_rate": 7.738095238095238e-06,
      "loss": 0.9449,
      "step": 13
    },
    {
      "epoch": 0.001251788268955651,
      "grad_norm": 0.32291505466984455,
      "learning_rate": 8.333333333333334e-06,
      "loss": 0.9529,
      "step": 14
    },
    {
      "epoch": 0.0013412017167381974,
      "grad_norm": 0.3030106421942405,
      "learning_rate": 8.92857142857143e-06,
      "loss": 0.9761,
      "step": 15
    },
    {
      "epoch": 0.001430615164520744,
      "grad_norm": 0.2847457564518435,
      "learning_rate": 9.523809523809523e-06,
      "loss": 0.9705,
      "step": 16
    },
    {
      "epoch": 0.0015200286123032904,
      "grad_norm": 0.27461646470802675,
      "learning_rate": 1.011904761904762e-05,
      "loss": 0.9993,
      "step": 17
    },
    {
      "epoch": 0.001609442060085837,
      "grad_norm": 0.22459664444338656,
      "learning_rate": 1.0714285714285714e-05,
      "loss": 0.9317,
      "step": 18
    },
    {
      "epoch": 0.0016988555078683834,
      "grad_norm": 0.24373822011391716,
      "learning_rate": 1.130952380952381e-05,
      "loss": 1.0097,
      "step": 19
    },
    {
      "epoch": 0.00178826895565093,
      "grad_norm": 0.19738296333260807,
      "learning_rate": 1.1904761904761905e-05,
      "loss": 0.8926,
      "step": 20
    },
    {
      "epoch": 0.0018776824034334764,
      "grad_norm": 0.19974536824274355,
      "learning_rate": 1.25e-05,
      "loss": 0.9021,
      "step": 21
    },
    {
      "epoch": 0.001967095851216023,
      "grad_norm": 0.19261677141681324,
      "learning_rate": 1.3095238095238096e-05,
      "loss": 0.955,
      "step": 22
    },
    {
      "epoch": 0.0020565092989985696,
      "grad_norm": 0.17554440665104032,
      "learning_rate": 1.3690476190476192e-05,
      "loss": 0.8687,
      "step": 23
    },
    {
      "epoch": 0.002145922746781116,
      "grad_norm": 0.18562404188966203,
      "learning_rate": 1.4285714285714285e-05,
      "loss": 0.9262,
      "step": 24
    },
    {
      "epoch": 0.0022353361945636626,
      "grad_norm": 0.17844785265242513,
      "learning_rate": 1.4880952380952381e-05,
      "loss": 0.9053,
      "step": 25
    },
    {
      "epoch": 0.002324749642346209,
      "grad_norm": 0.17964001363202986,
      "learning_rate": 1.5476190476190476e-05,
      "loss": 0.9349,
      "step": 26
    },
    {
      "epoch": 0.0024141630901287556,
      "grad_norm": 0.18209586946787387,
      "learning_rate": 1.6071428571428572e-05,
      "loss": 0.9346,
      "step": 27
    },
    {
      "epoch": 0.002503576537911302,
      "grad_norm": 0.17632424859436938,
      "learning_rate": 1.6666666666666667e-05,
      "loss": 0.9194,
      "step": 28
    },
    {
      "epoch": 0.0025929899856938486,
      "grad_norm": 0.190207207146781,
      "learning_rate": 1.7261904761904763e-05,
      "loss": 0.9051,
      "step": 29
    },
    {
      "epoch": 0.002682403433476395,
      "grad_norm": 0.17095586870693202,
      "learning_rate": 1.785714285714286e-05,
      "loss": 0.9095,
      "step": 30
    },
    {
      "epoch": 0.0027718168812589416,
      "grad_norm": 0.16685920972774165,
      "learning_rate": 1.8452380952380954e-05,
      "loss": 0.8813,
      "step": 31
    },
    {
      "epoch": 0.002861230329041488,
      "grad_norm": 0.23261692525171823,
      "learning_rate": 1.9047619047619046e-05,
      "loss": 0.8891,
      "step": 32
    },
    {
      "epoch": 0.0029506437768240345,
      "grad_norm": 0.192311276472303,
      "learning_rate": 1.9642857142857145e-05,
      "loss": 0.9323,
      "step": 33
    },
    {
      "epoch": 0.003040057224606581,
      "grad_norm": 0.18603680489535224,
      "learning_rate": 2.023809523809524e-05,
      "loss": 0.9077,
      "step": 34
    },
    {
      "epoch": 0.0031294706723891275,
      "grad_norm": 0.19693906618139081,
      "learning_rate": 2.0833333333333336e-05,
      "loss": 0.9034,
      "step": 35
    },
    {
      "epoch": 0.003218884120171674,
      "grad_norm": 0.24330083287198428,
      "learning_rate": 2.1428571428571428e-05,
      "loss": 0.9006,
      "step": 36
    },
    {
      "epoch": 0.0033082975679542205,
      "grad_norm": 0.17677039903201588,
      "learning_rate": 2.2023809523809524e-05,
      "loss": 0.8683,
      "step": 37
    },
    {
      "epoch": 0.003397711015736767,
      "grad_norm": 0.1785508814732255,
      "learning_rate": 2.261904761904762e-05,
      "loss": 0.8841,
      "step": 38
    },
    {
      "epoch": 0.0034871244635193135,
      "grad_norm": 0.17130963265562274,
      "learning_rate": 2.3214285714285715e-05,
      "loss": 0.8721,
      "step": 39
    },
    {
      "epoch": 0.00357653791130186,
      "grad_norm": 0.13507794513026344,
      "learning_rate": 2.380952380952381e-05,
      "loss": 0.8351,
      "step": 40
    },
    {
      "epoch": 0.0036659513590844065,
      "grad_norm": 0.1955691209531565,
      "learning_rate": 2.4404761904761906e-05,
      "loss": 0.9041,
      "step": 41
    },
    {
      "epoch": 0.0037553648068669528,
      "grad_norm": 0.17938946376175904,
      "learning_rate": 2.5e-05,
      "loss": 0.8805,
      "step": 42
    },
    {
      "epoch": 0.0038447782546494995,
      "grad_norm": 0.14827766876662968,
      "learning_rate": 2.5595238095238093e-05,
      "loss": 0.8451,
      "step": 43
    },
    {
      "epoch": 0.003934191702432046,
      "grad_norm": 0.16258968815425687,
      "learning_rate": 2.6190476190476192e-05,
      "loss": 0.8745,
      "step": 44
    },
    {
      "epoch": 0.004023605150214592,
      "grad_norm": 0.19231003175621056,
      "learning_rate": 2.6785714285714288e-05,
      "loss": 0.9225,
      "step": 45
    },
    {
      "epoch": 0.004113018597997139,
      "grad_norm": 0.15834807020901728,
      "learning_rate": 2.7380952380952383e-05,
      "loss": 0.825,
      "step": 46
    },
    {
      "epoch": 0.0042024320457796855,
      "grad_norm": 0.1590700906209439,
      "learning_rate": 2.797619047619048e-05,
      "loss": 0.8418,
      "step": 47
    },
    {
      "epoch": 0.004291845493562232,
      "grad_norm": 0.16312067620504794,
      "learning_rate": 2.857142857142857e-05,
      "loss": 0.8531,
      "step": 48
    },
    {
      "epoch": 0.004381258941344778,
      "grad_norm": 0.18965418907486656,
      "learning_rate": 2.916666666666667e-05,
      "loss": 0.8423,
      "step": 49
    },
    {
      "epoch": 0.004470672389127325,
      "grad_norm": 0.19968579895838984,
      "learning_rate": 2.9761904761904762e-05,
      "loss": 0.8999,
      "step": 50
    },
    {
      "epoch": 0.0045600858369098714,
      "grad_norm": 0.20633417057159836,
      "learning_rate": 3.0357142857142857e-05,
      "loss": 0.8574,
      "step": 51
    },
    {
      "epoch": 0.004649499284692418,
      "grad_norm": 0.17586225413880222,
      "learning_rate": 3.095238095238095e-05,
      "loss": 0.8152,
      "step": 52
    },
    {
      "epoch": 0.004738912732474964,
      "grad_norm": 0.21896675419190548,
      "learning_rate": 3.154761904761905e-05,
      "loss": 0.8161,
      "step": 53
    },
    {
      "epoch": 0.004828326180257511,
      "grad_norm": 0.23134055852378677,
      "learning_rate": 3.2142857142857144e-05,
      "loss": 0.8657,
      "step": 54
    },
    {
      "epoch": 0.004917739628040057,
      "grad_norm": 0.21184750154675902,
      "learning_rate": 3.273809523809524e-05,
      "loss": 0.8815,
      "step": 55
    },
    {
      "epoch": 0.005007153075822604,
      "grad_norm": 0.1700900462324013,
      "learning_rate": 3.3333333333333335e-05,
      "loss": 0.8257,
      "step": 56
    },
    {
      "epoch": 0.00509656652360515,
      "grad_norm": 0.21923127649758478,
      "learning_rate": 3.392857142857143e-05,
      "loss": 0.8348,
      "step": 57
    },
    {
      "epoch": 0.005185979971387697,
      "grad_norm": 0.22779402747914002,
      "learning_rate": 3.4523809523809526e-05,
      "loss": 0.8982,
      "step": 58
    },
    {
      "epoch": 0.005275393419170243,
      "grad_norm": 0.1877730770513562,
      "learning_rate": 3.511904761904762e-05,
      "loss": 0.8363,
      "step": 59
    },
    {
      "epoch": 0.00536480686695279,
      "grad_norm": 0.21099120787063008,
      "learning_rate": 3.571428571428572e-05,
      "loss": 0.8587,
      "step": 60
    },
    {
      "epoch": 0.005454220314735336,
      "grad_norm": 0.1904227409076542,
      "learning_rate": 3.630952380952381e-05,
      "loss": 0.8328,
      "step": 61
    },
    {
      "epoch": 0.005543633762517883,
      "grad_norm": 0.1867859596659735,
      "learning_rate": 3.690476190476191e-05,
      "loss": 0.8539,
      "step": 62
    },
    {
      "epoch": 0.005633047210300429,
      "grad_norm": 0.18336749443828393,
      "learning_rate": 3.7500000000000003e-05,
      "loss": 0.832,
      "step": 63
    },
    {
      "epoch": 0.005722460658082976,
      "grad_norm": 0.3126398010876427,
      "learning_rate": 3.809523809523809e-05,
      "loss": 0.3161,
      "step": 64
    },
    {
      "epoch": 0.005811874105865522,
      "grad_norm": 0.19114734212424317,
      "learning_rate": 3.8690476190476195e-05,
      "loss": 0.8784,
      "step": 65
    },
    {
      "epoch": 0.005901287553648069,
      "grad_norm": 0.21525538004296862,
      "learning_rate": 3.928571428571429e-05,
      "loss": 0.8514,
      "step": 66
    },
    {
      "epoch": 0.005990701001430615,
      "grad_norm": 0.21747961202171764,
      "learning_rate": 3.9880952380952386e-05,
      "loss": 0.7936,
      "step": 67
    },
    {
      "epoch": 0.006080114449213162,
      "grad_norm": 0.17528038592132564,
      "learning_rate": 4.047619047619048e-05,
      "loss": 0.8242,
      "step": 68
    },
    {
      "epoch": 0.006169527896995708,
      "grad_norm": 0.18346939214362257,
      "learning_rate": 4.107142857142857e-05,
      "loss": 0.8461,
      "step": 69
    },
    {
      "epoch": 0.006258941344778255,
      "grad_norm": 0.19572477626636944,
      "learning_rate": 4.166666666666667e-05,
      "loss": 0.8194,
      "step": 70
    },
    {
      "epoch": 0.006348354792560801,
      "grad_norm": 0.17648632561808922,
      "learning_rate": 4.226190476190476e-05,
      "loss": 0.7878,
      "step": 71
    },
    {
      "epoch": 0.006437768240343348,
      "grad_norm": 0.19124125127997355,
      "learning_rate": 4.2857142857142856e-05,
      "loss": 0.8148,
      "step": 72
    },
    {
      "epoch": 0.006527181688125894,
      "grad_norm": 0.21383335116877675,
      "learning_rate": 4.345238095238096e-05,
      "loss": 0.8348,
      "step": 73
    },
    {
      "epoch": 0.006616595135908441,
      "grad_norm": 0.19923398456458122,
      "learning_rate": 4.404761904761905e-05,
      "loss": 0.8174,
      "step": 74
    },
    {
      "epoch": 0.006706008583690987,
      "grad_norm": 0.17462845165247823,
      "learning_rate": 4.464285714285715e-05,
      "loss": 0.8029,
      "step": 75
    },
    {
      "epoch": 0.006795422031473534,
      "grad_norm": 0.17771710805134347,
      "learning_rate": 4.523809523809524e-05,
      "loss": 0.7936,
      "step": 76
    },
    {
      "epoch": 0.00688483547925608,
      "grad_norm": 0.20005916438648685,
      "learning_rate": 4.5833333333333334e-05,
      "loss": 0.7859,
      "step": 77
    },
    {
      "epoch": 0.006974248927038627,
      "grad_norm": 0.21078124792369776,
      "learning_rate": 4.642857142857143e-05,
      "loss": 0.7715,
      "step": 78
    },
    {
      "epoch": 0.007063662374821173,
      "grad_norm": 0.20987604543659175,
      "learning_rate": 4.7023809523809525e-05,
      "loss": 0.8445,
      "step": 79
    },
    {
      "epoch": 0.00715307582260372,
      "grad_norm": 0.21855162792007266,
      "learning_rate": 4.761904761904762e-05,
      "loss": 0.8628,
      "step": 80
    },
    {
      "epoch": 0.007242489270386266,
      "grad_norm": 0.19561684477064892,
      "learning_rate": 4.8214285714285716e-05,
      "loss": 0.8005,
      "step": 81
    },
    {
      "epoch": 0.007331902718168813,
      "grad_norm": 0.21439361367915738,
      "learning_rate": 4.880952380952381e-05,
      "loss": 0.7939,
      "step": 82
    },
    {
      "epoch": 0.007421316165951359,
      "grad_norm": 0.2339340400498359,
      "learning_rate": 4.940476190476191e-05,
      "loss": 0.8197,
      "step": 83
    },
    {
      "epoch": 0.0075107296137339056,
      "grad_norm": 0.19568000642114702,
      "learning_rate": 5e-05,
      "loss": 0.7964,
      "step": 84
    },
    {
      "epoch": 0.007600143061516452,
      "grad_norm": 0.21550339961867232,
      "learning_rate": 5.05952380952381e-05,
      "loss": 0.7832,
      "step": 85
    },
    {
      "epoch": 0.007689556509298999,
      "grad_norm": 0.21018317172063372,
      "learning_rate": 5.119047619047619e-05,
      "loss": 0.8008,
      "step": 86
    },
    {
      "epoch": 0.007778969957081545,
      "grad_norm": 0.1365728757486357,
      "learning_rate": 5.1785714285714296e-05,
      "loss": 0.2743,
      "step": 87
    },
    {
      "epoch": 0.007868383404864092,
      "grad_norm": 0.23075996053921385,
      "learning_rate": 5.2380952380952384e-05,
      "loss": 0.8075,
      "step": 88
    },
    {
      "epoch": 0.007957796852646639,
      "grad_norm": 0.1989426647212182,
      "learning_rate": 5.297619047619048e-05,
      "loss": 0.7806,
      "step": 89
    },
    {
      "epoch": 0.008047210300429184,
      "grad_norm": 0.24228514199325257,
      "learning_rate": 5.3571428571428575e-05,
      "loss": 0.8185,
      "step": 90
    },
    {
      "epoch": 0.008136623748211731,
      "grad_norm": 0.21221745626170824,
      "learning_rate": 5.4166666666666664e-05,
      "loss": 0.843,
      "step": 91
    },
    {
      "epoch": 0.008226037195994278,
      "grad_norm": 0.19506863321536086,
      "learning_rate": 5.4761904761904766e-05,
      "loss": 0.8012,
      "step": 92
    },
    {
      "epoch": 0.008315450643776824,
      "grad_norm": 0.1868034825180146,
      "learning_rate": 5.535714285714286e-05,
      "loss": 0.7888,
      "step": 93
    },
    {
      "epoch": 0.008404864091559371,
      "grad_norm": 0.18377500117348866,
      "learning_rate": 5.595238095238096e-05,
      "loss": 0.7682,
      "step": 94
    },
    {
      "epoch": 0.008494277539341916,
      "grad_norm": 0.18413588059282268,
      "learning_rate": 5.6547619047619046e-05,
      "loss": 0.8228,
      "step": 95
    },
    {
      "epoch": 0.008583690987124463,
      "grad_norm": 0.23194615422533807,
      "learning_rate": 5.714285714285714e-05,
      "loss": 0.8013,
      "step": 96
    },
    {
      "epoch": 0.00867310443490701,
      "grad_norm": 0.201030546047415,
      "learning_rate": 5.773809523809524e-05,
      "loss": 0.8121,
      "step": 97
    },
    {
      "epoch": 0.008762517882689556,
      "grad_norm": 0.20033852699245752,
      "learning_rate": 5.833333333333334e-05,
      "loss": 0.7959,
      "step": 98
    },
    {
      "epoch": 0.008851931330472103,
      "grad_norm": 0.24246270065788733,
      "learning_rate": 5.8928571428571435e-05,
      "loss": 0.7716,
      "step": 99
    },
    {
      "epoch": 0.00894134477825465,
      "grad_norm": 0.20575709950493704,
      "learning_rate": 5.9523809523809524e-05,
      "loss": 0.8232,
      "step": 100
    },
    {
      "epoch": 0.009030758226037196,
      "grad_norm": 0.25278371007059053,
      "learning_rate": 6.011904761904762e-05,
      "loss": 0.7775,
      "step": 101
    },
    {
      "epoch": 0.009120171673819743,
      "grad_norm": 0.16264393556164627,
      "learning_rate": 6.0714285714285715e-05,
      "loss": 0.758,
      "step": 102
    },
    {
      "epoch": 0.009209585121602288,
      "grad_norm": 0.2114631368454318,
      "learning_rate": 6.130952380952381e-05,
      "loss": 0.8187,
      "step": 103
    },
    {
      "epoch": 0.009298998569384835,
      "grad_norm": 0.18016457688807772,
      "learning_rate": 6.19047619047619e-05,
      "loss": 0.7977,
      "step": 104
    },
    {
      "epoch": 0.009388412017167383,
      "grad_norm": 0.17721663753843828,
      "learning_rate": 6.25e-05,
      "loss": 0.7991,
      "step": 105
    },
    {
      "epoch": 0.009477825464949928,
      "grad_norm": 0.23583697011304433,
      "learning_rate": 6.30952380952381e-05,
      "loss": 0.8332,
      "step": 106
    },
    {
      "epoch": 0.009567238912732475,
      "grad_norm": 0.16872036309402033,
      "learning_rate": 6.369047619047619e-05,
      "loss": 0.7718,
      "step": 107
    },
    {
      "epoch": 0.009656652360515022,
      "grad_norm": 0.22514809297442848,
      "learning_rate": 6.428571428571429e-05,
      "loss": 0.7937,
      "step": 108
    },
    {
      "epoch": 0.009746065808297568,
      "grad_norm": 0.18568939101658655,
      "learning_rate": 6.488095238095238e-05,
      "loss": 0.7747,
      "step": 109
    },
    {
      "epoch": 0.009835479256080115,
      "grad_norm": 0.20955347921601017,
      "learning_rate": 6.547619047619048e-05,
      "loss": 0.78,
      "step": 110
    },
    {
      "epoch": 0.00992489270386266,
      "grad_norm": 0.1877972858138859,
      "learning_rate": 6.607142857142857e-05,
      "loss": 0.7793,
      "step": 111
    },
    {
      "epoch": 0.010014306151645207,
      "grad_norm": 0.1813256119012357,
      "learning_rate": 6.666666666666667e-05,
      "loss": 0.7723,
      "step": 112
    },
    {
      "epoch": 0.010103719599427755,
      "grad_norm": 0.18051211303123374,
      "learning_rate": 6.726190476190477e-05,
      "loss": 0.7903,
      "step": 113
    },
    {
      "epoch": 0.0101931330472103,
      "grad_norm": 0.18304460332409944,
      "learning_rate": 6.785714285714286e-05,
      "loss": 0.8504,
      "step": 114
    },
    {
      "epoch": 0.010282546494992847,
      "grad_norm": 0.1930595716710517,
      "learning_rate": 6.845238095238096e-05,
      "loss": 0.7492,
      "step": 115
    },
    {
      "epoch": 0.010371959942775394,
      "grad_norm": 0.167169432690334,
      "learning_rate": 6.904761904761905e-05,
      "loss": 0.7579,
      "step": 116
    },
    {
      "epoch": 0.01046137339055794,
      "grad_norm": 0.20475087679388965,
      "learning_rate": 6.964285714285715e-05,
      "loss": 0.7709,
      "step": 117
    },
    {
      "epoch": 0.010550786838340487,
      "grad_norm": 0.19333408384291892,
      "learning_rate": 7.023809523809524e-05,
      "loss": 0.8009,
      "step": 118
    },
    {
      "epoch": 0.010640200286123032,
      "grad_norm": 0.23428551332595565,
      "learning_rate": 7.083333333333334e-05,
      "loss": 0.7952,
      "step": 119
    },
    {
      "epoch": 0.01072961373390558,
      "grad_norm": 0.26457091750544476,
      "learning_rate": 7.142857142857143e-05,
      "loss": 0.7654,
      "step": 120
    },
    {
      "epoch": 0.010819027181688127,
      "grad_norm": 0.24474157108558503,
      "learning_rate": 7.202380952380953e-05,
      "loss": 0.7846,
      "step": 121
    },
    {
      "epoch": 0.010908440629470672,
      "grad_norm": 0.23336956811602638,
      "learning_rate": 7.261904761904762e-05,
      "loss": 0.7926,
      "step": 122
    },
    {
      "epoch": 0.010997854077253219,
      "grad_norm": 0.209740973612503,
      "learning_rate": 7.321428571428571e-05,
      "loss": 0.7694,
      "step": 123
    },
    {
      "epoch": 0.011087267525035766,
      "grad_norm": 0.22610732537282807,
      "learning_rate": 7.380952380952382e-05,
      "loss": 0.7986,
      "step": 124
    },
    {
      "epoch": 0.011176680972818312,
      "grad_norm": 0.2752256150146888,
      "learning_rate": 7.440476190476191e-05,
      "loss": 0.8121,
      "step": 125
    },
    {
      "epoch": 0.011266094420600859,
      "grad_norm": 0.26007432018608007,
      "learning_rate": 7.500000000000001e-05,
      "loss": 0.8054,
      "step": 126
    },
    {
      "epoch": 0.011355507868383404,
      "grad_norm": 0.17727304207143485,
      "learning_rate": 7.55952380952381e-05,
      "loss": 0.7536,
      "step": 127
    },
    {
      "epoch": 0.011444921316165951,
      "grad_norm": 0.21281859350591364,
      "learning_rate": 7.619047619047618e-05,
      "loss": 0.7892,
      "step": 128
    },
    {
      "epoch": 0.011534334763948498,
      "grad_norm": 0.19589223681103632,
      "learning_rate": 7.67857142857143e-05,
      "loss": 0.7765,
      "step": 129
    },
    {
      "epoch": 0.011623748211731044,
      "grad_norm": 0.18395133248417353,
      "learning_rate": 7.738095238095239e-05,
      "loss": 0.7434,
      "step": 130
    },
    {
      "epoch": 0.011713161659513591,
      "grad_norm": 0.169475821126296,
      "learning_rate": 7.797619047619048e-05,
      "loss": 0.7639,
      "step": 131
    },
    {
      "epoch": 0.011802575107296138,
      "grad_norm": 0.18632397624914637,
      "learning_rate": 7.857142857142858e-05,
      "loss": 0.7395,
      "step": 132
    },
    {
      "epoch": 0.011891988555078684,
      "grad_norm": 0.19119108463247617,
      "learning_rate": 7.916666666666666e-05,
      "loss": 0.7895,
      "step": 133
    },
    {
      "epoch": 0.01198140200286123,
      "grad_norm": 0.21100708949579258,
      "learning_rate": 7.976190476190477e-05,
      "loss": 0.7425,
      "step": 134
    },
    {
      "epoch": 0.012070815450643776,
      "grad_norm": 0.19621615706567247,
      "learning_rate": 8.035714285714287e-05,
      "loss": 0.8044,
      "step": 135
    },
    {
      "epoch": 0.012160228898426323,
      "grad_norm": 0.19049057193483668,
      "learning_rate": 8.095238095238096e-05,
      "loss": 0.8121,
      "step": 136
    },
    {
      "epoch": 0.01224964234620887,
      "grad_norm": 0.15883862608544763,
      "learning_rate": 8.154761904761904e-05,
      "loss": 0.7137,
      "step": 137
    },
    {
      "epoch": 0.012339055793991416,
      "grad_norm": 0.15753205303205278,
      "learning_rate": 8.214285714285714e-05,
      "loss": 0.3043,
      "step": 138
    },
    {
      "epoch": 0.012428469241773963,
      "grad_norm": 0.23873310653369575,
      "learning_rate": 8.273809523809524e-05,
      "loss": 0.7794,
      "step": 139
    },
    {
      "epoch": 0.01251788268955651,
      "grad_norm": 0.16398351808987474,
      "learning_rate": 8.333333333333334e-05,
      "loss": 0.7401,
      "step": 140
    },
    {
      "epoch": 0.012607296137339056,
      "grad_norm": 0.2211719658857812,
      "learning_rate": 8.392857142857144e-05,
      "loss": 0.7998,
      "step": 141
    },
    {
      "epoch": 0.012696709585121603,
      "grad_norm": 0.17844178385505208,
      "learning_rate": 8.452380952380952e-05,
      "loss": 0.7632,
      "step": 142
    },
    {
      "epoch": 0.012786123032904148,
      "grad_norm": 0.17854105034643422,
      "learning_rate": 8.511904761904762e-05,
      "loss": 0.759,
      "step": 143
    },
    {
      "epoch": 0.012875536480686695,
      "grad_norm": 0.22062566117190807,
      "learning_rate": 8.571428571428571e-05,
      "loss": 0.8101,
      "step": 144
    },
    {
      "epoch": 0.012964949928469242,
      "grad_norm": 0.1776277076319471,
      "learning_rate": 8.630952380952382e-05,
      "loss": 0.7755,
      "step": 145
    },
    {
      "epoch": 0.013054363376251788,
      "grad_norm": 0.18504873783951659,
      "learning_rate": 8.690476190476192e-05,
      "loss": 0.8164,
      "step": 146
    },
    {
      "epoch": 0.013143776824034335,
      "grad_norm": 0.18214825387275027,
      "learning_rate": 8.75e-05,
      "loss": 0.806,
      "step": 147
    },
    {
      "epoch": 0.013233190271816882,
      "grad_norm": 0.1849269722452864,
      "learning_rate": 8.80952380952381e-05,
      "loss": 0.7373,
      "step": 148
    },
    {
      "epoch": 0.013322603719599427,
      "grad_norm": 0.1609889636209293,
      "learning_rate": 8.869047619047619e-05,
      "loss": 0.7883,
      "step": 149
    },
    {
      "epoch": 0.013412017167381975,
      "grad_norm": 0.22840425213377977,
      "learning_rate": 8.92857142857143e-05,
      "loss": 0.7495,
      "step": 150
    },
    {
      "epoch": 0.01350143061516452,
      "grad_norm": 0.18443627155097447,
      "learning_rate": 8.988095238095238e-05,
      "loss": 0.7513,
      "step": 151
    },
    {
      "epoch": 0.013590844062947067,
      "grad_norm": 0.18035695600618815,
      "learning_rate": 9.047619047619048e-05,
      "loss": 0.8028,
      "step": 152
    },
    {
      "epoch": 0.013680257510729614,
      "grad_norm": 0.19343374169385816,
      "learning_rate": 9.107142857142857e-05,
      "loss": 0.8074,
      "step": 153
    },
    {
      "epoch": 0.01376967095851216,
      "grad_norm": 0.1482331004080805,
      "learning_rate": 9.166666666666667e-05,
      "loss": 0.7776,
      "step": 154
    },
    {
      "epoch": 0.013859084406294707,
      "grad_norm": 0.17272702704683493,
      "learning_rate": 9.226190476190478e-05,
      "loss": 0.7529,
      "step": 155
    },
    {
      "epoch": 0.013948497854077254,
      "grad_norm": 0.15597718001602467,
      "learning_rate": 9.285714285714286e-05,
      "loss": 0.777,
      "step": 156
    },
    {
      "epoch": 0.0140379113018598,
      "grad_norm": 0.16048430631470142,
      "learning_rate": 9.345238095238095e-05,
      "loss": 0.7971,
      "step": 157
    },
    {
      "epoch": 0.014127324749642347,
      "grad_norm": 0.14819246743040107,
      "learning_rate": 9.404761904761905e-05,
      "loss": 0.7748,
      "step": 158
    },
    {
      "epoch": 0.014216738197424892,
      "grad_norm": 0.16115440029047925,
      "learning_rate": 9.464285714285715e-05,
      "loss": 0.731,
      "step": 159
    },
    {
      "epoch": 0.01430615164520744,
      "grad_norm": 0.1692225815742582,
      "learning_rate": 9.523809523809524e-05,
      "loss": 0.7238,
      "step": 160
    },
    {
      "epoch": 0.014395565092989986,
      "grad_norm": 0.181369068092425,
      "learning_rate": 9.583333333333334e-05,
      "loss": 0.7511,
      "step": 161
    },
    {
      "epoch": 0.014484978540772532,
      "grad_norm": 0.14955507796232748,
      "learning_rate": 9.642857142857143e-05,
      "loss": 0.7772,
      "step": 162
    },
    {
      "epoch": 0.014574391988555079,
      "grad_norm": 0.18414106617268328,
      "learning_rate": 9.702380952380953e-05,
      "loss": 0.7258,
      "step": 163
    },
    {
      "epoch": 0.014663805436337626,
      "grad_norm": 0.22503585625853867,
      "learning_rate": 9.761904761904762e-05,
      "loss": 0.7741,
      "step": 164
    },
    {
      "epoch": 0.014753218884120171,
      "grad_norm": 0.14208078396778487,
      "learning_rate": 9.821428571428572e-05,
      "loss": 0.291,
      "step": 165
    },
    {
      "epoch": 0.014842632331902719,
      "grad_norm": 0.16832480350803908,
      "learning_rate": 9.880952380952381e-05,
      "loss": 0.7767,
      "step": 166
    },
    {
      "epoch": 0.014932045779685264,
      "grad_norm": 0.1649468633692074,
      "learning_rate": 9.940476190476191e-05,
      "loss": 0.7155,
      "step": 167
    },
    {
      "epoch": 0.015021459227467811,
      "grad_norm": 0.17700806635359626,
      "learning_rate": 0.0001,
      "loss": 0.7922,
      "step": 168
    },
    {
      "epoch": 0.015110872675250358,
      "grad_norm": 0.169842041426953,
      "learning_rate": 0.0001005952380952381,
      "loss": 0.7678,
      "step": 169
    },
    {
      "epoch": 0.015200286123032904,
      "grad_norm": 0.17775415733576103,
      "learning_rate": 0.0001011904761904762,
      "loss": 0.7911,
      "step": 170
    },
    {
      "epoch": 0.01528969957081545,
      "grad_norm": 0.1543374291049899,
      "learning_rate": 0.00010178571428571428,
      "loss": 0.7991,
      "step": 171
    },
    {
      "epoch": 0.015379113018597998,
      "grad_norm": 0.1540153244159531,
      "learning_rate": 0.00010238095238095237,
      "loss": 0.7478,
      "step": 172
    },
    {
      "epoch": 0.015468526466380543,
      "grad_norm": 0.19284912851784256,
      "learning_rate": 0.00010297619047619047,
      "loss": 0.7671,
      "step": 173
    },
    {
      "epoch": 0.01555793991416309,
      "grad_norm": 0.17472542321016252,
      "learning_rate": 0.00010357142857142859,
      "loss": 0.7945,
      "step": 174
    },
    {
      "epoch": 0.015647353361945636,
      "grad_norm": 0.15774942565149414,
      "learning_rate": 0.00010416666666666667,
      "loss": 0.7553,
      "step": 175
    },
    {
      "epoch": 0.015736766809728183,
      "grad_norm": 0.16763670048850773,
      "learning_rate": 0.00010476190476190477,
      "loss": 0.7927,
      "step": 176
    },
    {
      "epoch": 0.01582618025751073,
      "grad_norm": 0.16806571404807855,
      "learning_rate": 0.00010535714285714286,
      "loss": 0.741,
      "step": 177
    },
    {
      "epoch": 0.015915593705293277,
      "grad_norm": 0.18261248636869,
      "learning_rate": 0.00010595238095238096,
      "loss": 0.7753,
      "step": 178
    },
    {
      "epoch": 0.01600500715307582,
      "grad_norm": 0.16216641379793262,
      "learning_rate": 0.00010654761904761906,
      "loss": 0.8371,
      "step": 179
    },
    {
      "epoch": 0.016094420600858368,
      "grad_norm": 0.20098862688736602,
      "learning_rate": 0.00010714285714285715,
      "loss": 0.772,
      "step": 180
    },
    {
      "epoch": 0.016183834048640915,
      "grad_norm": 0.1856346317519328,
      "learning_rate": 0.00010773809523809523,
      "loss": 0.7174,
      "step": 181
    },
    {
      "epoch": 0.016273247496423462,
      "grad_norm": 0.16022454562548588,
      "learning_rate": 0.00010833333333333333,
      "loss": 0.7373,
      "step": 182
    },
    {
      "epoch": 0.01636266094420601,
      "grad_norm": 0.20012101686520015,
      "learning_rate": 0.00010892857142857142,
      "loss": 0.7619,
      "step": 183
    },
    {
      "epoch": 0.016452074391988557,
      "grad_norm": 0.1603357006777846,
      "learning_rate": 0.00010952380952380953,
      "loss": 0.7578,
      "step": 184
    },
    {
      "epoch": 0.0165414878397711,
      "grad_norm": 0.1571898135574668,
      "learning_rate": 0.00011011904761904763,
      "loss": 0.7962,
      "step": 185
    },
    {
      "epoch": 0.016630901287553648,
      "grad_norm": 0.1362193061452631,
      "learning_rate": 0.00011071428571428572,
      "loss": 0.7446,
      "step": 186
    },
    {
      "epoch": 0.016720314735336195,
      "grad_norm": 0.17015486666381233,
      "learning_rate": 0.00011130952380952382,
      "loss": 0.7694,
      "step": 187
    },
    {
      "epoch": 0.016809728183118742,
      "grad_norm": 0.14450858601695504,
      "learning_rate": 0.00011190476190476191,
      "loss": 0.7801,
      "step": 188
    },
    {
      "epoch": 0.01689914163090129,
      "grad_norm": 0.20388384613782404,
      "learning_rate": 0.00011250000000000001,
      "loss": 0.7682,
      "step": 189
    },
    {
      "epoch": 0.016988555078683833,
      "grad_norm": 0.1954934553113732,
      "learning_rate": 0.00011309523809523809,
      "loss": 0.7635,
      "step": 190
    },
    {
      "epoch": 0.01707796852646638,
      "grad_norm": 0.15570445216124765,
      "learning_rate": 0.00011369047619047619,
      "loss": 0.7419,
      "step": 191
    },
    {
      "epoch": 0.017167381974248927,
      "grad_norm": 0.15249001759701364,
      "learning_rate": 0.00011428571428571428,
      "loss": 0.7497,
      "step": 192
    },
    {
      "epoch": 0.017256795422031474,
      "grad_norm": 0.2161829586676176,
      "learning_rate": 0.00011488095238095238,
      "loss": 0.7616,
      "step": 193
    },
    {
      "epoch": 0.01734620886981402,
      "grad_norm": 0.154909137853976,
      "learning_rate": 0.00011547619047619047,
      "loss": 0.7712,
      "step": 194
    },
    {
      "epoch": 0.017435622317596565,
      "grad_norm": 0.16742944430815157,
      "learning_rate": 0.00011607142857142858,
      "loss": 0.8058,
      "step": 195
    },
    {
      "epoch": 0.017525035765379112,
      "grad_norm": 0.15387142719720104,
      "learning_rate": 0.00011666666666666668,
      "loss": 0.768,
      "step": 196
    },
    {
      "epoch": 0.01761444921316166,
      "grad_norm": 0.15860116814525732,
      "learning_rate": 0.00011726190476190477,
      "loss": 0.7778,
      "step": 197
    },
    {
      "epoch": 0.017703862660944206,
      "grad_norm": 0.2103186270649761,
      "learning_rate": 0.00011785714285714287,
      "loss": 0.7845,
      "step": 198
    },
    {
      "epoch": 0.017793276108726754,
      "grad_norm": 0.15298748983932656,
      "learning_rate": 0.00011845238095238097,
      "loss": 0.7548,
      "step": 199
    },
    {
      "epoch": 0.0178826895565093,
      "grad_norm": 0.1996218846312032,
      "learning_rate": 0.00011904761904761905,
      "loss": 0.8067,
      "step": 200
    },
    {
      "epoch": 0.017972103004291844,
      "grad_norm": 0.1521782611061927,
      "learning_rate": 0.00011964285714285714,
      "loss": 0.8079,
      "step": 201
    },
    {
      "epoch": 0.01806151645207439,
      "grad_norm": 0.1628269767745426,
      "learning_rate": 0.00012023809523809524,
      "loss": 0.7715,
      "step": 202
    },
    {
      "epoch": 0.01815092989985694,
      "grad_norm": 0.12843212300511622,
      "learning_rate": 0.00012083333333333333,
      "loss": 0.7484,
      "step": 203
    },
    {
      "epoch": 0.018240343347639486,
      "grad_norm": 0.1699087203006858,
      "learning_rate": 0.00012142857142857143,
      "loss": 0.7672,
      "step": 204
    },
    {
      "epoch": 0.018329756795422033,
      "grad_norm": 0.14986480537329253,
      "learning_rate": 0.00012202380952380954,
      "loss": 0.7368,
      "step": 205
    },
    {
      "epoch": 0.018419170243204577,
      "grad_norm": 0.1648275137666922,
      "learning_rate": 0.00012261904761904762,
      "loss": 0.7475,
      "step": 206
    },
    {
      "epoch": 0.018508583690987124,
      "grad_norm": 0.14217907647329173,
      "learning_rate": 0.00012321428571428572,
      "loss": 0.7529,
      "step": 207
    },
    {
      "epoch": 0.01859799713876967,
      "grad_norm": 0.16846943138947773,
      "learning_rate": 0.0001238095238095238,
      "loss": 0.7799,
      "step": 208
    },
    {
      "epoch": 0.018687410586552218,
      "grad_norm": 0.16162759318287642,
      "learning_rate": 0.0001244047619047619,
      "loss": 0.7743,
      "step": 209
    },
    {
      "epoch": 0.018776824034334765,
      "grad_norm": 0.1527217825286166,
      "learning_rate": 0.000125,
      "loss": 0.7354,
      "step": 210
    },
    {
      "epoch": 0.01886623748211731,
      "grad_norm": 0.14126889467623557,
      "learning_rate": 0.0001255952380952381,
      "loss": 0.7853,
      "step": 211
    },
    {
      "epoch": 0.018955650929899856,
      "grad_norm": 0.20062786390161158,
      "learning_rate": 0.0001261904761904762,
      "loss": 0.7511,
      "step": 212
    },
    {
      "epoch": 0.019045064377682403,
      "grad_norm": 0.16112502811109383,
      "learning_rate": 0.0001267857142857143,
      "loss": 0.7599,
      "step": 213
    },
    {
      "epoch": 0.01913447782546495,
      "grad_norm": 0.1754852592065329,
      "learning_rate": 0.00012738095238095238,
      "loss": 0.8028,
      "step": 214
    },
    {
      "epoch": 0.019223891273247497,
      "grad_norm": 0.184384663117507,
      "learning_rate": 0.00012797619047619048,
      "loss": 0.7306,
      "step": 215
    },
    {
      "epoch": 0.019313304721030045,
      "grad_norm": 0.15779458033871235,
      "learning_rate": 0.00012857142857142858,
      "loss": 0.7418,
      "step": 216
    },
    {
      "epoch": 0.019402718168812588,
      "grad_norm": 0.16545477261907984,
      "learning_rate": 0.00012916666666666667,
      "loss": 0.7914,
      "step": 217
    },
    {
      "epoch": 0.019492131616595135,
      "grad_norm": 0.14756337763715346,
      "learning_rate": 0.00012976190476190477,
      "loss": 0.7577,
      "step": 218
    },
    {
      "epoch": 0.019581545064377683,
      "grad_norm": 0.14402648754461134,
      "learning_rate": 0.00013035714285714286,
      "loss": 0.7406,
      "step": 219
    },
    {
      "epoch": 0.01967095851216023,
      "grad_norm": 0.15381224163413876,
      "learning_rate": 0.00013095238095238096,
      "loss": 0.7577,
      "step": 220
    },
    {
      "epoch": 0.019760371959942777,
      "grad_norm": 0.1528570815597786,
      "learning_rate": 0.00013154761904761905,
      "loss": 0.7722,
      "step": 221
    },
    {
      "epoch": 0.01984978540772532,
      "grad_norm": 0.15590960015296443,
      "learning_rate": 0.00013214285714285715,
      "loss": 0.7723,
      "step": 222
    },
    {
      "epoch": 0.019939198855507868,
      "grad_norm": 0.14250808800982576,
      "learning_rate": 0.00013273809523809524,
      "loss": 0.7506,
      "step": 223
    },
    {
      "epoch": 0.020028612303290415,
      "grad_norm": 0.15105184387679987,
      "learning_rate": 0.00013333333333333334,
      "loss": 0.7169,
      "step": 224
    },
    {
      "epoch": 0.020118025751072962,
      "grad_norm": 0.13957028759718382,
      "learning_rate": 0.00013392857142857144,
      "loss": 0.7313,
      "step": 225
    },
    {
      "epoch": 0.02020743919885551,
      "grad_norm": 0.15604694717638745,
      "learning_rate": 0.00013452380952380953,
      "loss": 0.7329,
      "step": 226
    },
    {
      "epoch": 0.020296852646638053,
      "grad_norm": 0.14470788843203825,
      "learning_rate": 0.00013511904761904763,
      "loss": 0.7653,
      "step": 227
    },
    {
      "epoch": 0.0203862660944206,
      "grad_norm": 0.20329529344736672,
      "learning_rate": 0.00013571428571428572,
      "loss": 0.7328,
      "step": 228
    },
    {
      "epoch": 0.020475679542203147,
      "grad_norm": 0.2178493228085506,
      "learning_rate": 0.00013630952380952382,
      "loss": 0.7278,
      "step": 229
    },
    {
      "epoch": 0.020565092989985694,
      "grad_norm": 0.1379972468631876,
      "learning_rate": 0.0001369047619047619,
      "loss": 0.7671,
      "step": 230
    },
    {
      "epoch": 0.02065450643776824,
      "grad_norm": 0.265045631653501,
      "learning_rate": 0.0001375,
      "loss": 0.6898,
      "step": 231
    },
    {
      "epoch": 0.02074391988555079,
      "grad_norm": 0.1773434048366614,
      "learning_rate": 0.0001380952380952381,
      "loss": 0.7862,
      "step": 232
    },
    {
      "epoch": 0.020833333333333332,
      "grad_norm": 0.1682356385864587,
      "learning_rate": 0.0001386904761904762,
      "loss": 0.757,
      "step": 233
    },
    {
      "epoch": 0.02092274678111588,
      "grad_norm": 0.14193413309422026,
      "learning_rate": 0.0001392857142857143,
      "loss": 0.7561,
      "step": 234
    },
    {
      "epoch": 0.021012160228898426,
      "grad_norm": 0.1879870961601807,
      "learning_rate": 0.0001398809523809524,
      "loss": 0.7546,
      "step": 235
    },
    {
      "epoch": 0.021101573676680974,
      "grad_norm": 0.13954402045139436,
      "learning_rate": 0.00014047619047619049,
      "loss": 0.7582,
      "step": 236
    },
    {
      "epoch": 0.02119098712446352,
      "grad_norm": 0.15615402496553557,
      "learning_rate": 0.00014107142857142858,
      "loss": 0.7934,
      "step": 237
    },
    {
      "epoch": 0.021280400572246064,
      "grad_norm": 0.14171131446521257,
      "learning_rate": 0.00014166666666666668,
      "loss": 0.7778,
      "step": 238
    },
    {
      "epoch": 0.02136981402002861,
      "grad_norm": 0.14132009951159968,
      "learning_rate": 0.00014226190476190477,
      "loss": 0.7597,
      "step": 239
    },
    {
      "epoch": 0.02145922746781116,
      "grad_norm": 0.13881115702132846,
      "learning_rate": 0.00014285714285714287,
      "loss": 0.7794,
      "step": 240
    },
    {
      "epoch": 0.021548640915593706,
      "grad_norm": 0.13598100403437224,
      "learning_rate": 0.00014345238095238096,
      "loss": 0.7281,
      "step": 241
    },
    {
      "epoch": 0.021638054363376253,
      "grad_norm": 0.13398120464879323,
      "learning_rate": 0.00014404761904761906,
      "loss": 0.7421,
      "step": 242
    },
    {
      "epoch": 0.021727467811158797,
      "grad_norm": 0.13606591550067845,
      "learning_rate": 0.00014464285714285715,
      "loss": 0.7418,
      "step": 243
    },
    {
      "epoch": 0.021816881258941344,
      "grad_norm": 0.14687640966463378,
      "learning_rate": 0.00014523809523809525,
      "loss": 0.7611,
      "step": 244
    },
    {
      "epoch": 0.02190629470672389,
      "grad_norm": 0.13327525775489227,
      "learning_rate": 0.00014583333333333335,
      "loss": 0.746,
      "step": 245
    },
    {
      "epoch": 0.021995708154506438,
      "grad_norm": 0.15130850149770994,
      "learning_rate": 0.00014642857142857141,
      "loss": 0.7572,
      "step": 246
    },
    {
      "epoch": 0.022085121602288985,
      "grad_norm": 0.13784154291485387,
      "learning_rate": 0.00014702380952380954,
      "loss": 0.7668,
      "step": 247
    },
    {
      "epoch": 0.022174535050071532,
      "grad_norm": 0.15192453563390934,
      "learning_rate": 0.00014761904761904763,
      "loss": 0.7582,
      "step": 248
    },
    {
      "epoch": 0.022263948497854076,
      "grad_norm": 0.157078385688589,
      "learning_rate": 0.00014821428571428573,
      "loss": 0.7632,
      "step": 249
    },
    {
      "epoch": 0.022353361945636623,
      "grad_norm": 0.15925684316656374,
      "learning_rate": 0.00014880952380952382,
      "loss": 0.7913,
      "step": 250
    },
    {
      "epoch": 0.02244277539341917,
      "grad_norm": 0.15740211219983832,
      "learning_rate": 0.00014940476190476192,
      "loss": 0.766,
      "step": 251
    },
    {
      "epoch": 0.022532188841201718,
      "grad_norm": 0.14184272813124119,
      "learning_rate": 0.00015000000000000001,
      "loss": 0.722,
      "step": 252
    },
    {
      "epoch": 0.022621602288984265,
      "grad_norm": 0.14087415942696266,
      "learning_rate": 0.0001505952380952381,
      "loss": 0.7284,
      "step": 253
    },
    {
      "epoch": 0.02271101573676681,
      "grad_norm": 0.19015097550353324,
      "learning_rate": 0.0001511904761904762,
      "loss": 0.7366,
      "step": 254
    },
    {
      "epoch": 0.022800429184549355,
      "grad_norm": 0.149672214629484,
      "learning_rate": 0.00015178571428571427,
      "loss": 0.7202,
      "step": 255
    },
    {
      "epoch": 0.022889842632331903,
      "grad_norm": 0.15948032864508332,
      "learning_rate": 0.00015238095238095237,
      "loss": 0.7305,
      "step": 256
    },
    {
      "epoch": 0.02297925608011445,
      "grad_norm": 0.17681498150508876,
      "learning_rate": 0.00015297619047619046,
      "loss": 0.745,
      "step": 257
    },
    {
      "epoch": 0.023068669527896997,
      "grad_norm": 0.14828733439343147,
      "learning_rate": 0.0001535714285714286,
      "loss": 0.794,
      "step": 258
    },
    {
      "epoch": 0.02315808297567954,
      "grad_norm": 0.16013222555138773,
      "learning_rate": 0.00015416666666666668,
      "loss": 0.778,
      "step": 259
    },
    {
      "epoch": 0.023247496423462088,
      "grad_norm": 0.1467575673108967,
      "learning_rate": 0.00015476190476190478,
      "loss": 0.7851,
      "step": 260
    },
    {
      "epoch": 0.023336909871244635,
      "grad_norm": 0.1400581139706627,
      "learning_rate": 0.00015535714285714287,
      "loss": 0.683,
      "step": 261
    },
    {
      "epoch": 0.023426323319027182,
      "grad_norm": 0.16568114437863737,
      "learning_rate": 0.00015595238095238097,
      "loss": 0.2925,
      "step": 262
    },
    {
      "epoch": 0.02351573676680973,
      "grad_norm": 0.18246942231526656,
      "learning_rate": 0.00015654761904761906,
      "loss": 0.7692,
      "step": 263
    },
    {
      "epoch": 0.023605150214592276,
      "grad_norm": 0.144320444235417,
      "learning_rate": 0.00015714285714285716,
      "loss": 0.8019,
      "step": 264
    },
    {
      "epoch": 0.02369456366237482,
      "grad_norm": 0.2148553873367696,
      "learning_rate": 0.00015773809523809523,
      "loss": 0.7469,
      "step": 265
    },
    {
      "epoch": 0.023783977110157367,
      "grad_norm": 0.15470450425215285,
      "learning_rate": 0.00015833333333333332,
      "loss": 0.7628,
      "step": 266
    },
    {
      "epoch": 0.023873390557939914,
      "grad_norm": 0.15103023591112238,
      "learning_rate": 0.00015892857142857142,
      "loss": 0.7387,
      "step": 267
    },
    {
      "epoch": 0.02396280400572246,
      "grad_norm": 0.14568378864756792,
      "learning_rate": 0.00015952380952380954,
      "loss": 0.7473,
      "step": 268
    },
    {
      "epoch": 0.02405221745350501,
      "grad_norm": 0.14192293811181306,
      "learning_rate": 0.00016011904761904764,
      "loss": 0.7967,
      "step": 269
    },
    {
      "epoch": 0.024141630901287552,
      "grad_norm": 0.13360984969630366,
      "learning_rate": 0.00016071428571428573,
      "loss": 0.7348,
      "step": 270
    },
    {
      "epoch": 0.0242310443490701,
      "grad_norm": 0.13294602189047458,
      "learning_rate": 0.00016130952380952383,
      "loss": 0.7353,
      "step": 271
    },
    {
      "epoch": 0.024320457796852647,
      "grad_norm": 0.13170608038488787,
      "learning_rate": 0.00016190476190476192,
      "loss": 0.7472,
      "step": 272
    },
    {
      "epoch": 0.024409871244635194,
      "grad_norm": 0.1276368091559164,
      "learning_rate": 0.00016250000000000002,
      "loss": 0.735,
      "step": 273
    },
    {
      "epoch": 0.02449928469241774,
      "grad_norm": 0.14762569440311626,
      "learning_rate": 0.0001630952380952381,
      "loss": 0.7457,
      "step": 274
    },
    {
      "epoch": 0.024588698140200285,
      "grad_norm": 0.15843970609858132,
      "learning_rate": 0.00016369047619047618,
      "loss": 0.7924,
      "step": 275
    },
    {
      "epoch": 0.02467811158798283,
      "grad_norm": 0.1295011271489963,
      "learning_rate": 0.00016428571428571428,
      "loss": 0.7515,
      "step": 276
    },
    {
      "epoch": 0.02476752503576538,
      "grad_norm": 0.26525329586730867,
      "learning_rate": 0.00016488095238095237,
      "loss": 0.3221,
      "step": 277
    },
    {
      "epoch": 0.024856938483547926,
      "grad_norm": 0.14823157551707325,
      "learning_rate": 0.00016547619047619047,
      "loss": 0.7363,
      "step": 278
    },
    {
      "epoch": 0.024946351931330473,
      "grad_norm": 0.1321300108105683,
      "learning_rate": 0.0001660714285714286,
      "loss": 0.7269,
      "step": 279
    },
    {
      "epoch": 0.02503576537911302,
      "grad_norm": 0.1385123082596031,
      "learning_rate": 0.0001666666666666667,
      "loss": 0.7331,
      "step": 280
    },
    {
      "epoch": 0.025125178826895564,
      "grad_norm": 0.1302241489937184,
      "learning_rate": 0.00016726190476190478,
      "loss": 0.7277,
      "step": 281
    },
    {
      "epoch": 0.02521459227467811,
      "grad_norm": 0.1622883682908419,
      "learning_rate": 0.00016785714285714288,
      "loss": 0.7934,
      "step": 282
    },
    {
      "epoch": 0.025304005722460658,
      "grad_norm": 0.1811490344679723,
      "learning_rate": 0.00016845238095238097,
      "loss": 0.3292,
      "step": 283
    },
    {
      "epoch": 0.025393419170243205,
      "grad_norm": 0.18137384214028907,
      "learning_rate": 0.00016904761904761904,
      "loss": 0.7549,
      "step": 284
    },
    {
      "epoch": 0.025482832618025753,
      "grad_norm": 0.13396439221213113,
      "learning_rate": 0.00016964285714285714,
      "loss": 0.7396,
      "step": 285
    },
    {
      "epoch": 0.025572246065808296,
      "grad_norm": 0.15655761468133292,
      "learning_rate": 0.00017023809523809523,
      "loss": 0.7427,
      "step": 286
    },
    {
      "epoch": 0.025661659513590843,
      "grad_norm": 0.1484434624095845,
      "learning_rate": 0.00017083333333333333,
      "loss": 0.7294,
      "step": 287
    },
    {
      "epoch": 0.02575107296137339,
      "grad_norm": 0.14994028877380372,
      "learning_rate": 0.00017142857142857143,
      "loss": 0.7091,
      "step": 288
    },
    {
      "epoch": 0.025840486409155938,
      "grad_norm": 0.155189849495682,
      "learning_rate": 0.00017202380952380955,
      "loss": 0.7771,
      "step": 289
    },
    {
      "epoch": 0.025929899856938485,
      "grad_norm": 0.19963439402654595,
      "learning_rate": 0.00017261904761904764,
      "loss": 0.7383,
      "step": 290
    },
    {
      "epoch": 0.02601931330472103,
      "grad_norm": 0.17671745840197123,
      "learning_rate": 0.00017321428571428574,
      "loss": 0.7806,
      "step": 291
    },
    {
      "epoch": 0.026108726752503576,
      "grad_norm": 0.14083183900267485,
      "learning_rate": 0.00017380952380952383,
      "loss": 0.757,
      "step": 292
    },
    {
      "epoch": 0.026198140200286123,
      "grad_norm": 0.1495987998477662,
      "learning_rate": 0.0001744047619047619,
      "loss": 0.7734,
      "step": 293
    },
    {
      "epoch": 0.02628755364806867,
      "grad_norm": 0.1393204916410398,
      "learning_rate": 0.000175,
      "loss": 0.7624,
      "step": 294
    },
    {
      "epoch": 0.026376967095851217,
      "grad_norm": 0.13364928345854019,
      "learning_rate": 0.0001755952380952381,
      "loss": 0.7365,
      "step": 295
    },
    {
      "epoch": 0.026466380543633764,
      "grad_norm": 0.14662484940267617,
      "learning_rate": 0.0001761904761904762,
      "loss": 0.7751,
      "step": 296
    },
    {
      "epoch": 0.026555793991416308,
      "grad_norm": 0.1392166577357731,
      "learning_rate": 0.00017678571428571428,
      "loss": 0.7745,
      "step": 297
    },
    {
      "epoch": 0.026645207439198855,
      "grad_norm": 0.15571900637164734,
      "learning_rate": 0.00017738095238095238,
      "loss": 0.7623,
      "step": 298
    },
    {
      "epoch": 0.026734620886981402,
      "grad_norm": 0.16361552972248866,
      "learning_rate": 0.00017797619047619048,
      "loss": 0.7535,
      "step": 299
    },
    {
      "epoch": 0.02682403433476395,
      "grad_norm": 0.13876079524328355,
      "learning_rate": 0.0001785714285714286,
      "loss": 0.7423,
      "step": 300
    },
    {
      "epoch": 0.026913447782546496,
      "grad_norm": 0.14680238933099424,
      "learning_rate": 0.0001791666666666667,
      "loss": 0.7673,
      "step": 301
    },
    {
      "epoch": 0.02700286123032904,
      "grad_norm": 0.12769823252989168,
      "learning_rate": 0.00017976190476190476,
      "loss": 0.735,
      "step": 302
    },
    {
      "epoch": 0.027092274678111587,
      "grad_norm": 0.12776732406146257,
      "learning_rate": 0.00018035714285714286,
      "loss": 0.7433,
      "step": 303
    },
    {
      "epoch": 0.027181688125894134,
      "grad_norm": 0.13429151580169205,
      "learning_rate": 0.00018095238095238095,
      "loss": 0.7226,
      "step": 304
    },
    {
      "epoch": 0.02727110157367668,
      "grad_norm": 0.1517122244270556,
      "learning_rate": 0.00018154761904761905,
      "loss": 0.8077,
      "step": 305
    },
    {
      "epoch": 0.02736051502145923,
      "grad_norm": 0.12811289007711446,
      "learning_rate": 0.00018214285714285714,
      "loss": 0.7346,
      "step": 306
    },
    {
      "epoch": 0.027449928469241772,
      "grad_norm": 0.16965605176463935,
      "learning_rate": 0.00018273809523809524,
      "loss": 0.7506,
      "step": 307
    },
    {
      "epoch": 0.02753934191702432,
      "grad_norm": 0.09483952597559206,
      "learning_rate": 0.00018333333333333334,
      "loss": 0.3024,
      "step": 308
    },
    {
      "epoch": 0.027628755364806867,
      "grad_norm": 0.17042358424806248,
      "learning_rate": 0.00018392857142857143,
      "loss": 0.7171,
      "step": 309
    },
    {
      "epoch": 0.027718168812589414,
      "grad_norm": 0.17436676230183384,
      "learning_rate": 0.00018452380952380955,
      "loss": 0.7417,
      "step": 310
    },
    {
      "epoch": 0.02780758226037196,
      "grad_norm": 0.16979754720746074,
      "learning_rate": 0.00018511904761904765,
      "loss": 0.7629,
      "step": 311
    },
    {
      "epoch": 0.027896995708154508,
      "grad_norm": 0.1392354846362843,
      "learning_rate": 0.00018571428571428572,
      "loss": 0.7121,
      "step": 312
    },
    {
      "epoch": 0.027986409155937052,
      "grad_norm": 0.1326948555933914,
      "learning_rate": 0.0001863095238095238,
      "loss": 0.7518,
      "step": 313
    },
    {
      "epoch": 0.0280758226037196,
      "grad_norm": 0.13135966227665768,
      "learning_rate": 0.0001869047619047619,
      "loss": 0.7264,
      "step": 314
    },
    {
      "epoch": 0.028165236051502146,
      "grad_norm": 0.1691855639885468,
      "learning_rate": 0.0001875,
      "loss": 0.7577,
      "step": 315
    },
    {
      "epoch": 0.028254649499284693,
      "grad_norm": 0.13598542239197153,
      "learning_rate": 0.0001880952380952381,
      "loss": 0.7277,
      "step": 316
    },
    {
      "epoch": 0.02834406294706724,
      "grad_norm": 0.1407053254721104,
      "learning_rate": 0.0001886904761904762,
      "loss": 0.7088,
      "step": 317
    },
    {
      "epoch": 0.028433476394849784,
      "grad_norm": 0.14546831169569324,
      "learning_rate": 0.0001892857142857143,
      "loss": 0.705,
      "step": 318
    },
    {
      "epoch": 0.02852288984263233,
      "grad_norm": 0.14977006433485368,
      "learning_rate": 0.00018988095238095239,
      "loss": 0.7202,
      "step": 319
    },
    {
      "epoch": 0.02861230329041488,
      "grad_norm": 0.14110571503068084,
      "learning_rate": 0.00019047619047619048,
      "loss": 0.7436,
      "step": 320
    },
    {
      "epoch": 0.028701716738197425,
      "grad_norm": 0.14547993881157198,
      "learning_rate": 0.00019107142857142858,
      "loss": 0.7434,
      "step": 321
    },
    {
      "epoch": 0.028791130185979973,
      "grad_norm": 0.13802037738421924,
      "learning_rate": 0.00019166666666666667,
      "loss": 0.7659,
      "step": 322
    },
    {
      "epoch": 0.028880543633762516,
      "grad_norm": 0.1618876192119953,
      "learning_rate": 0.00019226190476190477,
      "loss": 0.7311,
      "step": 323
    },
    {
      "epoch": 0.028969957081545063,
      "grad_norm": 0.12743600306026073,
      "learning_rate": 0.00019285714285714286,
      "loss": 0.7231,
      "step": 324
    },
    {
      "epoch": 0.02905937052932761,
      "grad_norm": 0.1423391877078894,
      "learning_rate": 0.00019345238095238096,
      "loss": 0.7321,
      "step": 325
    },
    {
      "epoch": 0.029148783977110158,
      "grad_norm": 0.1386911143123634,
      "learning_rate": 0.00019404761904761905,
      "loss": 0.7469,
      "step": 326
    },
    {
      "epoch": 0.029238197424892705,
      "grad_norm": 0.1584745006794999,
      "learning_rate": 0.00019464285714285715,
      "loss": 0.7572,
      "step": 327
    },
    {
      "epoch": 0.029327610872675252,
      "grad_norm": 0.14140834650144818,
      "learning_rate": 0.00019523809523809525,
      "loss": 0.7482,
      "step": 328
    },
    {
      "epoch": 0.029417024320457796,
      "grad_norm": 0.13718148872856087,
      "learning_rate": 0.00019583333333333334,
      "loss": 0.7407,
      "step": 329
    },
    {
      "epoch": 0.029506437768240343,
      "grad_norm": 0.1343431499042781,
      "learning_rate": 0.00019642857142857144,
      "loss": 0.7565,
      "step": 330
    },
    {
      "epoch": 0.02959585121602289,
      "grad_norm": 0.15146174728855188,
      "learning_rate": 0.00019702380952380953,
      "loss": 0.754,
      "step": 331
    },
    {
      "epoch": 0.029685264663805437,
      "grad_norm": 0.12610549064589097,
      "learning_rate": 0.00019761904761904763,
      "loss": 0.7416,
      "step": 332
    },
    {
      "epoch": 0.029774678111587984,
      "grad_norm": 0.12717344417578666,
      "learning_rate": 0.00019821428571428572,
      "loss": 0.7501,
      "step": 333
    },
    {
      "epoch": 0.029864091559370528,
      "grad_norm": 0.14911997102301403,
      "learning_rate": 0.00019880952380952382,
      "loss": 0.7455,
      "step": 334
    },
    {
      "epoch": 0.029953505007153075,
      "grad_norm": 0.15847049973119595,
      "learning_rate": 0.00019940476190476191,
      "loss": 0.7578,
      "step": 335
    },
    {
      "epoch": 0.030042918454935622,
      "grad_norm": 0.1536560652722581,
      "learning_rate": 0.0002,
      "loss": 0.7386,
      "step": 336
    },
    {
      "epoch": 0.03013233190271817,
      "grad_norm": 0.13559855115356378,
      "learning_rate": 0.0001999999958065604,
      "loss": 0.7314,
      "step": 337
    },
    {
      "epoch": 0.030221745350500717,
      "grad_norm": 0.17132809732030788,
      "learning_rate": 0.0001999999832262419,
      "loss": 0.7074,
      "step": 338
    },
    {
      "epoch": 0.03031115879828326,
      "grad_norm": 0.14098873439606674,
      "learning_rate": 0.00019999996225904558,
      "loss": 0.7747,
      "step": 339
    },
    {
      "epoch": 0.030400572246065807,
      "grad_norm": 0.15457663967264648,
      "learning_rate": 0.00019999993290497318,
      "loss": 0.7704,
      "step": 340
    },
    {
      "epoch": 0.030489985693848354,
      "grad_norm": 0.12937929494801842,
      "learning_rate": 0.0001999998951640272,
      "loss": 0.7158,
      "step": 341
    },
    {
      "epoch": 0.0305793991416309,
      "grad_norm": 0.15036671324180942,
      "learning_rate": 0.0001999998490362108,
      "loss": 0.7403,
      "step": 342
    },
    {
      "epoch": 0.03066881258941345,
      "grad_norm": 0.1390034482620388,
      "learning_rate": 0.0001999997945215278,
      "loss": 0.7543,
      "step": 343
    },
    {
      "epoch": 0.030758226037195996,
      "grad_norm": 0.1292558002052378,
      "learning_rate": 0.0001999997316199828,
      "loss": 0.7571,
      "step": 344
    },
    {
      "epoch": 0.03084763948497854,
      "grad_norm": 0.140454535224009,
      "learning_rate": 0.00019999966033158108,
      "loss": 0.7303,
      "step": 345
    },
    {
      "epoch": 0.030937052932761087,
      "grad_norm": 0.19117243496077058,
      "learning_rate": 0.00019999958065632862,
      "loss": 0.7812,
      "step": 346
    },
    {
      "epoch": 0.031026466380543634,
      "grad_norm": 0.14297849927557701,
      "learning_rate": 0.0001999994925942321,
      "loss": 0.7699,
      "step": 347
    },
    {
      "epoch": 0.03111587982832618,
      "grad_norm": 0.16701482098937412,
      "learning_rate": 0.00019999939614529893,
      "loss": 0.7668,
      "step": 348
    },
    {
      "epoch": 0.031205293276108728,
      "grad_norm": 0.13159684951316747,
      "learning_rate": 0.00019999929130953714,
      "loss": 0.7468,
      "step": 349
    },
    {
      "epoch": 0.03129470672389127,
      "grad_norm": 0.12523450281904966,
      "learning_rate": 0.00019999917808695558,
      "loss": 0.7235,
      "step": 350
    },
    {
      "epoch": 0.03138412017167382,
      "grad_norm": 0.16330935781595576,
      "learning_rate": 0.0001999990564775637,
      "loss": 0.7635,
      "step": 351
    },
    {
      "epoch": 0.031473533619456366,
      "grad_norm": 0.14414056597033678,
      "learning_rate": 0.00019999892648137174,
      "loss": 0.7666,
      "step": 352
    },
    {
      "epoch": 0.03156294706723891,
      "grad_norm": 0.1839725000220017,
      "learning_rate": 0.00019999878809839056,
      "loss": 0.7218,
      "step": 353
    },
    {
      "epoch": 0.03165236051502146,
      "grad_norm": 0.15479628691306346,
      "learning_rate": 0.0001999986413286318,
      "loss": 0.7256,
      "step": 354
    },
    {
      "epoch": 0.031741773962804004,
      "grad_norm": 0.16666579460251305,
      "learning_rate": 0.00019999848617210776,
      "loss": 0.787,
      "step": 355
    },
    {
      "epoch": 0.031831187410586555,
      "grad_norm": 0.13622469342784319,
      "learning_rate": 0.00019999832262883148,
      "loss": 0.7395,
      "step": 356
    },
    {
      "epoch": 0.0319206008583691,
      "grad_norm": 0.14062891251495316,
      "learning_rate": 0.00019999815069881663,
      "loss": 0.7435,
      "step": 357
    },
    {
      "epoch": 0.03201001430615164,
      "grad_norm": 0.12357945227140434,
      "learning_rate": 0.00019999797038207763,
      "loss": 0.7732,
      "step": 358
    },
    {
      "epoch": 0.03209942775393419,
      "grad_norm": 0.14816649540718455,
      "learning_rate": 0.00019999778167862964,
      "loss": 0.7775,
      "step": 359
    },
    {
      "epoch": 0.032188841201716736,
      "grad_norm": 0.14378971992442013,
      "learning_rate": 0.00019999758458848847,
      "loss": 0.7315,
      "step": 360
    },
    {
      "epoch": 0.03227825464949929,
      "grad_norm": 0.15126015111935937,
      "learning_rate": 0.00019999737911167065,
      "loss": 0.7855,
      "step": 361
    },
    {
      "epoch": 0.03236766809728183,
      "grad_norm": 0.1396553966307511,
      "learning_rate": 0.00019999716524819337,
      "loss": 0.7264,
      "step": 362
    },
    {
      "epoch": 0.032457081545064374,
      "grad_norm": 0.14119124665535493,
      "learning_rate": 0.00019999694299807465,
      "loss": 0.7558,
      "step": 363
    },
    {
      "epoch": 0.032546494992846925,
      "grad_norm": 0.1566744574907628,
      "learning_rate": 0.0001999967123613331,
      "loss": 0.754,
      "step": 364
    },
    {
      "epoch": 0.03263590844062947,
      "grad_norm": 0.14377569919935407,
      "learning_rate": 0.000199996473337988,
      "loss": 0.7441,
      "step": 365
    },
    {
      "epoch": 0.03272532188841202,
      "grad_norm": 0.1296742867343294,
      "learning_rate": 0.0001999962259280595,
      "loss": 0.7566,
      "step": 366
    },
    {
      "epoch": 0.03281473533619456,
      "grad_norm": 0.14274011861689762,
      "learning_rate": 0.00019999597013156824,
      "loss": 0.7217,
      "step": 367
    },
    {
      "epoch": 0.032904148783977114,
      "grad_norm": 0.13224061019046401,
      "learning_rate": 0.00019999570594853575,
      "loss": 0.7117,
      "step": 368
    },
    {
      "epoch": 0.03299356223175966,
      "grad_norm": 0.14721624348170878,
      "learning_rate": 0.0001999954333789842,
      "loss": 0.7247,
      "step": 369
    },
    {
      "epoch": 0.0330829756795422,
      "grad_norm": 0.12340699736422292,
      "learning_rate": 0.00019999515242293637,
      "loss": 0.7629,
      "step": 370
    },
    {
      "epoch": 0.03317238912732475,
      "grad_norm": 0.13954278062603687,
      "learning_rate": 0.0001999948630804159,
      "loss": 0.7821,
      "step": 371
    },
    {
      "epoch": 0.033261802575107295,
      "grad_norm": 0.1317220614145399,
      "learning_rate": 0.000199994565351447,
      "loss": 0.7604,
      "step": 372
    },
    {
      "epoch": 0.033351216022889846,
      "grad_norm": 0.14174069531643138,
      "learning_rate": 0.00019999425923605468,
      "loss": 0.7341,
      "step": 373
    },
    {
      "epoch": 0.03344062947067239,
      "grad_norm": 0.14495157031486028,
      "learning_rate": 0.0001999939447342646,
      "loss": 0.7367,
      "step": 374
    },
    {
      "epoch": 0.03353004291845493,
      "grad_norm": 0.12785506585095144,
      "learning_rate": 0.00019999362184610316,
      "loss": 0.7048,
      "step": 375
    },
    {
      "epoch": 0.033619456366237484,
      "grad_norm": 0.12977399525174782,
      "learning_rate": 0.00019999329057159736,
      "loss": 0.7544,
      "step": 376
    },
    {
      "epoch": 0.03370886981402003,
      "grad_norm": 0.16054292056947247,
      "learning_rate": 0.0001999929509107751,
      "loss": 0.7701,
      "step": 377
    },
    {
      "epoch": 0.03379828326180258,
      "grad_norm": 0.12457211215798229,
      "learning_rate": 0.00019999260286366477,
      "loss": 0.7192,
      "step": 378
    },
    {
      "epoch": 0.03388769670958512,
      "grad_norm": 0.13876556800512704,
      "learning_rate": 0.00019999224643029565,
      "loss": 0.7434,
      "step": 379
    },
    {
      "epoch": 0.033977110157367665,
      "grad_norm": 0.13039845099598635,
      "learning_rate": 0.0001999918816106975,
      "loss": 0.7228,
      "step": 380
    },
    {
      "epoch": 0.034066523605150216,
      "grad_norm": 0.16000899077844974,
      "learning_rate": 0.00019999150840490105,
      "loss": 0.7316,
      "step": 381
    },
    {
      "epoch": 0.03415593705293276,
      "grad_norm": 0.1344832799291948,
      "learning_rate": 0.00019999112681293757,
      "loss": 0.7275,
      "step": 382
    },
    {
      "epoch": 0.03424535050071531,
      "grad_norm": 0.14207025330167944,
      "learning_rate": 0.000199990736834839,
      "loss": 0.7567,
      "step": 383
    },
    {
      "epoch": 0.034334763948497854,
      "grad_norm": 0.1331174996435373,
      "learning_rate": 0.00019999033847063811,
      "loss": 0.7551,
      "step": 384
    },
    {
      "epoch": 0.0344241773962804,
      "grad_norm": 0.14001402674454344,
      "learning_rate": 0.00019998993172036828,
      "loss": 0.7142,
      "step": 385
    },
    {
      "epoch": 0.03451359084406295,
      "grad_norm": 0.12871467666768255,
      "learning_rate": 0.00019998951658406364,
      "loss": 0.7557,
      "step": 386
    },
    {
      "epoch": 0.03460300429184549,
      "grad_norm": 0.1405167428397779,
      "learning_rate": 0.000199989093061759,
      "loss": 0.799,
      "step": 387
    },
    {
      "epoch": 0.03469241773962804,
      "grad_norm": 0.13900726845994613,
      "learning_rate": 0.00019998866115348988,
      "loss": 0.7514,
      "step": 388
    },
    {
      "epoch": 0.034781831187410586,
      "grad_norm": 0.14640010697123274,
      "learning_rate": 0.0001999882208592925,
      "loss": 0.3077,
      "step": 389
    },
    {
      "epoch": 0.03487124463519313,
      "grad_norm": 0.17266593860484372,
      "learning_rate": 0.00019998777217920385,
      "loss": 0.7801,
      "step": 390
    },
    {
      "epoch": 0.03496065808297568,
      "grad_norm": 0.13595121915975025,
      "learning_rate": 0.0001999873151132614,
      "loss": 0.719,
      "step": 391
    },
    {
      "epoch": 0.035050071530758224,
      "grad_norm": 0.1449348676121308,
      "learning_rate": 0.00019998684966150365,
      "loss": 0.7353,
      "step": 392
    },
    {
      "epoch": 0.035139484978540775,
      "grad_norm": 0.14435306810591267,
      "learning_rate": 0.00019998637582396958,
      "loss": 0.7404,
      "step": 393
    },
    {
      "epoch": 0.03522889842632332,
      "grad_norm": 0.1675272867605722,
      "learning_rate": 0.0001999858936006989,
      "loss": 0.7288,
      "step": 394
    },
    {
      "epoch": 0.03531831187410586,
      "grad_norm": 0.15272697419335127,
      "learning_rate": 0.00019998540299173207,
      "loss": 0.7546,
      "step": 395
    },
    {
      "epoch": 0.03540772532188841,
      "grad_norm": 0.34984549451961233,
      "learning_rate": 0.00019998490399711024,
      "loss": 0.3319,
      "step": 396
    },
    {
      "epoch": 0.035497138769670956,
      "grad_norm": 0.14292861052594438,
      "learning_rate": 0.0001999843966168753,
      "loss": 0.7535,
      "step": 397
    },
    {
      "epoch": 0.03558655221745351,
      "grad_norm": 0.13184258131302412,
      "learning_rate": 0.00019998388085106972,
      "loss": 0.7556,
      "step": 398
    },
    {
      "epoch": 0.03567596566523605,
      "grad_norm": 0.15309106869857309,
      "learning_rate": 0.00019998335669973682,
      "loss": 0.7316,
      "step": 399
    },
    {
      "epoch": 0.0357653791130186,
      "grad_norm": 0.1223298800982776,
      "learning_rate": 0.00019998282416292055,
      "loss": 0.7405,
      "step": 400
    },
    {
      "epoch": 0.035854792560801145,
      "grad_norm": 0.1439447535601414,
      "learning_rate": 0.00019998228324066557,
      "loss": 0.7092,
      "step": 401
    },
    {
      "epoch": 0.03594420600858369,
      "grad_norm": 0.1359888488531804,
      "learning_rate": 0.00019998173393301723,
      "loss": 0.7407,
      "step": 402
    },
    {
      "epoch": 0.03603361945636624,
      "grad_norm": 0.13746765985683188,
      "learning_rate": 0.0001999811762400216,
      "loss": 0.7483,
      "step": 403
    },
    {
      "epoch": 0.03612303290414878,
      "grad_norm": 0.14680182661670443,
      "learning_rate": 0.0001999806101617255,
      "loss": 0.7764,
      "step": 404
    },
    {
      "epoch": 0.036212446351931334,
      "grad_norm": 0.12848279050448838,
      "learning_rate": 0.00019998003569817637,
      "loss": 0.7354,
      "step": 405
    },
    {
      "epoch": 0.03630185979971388,
      "grad_norm": 0.16225186680918438,
      "learning_rate": 0.00019997945284942235,
      "loss": 0.7581,
      "step": 406
    },
    {
      "epoch": 0.03639127324749642,
      "grad_norm": 0.1483462714179653,
      "learning_rate": 0.0001999788616155124,
      "loss": 0.7864,
      "step": 407
    },
    {
      "epoch": 0.03648068669527897,
      "grad_norm": 0.13762150610100365,
      "learning_rate": 0.00019997826199649605,
      "loss": 0.7245,
      "step": 408
    },
    {
      "epoch": 0.036570100143061515,
      "grad_norm": 0.1232530240099414,
      "learning_rate": 0.00019997765399242364,
      "loss": 0.6972,
      "step": 409
    },
    {
      "epoch": 0.036659513590844066,
      "grad_norm": 0.12877987580945913,
      "learning_rate": 0.0001999770376033461,
      "loss": 0.7479,
      "step": 410
    },
    {
      "epoch": 0.03674892703862661,
      "grad_norm": 0.12407561528068684,
      "learning_rate": 0.00019997641282931515,
      "loss": 0.7342,
      "step": 411
    },
    {
      "epoch": 0.03683834048640915,
      "grad_norm": 0.1565197751111714,
      "learning_rate": 0.00019997577967038324,
      "loss": 0.7169,
      "step": 412
    },
    {
      "epoch": 0.036927753934191704,
      "grad_norm": 0.12979647983660655,
      "learning_rate": 0.0001999751381266034,
      "loss": 0.7852,
      "step": 413
    },
    {
      "epoch": 0.03701716738197425,
      "grad_norm": 0.18088193999703972,
      "learning_rate": 0.00019997448819802948,
      "loss": 0.7627,
      "step": 414
    },
    {
      "epoch": 0.0371065808297568,
      "grad_norm": 0.1217395141731323,
      "learning_rate": 0.00019997382988471595,
      "loss": 0.7519,
      "step": 415
    },
    {
      "epoch": 0.03719599427753934,
      "grad_norm": 0.1826654759095402,
      "learning_rate": 0.00019997316318671806,
      "loss": 0.7285,
      "step": 416
    },
    {
      "epoch": 0.037285407725321885,
      "grad_norm": 0.14380155007838794,
      "learning_rate": 0.00019997248810409173,
      "loss": 0.7254,
      "step": 417
    },
    {
      "epoch": 0.037374821173104436,
      "grad_norm": 0.16205687070830221,
      "learning_rate": 0.0001999718046368935,
      "loss": 0.8118,
      "step": 418
    },
    {
      "epoch": 0.03746423462088698,
      "grad_norm": 0.12967873478261668,
      "learning_rate": 0.0001999711127851808,
      "loss": 0.7126,
      "step": 419
    },
    {
      "epoch": 0.03755364806866953,
      "grad_norm": 0.13998257443512957,
      "learning_rate": 0.0001999704125490116,
      "loss": 0.7545,
      "step": 420
    },
    {
      "epoch": 0.037643061516452074,
      "grad_norm": 0.12708372909584448,
      "learning_rate": 0.00019996970392844462,
      "loss": 0.7316,
      "step": 421
    },
    {
      "epoch": 0.03773247496423462,
      "grad_norm": 0.12496122996066965,
      "learning_rate": 0.0001999689869235393,
      "loss": 0.7479,
      "step": 422
    },
    {
      "epoch": 0.03782188841201717,
      "grad_norm": 0.12347624326863685,
      "learning_rate": 0.00019996826153435582,
      "loss": 0.7104,
      "step": 423
    },
    {
      "epoch": 0.03791130185979971,
      "grad_norm": 0.1278097669895413,
      "learning_rate": 0.00019996752776095495,
      "loss": 0.7135,
      "step": 424
    },
    {
      "epoch": 0.03800071530758226,
      "grad_norm": 0.13058050579688038,
      "learning_rate": 0.00019996678560339824,
      "loss": 0.7636,
      "step": 425
    },
    {
      "epoch": 0.038090128755364806,
      "grad_norm": 0.12986982266626307,
      "learning_rate": 0.00019996603506174795,
      "loss": 0.6955,
      "step": 426
    },
    {
      "epoch": 0.03817954220314735,
      "grad_norm": 0.13193096346395072,
      "learning_rate": 0.00019996527613606708,
      "loss": 0.7115,
      "step": 427
    },
    {
      "epoch": 0.0382689556509299,
      "grad_norm": 0.12617109676235766,
      "learning_rate": 0.00019996450882641916,
      "loss": 0.7325,
      "step": 428
    },
    {
      "epoch": 0.038358369098712444,
      "grad_norm": 0.1412721983606963,
      "learning_rate": 0.00019996373313286867,
      "loss": 0.7488,
      "step": 429
    },
    {
      "epoch": 0.038447782546494995,
      "grad_norm": 0.13455424776094418,
      "learning_rate": 0.00019996294905548056,
      "loss": 0.7627,
      "step": 430
    },
    {
      "epoch": 0.03853719599427754,
      "grad_norm": 0.1269976269746249,
      "learning_rate": 0.00019996215659432066,
      "loss": 0.76,
      "step": 431
    },
    {
      "epoch": 0.03862660944206009,
      "grad_norm": 0.13871506255157878,
      "learning_rate": 0.00019996135574945544,
      "loss": 0.7304,
      "step": 432
    },
    {
      "epoch": 0.03871602288984263,
      "grad_norm": 0.130357461553849,
      "learning_rate": 0.00019996054652095198,
      "loss": 0.7171,
      "step": 433
    },
    {
      "epoch": 0.038805436337625177,
      "grad_norm": 0.13687212109375416,
      "learning_rate": 0.00019995972890887823,
      "loss": 0.7254,
      "step": 434
    },
    {
      "epoch": 0.03889484978540773,
      "grad_norm": 0.1404944816348864,
      "learning_rate": 0.00019995890291330272,
      "loss": 0.7465,
      "step": 435
    },
    {
      "epoch": 0.03898426323319027,
      "grad_norm": 0.15686178927571615,
      "learning_rate": 0.00019995806853429477,
      "loss": 0.7084,
      "step": 436
    },
    {
      "epoch": 0.03907367668097282,
      "grad_norm": 0.13962362203313544,
      "learning_rate": 0.0001999572257719243,
      "loss": 0.7384,
      "step": 437
    },
    {
      "epoch": 0.039163090128755365,
      "grad_norm": 0.13493271908931032,
      "learning_rate": 0.00019995637462626205,
      "loss": 0.7349,
      "step": 438
    },
    {
      "epoch": 0.03925250357653791,
      "grad_norm": 0.14406410470516143,
      "learning_rate": 0.00019995551509737936,
      "loss": 0.7383,
      "step": 439
    },
    {
      "epoch": 0.03934191702432046,
      "grad_norm": 0.12717400933672626,
      "learning_rate": 0.00019995464718534835,
      "loss": 0.7277,
      "step": 440
    },
    {
      "epoch": 0.039431330472103,
      "grad_norm": 0.14138841078685857,
      "learning_rate": 0.00019995377089024178,
      "loss": 0.7634,
      "step": 441
    },
    {
      "epoch": 0.039520743919885554,
      "grad_norm": 0.16405756572438962,
      "learning_rate": 0.00019995288621213318,
      "loss": 0.7028,
      "step": 442
    },
    {
      "epoch": 0.0396101573676681,
      "grad_norm": 0.14661552692095364,
      "learning_rate": 0.0001999519931510967,
      "loss": 0.791,
      "step": 443
    },
    {
      "epoch": 0.03969957081545064,
      "grad_norm": 0.14316447218259512,
      "learning_rate": 0.00019995109170720728,
      "loss": 0.7594,
      "step": 444
    },
    {
      "epoch": 0.03978898426323319,
      "grad_norm": 0.14539535794330663,
      "learning_rate": 0.0001999501818805405,
      "loss": 0.7843,
      "step": 445
    },
    {
      "epoch": 0.039878397711015735,
      "grad_norm": 0.14266050489556537,
      "learning_rate": 0.0001999492636711727,
      "loss": 0.7742,
      "step": 446
    },
    {
      "epoch": 0.039967811158798286,
      "grad_norm": 0.15212065705372274,
      "learning_rate": 0.00019994833707918084,
      "loss": 0.6934,
      "step": 447
    },
    {
      "epoch": 0.04005722460658083,
      "grad_norm": 0.1394871762282056,
      "learning_rate": 0.00019994740210464268,
      "loss": 0.7585,
      "step": 448
    },
    {
      "epoch": 0.04014663805436337,
      "grad_norm": 0.12716691846274178,
      "learning_rate": 0.00019994645874763658,
      "loss": 0.7301,
      "step": 449
    },
    {
      "epoch": 0.040236051502145924,
      "grad_norm": 0.13230804874174026,
      "learning_rate": 0.00019994550700824172,
      "loss": 0.7373,
      "step": 450
    },
    {
      "epoch": 0.04032546494992847,
      "grad_norm": 0.13547089891761752,
      "learning_rate": 0.00019994454688653784,
      "loss": 0.7667,
      "step": 451
    },
    {
      "epoch": 0.04041487839771102,
      "grad_norm": 0.12781385436039755,
      "learning_rate": 0.00019994357838260557,
      "loss": 0.7506,
      "step": 452
    },
    {
      "epoch": 0.04050429184549356,
      "grad_norm": 0.1533321750359391,
      "learning_rate": 0.00019994260149652603,
      "loss": 0.7886,
      "step": 453
    },
    {
      "epoch": 0.040593705293276106,
      "grad_norm": 0.1349775736463149,
      "learning_rate": 0.00019994161622838126,
      "loss": 0.7287,
      "step": 454
    },
    {
      "epoch": 0.040683118741058656,
      "grad_norm": 0.12915562043051892,
      "learning_rate": 0.0001999406225782538,
      "loss": 0.752,
      "step": 455
    },
    {
      "epoch": 0.0407725321888412,
      "grad_norm": 0.14312369105496303,
      "learning_rate": 0.00019993962054622703,
      "loss": 0.7422,
      "step": 456
    },
    {
      "epoch": 0.04086194563662375,
      "grad_norm": 0.12514303576899752,
      "learning_rate": 0.00019993861013238497,
      "loss": 0.7233,
      "step": 457
    },
    {
      "epoch": 0.040951359084406294,
      "grad_norm": 0.1255686264353843,
      "learning_rate": 0.0001999375913368124,
      "loss": 0.7413,
      "step": 458
    },
    {
      "epoch": 0.04104077253218884,
      "grad_norm": 0.1567444208351315,
      "learning_rate": 0.00019993656415959472,
      "loss": 0.7616,
      "step": 459
    },
    {
      "epoch": 0.04113018597997139,
      "grad_norm": 0.16818036097997416,
      "learning_rate": 0.00019993552860081814,
      "loss": 0.716,
      "step": 460
    },
    {
      "epoch": 0.04121959942775393,
      "grad_norm": 0.18038055018963128,
      "learning_rate": 0.00019993448466056938,
      "loss": 0.7562,
      "step": 461
    },
    {
      "epoch": 0.04130901287553648,
      "grad_norm": 0.14633106547821362,
      "learning_rate": 0.00019993343233893615,
      "loss": 0.7451,
      "step": 462
    },
    {
      "epoch": 0.041398426323319026,
      "grad_norm": 0.21539318789025705,
      "learning_rate": 0.00019993237163600663,
      "loss": 0.8164,
      "step": 463
    },
    {
      "epoch": 0.04148783977110158,
      "grad_norm": 0.1346085291549028,
      "learning_rate": 0.00019993130255186977,
      "loss": 0.7531,
      "step": 464
    },
    {
      "epoch": 0.04157725321888412,
      "grad_norm": 0.13694584414845506,
      "learning_rate": 0.00019993022508661525,
      "loss": 0.7441,
      "step": 465
    },
    {
      "epoch": 0.041666666666666664,
      "grad_norm": 0.13648233295338924,
      "learning_rate": 0.00019992913924033349,
      "loss": 0.7461,
      "step": 466
    },
    {
      "epoch": 0.041756080114449215,
      "grad_norm": 0.14624102438109368,
      "learning_rate": 0.00019992804501311543,
      "loss": 0.678,
      "step": 467
    },
    {
      "epoch": 0.04184549356223176,
      "grad_norm": 0.13504798112478011,
      "learning_rate": 0.00019992694240505293,
      "loss": 0.7284,
      "step": 468
    },
    {
      "epoch": 0.04193490701001431,
      "grad_norm": 0.1330603187860877,
      "learning_rate": 0.00019992583141623848,
      "loss": 0.7359,
      "step": 469
    },
    {
      "epoch": 0.04202432045779685,
      "grad_norm": 0.11767009682632566,
      "learning_rate": 0.00019992471204676525,
      "loss": 0.7001,
      "step": 470
    },
    {
      "epoch": 0.0421137339055794,
      "grad_norm": 0.1955643123733572,
      "learning_rate": 0.00019992358429672704,
      "loss": 0.3548,
      "step": 471
    },
    {
      "epoch": 0.04220314735336195,
      "grad_norm": 0.16138670601696042,
      "learning_rate": 0.00019992244816621852,
      "loss": 0.7237,
      "step": 472
    },
    {
      "epoch": 0.04229256080114449,
      "grad_norm": 0.16010825238190926,
      "learning_rate": 0.00019992130365533497,
      "loss": 0.7437,
      "step": 473
    },
    {
      "epoch": 0.04238197424892704,
      "grad_norm": 0.14722007707416931,
      "learning_rate": 0.00019992015076417233,
      "loss": 0.7045,
      "step": 474
    },
    {
      "epoch": 0.042471387696709585,
      "grad_norm": 0.1396129954022203,
      "learning_rate": 0.00019991898949282732,
      "loss": 0.7129,
      "step": 475
    },
    {
      "epoch": 0.04256080114449213,
      "grad_norm": 0.16415265075747673,
      "learning_rate": 0.00019991781984139736,
      "loss": 0.8185,
      "step": 476
    },
    {
      "epoch": 0.04265021459227468,
      "grad_norm": 0.12036974942726832,
      "learning_rate": 0.00019991664180998048,
      "loss": 0.7386,
      "step": 477
    },
    {
      "epoch": 0.04273962804005722,
      "grad_norm": 0.1197792088065156,
      "learning_rate": 0.00019991545539867556,
      "loss": 0.7044,
      "step": 478
    },
    {
      "epoch": 0.042829041487839774,
      "grad_norm": 0.13050488615473554,
      "learning_rate": 0.00019991426060758202,
      "loss": 0.7227,
      "step": 479
    },
    {
      "epoch": 0.04291845493562232,
      "grad_norm": 0.1367563707718615,
      "learning_rate": 0.00019991305743680013,
      "loss": 0.7545,
      "step": 480
    },
    {
      "epoch": 0.04300786838340486,
      "grad_norm": 0.14168146105499407,
      "learning_rate": 0.00019991184588643077,
      "loss": 0.7737,
      "step": 481
    },
    {
      "epoch": 0.04309728183118741,
      "grad_norm": 0.14235425582135225,
      "learning_rate": 0.00019991062595657558,
      "loss": 0.7296,
      "step": 482
    },
    {
      "epoch": 0.043186695278969955,
      "grad_norm": 0.14573686801721128,
      "learning_rate": 0.00019990939764733684,
      "loss": 0.7287,
      "step": 483
    },
    {
      "epoch": 0.043276108726752506,
      "grad_norm": 0.1363488524471403,
      "learning_rate": 0.0001999081609588176,
      "loss": 0.7237,
      "step": 484
    },
    {
      "epoch": 0.04336552217453505,
      "grad_norm": 0.1812502195987964,
      "learning_rate": 0.0001999069158911215,
      "loss": 0.7238,
      "step": 485
    },
    {
      "epoch": 0.04345493562231759,
      "grad_norm": 0.13725619094992805,
      "learning_rate": 0.00019990566244435307,
      "loss": 0.7498,
      "step": 486
    },
    {
      "epoch": 0.043544349070100144,
      "grad_norm": 0.17731206342598232,
      "learning_rate": 0.0001999044006186174,
      "loss": 0.7374,
      "step": 487
    },
    {
      "epoch": 0.04363376251788269,
      "grad_norm": 0.14842406057876842,
      "learning_rate": 0.00019990313041402024,
      "loss": 0.8024,
      "step": 488
    },
    {
      "epoch": 0.04372317596566524,
      "grad_norm": 0.16097208721460907,
      "learning_rate": 0.00019990185183066825,
      "loss": 0.7382,
      "step": 489
    },
    {
      "epoch": 0.04381258941344778,
      "grad_norm": 0.1298476742889539,
      "learning_rate": 0.00019990056486866858,
      "loss": 0.736,
      "step": 490
    },
    {
      "epoch": 0.043902002861230326,
      "grad_norm": 0.1701000633231983,
      "learning_rate": 0.00019989926952812916,
      "loss": 0.7226,
      "step": 491
    },
    {
      "epoch": 0.043991416309012876,
      "grad_norm": 0.15586925281193775,
      "learning_rate": 0.00019989796580915866,
      "loss": 0.2977,
      "step": 492
    },
    {
      "epoch": 0.04408082975679542,
      "grad_norm": 0.11908001475399667,
      "learning_rate": 0.0001998966537118664,
      "loss": 0.687,
      "step": 493
    },
    {
      "epoch": 0.04417024320457797,
      "grad_norm": 0.12871179089336243,
      "learning_rate": 0.0001998953332363625,
      "loss": 0.759,
      "step": 494
    },
    {
      "epoch": 0.044259656652360514,
      "grad_norm": 0.1279961933252126,
      "learning_rate": 0.00019989400438275758,
      "loss": 0.7132,
      "step": 495
    },
    {
      "epoch": 0.044349070100143065,
      "grad_norm": 0.13161446883768638,
      "learning_rate": 0.00019989266715116316,
      "loss": 0.7371,
      "step": 496
    },
    {
      "epoch": 0.04443848354792561,
      "grad_norm": 0.13302686667934255,
      "learning_rate": 0.0001998913215416914,
      "loss": 0.7318,
      "step": 497
    },
    {
      "epoch": 0.04452789699570815,
      "grad_norm": 0.1412160863958369,
      "learning_rate": 0.00019988996755445517,
      "loss": 0.7289,
      "step": 498
    },
    {
      "epoch": 0.0446173104434907,
      "grad_norm": 0.1367286799260126,
      "learning_rate": 0.00019988860518956796,
      "loss": 0.7385,
      "step": 499
    },
    {
      "epoch": 0.044706723891273246,
      "grad_norm": 0.13479356875414145,
      "learning_rate": 0.0001998872344471441,
      "loss": 0.7536,
      "step": 500
    },
    {
      "epoch": 0.0447961373390558,
      "grad_norm": 0.12788874674508466,
      "learning_rate": 0.00019988585532729848,
      "loss": 0.768,
      "step": 501
    },
    {
      "epoch": 0.04488555078683834,
      "grad_norm": 0.13182056489954994,
      "learning_rate": 0.00019988446783014683,
      "loss": 0.6955,
      "step": 502
    },
    {
      "epoch": 0.044974964234620884,
      "grad_norm": 0.1214131829015826,
      "learning_rate": 0.0001998830719558055,
      "loss": 0.7603,
      "step": 503
    },
    {
      "epoch": 0.045064377682403435,
      "grad_norm": 0.13302966097479335,
      "learning_rate": 0.00019988166770439154,
      "loss": 0.7533,
      "step": 504
    },
    {
      "epoch": 0.04515379113018598,
      "grad_norm": 0.1514979416701935,
      "learning_rate": 0.00019988025507602274,
      "loss": 0.7508,
      "step": 505
    },
    {
      "epoch": 0.04524320457796853,
      "grad_norm": 0.1378939675509189,
      "learning_rate": 0.00019987883407081753,
      "loss": 0.7634,
      "step": 506
    },
    {
      "epoch": 0.04533261802575107,
      "grad_norm": 0.1389635359859422,
      "learning_rate": 0.00019987740468889519,
      "loss": 0.7173,
      "step": 507
    },
    {
      "epoch": 0.04542203147353362,
      "grad_norm": 0.14990532283555422,
      "learning_rate": 0.00019987596693037552,
      "loss": 0.7899,
      "step": 508
    },
    {
      "epoch": 0.04551144492131617,
      "grad_norm": 0.13229042093302593,
      "learning_rate": 0.00019987452079537913,
      "loss": 0.7589,
      "step": 509
    },
    {
      "epoch": 0.04560085836909871,
      "grad_norm": 0.14503156559309544,
      "learning_rate": 0.00019987306628402727,
      "loss": 0.7414,
      "step": 510
    },
    {
      "epoch": 0.04569027181688126,
      "grad_norm": 0.1268676243187444,
      "learning_rate": 0.00019987160339644198,
      "loss": 0.6991,
      "step": 511
    },
    {
      "epoch": 0.045779685264663805,
      "grad_norm": 0.13357958042643836,
      "learning_rate": 0.00019987013213274593,
      "loss": 0.7572,
      "step": 512
    },
    {
      "epoch": 0.04586909871244635,
      "grad_norm": 0.12245715833468018,
      "learning_rate": 0.0001998686524930625,
      "loss": 0.7313,
      "step": 513
    },
    {
      "epoch": 0.0459585121602289,
      "grad_norm": 0.1338349428588623,
      "learning_rate": 0.0001998671644775158,
      "loss": 0.7329,
      "step": 514
    },
    {
      "epoch": 0.04604792560801144,
      "grad_norm": 0.12067844185024046,
      "learning_rate": 0.00019986566808623062,
      "loss": 0.743,
      "step": 515
    },
    {
      "epoch": 0.046137339055793994,
      "grad_norm": 0.13258660603900213,
      "learning_rate": 0.00019986416331933246,
      "loss": 0.7784,
      "step": 516
    },
    {
      "epoch": 0.04622675250357654,
      "grad_norm": 0.13524882655588646,
      "learning_rate": 0.00019986265017694755,
      "loss": 0.7556,
      "step": 517
    },
    {
      "epoch": 0.04631616595135908,
      "grad_norm": 0.13005021377905418,
      "learning_rate": 0.00019986112865920277,
      "loss": 0.7497,
      "step": 518
    },
    {
      "epoch": 0.04640557939914163,
      "grad_norm": 0.14231620023034186,
      "learning_rate": 0.00019985959876622574,
      "loss": 0.7739,
      "step": 519
    },
    {
      "epoch": 0.046494992846924176,
      "grad_norm": 0.13305080744240494,
      "learning_rate": 0.00019985806049814474,
      "loss": 0.7293,
      "step": 520
    },
    {
      "epoch": 0.046584406294706726,
      "grad_norm": 0.1491106949270077,
      "learning_rate": 0.0001998565138550888,
      "loss": 0.77,
      "step": 521
    },
    {
      "epoch": 0.04667381974248927,
      "grad_norm": 0.13909402409924376,
      "learning_rate": 0.00019985495883718764,
      "loss": 0.7593,
      "step": 522
    },
    {
      "epoch": 0.04676323319027181,
      "grad_norm": 0.13861632866932164,
      "learning_rate": 0.0001998533954445717,
      "loss": 0.7141,
      "step": 523
    },
    {
      "epoch": 0.046852646638054364,
      "grad_norm": 0.1322459186576319,
      "learning_rate": 0.00019985182367737202,
      "loss": 0.753,
      "step": 524
    },
    {
      "epoch": 0.04694206008583691,
      "grad_norm": 0.12977621388255808,
      "learning_rate": 0.00019985024353572054,
      "loss": 0.7367,
      "step": 525
    },
    {
      "epoch": 0.04703147353361946,
      "grad_norm": 0.14610835686661,
      "learning_rate": 0.0001998486550197497,
      "loss": 0.7512,
      "step": 526
    },
    {
      "epoch": 0.047120886981402,
      "grad_norm": 0.2095680950416219,
      "learning_rate": 0.00019984705812959276,
      "loss": 0.7551,
      "step": 527
    },
    {
      "epoch": 0.04721030042918455,
      "grad_norm": 0.14056680824855491,
      "learning_rate": 0.0001998454528653836,
      "loss": 0.7663,
      "step": 528
    },
    {
      "epoch": 0.047299713876967096,
      "grad_norm": 0.14204924585641182,
      "learning_rate": 0.00019984383922725695,
      "loss": 0.7363,
      "step": 529
    },
    {
      "epoch": 0.04738912732474964,
      "grad_norm": 0.13990251943096177,
      "learning_rate": 0.00019984221721534805,
      "loss": 0.7523,
      "step": 530
    },
    {
      "epoch": 0.04747854077253219,
      "grad_norm": 0.1264910419396674,
      "learning_rate": 0.00019984058682979297,
      "loss": 0.718,
      "step": 531
    },
    {
      "epoch": 0.047567954220314734,
      "grad_norm": 0.13720598303897188,
      "learning_rate": 0.00019983894807072848,
      "loss": 0.777,
      "step": 532
    },
    {
      "epoch": 0.047657367668097285,
      "grad_norm": 0.12513322385596146,
      "learning_rate": 0.00019983730093829194,
      "loss": 0.7037,
      "step": 533
    },
    {
      "epoch": 0.04774678111587983,
      "grad_norm": 0.12264229924187742,
      "learning_rate": 0.00019983564543262156,
      "loss": 0.7308,
      "step": 534
    },
    {
      "epoch": 0.04783619456366237,
      "grad_norm": 0.12370890878027817,
      "learning_rate": 0.0001998339815538562,
      "loss": 0.7344,
      "step": 535
    },
    {
      "epoch": 0.04792560801144492,
      "grad_norm": 0.1337941081369745,
      "learning_rate": 0.00019983230930213536,
      "loss": 0.7398,
      "step": 536
    },
    {
      "epoch": 0.04801502145922747,
      "grad_norm": 0.14208305454078313,
      "learning_rate": 0.00019983062867759928,
      "loss": 0.7808,
      "step": 537
    },
    {
      "epoch": 0.04810443490701002,
      "grad_norm": 0.13229985978534478,
      "learning_rate": 0.00019982893968038896,
      "loss": 0.7582,
      "step": 538
    },
    {
      "epoch": 0.04819384835479256,
      "grad_norm": 0.13643126138815548,
      "learning_rate": 0.00019982724231064602,
      "loss": 0.7327,
      "step": 539
    },
    {
      "epoch": 0.048283261802575105,
      "grad_norm": 0.1507976490024356,
      "learning_rate": 0.00019982553656851284,
      "loss": 0.7869,
      "step": 540
    },
    {
      "epoch": 0.048372675250357655,
      "grad_norm": 0.14617401712184466,
      "learning_rate": 0.00019982382245413248,
      "loss": 0.7511,
      "step": 541
    },
    {
      "epoch": 0.0484620886981402,
      "grad_norm": 0.1252093349814613,
      "learning_rate": 0.00019982209996764866,
      "loss": 0.7328,
      "step": 542
    },
    {
      "epoch": 0.04855150214592275,
      "grad_norm": 0.14280388990088408,
      "learning_rate": 0.0001998203691092059,
      "loss": 0.7338,
      "step": 543
    },
    {
      "epoch": 0.04864091559370529,
      "grad_norm": 0.12292385237076198,
      "learning_rate": 0.00019981862987894934,
      "loss": 0.7484,
      "step": 544
    },
    {
      "epoch": 0.04873032904148784,
      "grad_norm": 0.13728135561036778,
      "learning_rate": 0.0001998168822770248,
      "loss": 0.7382,
      "step": 545
    },
    {
      "epoch": 0.04881974248927039,
      "grad_norm": 0.12702235529303826,
      "learning_rate": 0.0001998151263035789,
      "loss": 0.7131,
      "step": 546
    },
    {
      "epoch": 0.04890915593705293,
      "grad_norm": 0.13034378613116507,
      "learning_rate": 0.00019981336195875894,
      "loss": 0.7265,
      "step": 547
    },
    {
      "epoch": 0.04899856938483548,
      "grad_norm": 0.1326625357496389,
      "learning_rate": 0.00019981158924271283,
      "loss": 0.7111,
      "step": 548
    },
    {
      "epoch": 0.049087982832618025,
      "grad_norm": 0.1286039243295954,
      "learning_rate": 0.00019980980815558925,
      "loss": 0.7454,
      "step": 549
    },
    {
      "epoch": 0.04917739628040057,
      "grad_norm": 0.1324497423226744,
      "learning_rate": 0.00019980801869753765,
      "loss": 0.7543,
      "step": 550
    },
    {
      "epoch": 0.04926680972818312,
      "grad_norm": 0.12349212058857022,
      "learning_rate": 0.00019980622086870803,
      "loss": 0.7303,
      "step": 551
    },
    {
      "epoch": 0.04935622317596566,
      "grad_norm": 0.1266269239210141,
      "learning_rate": 0.00019980441466925118,
      "loss": 0.7328,
      "step": 552
    },
    {
      "epoch": 0.049445636623748214,
      "grad_norm": 0.12720083664966644,
      "learning_rate": 0.00019980260009931864,
      "loss": 0.7522,
      "step": 553
    },
    {
      "epoch": 0.04953505007153076,
      "grad_norm": 0.15010845876599566,
      "learning_rate": 0.00019980077715906256,
      "loss": 0.7769,
      "step": 554
    },
    {
      "epoch": 0.0496244635193133,
      "grad_norm": 0.13004650317828836,
      "learning_rate": 0.0001997989458486358,
      "loss": 0.7278,
      "step": 555
    },
    {
      "epoch": 0.04971387696709585,
      "grad_norm": 0.14679214675776733,
      "learning_rate": 0.000199797106168192,
      "loss": 0.7561,
      "step": 556
    },
    {
      "epoch": 0.049803290414878396,
      "grad_norm": 0.12834982001590367,
      "learning_rate": 0.00019979525811788542,
      "loss": 0.6904,
      "step": 557
    },
    {
      "epoch": 0.049892703862660946,
      "grad_norm": 0.14174151614205158,
      "learning_rate": 0.0001997934016978711,
      "loss": 0.7415,
      "step": 558
    },
    {
      "epoch": 0.04998211731044349,
      "grad_norm": 0.1370348257225783,
      "learning_rate": 0.00019979153690830463,
      "loss": 0.747,
      "step": 559
    },
    {
      "epoch": 0.05007153075822604,
      "grad_norm": 0.1625054448512878,
      "learning_rate": 0.00019978966374934254,
      "loss": 0.7889,
      "step": 560
    },
    {
      "epoch": 0.050160944206008584,
      "grad_norm": 0.12119977663754473,
      "learning_rate": 0.00019978778222114185,
      "loss": 0.6713,
      "step": 561
    },
    {
      "epoch": 0.05025035765379113,
      "grad_norm": 0.13865521201977038,
      "learning_rate": 0.00019978589232386035,
      "loss": 0.7203,
      "step": 562
    },
    {
      "epoch": 0.05033977110157368,
      "grad_norm": 0.11805184165355188,
      "learning_rate": 0.0001997839940576566,
      "loss": 0.7274,
      "step": 563
    },
    {
      "epoch": 0.05042918454935622,
      "grad_norm": 0.12172425965834646,
      "learning_rate": 0.00019978208742268977,
      "loss": 0.6969,
      "step": 564
    },
    {
      "epoch": 0.05051859799713877,
      "grad_norm": 0.1262653834163526,
      "learning_rate": 0.00019978017241911977,
      "loss": 0.7336,
      "step": 565
    },
    {
      "epoch": 0.050608011444921316,
      "grad_norm": 0.13263582491901013,
      "learning_rate": 0.00019977824904710722,
      "loss": 0.7306,
      "step": 566
    },
    {
      "epoch": 0.05069742489270386,
      "grad_norm": 0.12093400525171141,
      "learning_rate": 0.00019977631730681343,
      "loss": 0.704,
      "step": 567
    },
    {
      "epoch": 0.05078683834048641,
      "grad_norm": 0.1449369306175799,
      "learning_rate": 0.0001997743771984004,
      "loss": 0.7514,
      "step": 568
    },
    {
      "epoch": 0.050876251788268954,
      "grad_norm": 0.12878579415616617,
      "learning_rate": 0.00019977242872203083,
      "loss": 0.7041,
      "step": 569
    },
    {
      "epoch": 0.050965665236051505,
      "grad_norm": 0.12766517184850087,
      "learning_rate": 0.00019977047187786818,
      "loss": 0.7709,
      "step": 570
    },
    {
      "epoch": 0.05105507868383405,
      "grad_norm": 0.1335567220755077,
      "learning_rate": 0.00019976850666607657,
      "loss": 0.7495,
      "step": 571
    },
    {
      "epoch": 0.05114449213161659,
      "grad_norm": 0.13584078186523735,
      "learning_rate": 0.00019976653308682076,
      "loss": 0.7019,
      "step": 572
    },
    {
      "epoch": 0.05123390557939914,
      "grad_norm": 0.11595010187873284,
      "learning_rate": 0.0001997645511402663,
      "loss": 0.7289,
      "step": 573
    },
    {
      "epoch": 0.05132331902718169,
      "grad_norm": 0.14090713014495607,
      "learning_rate": 0.00019976256082657946,
      "loss": 0.779,
      "step": 574
    },
    {
      "epoch": 0.05141273247496424,
      "grad_norm": 0.12857060727409336,
      "learning_rate": 0.00019976056214592708,
      "loss": 0.7636,
      "step": 575
    },
    {
      "epoch": 0.05150214592274678,
      "grad_norm": 0.13965058475957037,
      "learning_rate": 0.00019975855509847686,
      "loss": 0.7198,
      "step": 576
    },
    {
      "epoch": 0.051591559370529325,
      "grad_norm": 0.13387263217347228,
      "learning_rate": 0.00019975653968439712,
      "loss": 0.718,
      "step": 577
    },
    {
      "epoch": 0.051680972818311875,
      "grad_norm": 0.12190874501214896,
      "learning_rate": 0.00019975451590385684,
      "loss": 0.7253,
      "step": 578
    },
    {
      "epoch": 0.05177038626609442,
      "grad_norm": 0.13229439292879222,
      "learning_rate": 0.0001997524837570258,
      "loss": 0.6902,
      "step": 579
    },
    {
      "epoch": 0.05185979971387697,
      "grad_norm": 0.14943583019308207,
      "learning_rate": 0.0001997504432440744,
      "loss": 0.7371,
      "step": 580
    },
    {
      "epoch": 0.05194921316165951,
      "grad_norm": 0.13883857655986023,
      "learning_rate": 0.00019974839436517382,
      "loss": 0.7252,
      "step": 581
    },
    {
      "epoch": 0.05203862660944206,
      "grad_norm": 0.13347821504529098,
      "learning_rate": 0.00019974633712049587,
      "loss": 0.7214,
      "step": 582
    },
    {
      "epoch": 0.05212804005722461,
      "grad_norm": 0.1288777528899218,
      "learning_rate": 0.00019974427151021304,
      "loss": 0.7619,
      "step": 583
    },
    {
      "epoch": 0.05221745350500715,
      "grad_norm": 0.14233461734250322,
      "learning_rate": 0.00019974219753449867,
      "loss": 0.7637,
      "step": 584
    },
    {
      "epoch": 0.0523068669527897,
      "grad_norm": 0.12752292705025,
      "learning_rate": 0.00019974011519352663,
      "loss": 0.7268,
      "step": 585
    },
    {
      "epoch": 0.052396280400572245,
      "grad_norm": 0.14169079037176419,
      "learning_rate": 0.0001997380244874716,
      "loss": 0.7581,
      "step": 586
    },
    {
      "epoch": 0.05248569384835479,
      "grad_norm": 0.11798857044514804,
      "learning_rate": 0.0001997359254165089,
      "loss": 0.7084,
      "step": 587
    },
    {
      "epoch": 0.05257510729613734,
      "grad_norm": 0.13066590523214677,
      "learning_rate": 0.00019973381798081457,
      "loss": 0.7248,
      "step": 588
    },
    {
      "epoch": 0.05266452074391988,
      "grad_norm": 0.12671514066627398,
      "learning_rate": 0.0001997317021805654,
      "loss": 0.7372,
      "step": 589
    },
    {
      "epoch": 0.052753934191702434,
      "grad_norm": 0.13205575316789767,
      "learning_rate": 0.0001997295780159388,
      "loss": 0.7269,
      "step": 590
    },
    {
      "epoch": 0.05284334763948498,
      "grad_norm": 0.13045806000820226,
      "learning_rate": 0.00019972744548711293,
      "loss": 0.7918,
      "step": 591
    },
    {
      "epoch": 0.05293276108726753,
      "grad_norm": 0.13774374396774353,
      "learning_rate": 0.00019972530459426663,
      "loss": 0.7398,
      "step": 592
    },
    {
      "epoch": 0.05302217453505007,
      "grad_norm": 0.13294145515592617,
      "learning_rate": 0.00019972315533757954,
      "loss": 0.7113,
      "step": 593
    },
    {
      "epoch": 0.053111587982832616,
      "grad_norm": 0.13254240454723873,
      "learning_rate": 0.00019972099771723177,
      "loss": 0.7122,
      "step": 594
    },
    {
      "epoch": 0.053201001430615166,
      "grad_norm": 0.14420986226532692,
      "learning_rate": 0.00019971883173340439,
      "loss": 0.7444,
      "step": 595
    },
    {
      "epoch": 0.05329041487839771,
      "grad_norm": 0.1402034095201888,
      "learning_rate": 0.00019971665738627902,
      "loss": 0.7563,
      "step": 596
    },
    {
      "epoch": 0.05337982832618026,
      "grad_norm": 0.13389862097997776,
      "learning_rate": 0.00019971447467603804,
      "loss": 0.7256,
      "step": 597
    },
    {
      "epoch": 0.053469241773962804,
      "grad_norm": 0.15107016419407063,
      "learning_rate": 0.00019971228360286445,
      "loss": 0.7873,
      "step": 598
    },
    {
      "epoch": 0.05355865522174535,
      "grad_norm": 0.147403448211813,
      "learning_rate": 0.00019971008416694208,
      "loss": 0.7109,
      "step": 599
    },
    {
      "epoch": 0.0536480686695279,
      "grad_norm": 0.12266530950886413,
      "learning_rate": 0.00019970787636845535,
      "loss": 0.7295,
      "step": 600
    },
    {
      "epoch": 0.05373748211731044,
      "grad_norm": 0.12294337529023634,
      "learning_rate": 0.00019970566020758947,
      "loss": 0.7168,
      "step": 601
    },
    {
      "epoch": 0.05382689556509299,
      "grad_norm": 0.1546757060833254,
      "learning_rate": 0.0001997034356845303,
      "loss": 0.757,
      "step": 602
    },
    {
      "epoch": 0.053916309012875537,
      "grad_norm": 0.13109798667485897,
      "learning_rate": 0.00019970120279946436,
      "loss": 0.6886,
      "step": 603
    },
    {
      "epoch": 0.05400572246065808,
      "grad_norm": 0.14347905946333134,
      "learning_rate": 0.00019969896155257896,
      "loss": 0.7189,
      "step": 604
    },
    {
      "epoch": 0.05409513590844063,
      "grad_norm": 0.13193741615583168,
      "learning_rate": 0.00019969671194406205,
      "loss": 0.7441,
      "step": 605
    },
    {
      "epoch": 0.054184549356223174,
      "grad_norm": 0.13163664213047013,
      "learning_rate": 0.0001996944539741023,
      "loss": 0.6953,
      "step": 606
    },
    {
      "epoch": 0.054273962804005725,
      "grad_norm": 0.14574269664780987,
      "learning_rate": 0.00019969218764288914,
      "loss": 0.774,
      "step": 607
    },
    {
      "epoch": 0.05436337625178827,
      "grad_norm": 0.12094695682796094,
      "learning_rate": 0.0001996899129506126,
      "loss": 0.7312,
      "step": 608
    },
    {
      "epoch": 0.05445278969957081,
      "grad_norm": 0.12705283817155769,
      "learning_rate": 0.0001996876298974634,
      "loss": 0.7204,
      "step": 609
    },
    {
      "epoch": 0.05454220314735336,
      "grad_norm": 0.12965865617031258,
      "learning_rate": 0.00019968533848363311,
      "loss": 0.7385,
      "step": 610
    },
    {
      "epoch": 0.05463161659513591,
      "grad_norm": 0.18215473142402916,
      "learning_rate": 0.00019968303870931386,
      "loss": 0.7011,
      "step": 611
    },
    {
      "epoch": 0.05472103004291846,
      "grad_norm": 0.1187821710172724,
      "learning_rate": 0.00019968073057469857,
      "loss": 0.6945,
      "step": 612
    },
    {
      "epoch": 0.054810443490701,
      "grad_norm": 0.12890101244709432,
      "learning_rate": 0.00019967841407998076,
      "loss": 0.7195,
      "step": 613
    },
    {
      "epoch": 0.054899856938483545,
      "grad_norm": 0.13402576360416021,
      "learning_rate": 0.00019967608922535476,
      "loss": 0.7524,
      "step": 614
    },
    {
      "epoch": 0.054989270386266095,
      "grad_norm": 0.1538022246650352,
      "learning_rate": 0.00019967375601101552,
      "loss": 0.7499,
      "step": 615
    },
    {
      "epoch": 0.05507868383404864,
      "grad_norm": 0.12900030356331874,
      "learning_rate": 0.00019967141443715872,
      "loss": 0.7015,
      "step": 616
    },
    {
      "epoch": 0.05516809728183119,
      "grad_norm": 0.145057556847201,
      "learning_rate": 0.0001996690645039808,
      "loss": 0.7121,
      "step": 617
    },
    {
      "epoch": 0.05525751072961373,
      "grad_norm": 0.1501556724839918,
      "learning_rate": 0.00019966670621167877,
      "loss": 0.7739,
      "step": 618
    },
    {
      "epoch": 0.05534692417739628,
      "grad_norm": 0.157504271385096,
      "learning_rate": 0.0001996643395604505,
      "loss": 0.7701,
      "step": 619
    },
    {
      "epoch": 0.05543633762517883,
      "grad_norm": 0.15586538333184485,
      "learning_rate": 0.00019966196455049442,
      "loss": 0.7355,
      "step": 620
    },
    {
      "epoch": 0.05552575107296137,
      "grad_norm": 0.15098655799011249,
      "learning_rate": 0.00019965958118200972,
      "loss": 0.7714,
      "step": 621
    },
    {
      "epoch": 0.05561516452074392,
      "grad_norm": 0.13333352985872068,
      "learning_rate": 0.00019965718945519633,
      "loss": 0.6937,
      "step": 622
    },
    {
      "epoch": 0.055704577968526466,
      "grad_norm": 0.14374635180100673,
      "learning_rate": 0.00019965478937025483,
      "loss": 0.7093,
      "step": 623
    },
    {
      "epoch": 0.055793991416309016,
      "grad_norm": 0.1315673077538165,
      "learning_rate": 0.00019965238092738643,
      "loss": 0.7282,
      "step": 624
    },
    {
      "epoch": 0.05588340486409156,
      "grad_norm": 0.10974716496575011,
      "learning_rate": 0.00019964996412679325,
      "loss": 0.6878,
      "step": 625
    },
    {
      "epoch": 0.055972818311874104,
      "grad_norm": 0.12028082810121175,
      "learning_rate": 0.00019964753896867788,
      "loss": 0.7226,
      "step": 626
    },
    {
      "epoch": 0.056062231759656654,
      "grad_norm": 0.11386849967112696,
      "learning_rate": 0.00019964510545324382,
      "loss": 0.7105,
      "step": 627
    },
    {
      "epoch": 0.0561516452074392,
      "grad_norm": 0.11898842544657438,
      "learning_rate": 0.00019964266358069504,
      "loss": 0.6746,
      "step": 628
    },
    {
      "epoch": 0.05624105865522175,
      "grad_norm": 0.13172522324051036,
      "learning_rate": 0.00019964021335123645,
      "loss": 0.7493,
      "step": 629
    },
    {
      "epoch": 0.05633047210300429,
      "grad_norm": 0.13922504133553618,
      "learning_rate": 0.00019963775476507348,
      "loss": 0.7402,
      "step": 630
    },
    {
      "epoch": 0.056419885550786836,
      "grad_norm": 0.1415168248685213,
      "learning_rate": 0.00019963528782241237,
      "loss": 0.7236,
      "step": 631
    },
    {
      "epoch": 0.056509298998569386,
      "grad_norm": 0.1365342139583884,
      "learning_rate": 0.00019963281252346,
      "loss": 0.7069,
      "step": 632
    },
    {
      "epoch": 0.05659871244635193,
      "grad_norm": 0.13574664379242202,
      "learning_rate": 0.00019963032886842393,
      "loss": 0.7415,
      "step": 633
    },
    {
      "epoch": 0.05668812589413448,
      "grad_norm": 0.11877222814345362,
      "learning_rate": 0.00019962783685751253,
      "loss": 0.7517,
      "step": 634
    },
    {
      "epoch": 0.056777539341917024,
      "grad_norm": 0.1323311258097883,
      "learning_rate": 0.0001996253364909348,
      "loss": 0.6991,
      "step": 635
    },
    {
      "epoch": 0.05686695278969957,
      "grad_norm": 0.12480435749309707,
      "learning_rate": 0.00019962282776890037,
      "loss": 0.7043,
      "step": 636
    },
    {
      "epoch": 0.05695636623748212,
      "grad_norm": 0.12244451715884502,
      "learning_rate": 0.0001996203106916197,
      "loss": 0.6996,
      "step": 637
    },
    {
      "epoch": 0.05704577968526466,
      "grad_norm": 0.12368815835903929,
      "learning_rate": 0.00019961778525930387,
      "loss": 0.7614,
      "step": 638
    },
    {
      "epoch": 0.05713519313304721,
      "grad_norm": 0.18347774607086803,
      "learning_rate": 0.00019961525147216475,
      "loss": 0.3412,
      "step": 639
    },
    {
      "epoch": 0.05722460658082976,
      "grad_norm": 0.13580414358913936,
      "learning_rate": 0.00019961270933041477,
      "loss": 0.6931,
      "step": 640
    },
    {
      "epoch": 0.0573140200286123,
      "grad_norm": 0.1295696178434557,
      "learning_rate": 0.00019961015883426716,
      "loss": 0.7108,
      "step": 641
    },
    {
      "epoch": 0.05740343347639485,
      "grad_norm": 0.1574714754776261,
      "learning_rate": 0.0001996075999839358,
      "loss": 0.752,
      "step": 642
    },
    {
      "epoch": 0.057492846924177395,
      "grad_norm": 0.13277014463308878,
      "learning_rate": 0.0001996050327796353,
      "loss": 0.6704,
      "step": 643
    },
    {
      "epoch": 0.057582260371959945,
      "grad_norm": 0.13511263198980467,
      "learning_rate": 0.00019960245722158108,
      "loss": 0.7222,
      "step": 644
    },
    {
      "epoch": 0.05767167381974249,
      "grad_norm": 0.1242105247975873,
      "learning_rate": 0.000199599873309989,
      "loss": 0.7016,
      "step": 645
    },
    {
      "epoch": 0.05776108726752503,
      "grad_norm": 0.13621135755546204,
      "learning_rate": 0.00019959728104507586,
      "loss": 0.6939,
      "step": 646
    },
    {
      "epoch": 0.05785050071530758,
      "grad_norm": 0.13544801243363822,
      "learning_rate": 0.00019959468042705903,
      "loss": 0.76,
      "step": 647
    },
    {
      "epoch": 0.05793991416309013,
      "grad_norm": 0.142137929547377,
      "learning_rate": 0.00019959207145615665,
      "loss": 0.753,
      "step": 648
    },
    {
      "epoch": 0.05802932761087268,
      "grad_norm": 0.13367295295733583,
      "learning_rate": 0.00019958945413258748,
      "loss": 0.7002,
      "step": 649
    },
    {
      "epoch": 0.05811874105865522,
      "grad_norm": 0.16410717612180348,
      "learning_rate": 0.00019958682845657108,
      "loss": 0.7795,
      "step": 650
    },
    {
      "epoch": 0.058208154506437765,
      "grad_norm": 0.12320148111859387,
      "learning_rate": 0.00019958419442832765,
      "loss": 0.7203,
      "step": 651
    },
    {
      "epoch": 0.058297567954220315,
      "grad_norm": 0.13388095247795773,
      "learning_rate": 0.00019958155204807812,
      "loss": 0.7594,
      "step": 652
    },
    {
      "epoch": 0.05838698140200286,
      "grad_norm": 0.15047917429515767,
      "learning_rate": 0.00019957890131604405,
      "loss": 0.7195,
      "step": 653
    },
    {
      "epoch": 0.05847639484978541,
      "grad_norm": 0.14549176487396576,
      "learning_rate": 0.0001995762422324478,
      "loss": 0.7631,
      "step": 654
    },
    {
      "epoch": 0.05856580829756795,
      "grad_norm": 0.15556486111015133,
      "learning_rate": 0.00019957357479751236,
      "loss": 0.741,
      "step": 655
    },
    {
      "epoch": 0.058655221745350504,
      "grad_norm": 0.1334036588579464,
      "learning_rate": 0.00019957089901146148,
      "loss": 0.7362,
      "step": 656
    },
    {
      "epoch": 0.05874463519313305,
      "grad_norm": 0.2020036306263462,
      "learning_rate": 0.00019956821487451953,
      "loss": 0.73,
      "step": 657
    },
    {
      "epoch": 0.05883404864091559,
      "grad_norm": 0.13926672720266356,
      "learning_rate": 0.00019956552238691166,
      "loss": 0.7604,
      "step": 658
    },
    {
      "epoch": 0.05892346208869814,
      "grad_norm": 0.1266286632614101,
      "learning_rate": 0.00019956282154886369,
      "loss": 0.7457,
      "step": 659
    },
    {
      "epoch": 0.059012875536480686,
      "grad_norm": 0.12881550472880895,
      "learning_rate": 0.00019956011236060207,
      "loss": 0.7225,
      "step": 660
    },
    {
      "epoch": 0.059102288984263236,
      "grad_norm": 0.11994688497363218,
      "learning_rate": 0.0001995573948223541,
      "loss": 0.6956,
      "step": 661
    },
    {
      "epoch": 0.05919170243204578,
      "grad_norm": 0.15179163333946044,
      "learning_rate": 0.00019955466893434767,
      "loss": 0.773,
      "step": 662
    },
    {
      "epoch": 0.059281115879828324,
      "grad_norm": 0.12246562550245513,
      "learning_rate": 0.00019955193469681137,
      "loss": 0.7168,
      "step": 663
    },
    {
      "epoch": 0.059370529327610874,
      "grad_norm": 0.12685258388997409,
      "learning_rate": 0.00019954919210997453,
      "loss": 0.7349,
      "step": 664
    },
    {
      "epoch": 0.05945994277539342,
      "grad_norm": 0.13702504186176473,
      "learning_rate": 0.00019954644117406718,
      "loss": 0.7431,
      "step": 665
    },
    {
      "epoch": 0.05954935622317597,
      "grad_norm": 0.1307938025483592,
      "learning_rate": 0.00019954368188932002,
      "loss": 0.696,
      "step": 666
    },
    {
      "epoch": 0.05963876967095851,
      "grad_norm": 0.131047357693196,
      "learning_rate": 0.0001995409142559645,
      "loss": 0.7391,
      "step": 667
    },
    {
      "epoch": 0.059728183118741056,
      "grad_norm": 0.13122665205059073,
      "learning_rate": 0.0001995381382742327,
      "loss": 0.7216,
      "step": 668
    },
    {
      "epoch": 0.059817596566523606,
      "grad_norm": 0.12355804486320335,
      "learning_rate": 0.00019953535394435744,
      "loss": 0.707,
      "step": 669
    },
    {
      "epoch": 0.05990701001430615,
      "grad_norm": 0.13378227892671962,
      "learning_rate": 0.0001995325612665723,
      "loss": 0.7298,
      "step": 670
    },
    {
      "epoch": 0.0599964234620887,
      "grad_norm": 0.1299822664912523,
      "learning_rate": 0.00019952976024111143,
      "loss": 0.7218,
      "step": 671
    },
    {
      "epoch": 0.060085836909871244,
      "grad_norm": 0.12860495174532452,
      "learning_rate": 0.00019952695086820975,
      "loss": 0.764,
      "step": 672
    },
    {
      "epoch": 0.06017525035765379,
      "grad_norm": 0.13210633624599283,
      "learning_rate": 0.0001995241331481029,
      "loss": 0.7102,
      "step": 673
    },
    {
      "epoch": 0.06026466380543634,
      "grad_norm": 0.13786010291082929,
      "learning_rate": 0.00019952130708102722,
      "loss": 0.7544,
      "step": 674
    },
    {
      "epoch": 0.06035407725321888,
      "grad_norm": 0.13408779565847181,
      "learning_rate": 0.0001995184726672197,
      "loss": 0.7131,
      "step": 675
    },
    {
      "epoch": 0.06044349070100143,
      "grad_norm": 0.1269960290106908,
      "learning_rate": 0.00019951562990691807,
      "loss": 0.7502,
      "step": 676
    },
    {
      "epoch": 0.06053290414878398,
      "grad_norm": 0.13474113335984336,
      "learning_rate": 0.00019951277880036073,
      "loss": 0.7421,
      "step": 677
    },
    {
      "epoch": 0.06062231759656652,
      "grad_norm": 0.14104378137715237,
      "learning_rate": 0.0001995099193477868,
      "loss": 0.7634,
      "step": 678
    },
    {
      "epoch": 0.06071173104434907,
      "grad_norm": 0.13347074062030925,
      "learning_rate": 0.00019950705154943613,
      "loss": 0.2934,
      "step": 679
    },
    {
      "epoch": 0.060801144492131615,
      "grad_norm": 0.1428903440439262,
      "learning_rate": 0.00019950417540554925,
      "loss": 0.7101,
      "step": 680
    },
    {
      "epoch": 0.060890557939914165,
      "grad_norm": 0.11893139166803876,
      "learning_rate": 0.00019950129091636732,
      "loss": 0.3246,
      "step": 681
    },
    {
      "epoch": 0.06097997138769671,
      "grad_norm": 0.16740047123772012,
      "learning_rate": 0.00019949839808213227,
      "loss": 0.7873,
      "step": 682
    },
    {
      "epoch": 0.06106938483547925,
      "grad_norm": 0.13679425139917187,
      "learning_rate": 0.00019949549690308677,
      "loss": 0.7761,
      "step": 683
    },
    {
      "epoch": 0.0611587982832618,
      "grad_norm": 0.15435900816455325,
      "learning_rate": 0.0001994925873794741,
      "loss": 0.7108,
      "step": 684
    },
    {
      "epoch": 0.06124821173104435,
      "grad_norm": 0.14122952288356008,
      "learning_rate": 0.00019948966951153824,
      "loss": 0.7278,
      "step": 685
    },
    {
      "epoch": 0.0613376251788269,
      "grad_norm": 0.13135572812198973,
      "learning_rate": 0.000199486743299524,
      "loss": 0.7607,
      "step": 686
    },
    {
      "epoch": 0.06142703862660944,
      "grad_norm": 0.14542071825112737,
      "learning_rate": 0.00019948380874367674,
      "loss": 0.8032,
      "step": 687
    },
    {
      "epoch": 0.06151645207439199,
      "grad_norm": 0.1268143111887935,
      "learning_rate": 0.00019948086584424256,
      "loss": 0.7077,
      "step": 688
    },
    {
      "epoch": 0.061605865522174535,
      "grad_norm": 0.13520241437224984,
      "learning_rate": 0.00019947791460146833,
      "loss": 0.3268,
      "step": 689
    },
    {
      "epoch": 0.06169527896995708,
      "grad_norm": 0.1973492484632994,
      "learning_rate": 0.00019947495501560153,
      "loss": 0.7465,
      "step": 690
    },
    {
      "epoch": 0.06178469241773963,
      "grad_norm": 0.14127339882516346,
      "learning_rate": 0.00019947198708689042,
      "loss": 0.7629,
      "step": 691
    },
    {
      "epoch": 0.06187410586552217,
      "grad_norm": 0.14451719710473557,
      "learning_rate": 0.00019946901081558386,
      "loss": 0.7456,
      "step": 692
    },
    {
      "epoch": 0.061963519313304724,
      "grad_norm": 0.15476918272029885,
      "learning_rate": 0.0001994660262019315,
      "loss": 0.7976,
      "step": 693
    },
    {
      "epoch": 0.06205293276108727,
      "grad_norm": 0.13702184814438495,
      "learning_rate": 0.0001994630332461836,
      "loss": 0.7301,
      "step": 694
    },
    {
      "epoch": 0.06214234620886981,
      "grad_norm": 0.14467529136328888,
      "learning_rate": 0.00019946003194859125,
      "loss": 0.7363,
      "step": 695
    },
    {
      "epoch": 0.06223175965665236,
      "grad_norm": 0.12620240985467007,
      "learning_rate": 0.00019945702230940614,
      "loss": 0.7304,
      "step": 696
    },
    {
      "epoch": 0.062321173104434906,
      "grad_norm": 0.15135017706505402,
      "learning_rate": 0.0001994540043288807,
      "loss": 0.7498,
      "step": 697
    },
    {
      "epoch": 0.062410586552217456,
      "grad_norm": 0.1393266531270071,
      "learning_rate": 0.00019945097800726802,
      "loss": 0.7424,
      "step": 698
    },
    {
      "epoch": 0.0625,
      "grad_norm": 0.16761894677284356,
      "learning_rate": 0.00019944794334482194,
      "loss": 0.3346,
      "step": 699
    },
    {
      "epoch": 0.06258941344778254,
      "grad_norm": 0.12198084412542842,
      "learning_rate": 0.0001994449003417969,
      "loss": 0.7102,
      "step": 700
    },
    {
      "epoch": 0.06267882689556509,
      "grad_norm": 0.11879494024195074,
      "learning_rate": 0.00019944184899844822,
      "loss": 0.6876,
      "step": 701
    },
    {
      "epoch": 0.06276824034334764,
      "grad_norm": 0.1358449742602433,
      "learning_rate": 0.00019943878931503176,
      "loss": 0.7931,
      "step": 702
    },
    {
      "epoch": 0.06285765379113019,
      "grad_norm": 0.12322954366355535,
      "learning_rate": 0.0001994357212918041,
      "loss": 0.7538,
      "step": 703
    },
    {
      "epoch": 0.06294706723891273,
      "grad_norm": 0.1269615173549813,
      "learning_rate": 0.00019943264492902258,
      "loss": 0.7406,
      "step": 704
    },
    {
      "epoch": 0.06303648068669528,
      "grad_norm": 0.1334350259185143,
      "learning_rate": 0.00019942956022694523,
      "loss": 0.7038,
      "step": 705
    },
    {
      "epoch": 0.06312589413447782,
      "grad_norm": 0.14397894688758292,
      "learning_rate": 0.00019942646718583076,
      "loss": 0.717,
      "step": 706
    },
    {
      "epoch": 0.06321530758226038,
      "grad_norm": 0.13182436026603594,
      "learning_rate": 0.00019942336580593852,
      "loss": 0.7244,
      "step": 707
    },
    {
      "epoch": 0.06330472103004292,
      "grad_norm": 0.13391890489814579,
      "learning_rate": 0.0001994202560875287,
      "loss": 0.7558,
      "step": 708
    },
    {
      "epoch": 0.06339413447782546,
      "grad_norm": 0.13819730048976256,
      "learning_rate": 0.00019941713803086204,
      "loss": 0.7175,
      "step": 709
    },
    {
      "epoch": 0.06348354792560801,
      "grad_norm": 0.11913718568955474,
      "learning_rate": 0.0001994140116362001,
      "loss": 0.722,
      "step": 710
    },
    {
      "epoch": 0.06357296137339055,
      "grad_norm": 0.13267993038359324,
      "learning_rate": 0.0001994108769038051,
      "loss": 0.7493,
      "step": 711
    },
    {
      "epoch": 0.06366237482117311,
      "grad_norm": 0.1454745645326714,
      "learning_rate": 0.00019940773383393987,
      "loss": 0.7753,
      "step": 712
    },
    {
      "epoch": 0.06375178826895565,
      "grad_norm": 0.12861958721902303,
      "learning_rate": 0.00019940458242686802,
      "loss": 0.7659,
      "step": 713
    },
    {
      "epoch": 0.0638412017167382,
      "grad_norm": 0.12501484752846959,
      "learning_rate": 0.00019940142268285395,
      "loss": 0.7376,
      "step": 714
    },
    {
      "epoch": 0.06393061516452074,
      "grad_norm": 0.13344486234378466,
      "learning_rate": 0.0001993982546021626,
      "loss": 0.7184,
      "step": 715
    },
    {
      "epoch": 0.06402002861230328,
      "grad_norm": 0.1451515926378223,
      "learning_rate": 0.00019939507818505966,
      "loss": 0.7637,
      "step": 716
    },
    {
      "epoch": 0.06410944206008584,
      "grad_norm": 0.13451009877007825,
      "learning_rate": 0.00019939189343181157,
      "loss": 0.7123,
      "step": 717
    },
    {
      "epoch": 0.06419885550786839,
      "grad_norm": 0.1287880637039097,
      "learning_rate": 0.00019938870034268542,
      "loss": 0.6954,
      "step": 718
    },
    {
      "epoch": 0.06428826895565093,
      "grad_norm": 0.1536471114021178,
      "learning_rate": 0.00019938549891794898,
      "loss": 0.7604,
      "step": 719
    },
    {
      "epoch": 0.06437768240343347,
      "grad_norm": 0.12428676947728994,
      "learning_rate": 0.0001993822891578708,
      "loss": 0.7481,
      "step": 720
    },
    {
      "epoch": 0.06446709585121602,
      "grad_norm": 0.1372108863668176,
      "learning_rate": 0.00019937907106272002,
      "loss": 0.7278,
      "step": 721
    },
    {
      "epoch": 0.06455650929899857,
      "grad_norm": 0.12752827210062628,
      "learning_rate": 0.00019937584463276657,
      "loss": 0.7232,
      "step": 722
    },
    {
      "epoch": 0.06464592274678112,
      "grad_norm": 0.15595432118534602,
      "learning_rate": 0.00019937260986828108,
      "loss": 0.7481,
      "step": 723
    },
    {
      "epoch": 0.06473533619456366,
      "grad_norm": 0.14932726859085693,
      "learning_rate": 0.0001993693667695348,
      "loss": 0.7059,
      "step": 724
    },
    {
      "epoch": 0.0648247496423462,
      "grad_norm": 0.15931477567686117,
      "learning_rate": 0.0001993661153367997,
      "loss": 0.7436,
      "step": 725
    },
    {
      "epoch": 0.06491416309012875,
      "grad_norm": 0.18283679958266144,
      "learning_rate": 0.00019936285557034858,
      "loss": 0.7804,
      "step": 726
    },
    {
      "epoch": 0.0650035765379113,
      "grad_norm": 0.13026601743434343,
      "learning_rate": 0.00019935958747045472,
      "loss": 0.7123,
      "step": 727
    },
    {
      "epoch": 0.06509298998569385,
      "grad_norm": 0.1533927283395776,
      "learning_rate": 0.00019935631103739225,
      "loss": 0.7174,
      "step": 728
    },
    {
      "epoch": 0.0651824034334764,
      "grad_norm": 0.12134025341168345,
      "learning_rate": 0.00019935302627143594,
      "loss": 0.7091,
      "step": 729
    },
    {
      "epoch": 0.06527181688125894,
      "grad_norm": 0.1320864748794469,
      "learning_rate": 0.00019934973317286138,
      "loss": 0.3196,
      "step": 730
    },
    {
      "epoch": 0.0653612303290415,
      "grad_norm": 0.1674497824842328,
      "learning_rate": 0.00019934643174194462,
      "loss": 0.7263,
      "step": 731
    },
    {
      "epoch": 0.06545064377682404,
      "grad_norm": 0.13867535146822008,
      "learning_rate": 0.00019934312197896262,
      "loss": 0.7148,
      "step": 732
    },
    {
      "epoch": 0.06554005722460658,
      "grad_norm": 0.14483659285237593,
      "learning_rate": 0.00019933980388419297,
      "loss": 0.7331,
      "step": 733
    },
    {
      "epoch": 0.06562947067238913,
      "grad_norm": 0.14020817841556327,
      "learning_rate": 0.00019933647745791393,
      "loss": 0.7443,
      "step": 734
    },
    {
      "epoch": 0.06571888412017167,
      "grad_norm": 0.1466308641062983,
      "learning_rate": 0.0001993331427004045,
      "loss": 0.7472,
      "step": 735
    },
    {
      "epoch": 0.06580829756795423,
      "grad_norm": 0.13524104251286412,
      "learning_rate": 0.00019932979961194435,
      "loss": 0.713,
      "step": 736
    },
    {
      "epoch": 0.06589771101573677,
      "grad_norm": 0.13016011739840436,
      "learning_rate": 0.00019932644819281389,
      "loss": 0.7298,
      "step": 737
    },
    {
      "epoch": 0.06598712446351931,
      "grad_norm": 0.13265303497813968,
      "learning_rate": 0.00019932308844329417,
      "loss": 0.7266,
      "step": 738
    },
    {
      "epoch": 0.06607653791130186,
      "grad_norm": 0.11994526030765332,
      "learning_rate": 0.00019931972036366696,
      "loss": 0.6797,
      "step": 739
    },
    {
      "epoch": 0.0661659513590844,
      "grad_norm": 0.1328747384314752,
      "learning_rate": 0.00019931634395421475,
      "loss": 0.7283,
      "step": 740
    },
    {
      "epoch": 0.06625536480686696,
      "grad_norm": 0.14255875674730317,
      "learning_rate": 0.0001993129592152207,
      "loss": 0.7309,
      "step": 741
    },
    {
      "epoch": 0.0663447782546495,
      "grad_norm": 0.14822085875630706,
      "learning_rate": 0.00019930956614696874,
      "loss": 0.7468,
      "step": 742
    },
    {
      "epoch": 0.06643419170243205,
      "grad_norm": 0.14794172752364937,
      "learning_rate": 0.0001993061647497434,
      "loss": 0.7675,
      "step": 743
    },
    {
      "epoch": 0.06652360515021459,
      "grad_norm": 0.12612022600490042,
      "learning_rate": 0.0001993027550238299,
      "loss": 0.7101,
      "step": 744
    },
    {
      "epoch": 0.06661301859799713,
      "grad_norm": 0.15853788661721505,
      "learning_rate": 0.00019929933696951433,
      "loss": 0.7614,
      "step": 745
    },
    {
      "epoch": 0.06670243204577969,
      "grad_norm": 0.13979017770453464,
      "learning_rate": 0.00019929591058708324,
      "loss": 0.7564,
      "step": 746
    },
    {
      "epoch": 0.06679184549356224,
      "grad_norm": 0.14262770872438515,
      "learning_rate": 0.00019929247587682406,
      "loss": 0.7285,
      "step": 747
    },
    {
      "epoch": 0.06688125894134478,
      "grad_norm": 0.12292739846815788,
      "learning_rate": 0.00019928903283902486,
      "loss": 0.6606,
      "step": 748
    },
    {
      "epoch": 0.06697067238912732,
      "grad_norm": 0.1407942951941337,
      "learning_rate": 0.00019928558147397439,
      "loss": 0.7124,
      "step": 749
    },
    {
      "epoch": 0.06706008583690987,
      "grad_norm": 0.14682077408915412,
      "learning_rate": 0.0001992821217819621,
      "loss": 0.7306,
      "step": 750
    },
    {
      "epoch": 0.06714949928469242,
      "grad_norm": 0.14605763757321372,
      "learning_rate": 0.00019927865376327816,
      "loss": 0.3398,
      "step": 751
    },
    {
      "epoch": 0.06723891273247497,
      "grad_norm": 0.13734616261520627,
      "learning_rate": 0.00019927517741821343,
      "loss": 0.7047,
      "step": 752
    },
    {
      "epoch": 0.06732832618025751,
      "grad_norm": 0.14544509159895994,
      "learning_rate": 0.00019927169274705945,
      "loss": 0.7085,
      "step": 753
    },
    {
      "epoch": 0.06741773962804005,
      "grad_norm": 0.12608884776296933,
      "learning_rate": 0.00019926819975010852,
      "loss": 0.6867,
      "step": 754
    },
    {
      "epoch": 0.0675071530758226,
      "grad_norm": 0.13225488577513275,
      "learning_rate": 0.00019926469842765352,
      "loss": 0.7393,
      "step": 755
    },
    {
      "epoch": 0.06759656652360516,
      "grad_norm": 0.1485271586065787,
      "learning_rate": 0.00019926118877998817,
      "loss": 0.7665,
      "step": 756
    },
    {
      "epoch": 0.0676859799713877,
      "grad_norm": 0.12619438958575963,
      "learning_rate": 0.0001992576708074068,
      "loss": 0.7106,
      "step": 757
    },
    {
      "epoch": 0.06777539341917024,
      "grad_norm": 0.1296189026874222,
      "learning_rate": 0.00019925414451020442,
      "loss": 0.6857,
      "step": 758
    },
    {
      "epoch": 0.06786480686695279,
      "grad_norm": 0.13705186912381048,
      "learning_rate": 0.00019925060988867682,
      "loss": 0.7271,
      "step": 759
    },
    {
      "epoch": 0.06795422031473533,
      "grad_norm": 0.12570839954878482,
      "learning_rate": 0.00019924706694312045,
      "loss": 0.6879,
      "step": 760
    },
    {
      "epoch": 0.06804363376251789,
      "grad_norm": 0.14181471057820974,
      "learning_rate": 0.00019924351567383243,
      "loss": 0.7024,
      "step": 761
    },
    {
      "epoch": 0.06813304721030043,
      "grad_norm": 0.1540754510159454,
      "learning_rate": 0.00019923995608111058,
      "loss": 0.7551,
      "step": 762
    },
    {
      "epoch": 0.06822246065808298,
      "grad_norm": 0.153402723718828,
      "learning_rate": 0.0001992363881652535,
      "loss": 0.7742,
      "step": 763
    },
    {
      "epoch": 0.06831187410586552,
      "grad_norm": 0.1390077646204715,
      "learning_rate": 0.0001992328119265604,
      "loss": 0.7197,
      "step": 764
    },
    {
      "epoch": 0.06840128755364806,
      "grad_norm": 0.11872102776816297,
      "learning_rate": 0.0001992292273653312,
      "loss": 0.6812,
      "step": 765
    },
    {
      "epoch": 0.06849070100143062,
      "grad_norm": 0.12927406228810645,
      "learning_rate": 0.00019922563448186652,
      "loss": 0.7305,
      "step": 766
    },
    {
      "epoch": 0.06858011444921316,
      "grad_norm": 0.13901506651122675,
      "learning_rate": 0.00019922203327646772,
      "loss": 0.7566,
      "step": 767
    },
    {
      "epoch": 0.06866952789699571,
      "grad_norm": 0.13073479181220618,
      "learning_rate": 0.0001992184237494368,
      "loss": 0.733,
      "step": 768
    },
    {
      "epoch": 0.06875894134477825,
      "grad_norm": 0.13399128571442562,
      "learning_rate": 0.00019921480590107653,
      "loss": 0.7432,
      "step": 769
    },
    {
      "epoch": 0.0688483547925608,
      "grad_norm": 0.12834385875333462,
      "learning_rate": 0.0001992111797316903,
      "loss": 0.7336,
      "step": 770
    },
    {
      "epoch": 0.06893776824034335,
      "grad_norm": 0.1342069794363181,
      "learning_rate": 0.00019920754524158226,
      "loss": 0.7145,
      "step": 771
    },
    {
      "epoch": 0.0690271816881259,
      "grad_norm": 0.13366689844555477,
      "learning_rate": 0.00019920390243105716,
      "loss": 0.6924,
      "step": 772
    },
    {
      "epoch": 0.06911659513590844,
      "grad_norm": 0.19057865252522288,
      "learning_rate": 0.00019920025130042062,
      "loss": 0.3388,
      "step": 773
    },
    {
      "epoch": 0.06920600858369098,
      "grad_norm": 0.11503610848185107,
      "learning_rate": 0.0001991965918499788,
      "loss": 0.6859,
      "step": 774
    },
    {
      "epoch": 0.06929542203147353,
      "grad_norm": 0.13384026890139095,
      "learning_rate": 0.00019919292408003862,
      "loss": 0.7091,
      "step": 775
    },
    {
      "epoch": 0.06938483547925609,
      "grad_norm": 0.1408517078729437,
      "learning_rate": 0.0001991892479909077,
      "loss": 0.7514,
      "step": 776
    },
    {
      "epoch": 0.06947424892703863,
      "grad_norm": 0.15300557083714222,
      "learning_rate": 0.0001991855635828943,
      "loss": 0.7416,
      "step": 777
    },
    {
      "epoch": 0.06956366237482117,
      "grad_norm": 0.12761284992219335,
      "learning_rate": 0.00019918187085630752,
      "loss": 0.7217,
      "step": 778
    },
    {
      "epoch": 0.06965307582260372,
      "grad_norm": 0.1478834860412927,
      "learning_rate": 0.000199178169811457,
      "loss": 0.7752,
      "step": 779
    },
    {
      "epoch": 0.06974248927038626,
      "grad_norm": 0.13757459428070193,
      "learning_rate": 0.00019917446044865312,
      "loss": 0.7291,
      "step": 780
    },
    {
      "epoch": 0.06983190271816882,
      "grad_norm": 0.11870969126552057,
      "learning_rate": 0.00019917074276820705,
      "loss": 0.3009,
      "step": 781
    },
    {
      "epoch": 0.06992131616595136,
      "grad_norm": 0.21355346837159006,
      "learning_rate": 0.00019916701677043054,
      "loss": 0.7188,
      "step": 782
    },
    {
      "epoch": 0.0700107296137339,
      "grad_norm": 0.13472215598538312,
      "learning_rate": 0.00019916328245563611,
      "loss": 0.7282,
      "step": 783
    },
    {
      "epoch": 0.07010014306151645,
      "grad_norm": 0.15673597901288638,
      "learning_rate": 0.0001991595398241369,
      "loss": 0.7935,
      "step": 784
    },
    {
      "epoch": 0.07018955650929899,
      "grad_norm": 0.1281829047800385,
      "learning_rate": 0.0001991557888762469,
      "loss": 0.7185,
      "step": 785
    },
    {
      "epoch": 0.07027896995708155,
      "grad_norm": 0.1415168637785083,
      "learning_rate": 0.00019915202961228058,
      "loss": 0.7009,
      "step": 786
    },
    {
      "epoch": 0.0703683834048641,
      "grad_norm": 0.16211965037273696,
      "learning_rate": 0.00019914826203255333,
      "loss": 0.7001,
      "step": 787
    },
    {
      "epoch": 0.07045779685264664,
      "grad_norm": 0.13402512946184827,
      "learning_rate": 0.00019914448613738106,
      "loss": 0.7109,
      "step": 788
    },
    {
      "epoch": 0.07054721030042918,
      "grad_norm": 0.13064567293802273,
      "learning_rate": 0.00019914070192708047,
      "loss": 0.7329,
      "step": 789
    },
    {
      "epoch": 0.07063662374821172,
      "grad_norm": 0.13938888084357481,
      "learning_rate": 0.00019913690940196894,
      "loss": 0.7354,
      "step": 790
    },
    {
      "epoch": 0.07072603719599428,
      "grad_norm": 0.1557911712547833,
      "learning_rate": 0.00019913310856236452,
      "loss": 0.7593,
      "step": 791
    },
    {
      "epoch": 0.07081545064377683,
      "grad_norm": 0.1473461304157112,
      "learning_rate": 0.00019912929940858607,
      "loss": 0.7557,
      "step": 792
    },
    {
      "epoch": 0.07090486409155937,
      "grad_norm": 0.14393628193314942,
      "learning_rate": 0.00019912548194095297,
      "loss": 0.6769,
      "step": 793
    },
    {
      "epoch": 0.07099427753934191,
      "grad_norm": 0.10947439635579316,
      "learning_rate": 0.0001991216561597854,
      "loss": 0.7563,
      "step": 794
    },
    {
      "epoch": 0.07108369098712447,
      "grad_norm": 0.1378185206071284,
      "learning_rate": 0.00019911782206540423,
      "loss": 0.7659,
      "step": 795
    },
    {
      "epoch": 0.07117310443490701,
      "grad_norm": 0.12766950870519833,
      "learning_rate": 0.00019911397965813107,
      "loss": 0.7509,
      "step": 796
    },
    {
      "epoch": 0.07126251788268956,
      "grad_norm": 0.12375922683778162,
      "learning_rate": 0.0001991101289382881,
      "loss": 0.7358,
      "step": 797
    },
    {
      "epoch": 0.0713519313304721,
      "grad_norm": 0.14417421474559636,
      "learning_rate": 0.0001991062699061983,
      "loss": 0.7173,
      "step": 798
    },
    {
      "epoch": 0.07144134477825465,
      "grad_norm": 0.1380558566757712,
      "learning_rate": 0.00019910240256218535,
      "loss": 0.6972,
      "step": 799
    },
    {
      "epoch": 0.0715307582260372,
      "grad_norm": 0.15248975044778065,
      "learning_rate": 0.00019909852690657359,
      "loss": 0.7221,
      "step": 800
    },
    {
      "epoch": 0.07162017167381975,
      "grad_norm": 0.1210873838562799,
      "learning_rate": 0.00019909464293968804,
      "loss": 0.7136,
      "step": 801
    },
    {
      "epoch": 0.07170958512160229,
      "grad_norm": 0.13155116403297817,
      "learning_rate": 0.0001990907506618545,
      "loss": 0.7311,
      "step": 802
    },
    {
      "epoch": 0.07179899856938483,
      "grad_norm": 0.14033245715178264,
      "learning_rate": 0.00019908685007339932,
      "loss": 0.7682,
      "step": 803
    },
    {
      "epoch": 0.07188841201716738,
      "grad_norm": 0.12024793966350707,
      "learning_rate": 0.00019908294117464975,
      "loss": 0.7033,
      "step": 804
    },
    {
      "epoch": 0.07197782546494993,
      "grad_norm": 0.1182698198261899,
      "learning_rate": 0.00019907902396593352,
      "loss": 0.6756,
      "step": 805
    },
    {
      "epoch": 0.07206723891273248,
      "grad_norm": 0.13659372222522156,
      "learning_rate": 0.00019907509844757925,
      "loss": 0.7398,
      "step": 806
    },
    {
      "epoch": 0.07215665236051502,
      "grad_norm": 0.12165033573959809,
      "learning_rate": 0.00019907116461991605,
      "loss": 0.7149,
      "step": 807
    },
    {
      "epoch": 0.07224606580829757,
      "grad_norm": 0.12710461561950012,
      "learning_rate": 0.00019906722248327397,
      "loss": 0.7527,
      "step": 808
    },
    {
      "epoch": 0.07233547925608011,
      "grad_norm": 0.1401990273658364,
      "learning_rate": 0.0001990632720379836,
      "loss": 0.746,
      "step": 809
    },
    {
      "epoch": 0.07242489270386267,
      "grad_norm": 0.13619817602165812,
      "learning_rate": 0.00019905931328437624,
      "loss": 0.7191,
      "step": 810
    },
    {
      "epoch": 0.07251430615164521,
      "grad_norm": 0.12981501007659388,
      "learning_rate": 0.00019905534622278388,
      "loss": 0.7546,
      "step": 811
    },
    {
      "epoch": 0.07260371959942775,
      "grad_norm": 0.14245972254104955,
      "learning_rate": 0.00019905137085353926,
      "loss": 0.7188,
      "step": 812
    },
    {
      "epoch": 0.0726931330472103,
      "grad_norm": 0.12010470418526961,
      "learning_rate": 0.0001990473871769758,
      "loss": 0.6823,
      "step": 813
    },
    {
      "epoch": 0.07278254649499284,
      "grad_norm": 0.13979226304873252,
      "learning_rate": 0.00019904339519342764,
      "loss": 0.7262,
      "step": 814
    },
    {
      "epoch": 0.0728719599427754,
      "grad_norm": 0.13522102137880404,
      "learning_rate": 0.00019903939490322948,
      "loss": 0.7347,
      "step": 815
    },
    {
      "epoch": 0.07296137339055794,
      "grad_norm": 0.1440730493601707,
      "learning_rate": 0.0001990353863067169,
      "loss": 0.7363,
      "step": 816
    },
    {
      "epoch": 0.07305078683834049,
      "grad_norm": 0.127889517006606,
      "learning_rate": 0.00019903136940422605,
      "loss": 0.7189,
      "step": 817
    },
    {
      "epoch": 0.07314020028612303,
      "grad_norm": 0.128197733728683,
      "learning_rate": 0.00019902734419609389,
      "loss": 0.7251,
      "step": 818
    },
    {
      "epoch": 0.07322961373390557,
      "grad_norm": 0.1328438449218587,
      "learning_rate": 0.00019902331068265793,
      "loss": 0.7532,
      "step": 819
    },
    {
      "epoch": 0.07331902718168813,
      "grad_norm": 0.13255573355509184,
      "learning_rate": 0.00019901926886425653,
      "loss": 0.7372,
      "step": 820
    },
    {
      "epoch": 0.07340844062947068,
      "grad_norm": 0.14828866714928932,
      "learning_rate": 0.00019901521874122859,
      "loss": 0.7059,
      "step": 821
    },
    {
      "epoch": 0.07349785407725322,
      "grad_norm": 0.12919371162789373,
      "learning_rate": 0.00019901116031391386,
      "loss": 0.7126,
      "step": 822
    },
    {
      "epoch": 0.07358726752503576,
      "grad_norm": 0.11805843259786765,
      "learning_rate": 0.0001990070935826527,
      "loss": 0.6999,
      "step": 823
    },
    {
      "epoch": 0.0736766809728183,
      "grad_norm": 0.13085435954858557,
      "learning_rate": 0.00019900301854778617,
      "loss": 0.6835,
      "step": 824
    },
    {
      "epoch": 0.07376609442060086,
      "grad_norm": 0.1395596290819228,
      "learning_rate": 0.00019899893520965604,
      "loss": 0.7505,
      "step": 825
    },
    {
      "epoch": 0.07385550786838341,
      "grad_norm": 0.1314067125567278,
      "learning_rate": 0.00019899484356860473,
      "loss": 0.7538,
      "step": 826
    },
    {
      "epoch": 0.07394492131616595,
      "grad_norm": 0.1375830906718704,
      "learning_rate": 0.00019899074362497552,
      "loss": 0.764,
      "step": 827
    },
    {
      "epoch": 0.0740343347639485,
      "grad_norm": 0.12773602899117348,
      "learning_rate": 0.00019898663537911213,
      "loss": 0.7192,
      "step": 828
    },
    {
      "epoch": 0.07412374821173104,
      "grad_norm": 0.11598752993340472,
      "learning_rate": 0.00019898251883135922,
      "loss": 0.6825,
      "step": 829
    },
    {
      "epoch": 0.0742131616595136,
      "grad_norm": 0.1371366699983264,
      "learning_rate": 0.00019897839398206197,
      "loss": 0.7316,
      "step": 830
    },
    {
      "epoch": 0.07430257510729614,
      "grad_norm": 0.13011660216284188,
      "learning_rate": 0.00019897426083156634,
      "loss": 0.721,
      "step": 831
    },
    {
      "epoch": 0.07439198855507868,
      "grad_norm": 0.13601194130818292,
      "learning_rate": 0.000198970119380219,
      "loss": 0.7401,
      "step": 832
    },
    {
      "epoch": 0.07448140200286123,
      "grad_norm": 0.13207986371180486,
      "learning_rate": 0.0001989659696283673,
      "loss": 0.7663,
      "step": 833
    },
    {
      "epoch": 0.07457081545064377,
      "grad_norm": 0.13044793056989884,
      "learning_rate": 0.00019896181157635923,
      "loss": 0.7118,
      "step": 834
    },
    {
      "epoch": 0.07466022889842633,
      "grad_norm": 0.13371305406154724,
      "learning_rate": 0.0001989576452245435,
      "loss": 0.7223,
      "step": 835
    },
    {
      "epoch": 0.07474964234620887,
      "grad_norm": 0.13697790565717585,
      "learning_rate": 0.00019895347057326962,
      "loss": 0.6904,
      "step": 836
    },
    {
      "epoch": 0.07483905579399142,
      "grad_norm": 0.13579474376692977,
      "learning_rate": 0.00019894928762288766,
      "loss": 0.7377,
      "step": 837
    },
    {
      "epoch": 0.07492846924177396,
      "grad_norm": 0.11320185991712665,
      "learning_rate": 0.00019894509637374843,
      "loss": 0.6949,
      "step": 838
    },
    {
      "epoch": 0.0750178826895565,
      "grad_norm": 0.12463472526758353,
      "learning_rate": 0.00019894089682620349,
      "loss": 0.7285,
      "step": 839
    },
    {
      "epoch": 0.07510729613733906,
      "grad_norm": 0.11924251503148078,
      "learning_rate": 0.00019893668898060502,
      "loss": 0.6994,
      "step": 840
    },
    {
      "epoch": 0.0751967095851216,
      "grad_norm": 0.14873070972422267,
      "learning_rate": 0.00019893247283730593,
      "loss": 0.7471,
      "step": 841
    },
    {
      "epoch": 0.07528612303290415,
      "grad_norm": 0.12832354942016017,
      "learning_rate": 0.0001989282483966598,
      "loss": 0.722,
      "step": 842
    },
    {
      "epoch": 0.07537553648068669,
      "grad_norm": 0.13324039472891172,
      "learning_rate": 0.00019892401565902096,
      "loss": 0.7367,
      "step": 843
    },
    {
      "epoch": 0.07546494992846924,
      "grad_norm": 0.13768672899359502,
      "learning_rate": 0.0001989197746247444,
      "loss": 0.7125,
      "step": 844
    },
    {
      "epoch": 0.07555436337625179,
      "grad_norm": 0.11211532125754128,
      "learning_rate": 0.0001989155252941858,
      "loss": 0.3302,
      "step": 845
    },
    {
      "epoch": 0.07564377682403434,
      "grad_norm": 0.1521103339924491,
      "learning_rate": 0.00019891126766770158,
      "loss": 0.7233,
      "step": 846
    },
    {
      "epoch": 0.07573319027181688,
      "grad_norm": 0.1265212806449188,
      "learning_rate": 0.00019890700174564878,
      "loss": 0.6894,
      "step": 847
    },
    {
      "epoch": 0.07582260371959942,
      "grad_norm": 0.13842798392061884,
      "learning_rate": 0.00019890272752838518,
      "loss": 0.6955,
      "step": 848
    },
    {
      "epoch": 0.07591201716738197,
      "grad_norm": 0.1520411745869074,
      "learning_rate": 0.00019889844501626928,
      "loss": 0.7036,
      "step": 849
    },
    {
      "epoch": 0.07600143061516453,
      "grad_norm": 0.16277800260810385,
      "learning_rate": 0.00019889415420966026,
      "loss": 0.7298,
      "step": 850
    },
    {
      "epoch": 0.07609084406294707,
      "grad_norm": 0.13399569172860393,
      "learning_rate": 0.00019888985510891792,
      "loss": 0.724,
      "step": 851
    },
    {
      "epoch": 0.07618025751072961,
      "grad_norm": 0.12507333613733326,
      "learning_rate": 0.00019888554771440288,
      "loss": 0.7351,
      "step": 852
    },
    {
      "epoch": 0.07626967095851216,
      "grad_norm": 0.13100697963778818,
      "learning_rate": 0.00019888123202647636,
      "loss": 0.691,
      "step": 853
    },
    {
      "epoch": 0.0763590844062947,
      "grad_norm": 0.14769096454711725,
      "learning_rate": 0.00019887690804550035,
      "loss": 0.7397,
      "step": 854
    },
    {
      "epoch": 0.07644849785407726,
      "grad_norm": 0.12720270961551167,
      "learning_rate": 0.00019887257577183744,
      "loss": 0.7079,
      "step": 855
    },
    {
      "epoch": 0.0765379113018598,
      "grad_norm": 0.12384439737029193,
      "learning_rate": 0.00019886823520585105,
      "loss": 0.6901,
      "step": 856
    },
    {
      "epoch": 0.07662732474964234,
      "grad_norm": 0.12224583141730085,
      "learning_rate": 0.00019886388634790517,
      "loss": 0.7399,
      "step": 857
    },
    {
      "epoch": 0.07671673819742489,
      "grad_norm": 0.1304203390727742,
      "learning_rate": 0.0001988595291983645,
      "loss": 0.7136,
      "step": 858
    },
    {
      "epoch": 0.07680615164520745,
      "grad_norm": 0.12220498204761479,
      "learning_rate": 0.00019885516375759457,
      "loss": 0.7289,
      "step": 859
    },
    {
      "epoch": 0.07689556509298999,
      "grad_norm": 0.13068714774526002,
      "learning_rate": 0.00019885079002596138,
      "loss": 0.7231,
      "step": 860
    },
    {
      "epoch": 0.07698497854077253,
      "grad_norm": 0.1313233209329883,
      "learning_rate": 0.00019884640800383186,
      "loss": 0.7305,
      "step": 861
    },
    {
      "epoch": 0.07707439198855508,
      "grad_norm": 0.13045040549553696,
      "learning_rate": 0.00019884201769157346,
      "loss": 0.7118,
      "step": 862
    },
    {
      "epoch": 0.07716380543633762,
      "grad_norm": 0.11012693727176211,
      "learning_rate": 0.0001988376190895544,
      "loss": 0.7193,
      "step": 863
    },
    {
      "epoch": 0.07725321888412018,
      "grad_norm": 0.11872173720761758,
      "learning_rate": 0.0001988332121981436,
      "loss": 0.6811,
      "step": 864
    },
    {
      "epoch": 0.07734263233190272,
      "grad_norm": 0.12086787253443365,
      "learning_rate": 0.00019882879701771063,
      "loss": 0.732,
      "step": 865
    },
    {
      "epoch": 0.07743204577968527,
      "grad_norm": 0.13203216225503944,
      "learning_rate": 0.00019882437354862585,
      "loss": 0.7309,
      "step": 866
    },
    {
      "epoch": 0.07752145922746781,
      "grad_norm": 0.1089985964057159,
      "learning_rate": 0.00019881994179126017,
      "loss": 0.6863,
      "step": 867
    },
    {
      "epoch": 0.07761087267525035,
      "grad_norm": 0.15903761338233674,
      "learning_rate": 0.00019881550174598536,
      "loss": 0.7583,
      "step": 868
    },
    {
      "epoch": 0.07770028612303291,
      "grad_norm": 0.1377651174956009,
      "learning_rate": 0.00019881105341317372,
      "loss": 0.7359,
      "step": 869
    },
    {
      "epoch": 0.07778969957081545,
      "grad_norm": 0.15046094840718854,
      "learning_rate": 0.00019880659679319838,
      "loss": 0.7736,
      "step": 870
    },
    {
      "epoch": 0.077879113018598,
      "grad_norm": 0.14614635974073711,
      "learning_rate": 0.00019880213188643307,
      "loss": 0.7288,
      "step": 871
    },
    {
      "epoch": 0.07796852646638054,
      "grad_norm": 0.1622261387147982,
      "learning_rate": 0.00019879765869325233,
      "loss": 0.7143,
      "step": 872
    },
    {
      "epoch": 0.07805793991416309,
      "grad_norm": 0.14206477629797892,
      "learning_rate": 0.00019879317721403124,
      "loss": 0.7078,
      "step": 873
    },
    {
      "epoch": 0.07814735336194564,
      "grad_norm": 0.12546250198336376,
      "learning_rate": 0.00019878868744914569,
      "loss": 0.7186,
      "step": 874
    },
    {
      "epoch": 0.07823676680972819,
      "grad_norm": 0.14450679909691652,
      "learning_rate": 0.00019878418939897223,
      "loss": 0.7392,
      "step": 875
    },
    {
      "epoch": 0.07832618025751073,
      "grad_norm": 0.13069069582615828,
      "learning_rate": 0.00019877968306388811,
      "loss": 0.7002,
      "step": 876
    },
    {
      "epoch": 0.07841559370529327,
      "grad_norm": 0.12155837448992156,
      "learning_rate": 0.00019877516844427127,
      "loss": 0.735,
      "step": 877
    },
    {
      "epoch": 0.07850500715307582,
      "grad_norm": 0.15801953230095275,
      "learning_rate": 0.00019877064554050036,
      "loss": 0.7093,
      "step": 878
    },
    {
      "epoch": 0.07859442060085838,
      "grad_norm": 0.13865010028541766,
      "learning_rate": 0.00019876611435295466,
      "loss": 0.7363,
      "step": 879
    },
    {
      "epoch": 0.07868383404864092,
      "grad_norm": 0.14899433898362904,
      "learning_rate": 0.00019876157488201424,
      "loss": 0.7294,
      "step": 880
    },
    {
      "epoch": 0.07877324749642346,
      "grad_norm": 0.12584574688195496,
      "learning_rate": 0.00019875702712805984,
      "loss": 0.7278,
      "step": 881
    },
    {
      "epoch": 0.078862660944206,
      "grad_norm": 0.15099684817599615,
      "learning_rate": 0.00019875247109147278,
      "loss": 0.7604,
      "step": 882
    },
    {
      "epoch": 0.07895207439198855,
      "grad_norm": 0.14355290106012397,
      "learning_rate": 0.0001987479067726353,
      "loss": 0.7594,
      "step": 883
    },
    {
      "epoch": 0.07904148783977111,
      "grad_norm": 0.14393846393161652,
      "learning_rate": 0.00019874333417193007,
      "loss": 0.7284,
      "step": 884
    },
    {
      "epoch": 0.07913090128755365,
      "grad_norm": 0.12215972649029178,
      "learning_rate": 0.00019873875328974073,
      "loss": 0.6989,
      "step": 885
    },
    {
      "epoch": 0.0792203147353362,
      "grad_norm": 0.1260870590481384,
      "learning_rate": 0.00019873416412645133,
      "loss": 0.7231,
      "step": 886
    },
    {
      "epoch": 0.07930972818311874,
      "grad_norm": 0.12740224470015193,
      "learning_rate": 0.00019872956668244687,
      "loss": 0.7064,
      "step": 887
    },
    {
      "epoch": 0.07939914163090128,
      "grad_norm": 0.1230371049933604,
      "learning_rate": 0.00019872496095811286,
      "loss": 0.7143,
      "step": 888
    },
    {
      "epoch": 0.07948855507868384,
      "grad_norm": 0.1330150868693921,
      "learning_rate": 0.00019872034695383558,
      "loss": 0.712,
      "step": 889
    },
    {
      "epoch": 0.07957796852646638,
      "grad_norm": 0.11789841651998877,
      "learning_rate": 0.0001987157246700021,
      "loss": 0.6927,
      "step": 890
    },
    {
      "epoch": 0.07966738197424893,
      "grad_norm": 0.1447422118862811,
      "learning_rate": 0.00019871109410699996,
      "loss": 0.7094,
      "step": 891
    },
    {
      "epoch": 0.07975679542203147,
      "grad_norm": 0.12456074557737405,
      "learning_rate": 0.00019870645526521758,
      "loss": 0.6917,
      "step": 892
    },
    {
      "epoch": 0.07984620886981401,
      "grad_norm": 0.13419486823547372,
      "learning_rate": 0.000198701808145044,
      "loss": 0.7272,
      "step": 893
    },
    {
      "epoch": 0.07993562231759657,
      "grad_norm": 0.13905821174477112,
      "learning_rate": 0.00019869715274686898,
      "loss": 0.7292,
      "step": 894
    },
    {
      "epoch": 0.08002503576537912,
      "grad_norm": 0.14687088642165885,
      "learning_rate": 0.00019869248907108294,
      "loss": 0.7903,
      "step": 895
    },
    {
      "epoch": 0.08011444921316166,
      "grad_norm": 0.13423394133066635,
      "learning_rate": 0.00019868781711807705,
      "loss": 0.7404,
      "step": 896
    },
    {
      "epoch": 0.0802038626609442,
      "grad_norm": 0.141295786093553,
      "learning_rate": 0.0001986831368882431,
      "loss": 0.7741,
      "step": 897
    },
    {
      "epoch": 0.08029327610872675,
      "grad_norm": 0.13126920854052415,
      "learning_rate": 0.00019867844838197365,
      "loss": 0.7484,
      "step": 898
    },
    {
      "epoch": 0.0803826895565093,
      "grad_norm": 0.14610500949290944,
      "learning_rate": 0.00019867375159966192,
      "loss": 0.7241,
      "step": 899
    },
    {
      "epoch": 0.08047210300429185,
      "grad_norm": 0.12182523742857611,
      "learning_rate": 0.00019866904654170179,
      "loss": 0.6852,
      "step": 900
    },
    {
      "epoch": 0.08056151645207439,
      "grad_norm": 0.13189627251153774,
      "learning_rate": 0.0001986643332084879,
      "loss": 0.7409,
      "step": 901
    },
    {
      "epoch": 0.08065092989985694,
      "grad_norm": 0.1394759505524753,
      "learning_rate": 0.00019865961160041556,
      "loss": 0.7348,
      "step": 902
    },
    {
      "epoch": 0.08074034334763948,
      "grad_norm": 0.14633760811722626,
      "learning_rate": 0.0001986548817178807,
      "loss": 0.7008,
      "step": 903
    },
    {
      "epoch": 0.08082975679542204,
      "grad_norm": 0.14661070429120138,
      "learning_rate": 0.0001986501435612801,
      "loss": 0.7602,
      "step": 904
    },
    {
      "epoch": 0.08091917024320458,
      "grad_norm": 0.14415961722018295,
      "learning_rate": 0.00019864539713101108,
      "loss": 0.7546,
      "step": 905
    },
    {
      "epoch": 0.08100858369098712,
      "grad_norm": 0.1405393077062686,
      "learning_rate": 0.00019864064242747174,
      "loss": 0.7088,
      "step": 906
    },
    {
      "epoch": 0.08109799713876967,
      "grad_norm": 0.13352286271586972,
      "learning_rate": 0.00019863587945106084,
      "loss": 0.7382,
      "step": 907
    },
    {
      "epoch": 0.08118741058655221,
      "grad_norm": 0.159755867758851,
      "learning_rate": 0.00019863110820217785,
      "loss": 0.7092,
      "step": 908
    },
    {
      "epoch": 0.08127682403433477,
      "grad_norm": 0.15922070193976154,
      "learning_rate": 0.00019862632868122292,
      "loss": 0.7113,
      "step": 909
    },
    {
      "epoch": 0.08136623748211731,
      "grad_norm": 0.12082363314778365,
      "learning_rate": 0.00019862154088859697,
      "loss": 0.7078,
      "step": 910
    },
    {
      "epoch": 0.08145565092989986,
      "grad_norm": 0.13361826442901373,
      "learning_rate": 0.00019861674482470144,
      "loss": 0.7267,
      "step": 911
    },
    {
      "epoch": 0.0815450643776824,
      "grad_norm": 0.1458946232822115,
      "learning_rate": 0.00019861194048993863,
      "loss": 0.7035,
      "step": 912
    },
    {
      "epoch": 0.08163447782546494,
      "grad_norm": 0.12336117165755861,
      "learning_rate": 0.00019860712788471148,
      "loss": 0.6958,
      "step": 913
    },
    {
      "epoch": 0.0817238912732475,
      "grad_norm": 0.1324710081084547,
      "learning_rate": 0.00019860230700942356,
      "loss": 0.7788,
      "step": 914
    },
    {
      "epoch": 0.08181330472103004,
      "grad_norm": 0.14374534494186036,
      "learning_rate": 0.00019859747786447928,
      "loss": 0.7489,
      "step": 915
    },
    {
      "epoch": 0.08190271816881259,
      "grad_norm": 0.14183202021810631,
      "learning_rate": 0.00019859264045028358,
      "loss": 0.775,
      "step": 916
    },
    {
      "epoch": 0.08199213161659513,
      "grad_norm": 0.13323805755491963,
      "learning_rate": 0.00019858779476724219,
      "loss": 0.7174,
      "step": 917
    },
    {
      "epoch": 0.08208154506437768,
      "grad_norm": 0.11253631149408139,
      "learning_rate": 0.00019858294081576155,
      "loss": 0.7333,
      "step": 918
    },
    {
      "epoch": 0.08217095851216023,
      "grad_norm": 0.11997230503701856,
      "learning_rate": 0.00019857807859624869,
      "loss": 0.7105,
      "step": 919
    },
    {
      "epoch": 0.08226037195994278,
      "grad_norm": 0.13015987063569714,
      "learning_rate": 0.00019857320810911144,
      "loss": 0.6962,
      "step": 920
    },
    {
      "epoch": 0.08234978540772532,
      "grad_norm": 0.13273049843037854,
      "learning_rate": 0.00019856832935475827,
      "loss": 0.7045,
      "step": 921
    },
    {
      "epoch": 0.08243919885550786,
      "grad_norm": 0.1490010238458284,
      "learning_rate": 0.00019856344233359837,
      "loss": 0.7408,
      "step": 922
    },
    {
      "epoch": 0.08252861230329042,
      "grad_norm": 0.14158125707496966,
      "learning_rate": 0.0001985585470460416,
      "loss": 0.7202,
      "step": 923
    },
    {
      "epoch": 0.08261802575107297,
      "grad_norm": 0.13640761846569927,
      "learning_rate": 0.00019855364349249848,
      "loss": 0.7622,
      "step": 924
    },
    {
      "epoch": 0.08270743919885551,
      "grad_norm": 0.135010955774684,
      "learning_rate": 0.00019854873167338033,
      "loss": 0.7552,
      "step": 925
    },
    {
      "epoch": 0.08279685264663805,
      "grad_norm": 0.13635518744219496,
      "learning_rate": 0.0001985438115890991,
      "loss": 0.7447,
      "step": 926
    },
    {
      "epoch": 0.0828862660944206,
      "grad_norm": 0.135270073739332,
      "learning_rate": 0.00019853888324006735,
      "loss": 0.7527,
      "step": 927
    },
    {
      "epoch": 0.08297567954220315,
      "grad_norm": 0.1446397767999504,
      "learning_rate": 0.00019853394662669847,
      "loss": 0.7333,
      "step": 928
    },
    {
      "epoch": 0.0830650929899857,
      "grad_norm": 0.13903174343426436,
      "learning_rate": 0.00019852900174940655,
      "loss": 0.7494,
      "step": 929
    },
    {
      "epoch": 0.08315450643776824,
      "grad_norm": 0.12773168134094923,
      "learning_rate": 0.00019852404860860618,
      "loss": 0.7413,
      "step": 930
    },
    {
      "epoch": 0.08324391988555079,
      "grad_norm": 0.11592828944644026,
      "learning_rate": 0.00019851908720471285,
      "loss": 0.656,
      "step": 931
    },
    {
      "epoch": 0.08333333333333333,
      "grad_norm": 0.14391487272997341,
      "learning_rate": 0.0001985141175381427,
      "loss": 0.6772,
      "step": 932
    },
    {
      "epoch": 0.08342274678111589,
      "grad_norm": 0.14884432618683935,
      "learning_rate": 0.00019850913960931243,
      "loss": 0.7309,
      "step": 933
    },
    {
      "epoch": 0.08351216022889843,
      "grad_norm": 0.1212954146744199,
      "learning_rate": 0.00019850415341863962,
      "loss": 0.6918,
      "step": 934
    },
    {
      "epoch": 0.08360157367668097,
      "grad_norm": 0.12653225048350672,
      "learning_rate": 0.00019849915896654242,
      "loss": 0.6874,
      "step": 935
    },
    {
      "epoch": 0.08369098712446352,
      "grad_norm": 0.14313500201077073,
      "learning_rate": 0.0001984941562534397,
      "loss": 0.7164,
      "step": 936
    },
    {
      "epoch": 0.08378040057224606,
      "grad_norm": 0.16159899193661095,
      "learning_rate": 0.00019848914527975108,
      "loss": 0.7065,
      "step": 937
    },
    {
      "epoch": 0.08386981402002862,
      "grad_norm": 0.15199182715277648,
      "learning_rate": 0.00019848412604589678,
      "loss": 0.7231,
      "step": 938
    },
    {
      "epoch": 0.08395922746781116,
      "grad_norm": 0.14336219390894356,
      "learning_rate": 0.00019847909855229775,
      "loss": 0.7757,
      "step": 939
    },
    {
      "epoch": 0.0840486409155937,
      "grad_norm": 0.13089588753331413,
      "learning_rate": 0.00019847406279937567,
      "loss": 0.3448,
      "step": 940
    },
    {
      "epoch": 0.08413805436337625,
      "grad_norm": 0.14961393500165382,
      "learning_rate": 0.00019846901878755287,
      "loss": 0.7046,
      "step": 941
    },
    {
      "epoch": 0.0842274678111588,
      "grad_norm": 0.14615561607208963,
      "learning_rate": 0.00019846396651725237,
      "loss": 0.7358,
      "step": 942
    },
    {
      "epoch": 0.08431688125894135,
      "grad_norm": 0.12831801862522335,
      "learning_rate": 0.0001984589059888979,
      "loss": 0.7018,
      "step": 943
    },
    {
      "epoch": 0.0844062947067239,
      "grad_norm": 0.14895640525021997,
      "learning_rate": 0.00019845383720291392,
      "loss": 0.7719,
      "step": 944
    },
    {
      "epoch": 0.08449570815450644,
      "grad_norm": 0.12653185392556546,
      "learning_rate": 0.00019844876015972552,
      "loss": 0.7243,
      "step": 945
    },
    {
      "epoch": 0.08458512160228898,
      "grad_norm": 0.1158270277206204,
      "learning_rate": 0.0001984436748597585,
      "loss": 0.7189,
      "step": 946
    },
    {
      "epoch": 0.08467453505007153,
      "grad_norm": 0.14069182509630174,
      "learning_rate": 0.00019843858130343933,
      "loss": 0.7019,
      "step": 947
    },
    {
      "epoch": 0.08476394849785408,
      "grad_norm": 0.13357914656377345,
      "learning_rate": 0.00019843347949119526,
      "loss": 0.7349,
      "step": 948
    },
    {
      "epoch": 0.08485336194563663,
      "grad_norm": 0.13890894760875613,
      "learning_rate": 0.00019842836942345415,
      "loss": 0.7158,
      "step": 949
    },
    {
      "epoch": 0.08494277539341917,
      "grad_norm": 0.1420143920311145,
      "learning_rate": 0.00019842325110064454,
      "loss": 0.7269,
      "step": 950
    },
    {
      "epoch": 0.08503218884120171,
      "grad_norm": 0.13059217523119404,
      "learning_rate": 0.00019841812452319575,
      "loss": 0.6874,
      "step": 951
    },
    {
      "epoch": 0.08512160228898426,
      "grad_norm": 0.14045157049707546,
      "learning_rate": 0.0001984129896915377,
      "loss": 0.7861,
      "step": 952
    },
    {
      "epoch": 0.08521101573676682,
      "grad_norm": 0.1322465092167631,
      "learning_rate": 0.00019840784660610106,
      "loss": 0.6898,
      "step": 953
    },
    {
      "epoch": 0.08530042918454936,
      "grad_norm": 0.10722553712237927,
      "learning_rate": 0.00019840269526731716,
      "loss": 0.6948,
      "step": 954
    },
    {
      "epoch": 0.0853898426323319,
      "grad_norm": 0.13384952239845385,
      "learning_rate": 0.00019839753567561807,
      "loss": 0.7103,
      "step": 955
    },
    {
      "epoch": 0.08547925608011445,
      "grad_norm": 0.1382873093363074,
      "learning_rate": 0.0001983923678314365,
      "loss": 0.7155,
      "step": 956
    },
    {
      "epoch": 0.08556866952789699,
      "grad_norm": 0.13350079542160453,
      "learning_rate": 0.00019838719173520585,
      "loss": 0.7436,
      "step": 957
    },
    {
      "epoch": 0.08565808297567955,
      "grad_norm": 0.14424729694757984,
      "learning_rate": 0.00019838200738736027,
      "loss": 0.7585,
      "step": 958
    },
    {
      "epoch": 0.08574749642346209,
      "grad_norm": 0.13568745979570088,
      "learning_rate": 0.0001983768147883345,
      "loss": 0.72,
      "step": 959
    },
    {
      "epoch": 0.08583690987124463,
      "grad_norm": 0.13512525108624326,
      "learning_rate": 0.0001983716139385641,
      "loss": 0.7337,
      "step": 960
    },
    {
      "epoch": 0.08592632331902718,
      "grad_norm": 0.13711559393580403,
      "learning_rate": 0.00019836640483848528,
      "loss": 0.7275,
      "step": 961
    },
    {
      "epoch": 0.08601573676680972,
      "grad_norm": 0.13938715908089228,
      "learning_rate": 0.00019836118748853485,
      "loss": 0.6799,
      "step": 962
    },
    {
      "epoch": 0.08610515021459228,
      "grad_norm": 0.1643096547845956,
      "learning_rate": 0.00019835596188915044,
      "loss": 0.7507,
      "step": 963
    },
    {
      "epoch": 0.08619456366237482,
      "grad_norm": 0.12861505266024487,
      "learning_rate": 0.00019835072804077027,
      "loss": 0.6917,
      "step": 964
    },
    {
      "epoch": 0.08628397711015737,
      "grad_norm": 0.11541763651029001,
      "learning_rate": 0.00019834548594383332,
      "loss": 0.696,
      "step": 965
    },
    {
      "epoch": 0.08637339055793991,
      "grad_norm": 0.1243179219029705,
      "learning_rate": 0.00019834023559877923,
      "loss": 0.7119,
      "step": 966
    },
    {
      "epoch": 0.08646280400572245,
      "grad_norm": 0.12186779570093811,
      "learning_rate": 0.00019833497700604835,
      "loss": 0.6896,
      "step": 967
    },
    {
      "epoch": 0.08655221745350501,
      "grad_norm": 0.1110096675324757,
      "learning_rate": 0.0001983297101660817,
      "loss": 0.6946,
      "step": 968
    },
    {
      "epoch": 0.08664163090128756,
      "grad_norm": 0.12449905617140944,
      "learning_rate": 0.00019832443507932103,
      "loss": 0.7037,
      "step": 969
    },
    {
      "epoch": 0.0867310443490701,
      "grad_norm": 0.12980935784816056,
      "learning_rate": 0.00019831915174620872,
      "loss": 0.675,
      "step": 970
    },
    {
      "epoch": 0.08682045779685264,
      "grad_norm": 0.1488904902940773,
      "learning_rate": 0.0001983138601671879,
      "loss": 0.7554,
      "step": 971
    },
    {
      "epoch": 0.08690987124463519,
      "grad_norm": 0.14635996820796862,
      "learning_rate": 0.00019830856034270235,
      "loss": 0.7376,
      "step": 972
    },
    {
      "epoch": 0.08699928469241774,
      "grad_norm": 0.13557424993536196,
      "learning_rate": 0.0001983032522731966,
      "loss": 0.6651,
      "step": 973
    },
    {
      "epoch": 0.08708869814020029,
      "grad_norm": 0.13665941665850434,
      "learning_rate": 0.00019829793595911577,
      "loss": 0.6947,
      "step": 974
    },
    {
      "epoch": 0.08717811158798283,
      "grad_norm": 0.14083217104890378,
      "learning_rate": 0.0001982926114009058,
      "loss": 0.7265,
      "step": 975
    },
    {
      "epoch": 0.08726752503576538,
      "grad_norm": 0.13631460697579267,
      "learning_rate": 0.00019828727859901317,
      "loss": 0.7186,
      "step": 976
    },
    {
      "epoch": 0.08735693848354792,
      "grad_norm": 0.13230632096567466,
      "learning_rate": 0.00019828193755388522,
      "loss": 0.6709,
      "step": 977
    },
    {
      "epoch": 0.08744635193133048,
      "grad_norm": 0.14472371153797628,
      "learning_rate": 0.00019827658826596984,
      "loss": 0.3454,
      "step": 978
    },
    {
      "epoch": 0.08753576537911302,
      "grad_norm": 0.12339802782626388,
      "learning_rate": 0.00019827123073571572,
      "loss": 0.708,
      "step": 979
    },
    {
      "epoch": 0.08762517882689556,
      "grad_norm": 0.12427969882791988,
      "learning_rate": 0.00019826586496357216,
      "loss": 0.7011,
      "step": 980
    },
    {
      "epoch": 0.08771459227467811,
      "grad_norm": 0.12468172517299421,
      "learning_rate": 0.00019826049094998912,
      "loss": 0.6808,
      "step": 981
    },
    {
      "epoch": 0.08780400572246065,
      "grad_norm": 0.14155531502702096,
      "learning_rate": 0.00019825510869541743,
      "loss": 0.7415,
      "step": 982
    },
    {
      "epoch": 0.08789341917024321,
      "grad_norm": 0.12044632328040988,
      "learning_rate": 0.00019824971820030842,
      "loss": 0.7105,
      "step": 983
    },
    {
      "epoch": 0.08798283261802575,
      "grad_norm": 0.13853154018699176,
      "learning_rate": 0.0001982443194651142,
      "loss": 0.7118,
      "step": 984
    },
    {
      "epoch": 0.0880722460658083,
      "grad_norm": 0.13650819658347588,
      "learning_rate": 0.00019823891249028756,
      "loss": 0.7385,
      "step": 985
    },
    {
      "epoch": 0.08816165951359084,
      "grad_norm": 0.13761286582628954,
      "learning_rate": 0.00019823349727628197,
      "loss": 0.6872,
      "step": 986
    },
    {
      "epoch": 0.0882510729613734,
      "grad_norm": 0.1342341677289282,
      "learning_rate": 0.00019822807382355163,
      "loss": 0.6911,
      "step": 987
    },
    {
      "epoch": 0.08834048640915594,
      "grad_norm": 0.12572332248484203,
      "learning_rate": 0.0001982226421325513,
      "loss": 0.7002,
      "step": 988
    },
    {
      "epoch": 0.08842989985693848,
      "grad_norm": 0.17351216233499336,
      "learning_rate": 0.00019821720220373665,
      "loss": 0.3423,
      "step": 989
    },
    {
      "epoch": 0.08851931330472103,
      "grad_norm": 0.12408271891285234,
      "learning_rate": 0.00019821175403756384,
      "loss": 0.7006,
      "step": 990
    },
    {
      "epoch": 0.08860872675250357,
      "grad_norm": 0.12899036331524152,
      "learning_rate": 0.00019820629763448987,
      "loss": 0.7326,
      "step": 991
    },
    {
      "epoch": 0.08869814020028613,
      "grad_norm": 0.11292432468980451,
      "learning_rate": 0.00019820083299497228,
      "loss": 0.7278,
      "step": 992
    },
    {
      "epoch": 0.08878755364806867,
      "grad_norm": 0.1336969534458324,
      "learning_rate": 0.00019819536011946945,
      "loss": 0.7332,
      "step": 993
    },
    {
      "epoch": 0.08887696709585122,
      "grad_norm": 0.1366025142428574,
      "learning_rate": 0.00019818987900844032,
      "loss": 0.7152,
      "step": 994
    },
    {
      "epoch": 0.08896638054363376,
      "grad_norm": 0.1363742130439042,
      "learning_rate": 0.00019818438966234464,
      "loss": 0.7278,
      "step": 995
    },
    {
      "epoch": 0.0890557939914163,
      "grad_norm": 0.14032865517917967,
      "learning_rate": 0.00019817889208164277,
      "loss": 0.708,
      "step": 996
    },
    {
      "epoch": 0.08914520743919886,
      "grad_norm": 0.14469012585727534,
      "learning_rate": 0.0001981733862667958,
      "loss": 0.722,
      "step": 997
    },
    {
      "epoch": 0.0892346208869814,
      "grad_norm": 0.13375760037321005,
      "learning_rate": 0.00019816787221826548,
      "loss": 0.6982,
      "step": 998
    },
    {
      "epoch": 0.08932403433476395,
      "grad_norm": 0.14158557211959663,
      "learning_rate": 0.0001981623499365143,
      "loss": 0.7363,
      "step": 999
    },
    {
      "epoch": 0.08941344778254649,
      "grad_norm": 0.1274832885361676,
      "learning_rate": 0.00019815681942200535,
      "loss": 0.677,
      "step": 1000
    },
    {
      "epoch": 0.08950286123032904,
      "grad_norm": 0.14887243456868146,
      "learning_rate": 0.00019815128067520252,
      "loss": 0.7225,
      "step": 1001
    },
    {
      "epoch": 0.0895922746781116,
      "grad_norm": 0.1487709379687945,
      "learning_rate": 0.0001981457336965703,
      "loss": 0.7453,
      "step": 1002
    },
    {
      "epoch": 0.08968168812589414,
      "grad_norm": 0.14652950088678157,
      "learning_rate": 0.0001981401784865739,
      "loss": 0.7266,
      "step": 1003
    },
    {
      "epoch": 0.08977110157367668,
      "grad_norm": 0.13619402287075344,
      "learning_rate": 0.00019813461504567933,
      "loss": 0.6921,
      "step": 1004
    },
    {
      "epoch": 0.08986051502145923,
      "grad_norm": 0.13568535042681112,
      "learning_rate": 0.00019812904337435306,
      "loss": 0.7611,
      "step": 1005
    },
    {
      "epoch": 0.08994992846924177,
      "grad_norm": 0.12021221923496014,
      "learning_rate": 0.00019812346347306242,
      "loss": 0.6976,
      "step": 1006
    },
    {
      "epoch": 0.09003934191702433,
      "grad_norm": 0.13038079321274762,
      "learning_rate": 0.00019811787534227543,
      "loss": 0.7214,
      "step": 1007
    },
    {
      "epoch": 0.09012875536480687,
      "grad_norm": 0.129150495114048,
      "learning_rate": 0.0001981122789824607,
      "loss": 0.7418,
      "step": 1008
    },
    {
      "epoch": 0.09021816881258941,
      "grad_norm": 0.13319412643274312,
      "learning_rate": 0.00019810667439408767,
      "loss": 0.713,
      "step": 1009
    },
    {
      "epoch": 0.09030758226037196,
      "grad_norm": 0.13533457270681967,
      "learning_rate": 0.0001981010615776263,
      "loss": 0.7187,
      "step": 1010
    },
    {
      "epoch": 0.0903969957081545,
      "grad_norm": 0.1261094277611602,
      "learning_rate": 0.00019809544053354738,
      "loss": 0.715,
      "step": 1011
    },
    {
      "epoch": 0.09048640915593706,
      "grad_norm": 0.1284952925357722,
      "learning_rate": 0.00019808981126232236,
      "loss": 0.7487,
      "step": 1012
    },
    {
      "epoch": 0.0905758226037196,
      "grad_norm": 0.13239499722956438,
      "learning_rate": 0.0001980841737644233,
      "loss": 0.714,
      "step": 1013
    },
    {
      "epoch": 0.09066523605150215,
      "grad_norm": 0.1463803607008311,
      "learning_rate": 0.00019807852804032305,
      "loss": 0.733,
      "step": 1014
    },
    {
      "epoch": 0.09075464949928469,
      "grad_norm": 0.15265097284338827,
      "learning_rate": 0.00019807287409049512,
      "loss": 0.7178,
      "step": 1015
    },
    {
      "epoch": 0.09084406294706723,
      "grad_norm": 0.12076282521075361,
      "learning_rate": 0.00019806721191541367,
      "loss": 0.7044,
      "step": 1016
    },
    {
      "epoch": 0.09093347639484979,
      "grad_norm": 0.1412507791215726,
      "learning_rate": 0.00019806154151555356,
      "loss": 0.7114,
      "step": 1017
    },
    {
      "epoch": 0.09102288984263233,
      "grad_norm": 0.137682225160043,
      "learning_rate": 0.00019805586289139043,
      "loss": 0.7292,
      "step": 1018
    },
    {
      "epoch": 0.09111230329041488,
      "grad_norm": 0.14270972419362793,
      "learning_rate": 0.0001980501760434005,
      "loss": 0.741,
      "step": 1019
    },
    {
      "epoch": 0.09120171673819742,
      "grad_norm": 0.14588593766872834,
      "learning_rate": 0.00019804448097206068,
      "loss": 0.7548,
      "step": 1020
    },
    {
      "epoch": 0.09129113018597997,
      "grad_norm": 0.15939622099492742,
      "learning_rate": 0.0001980387776778487,
      "loss": 0.7424,
      "step": 1021
    },
    {
      "epoch": 0.09138054363376252,
      "grad_norm": 0.1256774069843306,
      "learning_rate": 0.00019803306616124282,
      "loss": 0.7356,
      "step": 1022
    },
    {
      "epoch": 0.09146995708154507,
      "grad_norm": 0.12521774604638242,
      "learning_rate": 0.00019802734642272206,
      "loss": 0.7705,
      "step": 1023
    },
    {
      "epoch": 0.09155937052932761,
      "grad_norm": 0.1392411112726795,
      "learning_rate": 0.00019802161846276615,
      "loss": 0.7416,
      "step": 1024
    },
    {
      "epoch": 0.09164878397711015,
      "grad_norm": 0.1465997832087601,
      "learning_rate": 0.0001980158822818555,
      "loss": 0.7114,
      "step": 1025
    },
    {
      "epoch": 0.0917381974248927,
      "grad_norm": 0.19745154327346234,
      "learning_rate": 0.00019801013788047116,
      "loss": 0.7659,
      "step": 1026
    },
    {
      "epoch": 0.09182761087267526,
      "grad_norm": 0.15943121832589915,
      "learning_rate": 0.00019800438525909491,
      "loss": 0.7632,
      "step": 1027
    },
    {
      "epoch": 0.0919170243204578,
      "grad_norm": 0.1528124081619799,
      "learning_rate": 0.00019799862441820923,
      "loss": 0.7385,
      "step": 1028
    },
    {
      "epoch": 0.09200643776824034,
      "grad_norm": 0.14198989767661194,
      "learning_rate": 0.0001979928553582973,
      "loss": 0.7261,
      "step": 1029
    },
    {
      "epoch": 0.09209585121602289,
      "grad_norm": 0.1543354379965566,
      "learning_rate": 0.0001979870780798429,
      "loss": 0.7271,
      "step": 1030
    },
    {
      "epoch": 0.09218526466380543,
      "grad_norm": 0.1544944458469831,
      "learning_rate": 0.00019798129258333065,
      "loss": 0.7689,
      "step": 1031
    },
    {
      "epoch": 0.09227467811158799,
      "grad_norm": 0.15153210382687232,
      "learning_rate": 0.00019797549886924566,
      "loss": 0.7094,
      "step": 1032
    },
    {
      "epoch": 0.09236409155937053,
      "grad_norm": 0.13315925110540594,
      "learning_rate": 0.00019796969693807394,
      "loss": 0.6985,
      "step": 1033
    },
    {
      "epoch": 0.09245350500715308,
      "grad_norm": 0.11780557165612265,
      "learning_rate": 0.00019796388679030205,
      "loss": 0.6791,
      "step": 1034
    },
    {
      "epoch": 0.09254291845493562,
      "grad_norm": 0.11947012786980503,
      "learning_rate": 0.0001979580684264173,
      "loss": 0.6925,
      "step": 1035
    },
    {
      "epoch": 0.09263233190271816,
      "grad_norm": 0.13561946917653275,
      "learning_rate": 0.00019795224184690764,
      "loss": 0.6959,
      "step": 1036
    },
    {
      "epoch": 0.09272174535050072,
      "grad_norm": 0.13261162072836616,
      "learning_rate": 0.00019794640705226175,
      "loss": 0.7141,
      "step": 1037
    },
    {
      "epoch": 0.09281115879828326,
      "grad_norm": 0.1398839823160667,
      "learning_rate": 0.00019794056404296898,
      "loss": 0.7364,
      "step": 1038
    },
    {
      "epoch": 0.09290057224606581,
      "grad_norm": 0.12616668906266046,
      "learning_rate": 0.0001979347128195194,
      "loss": 0.7279,
      "step": 1039
    },
    {
      "epoch": 0.09298998569384835,
      "grad_norm": 0.1284612185604553,
      "learning_rate": 0.00019792885338240374,
      "loss": 0.7146,
      "step": 1040
    },
    {
      "epoch": 0.0930793991416309,
      "grad_norm": 0.13174958580050478,
      "learning_rate": 0.0001979229857321134,
      "loss": 0.6939,
      "step": 1041
    },
    {
      "epoch": 0.09316881258941345,
      "grad_norm": 0.1148261277020849,
      "learning_rate": 0.00019791710986914051,
      "loss": 0.7112,
      "step": 1042
    },
    {
      "epoch": 0.093258226037196,
      "grad_norm": 0.1371348267989942,
      "learning_rate": 0.00019791122579397789,
      "loss": 0.7246,
      "step": 1043
    },
    {
      "epoch": 0.09334763948497854,
      "grad_norm": 0.13064653704467974,
      "learning_rate": 0.00019790533350711899,
      "loss": 0.7054,
      "step": 1044
    },
    {
      "epoch": 0.09343705293276108,
      "grad_norm": 0.12264143977769917,
      "learning_rate": 0.000197899433009058,
      "loss": 0.6953,
      "step": 1045
    },
    {
      "epoch": 0.09352646638054363,
      "grad_norm": 0.14801673672514482,
      "learning_rate": 0.0001978935243002898,
      "loss": 0.7543,
      "step": 1046
    },
    {
      "epoch": 0.09361587982832618,
      "grad_norm": 0.1408678077874777,
      "learning_rate": 0.00019788760738130993,
      "loss": 0.7507,
      "step": 1047
    },
    {
      "epoch": 0.09370529327610873,
      "grad_norm": 0.12135639293745253,
      "learning_rate": 0.00019788168225261469,
      "loss": 0.7102,
      "step": 1048
    },
    {
      "epoch": 0.09379470672389127,
      "grad_norm": 0.13297424902423835,
      "learning_rate": 0.00019787574891470095,
      "loss": 0.7412,
      "step": 1049
    },
    {
      "epoch": 0.09388412017167382,
      "grad_norm": 0.1316296797585903,
      "learning_rate": 0.00019786980736806635,
      "loss": 0.6987,
      "step": 1050
    },
    {
      "epoch": 0.09397353361945637,
      "grad_norm": 0.1398767038756093,
      "learning_rate": 0.0001978638576132092,
      "loss": 0.7207,
      "step": 1051
    },
    {
      "epoch": 0.09406294706723892,
      "grad_norm": 0.13951375575912228,
      "learning_rate": 0.00019785789965062848,
      "loss": 0.7658,
      "step": 1052
    },
    {
      "epoch": 0.09415236051502146,
      "grad_norm": 0.12992326829155554,
      "learning_rate": 0.00019785193348082394,
      "loss": 0.765,
      "step": 1053
    },
    {
      "epoch": 0.094241773962804,
      "grad_norm": 0.13803108386615887,
      "learning_rate": 0.0001978459591042959,
      "loss": 0.7291,
      "step": 1054
    },
    {
      "epoch": 0.09433118741058655,
      "grad_norm": 0.13374427139410652,
      "learning_rate": 0.00019783997652154543,
      "loss": 0.7004,
      "step": 1055
    },
    {
      "epoch": 0.0944206008583691,
      "grad_norm": 0.12569021709387077,
      "learning_rate": 0.00019783398573307428,
      "loss": 0.7073,
      "step": 1056
    },
    {
      "epoch": 0.09451001430615165,
      "grad_norm": 0.1476253134066524,
      "learning_rate": 0.00019782798673938492,
      "loss": 0.7282,
      "step": 1057
    },
    {
      "epoch": 0.09459942775393419,
      "grad_norm": 0.13335903419746536,
      "learning_rate": 0.00019782197954098046,
      "loss": 0.7466,
      "step": 1058
    },
    {
      "epoch": 0.09468884120171674,
      "grad_norm": 0.1430894407630406,
      "learning_rate": 0.0001978159641383647,
      "loss": 0.758,
      "step": 1059
    },
    {
      "epoch": 0.09477825464949928,
      "grad_norm": 0.14136667558368,
      "learning_rate": 0.00019780994053204216,
      "loss": 0.7383,
      "step": 1060
    },
    {
      "epoch": 0.09486766809728184,
      "grad_norm": 0.1382441670943065,
      "learning_rate": 0.00019780390872251803,
      "loss": 0.7208,
      "step": 1061
    },
    {
      "epoch": 0.09495708154506438,
      "grad_norm": 0.18732257907338057,
      "learning_rate": 0.00019779786871029819,
      "loss": 0.3592,
      "step": 1062
    },
    {
      "epoch": 0.09504649499284692,
      "grad_norm": 0.14292967643566115,
      "learning_rate": 0.00019779182049588925,
      "loss": 0.7325,
      "step": 1063
    },
    {
      "epoch": 0.09513590844062947,
      "grad_norm": 0.14245940367115992,
      "learning_rate": 0.0001977857640797984,
      "loss": 0.6927,
      "step": 1064
    },
    {
      "epoch": 0.09522532188841201,
      "grad_norm": 0.12717372511226646,
      "learning_rate": 0.0001977796994625336,
      "loss": 0.7188,
      "step": 1065
    },
    {
      "epoch": 0.09531473533619457,
      "grad_norm": 0.1409756650270344,
      "learning_rate": 0.0001977736266446035,
      "loss": 0.743,
      "step": 1066
    },
    {
      "epoch": 0.09540414878397711,
      "grad_norm": 0.12685390302469185,
      "learning_rate": 0.00019776754562651742,
      "loss": 0.7195,
      "step": 1067
    },
    {
      "epoch": 0.09549356223175966,
      "grad_norm": 0.1341929519515029,
      "learning_rate": 0.00019776145640878538,
      "loss": 0.7145,
      "step": 1068
    },
    {
      "epoch": 0.0955829756795422,
      "grad_norm": 0.11209463729130213,
      "learning_rate": 0.000197755358991918,
      "loss": 0.7014,
      "step": 1069
    },
    {
      "epoch": 0.09567238912732474,
      "grad_norm": 0.11781086327156153,
      "learning_rate": 0.00019774925337642677,
      "loss": 0.7336,
      "step": 1070
    },
    {
      "epoch": 0.0957618025751073,
      "grad_norm": 0.13351160064595616,
      "learning_rate": 0.0001977431395628237,
      "loss": 0.7518,
      "step": 1071
    },
    {
      "epoch": 0.09585121602288985,
      "grad_norm": 0.135110391204578,
      "learning_rate": 0.00019773701755162158,
      "loss": 0.7021,
      "step": 1072
    },
    {
      "epoch": 0.09594062947067239,
      "grad_norm": 0.13872853852713968,
      "learning_rate": 0.0001977308873433338,
      "loss": 0.7087,
      "step": 1073
    },
    {
      "epoch": 0.09603004291845493,
      "grad_norm": 0.14104790614925056,
      "learning_rate": 0.00019772474893847456,
      "loss": 0.7438,
      "step": 1074
    },
    {
      "epoch": 0.09611945636623748,
      "grad_norm": 0.14022156130958532,
      "learning_rate": 0.00019771860233755862,
      "loss": 0.7235,
      "step": 1075
    },
    {
      "epoch": 0.09620886981402003,
      "grad_norm": 0.13765536050299498,
      "learning_rate": 0.0001977124475411015,
      "loss": 0.6908,
      "step": 1076
    },
    {
      "epoch": 0.09629828326180258,
      "grad_norm": 0.14687903925283427,
      "learning_rate": 0.00019770628454961946,
      "loss": 0.6426,
      "step": 1077
    },
    {
      "epoch": 0.09638769670958512,
      "grad_norm": 0.14388502793752278,
      "learning_rate": 0.00019770011336362928,
      "loss": 0.7158,
      "step": 1078
    },
    {
      "epoch": 0.09647711015736767,
      "grad_norm": 0.13464188774446098,
      "learning_rate": 0.00019769393398364865,
      "loss": 0.7387,
      "step": 1079
    },
    {
      "epoch": 0.09656652360515021,
      "grad_norm": 0.15743978827420393,
      "learning_rate": 0.0001976877464101957,
      "loss": 0.7656,
      "step": 1080
    },
    {
      "epoch": 0.09665593705293277,
      "grad_norm": 0.14062619086022954,
      "learning_rate": 0.00019768155064378947,
      "loss": 0.6978,
      "step": 1081
    },
    {
      "epoch": 0.09674535050071531,
      "grad_norm": 0.14860648553494363,
      "learning_rate": 0.00019767534668494954,
      "loss": 0.7577,
      "step": 1082
    },
    {
      "epoch": 0.09683476394849785,
      "grad_norm": 0.1279605711489036,
      "learning_rate": 0.00019766913453419624,
      "loss": 0.7218,
      "step": 1083
    },
    {
      "epoch": 0.0969241773962804,
      "grad_norm": 0.12031140430073027,
      "learning_rate": 0.0001976629141920506,
      "loss": 0.6954,
      "step": 1084
    },
    {
      "epoch": 0.09701359084406294,
      "grad_norm": 0.12649780924815074,
      "learning_rate": 0.0001976566856590343,
      "loss": 0.7152,
      "step": 1085
    },
    {
      "epoch": 0.0971030042918455,
      "grad_norm": 0.12258377861393306,
      "learning_rate": 0.00019765044893566968,
      "loss": 0.6858,
      "step": 1086
    },
    {
      "epoch": 0.09719241773962804,
      "grad_norm": 0.11727103292649398,
      "learning_rate": 0.00019764420402247987,
      "loss": 0.682,
      "step": 1087
    },
    {
      "epoch": 0.09728183118741059,
      "grad_norm": 0.14307972551808484,
      "learning_rate": 0.00019763795091998858,
      "loss": 0.7339,
      "step": 1088
    },
    {
      "epoch": 0.09737124463519313,
      "grad_norm": 0.17614047967491572,
      "learning_rate": 0.00019763168962872026,
      "loss": 0.3522,
      "step": 1089
    },
    {
      "epoch": 0.09746065808297567,
      "grad_norm": 0.1437330999251322,
      "learning_rate": 0.00019762542014920004,
      "loss": 0.7431,
      "step": 1090
    },
    {
      "epoch": 0.09755007153075823,
      "grad_norm": 0.133134352591035,
      "learning_rate": 0.00019761914248195373,
      "loss": 0.7311,
      "step": 1091
    },
    {
      "epoch": 0.09763948497854077,
      "grad_norm": 0.1209758679474751,
      "learning_rate": 0.00019761285662750787,
      "loss": 0.6891,
      "step": 1092
    },
    {
      "epoch": 0.09772889842632332,
      "grad_norm": 0.12861116265560812,
      "learning_rate": 0.00019760656258638958,
      "loss": 0.7306,
      "step": 1093
    },
    {
      "epoch": 0.09781831187410586,
      "grad_norm": 0.13088946296555717,
      "learning_rate": 0.0001976002603591268,
      "loss": 0.7214,
      "step": 1094
    },
    {
      "epoch": 0.0979077253218884,
      "grad_norm": 0.1514821673573704,
      "learning_rate": 0.000197593949946248,
      "loss": 0.743,
      "step": 1095
    },
    {
      "epoch": 0.09799713876967096,
      "grad_norm": 0.13366792938316327,
      "learning_rate": 0.0001975876313482825,
      "loss": 0.7192,
      "step": 1096
    },
    {
      "epoch": 0.09808655221745351,
      "grad_norm": 0.15223258495817985,
      "learning_rate": 0.00019758130456576023,
      "loss": 0.7406,
      "step": 1097
    },
    {
      "epoch": 0.09817596566523605,
      "grad_norm": 0.14765542445021695,
      "learning_rate": 0.00019757496959921177,
      "loss": 0.7336,
      "step": 1098
    },
    {
      "epoch": 0.0982653791130186,
      "grad_norm": 0.1478212724452826,
      "learning_rate": 0.00019756862644916846,
      "loss": 0.7311,
      "step": 1099
    },
    {
      "epoch": 0.09835479256080114,
      "grad_norm": 0.12793010803823537,
      "learning_rate": 0.00019756227511616233,
      "loss": 0.6799,
      "step": 1100
    },
    {
      "epoch": 0.0984442060085837,
      "grad_norm": 0.12758223130998222,
      "learning_rate": 0.00019755591560072596,
      "loss": 0.7141,
      "step": 1101
    },
    {
      "epoch": 0.09853361945636624,
      "grad_norm": 0.14353535057373235,
      "learning_rate": 0.00019754954790339278,
      "loss": 0.7174,
      "step": 1102
    },
    {
      "epoch": 0.09862303290414878,
      "grad_norm": 0.12224433092703864,
      "learning_rate": 0.00019754317202469682,
      "loss": 0.6739,
      "step": 1103
    },
    {
      "epoch": 0.09871244635193133,
      "grad_norm": 0.13335424232538312,
      "learning_rate": 0.00019753678796517282,
      "loss": 0.7365,
      "step": 1104
    },
    {
      "epoch": 0.09880185979971387,
      "grad_norm": 0.11654788739720678,
      "learning_rate": 0.0001975303957253562,
      "loss": 0.6881,
      "step": 1105
    },
    {
      "epoch": 0.09889127324749643,
      "grad_norm": 0.15608760116079706,
      "learning_rate": 0.00019752399530578312,
      "loss": 0.7376,
      "step": 1106
    },
    {
      "epoch": 0.09898068669527897,
      "grad_norm": 0.14203437608734956,
      "learning_rate": 0.00019751758670699028,
      "loss": 0.7142,
      "step": 1107
    },
    {
      "epoch": 0.09907010014306152,
      "grad_norm": 0.13740152572976472,
      "learning_rate": 0.00019751116992951527,
      "loss": 0.7074,
      "step": 1108
    },
    {
      "epoch": 0.09915951359084406,
      "grad_norm": 0.13201954886249642,
      "learning_rate": 0.00019750474497389614,
      "loss": 0.711,
      "step": 1109
    },
    {
      "epoch": 0.0992489270386266,
      "grad_norm": 0.1400850577145821,
      "learning_rate": 0.00019749831184067185,
      "loss": 0.7099,
      "step": 1110
    },
    {
      "epoch": 0.09933834048640916,
      "grad_norm": 0.15115025417771763,
      "learning_rate": 0.00019749187053038188,
      "loss": 0.7593,
      "step": 1111
    },
    {
      "epoch": 0.0994277539341917,
      "grad_norm": 0.1436186933663997,
      "learning_rate": 0.00019748542104356648,
      "loss": 0.7769,
      "step": 1112
    },
    {
      "epoch": 0.09951716738197425,
      "grad_norm": 0.13603415713080058,
      "learning_rate": 0.00019747896338076655,
      "loss": 0.7003,
      "step": 1113
    },
    {
      "epoch": 0.09960658082975679,
      "grad_norm": 0.1286979075547969,
      "learning_rate": 0.00019747249754252367,
      "loss": 0.7243,
      "step": 1114
    },
    {
      "epoch": 0.09969599427753935,
      "grad_norm": 0.137741926862715,
      "learning_rate": 0.00019746602352938014,
      "loss": 0.7482,
      "step": 1115
    },
    {
      "epoch": 0.09978540772532189,
      "grad_norm": 0.14598954625863197,
      "learning_rate": 0.00019745954134187894,
      "loss": 0.7259,
      "step": 1116
    },
    {
      "epoch": 0.09987482117310444,
      "grad_norm": 0.14448340487210382,
      "learning_rate": 0.00019745305098056368,
      "loss": 0.7197,
      "step": 1117
    },
    {
      "epoch": 0.09996423462088698,
      "grad_norm": 0.1390523409113836,
      "learning_rate": 0.00019744655244597877,
      "loss": 0.7414,
      "step": 1118
    },
    {
      "epoch": 0.10005364806866952,
      "grad_norm": 0.1405309042291534,
      "learning_rate": 0.00019744004573866915,
      "loss": 0.7686,
      "step": 1119
    },
    {
      "epoch": 0.10014306151645208,
      "grad_norm": 0.1287157417619165,
      "learning_rate": 0.0001974335308591806,
      "loss": 0.7235,
      "step": 1120
    },
    {
      "epoch": 0.10023247496423462,
      "grad_norm": 0.1288993921357819,
      "learning_rate": 0.00019742700780805948,
      "loss": 0.7377,
      "step": 1121
    },
    {
      "epoch": 0.10032188841201717,
      "grad_norm": 0.12456836401502372,
      "learning_rate": 0.00019742047658585286,
      "loss": 0.6847,
      "step": 1122
    },
    {
      "epoch": 0.10041130185979971,
      "grad_norm": 0.14507992072414305,
      "learning_rate": 0.00019741393719310853,
      "loss": 0.3185,
      "step": 1123
    },
    {
      "epoch": 0.10050071530758226,
      "grad_norm": 0.14326511505047765,
      "learning_rate": 0.00019740738963037495,
      "loss": 0.6904,
      "step": 1124
    },
    {
      "epoch": 0.10059012875536481,
      "grad_norm": 0.1558849331811317,
      "learning_rate": 0.00019740083389820122,
      "loss": 0.7437,
      "step": 1125
    },
    {
      "epoch": 0.10067954220314736,
      "grad_norm": 0.1478620247586416,
      "learning_rate": 0.0001973942699971372,
      "loss": 0.6994,
      "step": 1126
    },
    {
      "epoch": 0.1007689556509299,
      "grad_norm": 0.15697781650437567,
      "learning_rate": 0.00019738769792773336,
      "loss": 0.7341,
      "step": 1127
    },
    {
      "epoch": 0.10085836909871244,
      "grad_norm": 0.12456943503914274,
      "learning_rate": 0.00019738111769054093,
      "loss": 0.7083,
      "step": 1128
    },
    {
      "epoch": 0.10094778254649499,
      "grad_norm": 0.12190534471990888,
      "learning_rate": 0.00019737452928611176,
      "loss": 0.6703,
      "step": 1129
    },
    {
      "epoch": 0.10103719599427755,
      "grad_norm": 0.12171267195182205,
      "learning_rate": 0.00019736793271499844,
      "loss": 0.6986,
      "step": 1130
    },
    {
      "epoch": 0.10112660944206009,
      "grad_norm": 0.13432765898122637,
      "learning_rate": 0.00019736132797775415,
      "loss": 0.7059,
      "step": 1131
    },
    {
      "epoch": 0.10121602288984263,
      "grad_norm": 0.1401385879055478,
      "learning_rate": 0.00019735471507493287,
      "loss": 0.7418,
      "step": 1132
    },
    {
      "epoch": 0.10130543633762518,
      "grad_norm": 0.1333529532352271,
      "learning_rate": 0.00019734809400708922,
      "loss": 0.7033,
      "step": 1133
    },
    {
      "epoch": 0.10139484978540772,
      "grad_norm": 0.12657965586788478,
      "learning_rate": 0.0001973414647747785,
      "loss": 0.758,
      "step": 1134
    },
    {
      "epoch": 0.10148426323319028,
      "grad_norm": 0.13217230292461812,
      "learning_rate": 0.0001973348273785567,
      "loss": 0.7387,
      "step": 1135
    },
    {
      "epoch": 0.10157367668097282,
      "grad_norm": 0.1224891003877939,
      "learning_rate": 0.00019732818181898045,
      "loss": 0.7136,
      "step": 1136
    },
    {
      "epoch": 0.10166309012875537,
      "grad_norm": 0.1308551698210265,
      "learning_rate": 0.00019732152809660716,
      "loss": 0.7309,
      "step": 1137
    },
    {
      "epoch": 0.10175250357653791,
      "grad_norm": 0.1389304126879816,
      "learning_rate": 0.0001973148662119948,
      "loss": 0.7392,
      "step": 1138
    },
    {
      "epoch": 0.10184191702432045,
      "grad_norm": 0.11614476473920268,
      "learning_rate": 0.0001973081961657022,
      "loss": 0.723,
      "step": 1139
    },
    {
      "epoch": 0.10193133047210301,
      "grad_norm": 0.11233961282934592,
      "learning_rate": 0.00019730151795828866,
      "loss": 0.6725,
      "step": 1140
    },
    {
      "epoch": 0.10202074391988555,
      "grad_norm": 0.13211659850335478,
      "learning_rate": 0.0001972948315903143,
      "loss": 0.7147,
      "step": 1141
    },
    {
      "epoch": 0.1021101573676681,
      "grad_norm": 0.14242145686309757,
      "learning_rate": 0.00019728813706233997,
      "loss": 0.7236,
      "step": 1142
    },
    {
      "epoch": 0.10219957081545064,
      "grad_norm": 0.15308669798979282,
      "learning_rate": 0.00019728143437492706,
      "loss": 0.7228,
      "step": 1143
    },
    {
      "epoch": 0.10228898426323318,
      "grad_norm": 0.12995807897609685,
      "learning_rate": 0.00019727472352863774,
      "loss": 0.7253,
      "step": 1144
    },
    {
      "epoch": 0.10237839771101574,
      "grad_norm": 0.1304773793202584,
      "learning_rate": 0.00019726800452403483,
      "loss": 0.6664,
      "step": 1145
    },
    {
      "epoch": 0.10246781115879829,
      "grad_norm": 0.1290421416592338,
      "learning_rate": 0.00019726127736168186,
      "loss": 0.6819,
      "step": 1146
    },
    {
      "epoch": 0.10255722460658083,
      "grad_norm": 0.14491719140303533,
      "learning_rate": 0.000197254542042143,
      "loss": 0.7297,
      "step": 1147
    },
    {
      "epoch": 0.10264663805436337,
      "grad_norm": 0.14446402231445507,
      "learning_rate": 0.00019724779856598317,
      "loss": 0.7348,
      "step": 1148
    },
    {
      "epoch": 0.10273605150214592,
      "grad_norm": 0.14405619177435652,
      "learning_rate": 0.0001972410469337679,
      "loss": 0.7085,
      "step": 1149
    },
    {
      "epoch": 0.10282546494992847,
      "grad_norm": 0.10882550081835761,
      "learning_rate": 0.00019723428714606348,
      "loss": 0.7171,
      "step": 1150
    },
    {
      "epoch": 0.10291487839771102,
      "grad_norm": 0.1411153840370423,
      "learning_rate": 0.0001972275192034368,
      "loss": 0.6993,
      "step": 1151
    },
    {
      "epoch": 0.10300429184549356,
      "grad_norm": 0.1423583786757254,
      "learning_rate": 0.00019722074310645553,
      "loss": 0.7431,
      "step": 1152
    },
    {
      "epoch": 0.1030937052932761,
      "grad_norm": 0.13780656297639943,
      "learning_rate": 0.00019721395885568795,
      "loss": 0.7274,
      "step": 1153
    },
    {
      "epoch": 0.10318311874105865,
      "grad_norm": 0.1426709587294201,
      "learning_rate": 0.00019720716645170303,
      "loss": 0.7352,
      "step": 1154
    },
    {
      "epoch": 0.1032725321888412,
      "grad_norm": 0.12187720542971602,
      "learning_rate": 0.00019720036589507048,
      "loss": 0.7109,
      "step": 1155
    },
    {
      "epoch": 0.10336194563662375,
      "grad_norm": 0.12739576897881957,
      "learning_rate": 0.0001971935571863606,
      "loss": 0.6676,
      "step": 1156
    },
    {
      "epoch": 0.1034513590844063,
      "grad_norm": 0.1280831594501213,
      "learning_rate": 0.00019718674032614448,
      "loss": 0.7517,
      "step": 1157
    },
    {
      "epoch": 0.10354077253218884,
      "grad_norm": 0.12557470862850326,
      "learning_rate": 0.0001971799153149938,
      "loss": 0.7056,
      "step": 1158
    },
    {
      "epoch": 0.10363018597997138,
      "grad_norm": 0.1492378735303767,
      "learning_rate": 0.00019717308215348102,
      "loss": 0.7403,
      "step": 1159
    },
    {
      "epoch": 0.10371959942775394,
      "grad_norm": 0.13250133703099706,
      "learning_rate": 0.00019716624084217918,
      "loss": 0.6824,
      "step": 1160
    },
    {
      "epoch": 0.10380901287553648,
      "grad_norm": 0.1387576601361292,
      "learning_rate": 0.00019715939138166205,
      "loss": 0.7183,
      "step": 1161
    },
    {
      "epoch": 0.10389842632331903,
      "grad_norm": 0.14258479731521326,
      "learning_rate": 0.00019715253377250411,
      "loss": 0.6889,
      "step": 1162
    },
    {
      "epoch": 0.10398783977110157,
      "grad_norm": 0.13058085417203572,
      "learning_rate": 0.0001971456680152805,
      "loss": 0.7042,
      "step": 1163
    },
    {
      "epoch": 0.10407725321888411,
      "grad_norm": 0.12558084846126602,
      "learning_rate": 0.00019713879411056704,
      "loss": 0.6961,
      "step": 1164
    },
    {
      "epoch": 0.10416666666666667,
      "grad_norm": 0.14007126710034656,
      "learning_rate": 0.00019713191205894025,
      "loss": 0.7467,
      "step": 1165
    },
    {
      "epoch": 0.10425608011444921,
      "grad_norm": 0.1258341537666909,
      "learning_rate": 0.00019712502186097726,
      "loss": 0.7291,
      "step": 1166
    },
    {
      "epoch": 0.10434549356223176,
      "grad_norm": 0.12817889666141827,
      "learning_rate": 0.00019711812351725603,
      "loss": 0.6922,
      "step": 1167
    },
    {
      "epoch": 0.1044349070100143,
      "grad_norm": 0.12452388534690587,
      "learning_rate": 0.00019711121702835504,
      "loss": 0.7116,
      "step": 1168
    },
    {
      "epoch": 0.10452432045779685,
      "grad_norm": 0.14354580665193092,
      "learning_rate": 0.00019710430239485354,
      "loss": 0.7285,
      "step": 1169
    },
    {
      "epoch": 0.1046137339055794,
      "grad_norm": 0.13167873515874198,
      "learning_rate": 0.0001970973796173315,
      "loss": 0.7342,
      "step": 1170
    },
    {
      "epoch": 0.10470314735336195,
      "grad_norm": 0.1210381085880435,
      "learning_rate": 0.00019709044869636947,
      "loss": 0.6943,
      "step": 1171
    },
    {
      "epoch": 0.10479256080114449,
      "grad_norm": 0.13191086956710582,
      "learning_rate": 0.00019708350963254878,
      "loss": 0.74,
      "step": 1172
    },
    {
      "epoch": 0.10488197424892703,
      "grad_norm": 0.13435270865515583,
      "learning_rate": 0.0001970765624264514,
      "loss": 0.7415,
      "step": 1173
    },
    {
      "epoch": 0.10497138769670958,
      "grad_norm": 0.1440182654702005,
      "learning_rate": 0.00019706960707865996,
      "loss": 0.6916,
      "step": 1174
    },
    {
      "epoch": 0.10506080114449214,
      "grad_norm": 0.14600282334082465,
      "learning_rate": 0.00019706264358975779,
      "loss": 0.7371,
      "step": 1175
    },
    {
      "epoch": 0.10515021459227468,
      "grad_norm": 0.12811549220695376,
      "learning_rate": 0.00019705567196032892,
      "loss": 0.6838,
      "step": 1176
    },
    {
      "epoch": 0.10523962804005722,
      "grad_norm": 0.12520096865787037,
      "learning_rate": 0.0001970486921909581,
      "loss": 0.7037,
      "step": 1177
    },
    {
      "epoch": 0.10532904148783977,
      "grad_norm": 0.1314606327772333,
      "learning_rate": 0.0001970417042822306,
      "loss": 0.7585,
      "step": 1178
    },
    {
      "epoch": 0.10541845493562232,
      "grad_norm": 0.129420214273468,
      "learning_rate": 0.00019703470823473262,
      "loss": 0.6983,
      "step": 1179
    },
    {
      "epoch": 0.10550786838340487,
      "grad_norm": 0.13562759315945833,
      "learning_rate": 0.0001970277040490508,
      "loss": 0.7096,
      "step": 1180
    },
    {
      "epoch": 0.10559728183118741,
      "grad_norm": 0.13214496924574365,
      "learning_rate": 0.0001970206917257727,
      "loss": 0.7134,
      "step": 1181
    },
    {
      "epoch": 0.10568669527896996,
      "grad_norm": 0.12914612237126508,
      "learning_rate": 0.0001970136712654863,
      "loss": 0.7107,
      "step": 1182
    },
    {
      "epoch": 0.1057761087267525,
      "grad_norm": 0.15303636940672952,
      "learning_rate": 0.00019700664266878045,
      "loss": 0.7324,
      "step": 1183
    },
    {
      "epoch": 0.10586552217453506,
      "grad_norm": 0.1352265046895876,
      "learning_rate": 0.00019699960593624462,
      "loss": 0.7086,
      "step": 1184
    },
    {
      "epoch": 0.1059549356223176,
      "grad_norm": 0.15662028784864748,
      "learning_rate": 0.00019699256106846903,
      "loss": 0.7216,
      "step": 1185
    },
    {
      "epoch": 0.10604434907010014,
      "grad_norm": 0.1299646083305036,
      "learning_rate": 0.00019698550806604445,
      "loss": 0.732,
      "step": 1186
    },
    {
      "epoch": 0.10613376251788269,
      "grad_norm": 0.1345885915235323,
      "learning_rate": 0.00019697844692956245,
      "loss": 0.7119,
      "step": 1187
    },
    {
      "epoch": 0.10622317596566523,
      "grad_norm": 0.1367700216604452,
      "learning_rate": 0.0001969713776596152,
      "loss": 0.7401,
      "step": 1188
    },
    {
      "epoch": 0.10631258941344779,
      "grad_norm": 0.16020809676883205,
      "learning_rate": 0.00019696430025679566,
      "loss": 0.7542,
      "step": 1189
    },
    {
      "epoch": 0.10640200286123033,
      "grad_norm": 0.12568253998425824,
      "learning_rate": 0.00019695721472169734,
      "loss": 0.6982,
      "step": 1190
    },
    {
      "epoch": 0.10649141630901288,
      "grad_norm": 0.1300751143267491,
      "learning_rate": 0.0001969501210549145,
      "loss": 0.6947,
      "step": 1191
    },
    {
      "epoch": 0.10658082975679542,
      "grad_norm": 0.12214571877438248,
      "learning_rate": 0.0001969430192570421,
      "loss": 0.7349,
      "step": 1192
    },
    {
      "epoch": 0.10667024320457796,
      "grad_norm": 0.15854657300645192,
      "learning_rate": 0.00019693590932867578,
      "loss": 0.7048,
      "step": 1193
    },
    {
      "epoch": 0.10675965665236052,
      "grad_norm": 0.13974302305551597,
      "learning_rate": 0.0001969287912704118,
      "loss": 0.677,
      "step": 1194
    },
    {
      "epoch": 0.10684907010014306,
      "grad_norm": 0.13743970547617654,
      "learning_rate": 0.00019692166508284716,
      "loss": 0.7695,
      "step": 1195
    },
    {
      "epoch": 0.10693848354792561,
      "grad_norm": 0.15339541686953947,
      "learning_rate": 0.0001969145307665795,
      "loss": 0.7767,
      "step": 1196
    },
    {
      "epoch": 0.10702789699570815,
      "grad_norm": 0.1442048083705041,
      "learning_rate": 0.0001969073883222072,
      "loss": 0.7675,
      "step": 1197
    },
    {
      "epoch": 0.1071173104434907,
      "grad_norm": 0.13564145357080748,
      "learning_rate": 0.00019690023775032929,
      "loss": 0.7123,
      "step": 1198
    },
    {
      "epoch": 0.10720672389127325,
      "grad_norm": 0.14246366620428252,
      "learning_rate": 0.00019689307905154543,
      "loss": 0.7229,
      "step": 1199
    },
    {
      "epoch": 0.1072961373390558,
      "grad_norm": 0.14544984843949924,
      "learning_rate": 0.00019688591222645607,
      "loss": 0.7348,
      "step": 1200
    },
    {
      "epoch": 0.10738555078683834,
      "grad_norm": 0.13560973068459958,
      "learning_rate": 0.00019687873727566226,
      "loss": 0.7247,
      "step": 1201
    },
    {
      "epoch": 0.10747496423462088,
      "grad_norm": 0.1645851006413508,
      "learning_rate": 0.00019687155419976574,
      "loss": 0.7501,
      "step": 1202
    },
    {
      "epoch": 0.10756437768240343,
      "grad_norm": 0.1457893610781465,
      "learning_rate": 0.000196864362999369,
      "loss": 0.6952,
      "step": 1203
    },
    {
      "epoch": 0.10765379113018599,
      "grad_norm": 0.14115444077600092,
      "learning_rate": 0.00019685716367507508,
      "loss": 0.7157,
      "step": 1204
    },
    {
      "epoch": 0.10774320457796853,
      "grad_norm": 0.12764865904357722,
      "learning_rate": 0.00019684995622748784,
      "loss": 0.7177,
      "step": 1205
    },
    {
      "epoch": 0.10783261802575107,
      "grad_norm": 0.15089321961004465,
      "learning_rate": 0.00019684274065721172,
      "loss": 0.7408,
      "step": 1206
    },
    {
      "epoch": 0.10792203147353362,
      "grad_norm": 0.12430430169740643,
      "learning_rate": 0.00019683551696485192,
      "loss": 0.6701,
      "step": 1207
    },
    {
      "epoch": 0.10801144492131616,
      "grad_norm": 0.1523521638687299,
      "learning_rate": 0.00019682828515101423,
      "loss": 0.7785,
      "step": 1208
    },
    {
      "epoch": 0.10810085836909872,
      "grad_norm": 0.13012291976330992,
      "learning_rate": 0.0001968210452163052,
      "loss": 0.6885,
      "step": 1209
    },
    {
      "epoch": 0.10819027181688126,
      "grad_norm": 0.13738193718865727,
      "learning_rate": 0.00019681379716133206,
      "loss": 0.7007,
      "step": 1210
    },
    {
      "epoch": 0.1082796852646638,
      "grad_norm": 0.1374494257577089,
      "learning_rate": 0.00019680654098670267,
      "loss": 0.686,
      "step": 1211
    },
    {
      "epoch": 0.10836909871244635,
      "grad_norm": 0.15024294123732676,
      "learning_rate": 0.0001967992766930256,
      "loss": 0.7814,
      "step": 1212
    },
    {
      "epoch": 0.10845851216022889,
      "grad_norm": 0.13545201871531162,
      "learning_rate": 0.0001967920042809101,
      "loss": 0.7459,
      "step": 1213
    },
    {
      "epoch": 0.10854792560801145,
      "grad_norm": 0.12308822797153604,
      "learning_rate": 0.0001967847237509661,
      "loss": 0.743,
      "step": 1214
    },
    {
      "epoch": 0.108637339055794,
      "grad_norm": 0.13485709094815795,
      "learning_rate": 0.00019677743510380417,
      "loss": 0.7349,
      "step": 1215
    },
    {
      "epoch": 0.10872675250357654,
      "grad_norm": 0.13790959169256822,
      "learning_rate": 0.0001967701383400357,
      "loss": 0.6967,
      "step": 1216
    },
    {
      "epoch": 0.10881616595135908,
      "grad_norm": 0.12207713593369407,
      "learning_rate": 0.00019676283346027254,
      "loss": 0.7089,
      "step": 1217
    },
    {
      "epoch": 0.10890557939914162,
      "grad_norm": 0.12823955830252753,
      "learning_rate": 0.0001967555204651274,
      "loss": 0.684,
      "step": 1218
    },
    {
      "epoch": 0.10899499284692418,
      "grad_norm": 0.14666881096146248,
      "learning_rate": 0.00019674819935521366,
      "loss": 0.7435,
      "step": 1219
    },
    {
      "epoch": 0.10908440629470673,
      "grad_norm": 0.15546076295370898,
      "learning_rate": 0.0001967408701311452,
      "loss": 0.7447,
      "step": 1220
    },
    {
      "epoch": 0.10917381974248927,
      "grad_norm": 0.13650633036554782,
      "learning_rate": 0.00019673353279353684,
      "loss": 0.7371,
      "step": 1221
    },
    {
      "epoch": 0.10926323319027181,
      "grad_norm": 0.137945222703032,
      "learning_rate": 0.00019672618734300392,
      "loss": 0.6967,
      "step": 1222
    },
    {
      "epoch": 0.10935264663805436,
      "grad_norm": 0.14260824626905413,
      "learning_rate": 0.0001967188337801625,
      "loss": 0.7188,
      "step": 1223
    },
    {
      "epoch": 0.10944206008583691,
      "grad_norm": 0.12423875056701413,
      "learning_rate": 0.00019671147210562927,
      "loss": 0.6888,
      "step": 1224
    },
    {
      "epoch": 0.10953147353361946,
      "grad_norm": 0.14931692627459317,
      "learning_rate": 0.00019670410232002164,
      "loss": 0.7559,
      "step": 1225
    },
    {
      "epoch": 0.109620886981402,
      "grad_norm": 0.1359024196287891,
      "learning_rate": 0.00019669672442395778,
      "loss": 0.6958,
      "step": 1226
    },
    {
      "epoch": 0.10971030042918455,
      "grad_norm": 0.11345595159030455,
      "learning_rate": 0.00019668933841805644,
      "loss": 0.6849,
      "step": 1227
    },
    {
      "epoch": 0.10979971387696709,
      "grad_norm": 0.1239018742491009,
      "learning_rate": 0.00019668194430293702,
      "loss": 0.3935,
      "step": 1228
    },
    {
      "epoch": 0.10988912732474965,
      "grad_norm": 0.1451210889210327,
      "learning_rate": 0.0001966745420792197,
      "loss": 0.7667,
      "step": 1229
    },
    {
      "epoch": 0.10997854077253219,
      "grad_norm": 0.12605634533749385,
      "learning_rate": 0.0001966671317475253,
      "loss": 0.6719,
      "step": 1230
    },
    {
      "epoch": 0.11006795422031473,
      "grad_norm": 0.13902116199669215,
      "learning_rate": 0.00019665971330847532,
      "loss": 0.7281,
      "step": 1231
    },
    {
      "epoch": 0.11015736766809728,
      "grad_norm": 0.14988014044356548,
      "learning_rate": 0.0001966522867626919,
      "loss": 0.7172,
      "step": 1232
    },
    {
      "epoch": 0.11024678111587982,
      "grad_norm": 0.16226771397079337,
      "learning_rate": 0.00019664485211079793,
      "loss": 0.7877,
      "step": 1233
    },
    {
      "epoch": 0.11033619456366238,
      "grad_norm": 0.13545847814221315,
      "learning_rate": 0.0001966374093534169,
      "loss": 0.7061,
      "step": 1234
    },
    {
      "epoch": 0.11042560801144492,
      "grad_norm": 0.13907929386608064,
      "learning_rate": 0.00019662995849117307,
      "loss": 0.7081,
      "step": 1235
    },
    {
      "epoch": 0.11051502145922747,
      "grad_norm": 0.12640497956505342,
      "learning_rate": 0.00019662249952469133,
      "loss": 0.6903,
      "step": 1236
    },
    {
      "epoch": 0.11060443490701001,
      "grad_norm": 0.13641761608879646,
      "learning_rate": 0.00019661503245459722,
      "loss": 0.7102,
      "step": 1237
    },
    {
      "epoch": 0.11069384835479255,
      "grad_norm": 0.14688726559054302,
      "learning_rate": 0.00019660755728151706,
      "loss": 0.7762,
      "step": 1238
    },
    {
      "epoch": 0.11078326180257511,
      "grad_norm": 0.13705079374135515,
      "learning_rate": 0.00019660007400607772,
      "loss": 0.7415,
      "step": 1239
    },
    {
      "epoch": 0.11087267525035766,
      "grad_norm": 0.11206514957202952,
      "learning_rate": 0.00019659258262890683,
      "loss": 0.7275,
      "step": 1240
    },
    {
      "epoch": 0.1109620886981402,
      "grad_norm": 0.11288079771026283,
      "learning_rate": 0.00019658508315063272,
      "loss": 0.6679,
      "step": 1241
    },
    {
      "epoch": 0.11105150214592274,
      "grad_norm": 0.13193922977556344,
      "learning_rate": 0.0001965775755718843,
      "loss": 0.7481,
      "step": 1242
    },
    {
      "epoch": 0.1111409155937053,
      "grad_norm": 0.12039128011575827,
      "learning_rate": 0.00019657005989329128,
      "loss": 0.7341,
      "step": 1243
    },
    {
      "epoch": 0.11123032904148784,
      "grad_norm": 0.1288950625132571,
      "learning_rate": 0.0001965625361154839,
      "loss": 0.7425,
      "step": 1244
    },
    {
      "epoch": 0.11131974248927039,
      "grad_norm": 0.13234731408598693,
      "learning_rate": 0.0001965550042390933,
      "loss": 0.6979,
      "step": 1245
    },
    {
      "epoch": 0.11140915593705293,
      "grad_norm": 0.12439771085618134,
      "learning_rate": 0.0001965474642647511,
      "loss": 0.7064,
      "step": 1246
    },
    {
      "epoch": 0.11149856938483547,
      "grad_norm": 0.12186874447942557,
      "learning_rate": 0.00019653991619308965,
      "loss": 0.6973,
      "step": 1247
    },
    {
      "epoch": 0.11158798283261803,
      "grad_norm": 0.1348016852469414,
      "learning_rate": 0.000196532360024742,
      "loss": 0.6918,
      "step": 1248
    },
    {
      "epoch": 0.11167739628040058,
      "grad_norm": 0.1324193705406802,
      "learning_rate": 0.00019652479576034196,
      "loss": 0.7157,
      "step": 1249
    },
    {
      "epoch": 0.11176680972818312,
      "grad_norm": 0.1366490391749904,
      "learning_rate": 0.0001965172234005238,
      "loss": 0.7074,
      "step": 1250
    },
    {
      "epoch": 0.11185622317596566,
      "grad_norm": 0.13049098170762713,
      "learning_rate": 0.00019650964294592272,
      "loss": 0.6934,
      "step": 1251
    },
    {
      "epoch": 0.11194563662374821,
      "grad_norm": 0.14817509887522956,
      "learning_rate": 0.0001965020543971744,
      "loss": 0.7472,
      "step": 1252
    },
    {
      "epoch": 0.11203505007153076,
      "grad_norm": 0.14476692645241016,
      "learning_rate": 0.0001964944577549154,
      "loss": 0.7224,
      "step": 1253
    },
    {
      "epoch": 0.11212446351931331,
      "grad_norm": 0.14858044926848787,
      "learning_rate": 0.00019648685301978271,
      "loss": 0.766,
      "step": 1254
    },
    {
      "epoch": 0.11221387696709585,
      "grad_norm": 0.1598760311435442,
      "learning_rate": 0.00019647924019241416,
      "loss": 0.7558,
      "step": 1255
    },
    {
      "epoch": 0.1123032904148784,
      "grad_norm": 0.13653203870202835,
      "learning_rate": 0.00019647161927344831,
      "loss": 0.6892,
      "step": 1256
    },
    {
      "epoch": 0.11239270386266094,
      "grad_norm": 0.12932226242753433,
      "learning_rate": 0.00019646399026352422,
      "loss": 0.6858,
      "step": 1257
    },
    {
      "epoch": 0.1124821173104435,
      "grad_norm": 0.1370688124050417,
      "learning_rate": 0.00019645635316328179,
      "loss": 0.7332,
      "step": 1258
    },
    {
      "epoch": 0.11257153075822604,
      "grad_norm": 0.1365907142151715,
      "learning_rate": 0.0001964487079733615,
      "loss": 0.7191,
      "step": 1259
    },
    {
      "epoch": 0.11266094420600858,
      "grad_norm": 0.1405611579408906,
      "learning_rate": 0.00019644105469440455,
      "loss": 0.7232,
      "step": 1260
    },
    {
      "epoch": 0.11275035765379113,
      "grad_norm": 0.1573557821626866,
      "learning_rate": 0.00019643339332705282,
      "loss": 0.7319,
      "step": 1261
    },
    {
      "epoch": 0.11283977110157367,
      "grad_norm": 0.13126702319728623,
      "learning_rate": 0.00019642572387194884,
      "loss": 0.6892,
      "step": 1262
    },
    {
      "epoch": 0.11292918454935623,
      "grad_norm": 0.15247162661720917,
      "learning_rate": 0.00019641804632973585,
      "loss": 0.7406,
      "step": 1263
    },
    {
      "epoch": 0.11301859799713877,
      "grad_norm": 0.13194261690485082,
      "learning_rate": 0.00019641036070105778,
      "loss": 0.6917,
      "step": 1264
    },
    {
      "epoch": 0.11310801144492132,
      "grad_norm": 0.15694156357007064,
      "learning_rate": 0.00019640266698655917,
      "loss": 0.6714,
      "step": 1265
    },
    {
      "epoch": 0.11319742489270386,
      "grad_norm": 0.14861573800919592,
      "learning_rate": 0.00019639496518688532,
      "loss": 0.7389,
      "step": 1266
    },
    {
      "epoch": 0.1132868383404864,
      "grad_norm": 0.12328446132638177,
      "learning_rate": 0.00019638725530268217,
      "loss": 0.6851,
      "step": 1267
    },
    {
      "epoch": 0.11337625178826896,
      "grad_norm": 0.13970243633887053,
      "learning_rate": 0.00019637953733459628,
      "loss": 0.7252,
      "step": 1268
    },
    {
      "epoch": 0.1134656652360515,
      "grad_norm": 0.11972885304993475,
      "learning_rate": 0.00019637181128327505,
      "loss": 0.6331,
      "step": 1269
    },
    {
      "epoch": 0.11355507868383405,
      "grad_norm": 0.13112918937157506,
      "learning_rate": 0.00019636407714936636,
      "loss": 0.7218,
      "step": 1270
    },
    {
      "epoch": 0.11364449213161659,
      "grad_norm": 0.136136511431768,
      "learning_rate": 0.00019635633493351887,
      "loss": 0.7407,
      "step": 1271
    },
    {
      "epoch": 0.11373390557939914,
      "grad_norm": 0.1411768354744352,
      "learning_rate": 0.000196348584636382,
      "loss": 0.3655,
      "step": 1272
    },
    {
      "epoch": 0.1138233190271817,
      "grad_norm": 0.1647450910570521,
      "learning_rate": 0.00019634082625860562,
      "loss": 0.7118,
      "step": 1273
    },
    {
      "epoch": 0.11391273247496424,
      "grad_norm": 0.14508471999273417,
      "learning_rate": 0.00019633305980084055,
      "loss": 0.6644,
      "step": 1274
    },
    {
      "epoch": 0.11400214592274678,
      "grad_norm": 0.14579523464874666,
      "learning_rate": 0.0001963252852637381,
      "loss": 0.7004,
      "step": 1275
    },
    {
      "epoch": 0.11409155937052932,
      "grad_norm": 0.16122773299476104,
      "learning_rate": 0.00019631750264795028,
      "loss": 0.7486,
      "step": 1276
    },
    {
      "epoch": 0.11418097281831187,
      "grad_norm": 0.1403808254821846,
      "learning_rate": 0.00019630971195412983,
      "loss": 0.7446,
      "step": 1277
    },
    {
      "epoch": 0.11427038626609443,
      "grad_norm": 0.14058265154517102,
      "learning_rate": 0.00019630191318293017,
      "loss": 0.7167,
      "step": 1278
    },
    {
      "epoch": 0.11435979971387697,
      "grad_norm": 0.1396262899606387,
      "learning_rate": 0.00019629410633500535,
      "loss": 0.7326,
      "step": 1279
    },
    {
      "epoch": 0.11444921316165951,
      "grad_norm": 0.11910613209700757,
      "learning_rate": 0.00019628629141101012,
      "loss": 0.6961,
      "step": 1280
    },
    {
      "epoch": 0.11453862660944206,
      "grad_norm": 0.12850310574526963,
      "learning_rate": 0.0001962784684115999,
      "loss": 0.717,
      "step": 1281
    },
    {
      "epoch": 0.1146280400572246,
      "grad_norm": 0.129980882408187,
      "learning_rate": 0.00019627063733743084,
      "loss": 0.699,
      "step": 1282
    },
    {
      "epoch": 0.11471745350500716,
      "grad_norm": 0.1262371803331788,
      "learning_rate": 0.0001962627981891597,
      "loss": 0.6815,
      "step": 1283
    },
    {
      "epoch": 0.1148068669527897,
      "grad_norm": 0.12429304098317642,
      "learning_rate": 0.00019625495096744388,
      "loss": 0.6763,
      "step": 1284
    },
    {
      "epoch": 0.11489628040057225,
      "grad_norm": 0.14013245561490903,
      "learning_rate": 0.00019624709567294158,
      "loss": 0.7589,
      "step": 1285
    },
    {
      "epoch": 0.11498569384835479,
      "grad_norm": 0.13878777621248908,
      "learning_rate": 0.00019623923230631165,
      "loss": 0.7352,
      "step": 1286
    },
    {
      "epoch": 0.11507510729613733,
      "grad_norm": 0.12613326792969937,
      "learning_rate": 0.0001962313608682135,
      "loss": 0.7274,
      "step": 1287
    },
    {
      "epoch": 0.11516452074391989,
      "grad_norm": 0.1427278443452391,
      "learning_rate": 0.00019622348135930735,
      "loss": 0.7376,
      "step": 1288
    },
    {
      "epoch": 0.11525393419170243,
      "grad_norm": 0.13023445478411405,
      "learning_rate": 0.00019621559378025401,
      "loss": 0.3295,
      "step": 1289
    },
    {
      "epoch": 0.11534334763948498,
      "grad_norm": 0.14467640472168725,
      "learning_rate": 0.00019620769813171504,
      "loss": 0.7023,
      "step": 1290
    },
    {
      "epoch": 0.11543276108726752,
      "grad_norm": 0.14302962532082733,
      "learning_rate": 0.0001961997944143526,
      "loss": 0.716,
      "step": 1291
    },
    {
      "epoch": 0.11552217453505007,
      "grad_norm": 0.12273318556624957,
      "learning_rate": 0.00019619188262882958,
      "loss": 0.7058,
      "step": 1292
    },
    {
      "epoch": 0.11561158798283262,
      "grad_norm": 0.1313184758489506,
      "learning_rate": 0.00019618396277580952,
      "loss": 0.7134,
      "step": 1293
    },
    {
      "epoch": 0.11570100143061517,
      "grad_norm": 0.14118919041055994,
      "learning_rate": 0.0001961760348559567,
      "loss": 0.691,
      "step": 1294
    },
    {
      "epoch": 0.11579041487839771,
      "grad_norm": 0.16759511148104683,
      "learning_rate": 0.00019616809886993596,
      "loss": 0.7415,
      "step": 1295
    },
    {
      "epoch": 0.11587982832618025,
      "grad_norm": 0.13398924197609355,
      "learning_rate": 0.0001961601548184129,
      "loss": 0.6965,
      "step": 1296
    },
    {
      "epoch": 0.1159692417739628,
      "grad_norm": 0.13031364969526218,
      "learning_rate": 0.00019615220270205382,
      "loss": 0.6832,
      "step": 1297
    },
    {
      "epoch": 0.11605865522174535,
      "grad_norm": 0.14284031026022126,
      "learning_rate": 0.00019614424252152558,
      "loss": 0.73,
      "step": 1298
    },
    {
      "epoch": 0.1161480686695279,
      "grad_norm": 0.14660027728171912,
      "learning_rate": 0.0001961362742774959,
      "loss": 0.7116,
      "step": 1299
    },
    {
      "epoch": 0.11623748211731044,
      "grad_norm": 0.14518968389115425,
      "learning_rate": 0.00019612829797063295,
      "loss": 0.7218,
      "step": 1300
    },
    {
      "epoch": 0.11632689556509299,
      "grad_norm": 0.1252048391094396,
      "learning_rate": 0.00019612031360160574,
      "loss": 0.7241,
      "step": 1301
    },
    {
      "epoch": 0.11641630901287553,
      "grad_norm": 0.13856552842437692,
      "learning_rate": 0.00019611232117108395,
      "loss": 0.7425,
      "step": 1302
    },
    {
      "epoch": 0.11650572246065809,
      "grad_norm": 0.12574924302263601,
      "learning_rate": 0.00019610432067973781,
      "loss": 0.6837,
      "step": 1303
    },
    {
      "epoch": 0.11659513590844063,
      "grad_norm": 0.1351720381782233,
      "learning_rate": 0.0001960963121282384,
      "loss": 0.6784,
      "step": 1304
    },
    {
      "epoch": 0.11668454935622317,
      "grad_norm": 0.1376580361697446,
      "learning_rate": 0.0001960882955172573,
      "loss": 0.6842,
      "step": 1305
    },
    {
      "epoch": 0.11677396280400572,
      "grad_norm": 0.14197129029293012,
      "learning_rate": 0.00019608027084746694,
      "loss": 0.6835,
      "step": 1306
    },
    {
      "epoch": 0.11686337625178828,
      "grad_norm": 0.13774632115828095,
      "learning_rate": 0.0001960722381195403,
      "loss": 0.7147,
      "step": 1307
    },
    {
      "epoch": 0.11695278969957082,
      "grad_norm": 0.13319779899137124,
      "learning_rate": 0.00019606419733415105,
      "loss": 0.6839,
      "step": 1308
    },
    {
      "epoch": 0.11704220314735336,
      "grad_norm": 0.13303991435795984,
      "learning_rate": 0.00019605614849197358,
      "loss": 0.7176,
      "step": 1309
    },
    {
      "epoch": 0.1171316165951359,
      "grad_norm": 0.13005782530035634,
      "learning_rate": 0.000196048091593683,
      "loss": 0.6738,
      "step": 1310
    },
    {
      "epoch": 0.11722103004291845,
      "grad_norm": 0.12097045592190389,
      "learning_rate": 0.00019604002663995492,
      "loss": 0.6996,
      "step": 1311
    },
    {
      "epoch": 0.11731044349070101,
      "grad_norm": 0.12956169969539183,
      "learning_rate": 0.0001960319536314658,
      "loss": 0.6738,
      "step": 1312
    },
    {
      "epoch": 0.11739985693848355,
      "grad_norm": 0.12160396192213964,
      "learning_rate": 0.0001960238725688927,
      "loss": 0.6959,
      "step": 1313
    },
    {
      "epoch": 0.1174892703862661,
      "grad_norm": 0.15062459750355334,
      "learning_rate": 0.0001960157834529134,
      "loss": 0.7567,
      "step": 1314
    },
    {
      "epoch": 0.11757868383404864,
      "grad_norm": 0.13395610665853283,
      "learning_rate": 0.0001960076862842063,
      "loss": 0.6747,
      "step": 1315
    },
    {
      "epoch": 0.11766809728183118,
      "grad_norm": 0.10881448468821157,
      "learning_rate": 0.00019599958106345045,
      "loss": 0.6874,
      "step": 1316
    },
    {
      "epoch": 0.11775751072961374,
      "grad_norm": 0.14068015647072604,
      "learning_rate": 0.00019599146779132576,
      "loss": 0.7244,
      "step": 1317
    },
    {
      "epoch": 0.11784692417739628,
      "grad_norm": 0.14507595474064608,
      "learning_rate": 0.00019598334646851254,
      "loss": 0.3571,
      "step": 1318
    },
    {
      "epoch": 0.11793633762517883,
      "grad_norm": 0.12884634262347563,
      "learning_rate": 0.000195975217095692,
      "loss": 0.6998,
      "step": 1319
    },
    {
      "epoch": 0.11802575107296137,
      "grad_norm": 0.13830508853520698,
      "learning_rate": 0.00019596707967354585,
      "loss": 0.7223,
      "step": 1320
    },
    {
      "epoch": 0.11811516452074391,
      "grad_norm": 0.14414268263519506,
      "learning_rate": 0.00019595893420275667,
      "loss": 0.73,
      "step": 1321
    },
    {
      "epoch": 0.11820457796852647,
      "grad_norm": 0.1510332361656532,
      "learning_rate": 0.00019595078068400756,
      "loss": 0.7282,
      "step": 1322
    },
    {
      "epoch": 0.11829399141630902,
      "grad_norm": 0.1344876035240956,
      "learning_rate": 0.00019594261911798236,
      "loss": 0.6781,
      "step": 1323
    },
    {
      "epoch": 0.11838340486409156,
      "grad_norm": 0.14289884283101834,
      "learning_rate": 0.00019593444950536556,
      "loss": 0.7155,
      "step": 1324
    },
    {
      "epoch": 0.1184728183118741,
      "grad_norm": 0.1542613054863709,
      "learning_rate": 0.00019592627184684234,
      "loss": 0.7669,
      "step": 1325
    },
    {
      "epoch": 0.11856223175965665,
      "grad_norm": 0.12838526391303912,
      "learning_rate": 0.00019591808614309854,
      "loss": 0.6978,
      "step": 1326
    },
    {
      "epoch": 0.1186516452074392,
      "grad_norm": 0.13336711223258116,
      "learning_rate": 0.0001959098923948207,
      "loss": 0.6446,
      "step": 1327
    },
    {
      "epoch": 0.11874105865522175,
      "grad_norm": 0.13755652158436643,
      "learning_rate": 0.00019590169060269602,
      "loss": 0.7238,
      "step": 1328
    },
    {
      "epoch": 0.11883047210300429,
      "grad_norm": 0.14537184114692514,
      "learning_rate": 0.0001958934807674124,
      "loss": 0.7065,
      "step": 1329
    },
    {
      "epoch": 0.11891988555078684,
      "grad_norm": 0.15024523396126815,
      "learning_rate": 0.00019588526288965828,
      "loss": 0.7465,
      "step": 1330
    },
    {
      "epoch": 0.11900929899856938,
      "grad_norm": 0.12436371032789384,
      "learning_rate": 0.00019587703697012302,
      "loss": 0.6712,
      "step": 1331
    },
    {
      "epoch": 0.11909871244635194,
      "grad_norm": 0.14115379551504956,
      "learning_rate": 0.00019586880300949644,
      "loss": 0.7053,
      "step": 1332
    },
    {
      "epoch": 0.11918812589413448,
      "grad_norm": 0.11926512411880766,
      "learning_rate": 0.00019586056100846916,
      "loss": 0.7422,
      "step": 1333
    },
    {
      "epoch": 0.11927753934191702,
      "grad_norm": 0.15019086415721797,
      "learning_rate": 0.00019585231096773238,
      "loss": 0.714,
      "step": 1334
    },
    {
      "epoch": 0.11936695278969957,
      "grad_norm": 0.14068228893741766,
      "learning_rate": 0.00019584405288797802,
      "loss": 0.7125,
      "step": 1335
    },
    {
      "epoch": 0.11945636623748211,
      "grad_norm": 0.15469903230665108,
      "learning_rate": 0.0001958357867698987,
      "loss": 0.7706,
      "step": 1336
    },
    {
      "epoch": 0.11954577968526467,
      "grad_norm": 0.11255252541836328,
      "learning_rate": 0.0001958275126141877,
      "loss": 0.6685,
      "step": 1337
    },
    {
      "epoch": 0.11963519313304721,
      "grad_norm": 0.12772380406052736,
      "learning_rate": 0.00019581923042153894,
      "loss": 0.7189,
      "step": 1338
    },
    {
      "epoch": 0.11972460658082976,
      "grad_norm": 0.13202859554856577,
      "learning_rate": 0.00019581094019264705,
      "loss": 0.7216,
      "step": 1339
    },
    {
      "epoch": 0.1198140200286123,
      "grad_norm": 0.14387562904816023,
      "learning_rate": 0.00019580264192820733,
      "loss": 0.7448,
      "step": 1340
    },
    {
      "epoch": 0.11990343347639484,
      "grad_norm": 0.14549769270442717,
      "learning_rate": 0.00019579433562891572,
      "loss": 0.6901,
      "step": 1341
    },
    {
      "epoch": 0.1199928469241774,
      "grad_norm": 0.13569542653930905,
      "learning_rate": 0.00019578602129546885,
      "loss": 0.6998,
      "step": 1342
    },
    {
      "epoch": 0.12008226037195995,
      "grad_norm": 0.14823499880892035,
      "learning_rate": 0.00019577769892856407,
      "loss": 0.7477,
      "step": 1343
    },
    {
      "epoch": 0.12017167381974249,
      "grad_norm": 0.14529507528309366,
      "learning_rate": 0.00019576936852889936,
      "loss": 0.7059,
      "step": 1344
    },
    {
      "epoch": 0.12026108726752503,
      "grad_norm": 0.1524638438588869,
      "learning_rate": 0.00019576103009717337,
      "loss": 0.6891,
      "step": 1345
    },
    {
      "epoch": 0.12035050071530758,
      "grad_norm": 0.1590353113995139,
      "learning_rate": 0.00019575268363408542,
      "loss": 0.7721,
      "step": 1346
    },
    {
      "epoch": 0.12043991416309013,
      "grad_norm": 0.14412656956309655,
      "learning_rate": 0.00019574432914033554,
      "loss": 0.7305,
      "step": 1347
    },
    {
      "epoch": 0.12052932761087268,
      "grad_norm": 0.1418556224662271,
      "learning_rate": 0.00019573596661662438,
      "loss": 0.7496,
      "step": 1348
    },
    {
      "epoch": 0.12061874105865522,
      "grad_norm": 0.1389674595120109,
      "learning_rate": 0.00019572759606365336,
      "loss": 0.745,
      "step": 1349
    },
    {
      "epoch": 0.12070815450643776,
      "grad_norm": 0.11064858663512954,
      "learning_rate": 0.00019571921748212447,
      "loss": 0.6756,
      "step": 1350
    },
    {
      "epoch": 0.12079756795422031,
      "grad_norm": 0.15272508123528974,
      "learning_rate": 0.0001957108308727404,
      "loss": 0.7584,
      "step": 1351
    },
    {
      "epoch": 0.12088698140200287,
      "grad_norm": 0.13349569538954786,
      "learning_rate": 0.0001957024362362045,
      "loss": 0.7288,
      "step": 1352
    },
    {
      "epoch": 0.12097639484978541,
      "grad_norm": 0.13760254381814596,
      "learning_rate": 0.0001956940335732209,
      "loss": 0.7267,
      "step": 1353
    },
    {
      "epoch": 0.12106580829756795,
      "grad_norm": 0.13458491247070553,
      "learning_rate": 0.00019568562288449422,
      "loss": 0.7328,
      "step": 1354
    },
    {
      "epoch": 0.1211552217453505,
      "grad_norm": 0.12854735792501526,
      "learning_rate": 0.00019567720417072997,
      "loss": 0.6943,
      "step": 1355
    },
    {
      "epoch": 0.12124463519313304,
      "grad_norm": 0.1551989824820137,
      "learning_rate": 0.00019566877743263414,
      "loss": 0.7197,
      "step": 1356
    },
    {
      "epoch": 0.1213340486409156,
      "grad_norm": 0.13662680424379242,
      "learning_rate": 0.00019566034267091346,
      "loss": 0.7008,
      "step": 1357
    },
    {
      "epoch": 0.12142346208869814,
      "grad_norm": 0.14704513570427935,
      "learning_rate": 0.0001956518998862754,
      "loss": 0.7258,
      "step": 1358
    },
    {
      "epoch": 0.12151287553648069,
      "grad_norm": 0.13872789151787873,
      "learning_rate": 0.00019564344907942798,
      "loss": 0.7261,
      "step": 1359
    },
    {
      "epoch": 0.12160228898426323,
      "grad_norm": 0.12751397935070996,
      "learning_rate": 0.00019563499025107998,
      "loss": 0.751,
      "step": 1360
    },
    {
      "epoch": 0.12169170243204577,
      "grad_norm": 0.15494980905447991,
      "learning_rate": 0.0001956265234019409,
      "loss": 0.7815,
      "step": 1361
    },
    {
      "epoch": 0.12178111587982833,
      "grad_norm": 0.1522426279174695,
      "learning_rate": 0.00019561804853272075,
      "loss": 0.7222,
      "step": 1362
    },
    {
      "epoch": 0.12187052932761087,
      "grad_norm": 0.13888023910567604,
      "learning_rate": 0.00019560956564413035,
      "loss": 0.7064,
      "step": 1363
    },
    {
      "epoch": 0.12195994277539342,
      "grad_norm": 0.13433357702802112,
      "learning_rate": 0.00019560107473688118,
      "loss": 0.7168,
      "step": 1364
    },
    {
      "epoch": 0.12204935622317596,
      "grad_norm": 0.15187518331481498,
      "learning_rate": 0.0001955925758116853,
      "loss": 0.747,
      "step": 1365
    },
    {
      "epoch": 0.1221387696709585,
      "grad_norm": 0.11466905023515991,
      "learning_rate": 0.00019558406886925554,
      "loss": 0.682,
      "step": 1366
    },
    {
      "epoch": 0.12222818311874106,
      "grad_norm": 0.13569264438949152,
      "learning_rate": 0.00019557555391030537,
      "loss": 0.327,
      "step": 1367
    },
    {
      "epoch": 0.1223175965665236,
      "grad_norm": 0.12973773049184398,
      "learning_rate": 0.0001955670309355489,
      "loss": 0.3239,
      "step": 1368
    },
    {
      "epoch": 0.12240701001430615,
      "grad_norm": 0.14598386946367103,
      "learning_rate": 0.00019555849994570097,
      "loss": 0.7197,
      "step": 1369
    },
    {
      "epoch": 0.1224964234620887,
      "grad_norm": 0.13969820965911045,
      "learning_rate": 0.00019554996094147707,
      "loss": 0.6938,
      "step": 1370
    },
    {
      "epoch": 0.12258583690987125,
      "grad_norm": 0.15421224537467282,
      "learning_rate": 0.00019554141392359332,
      "loss": 0.7477,
      "step": 1371
    },
    {
      "epoch": 0.1226752503576538,
      "grad_norm": 0.12452999592647163,
      "learning_rate": 0.00019553285889276656,
      "loss": 0.6748,
      "step": 1372
    },
    {
      "epoch": 0.12276466380543634,
      "grad_norm": 0.13106507780466303,
      "learning_rate": 0.00019552429584971434,
      "loss": 0.7067,
      "step": 1373
    },
    {
      "epoch": 0.12285407725321888,
      "grad_norm": 0.15342200369322034,
      "learning_rate": 0.00019551572479515478,
      "loss": 0.3535,
      "step": 1374
    },
    {
      "epoch": 0.12294349070100143,
      "grad_norm": 0.13647508281805856,
      "learning_rate": 0.00019550714572980668,
      "loss": 0.7132,
      "step": 1375
    },
    {
      "epoch": 0.12303290414878398,
      "grad_norm": 0.14183352253220324,
      "learning_rate": 0.00019549855865438965,
      "loss": 0.7357,
      "step": 1376
    },
    {
      "epoch": 0.12312231759656653,
      "grad_norm": 0.14440591504511174,
      "learning_rate": 0.00019548996356962386,
      "loss": 0.7145,
      "step": 1377
    },
    {
      "epoch": 0.12321173104434907,
      "grad_norm": 0.12954895679039072,
      "learning_rate": 0.00019548136047623015,
      "loss": 0.7291,
      "step": 1378
    },
    {
      "epoch": 0.12330114449213161,
      "grad_norm": 0.12267561839150824,
      "learning_rate": 0.00019547274937492998,
      "loss": 0.7257,
      "step": 1379
    },
    {
      "epoch": 0.12339055793991416,
      "grad_norm": 0.13540533915570618,
      "learning_rate": 0.00019546413026644567,
      "loss": 0.7319,
      "step": 1380
    },
    {
      "epoch": 0.12347997138769672,
      "grad_norm": 0.12071863944631975,
      "learning_rate": 0.00019545550315150004,
      "loss": 0.6849,
      "step": 1381
    },
    {
      "epoch": 0.12356938483547926,
      "grad_norm": 0.11834019753831843,
      "learning_rate": 0.00019544686803081666,
      "loss": 0.6755,
      "step": 1382
    },
    {
      "epoch": 0.1236587982832618,
      "grad_norm": 0.12074113898653606,
      "learning_rate": 0.00019543822490511974,
      "loss": 0.6723,
      "step": 1383
    },
    {
      "epoch": 0.12374821173104435,
      "grad_norm": 0.14149714591427967,
      "learning_rate": 0.00019542957377513412,
      "loss": 0.7257,
      "step": 1384
    },
    {
      "epoch": 0.12383762517882689,
      "grad_norm": 0.13506298076951337,
      "learning_rate": 0.00019542091464158542,
      "loss": 0.667,
      "step": 1385
    },
    {
      "epoch": 0.12392703862660945,
      "grad_norm": 0.14895132056095536,
      "learning_rate": 0.00019541224750519983,
      "loss": 0.7379,
      "step": 1386
    },
    {
      "epoch": 0.12401645207439199,
      "grad_norm": 0.1416541087137203,
      "learning_rate": 0.00019540357236670427,
      "loss": 0.7162,
      "step": 1387
    },
    {
      "epoch": 0.12410586552217454,
      "grad_norm": 0.16028871816851664,
      "learning_rate": 0.00019539488922682633,
      "loss": 0.7231,
      "step": 1388
    },
    {
      "epoch": 0.12419527896995708,
      "grad_norm": 0.1416367432395338,
      "learning_rate": 0.00019538619808629422,
      "loss": 0.7007,
      "step": 1389
    },
    {
      "epoch": 0.12428469241773962,
      "grad_norm": 0.1533336684351532,
      "learning_rate": 0.0001953774989458369,
      "loss": 0.74,
      "step": 1390
    },
    {
      "epoch": 0.12437410586552218,
      "grad_norm": 0.12286475045715384,
      "learning_rate": 0.00019536879180618392,
      "loss": 0.7192,
      "step": 1391
    },
    {
      "epoch": 0.12446351931330472,
      "grad_norm": 0.12750536699020132,
      "learning_rate": 0.00019536007666806556,
      "loss": 0.6684,
      "step": 1392
    },
    {
      "epoch": 0.12455293276108727,
      "grad_norm": 0.13360937003833448,
      "learning_rate": 0.00019535135353221272,
      "loss": 0.7165,
      "step": 1393
    },
    {
      "epoch": 0.12464234620886981,
      "grad_norm": 0.1508566012814802,
      "learning_rate": 0.000195342622399357,
      "loss": 0.7367,
      "step": 1394
    },
    {
      "epoch": 0.12473175965665236,
      "grad_norm": 0.13131081932785987,
      "learning_rate": 0.0001953338832702307,
      "loss": 0.7077,
      "step": 1395
    },
    {
      "epoch": 0.12482117310443491,
      "grad_norm": 0.12294083701637999,
      "learning_rate": 0.00019532513614556673,
      "loss": 0.7107,
      "step": 1396
    },
    {
      "epoch": 0.12491058655221746,
      "grad_norm": 0.14164119095664146,
      "learning_rate": 0.00019531638102609873,
      "loss": 0.7022,
      "step": 1397
    },
    {
      "epoch": 0.125,
      "grad_norm": 0.12816996465043579,
      "learning_rate": 0.00019530761791256097,
      "loss": 0.697,
      "step": 1398
    },
    {
      "epoch": 0.12508941344778254,
      "grad_norm": 0.15766773674347226,
      "learning_rate": 0.0001952988468056884,
      "loss": 0.7459,
      "step": 1399
    },
    {
      "epoch": 0.1251788268955651,
      "grad_norm": 0.13643026840722858,
      "learning_rate": 0.00019529006770621662,
      "loss": 0.7017,
      "step": 1400
    },
    {
      "epoch": 0.12526824034334763,
      "grad_norm": 0.1361158719042695,
      "learning_rate": 0.00019528128061488195,
      "loss": 0.7062,
      "step": 1401
    },
    {
      "epoch": 0.12535765379113017,
      "grad_norm": 0.13633638087430142,
      "learning_rate": 0.00019527248553242137,
      "loss": 0.7232,
      "step": 1402
    },
    {
      "epoch": 0.12544706723891275,
      "grad_norm": 0.1422224388103841,
      "learning_rate": 0.00019526368245957246,
      "loss": 0.7346,
      "step": 1403
    },
    {
      "epoch": 0.1255364806866953,
      "grad_norm": 0.14125158601886595,
      "learning_rate": 0.00019525487139707357,
      "loss": 0.7195,
      "step": 1404
    },
    {
      "epoch": 0.12562589413447783,
      "grad_norm": 0.15448541209316885,
      "learning_rate": 0.00019524605234566363,
      "loss": 0.7476,
      "step": 1405
    },
    {
      "epoch": 0.12571530758226038,
      "grad_norm": 0.14383006080116015,
      "learning_rate": 0.00019523722530608232,
      "loss": 0.6973,
      "step": 1406
    },
    {
      "epoch": 0.12580472103004292,
      "grad_norm": 0.1557785029001811,
      "learning_rate": 0.00019522839027906995,
      "loss": 0.7186,
      "step": 1407
    },
    {
      "epoch": 0.12589413447782546,
      "grad_norm": 0.1492040208291209,
      "learning_rate": 0.0001952195472653675,
      "loss": 0.7373,
      "step": 1408
    },
    {
      "epoch": 0.125983547925608,
      "grad_norm": 0.12317356530280849,
      "learning_rate": 0.0001952106962657166,
      "loss": 0.7204,
      "step": 1409
    },
    {
      "epoch": 0.12607296137339055,
      "grad_norm": 0.15568652659866797,
      "learning_rate": 0.0001952018372808596,
      "loss": 0.7109,
      "step": 1410
    },
    {
      "epoch": 0.1261623748211731,
      "grad_norm": 0.15131039649633782,
      "learning_rate": 0.00019519297031153946,
      "loss": 0.7262,
      "step": 1411
    },
    {
      "epoch": 0.12625178826895564,
      "grad_norm": 0.13111979479001054,
      "learning_rate": 0.0001951840953584999,
      "loss": 0.7008,
      "step": 1412
    },
    {
      "epoch": 0.1263412017167382,
      "grad_norm": 0.13089382592850432,
      "learning_rate": 0.0001951752124224852,
      "loss": 0.7218,
      "step": 1413
    },
    {
      "epoch": 0.12643061516452075,
      "grad_norm": 0.15475600462075506,
      "learning_rate": 0.00019516632150424034,
      "loss": 0.7263,
      "step": 1414
    },
    {
      "epoch": 0.1265200286123033,
      "grad_norm": 0.14245929152018377,
      "learning_rate": 0.00019515742260451107,
      "loss": 0.7453,
      "step": 1415
    },
    {
      "epoch": 0.12660944206008584,
      "grad_norm": 0.13403002409687073,
      "learning_rate": 0.00019514851572404368,
      "loss": 0.6735,
      "step": 1416
    },
    {
      "epoch": 0.12669885550786839,
      "grad_norm": 0.15005322292516207,
      "learning_rate": 0.0001951396008635852,
      "loss": 0.3358,
      "step": 1417
    },
    {
      "epoch": 0.12678826895565093,
      "grad_norm": 0.15607487922322458,
      "learning_rate": 0.00019513067802388325,
      "loss": 0.7193,
      "step": 1418
    },
    {
      "epoch": 0.12687768240343347,
      "grad_norm": 0.14374865802431502,
      "learning_rate": 0.00019512174720568627,
      "loss": 0.706,
      "step": 1419
    },
    {
      "epoch": 0.12696709585121602,
      "grad_norm": 0.14329219261539114,
      "learning_rate": 0.0001951128084097432,
      "loss": 0.7027,
      "step": 1420
    },
    {
      "epoch": 0.12705650929899856,
      "grad_norm": 0.11720243786712074,
      "learning_rate": 0.00019510386163680375,
      "loss": 0.6652,
      "step": 1421
    },
    {
      "epoch": 0.1271459227467811,
      "grad_norm": 0.1433078324858767,
      "learning_rate": 0.00019509490688761832,
      "loss": 0.7214,
      "step": 1422
    },
    {
      "epoch": 0.12723533619456368,
      "grad_norm": 0.13625312354762975,
      "learning_rate": 0.0001950859441629379,
      "loss": 0.6929,
      "step": 1423
    },
    {
      "epoch": 0.12732474964234622,
      "grad_norm": 0.13143092714195248,
      "learning_rate": 0.00019507697346351414,
      "loss": 0.6647,
      "step": 1424
    },
    {
      "epoch": 0.12741416309012876,
      "grad_norm": 0.13000578050796993,
      "learning_rate": 0.00019506799479009944,
      "loss": 0.6872,
      "step": 1425
    },
    {
      "epoch": 0.1275035765379113,
      "grad_norm": 0.12301420175999206,
      "learning_rate": 0.00019505900814344683,
      "loss": 0.6858,
      "step": 1426
    },
    {
      "epoch": 0.12759298998569385,
      "grad_norm": 0.14489684008145237,
      "learning_rate": 0.00019505001352431003,
      "loss": 0.719,
      "step": 1427
    },
    {
      "epoch": 0.1276824034334764,
      "grad_norm": 0.15353087702172757,
      "learning_rate": 0.00019504101093344338,
      "loss": 0.7417,
      "step": 1428
    },
    {
      "epoch": 0.12777181688125894,
      "grad_norm": 0.13196430007243573,
      "learning_rate": 0.00019503200037160193,
      "loss": 0.712,
      "step": 1429
    },
    {
      "epoch": 0.12786123032904148,
      "grad_norm": 0.14797978204757786,
      "learning_rate": 0.00019502298183954136,
      "loss": 0.7488,
      "step": 1430
    },
    {
      "epoch": 0.12795064377682402,
      "grad_norm": 0.13915956681473335,
      "learning_rate": 0.00019501395533801807,
      "loss": 0.7085,
      "step": 1431
    },
    {
      "epoch": 0.12804005722460657,
      "grad_norm": 0.12928515247017688,
      "learning_rate": 0.0001950049208677891,
      "loss": 0.6779,
      "step": 1432
    },
    {
      "epoch": 0.12812947067238914,
      "grad_norm": 0.11566255021953895,
      "learning_rate": 0.00019499587842961214,
      "loss": 0.6801,
      "step": 1433
    },
    {
      "epoch": 0.12821888412017168,
      "grad_norm": 0.14601664185316,
      "learning_rate": 0.0001949868280242456,
      "loss": 0.728,
      "step": 1434
    },
    {
      "epoch": 0.12830829756795423,
      "grad_norm": 0.15120079117033608,
      "learning_rate": 0.0001949777696524485,
      "loss": 0.7748,
      "step": 1435
    },
    {
      "epoch": 0.12839771101573677,
      "grad_norm": 0.14310790240174084,
      "learning_rate": 0.00019496870331498056,
      "loss": 0.7096,
      "step": 1436
    },
    {
      "epoch": 0.12848712446351931,
      "grad_norm": 0.12563507582719113,
      "learning_rate": 0.00019495962901260215,
      "loss": 0.7103,
      "step": 1437
    },
    {
      "epoch": 0.12857653791130186,
      "grad_norm": 0.130516716450135,
      "learning_rate": 0.00019495054674607438,
      "loss": 0.7097,
      "step": 1438
    },
    {
      "epoch": 0.1286659513590844,
      "grad_norm": 0.15138368090943072,
      "learning_rate": 0.00019494145651615888,
      "loss": 0.7049,
      "step": 1439
    },
    {
      "epoch": 0.12875536480686695,
      "grad_norm": 0.13904641130909048,
      "learning_rate": 0.0001949323583236181,
      "loss": 0.7232,
      "step": 1440
    },
    {
      "epoch": 0.1288447782546495,
      "grad_norm": 0.15741948738509637,
      "learning_rate": 0.00019492325216921506,
      "loss": 0.7497,
      "step": 1441
    },
    {
      "epoch": 0.12893419170243203,
      "grad_norm": 0.13509261630709563,
      "learning_rate": 0.00019491413805371356,
      "loss": 0.6884,
      "step": 1442
    },
    {
      "epoch": 0.1290236051502146,
      "grad_norm": 0.144436277193248,
      "learning_rate": 0.0001949050159778779,
      "loss": 0.7527,
      "step": 1443
    },
    {
      "epoch": 0.12911301859799715,
      "grad_norm": 0.14619262099475877,
      "learning_rate": 0.00019489588594247313,
      "loss": 0.7588,
      "step": 1444
    },
    {
      "epoch": 0.1292024320457797,
      "grad_norm": 0.1294578574293483,
      "learning_rate": 0.00019488674794826505,
      "loss": 0.6965,
      "step": 1445
    },
    {
      "epoch": 0.12929184549356224,
      "grad_norm": 0.13376248640094218,
      "learning_rate": 0.00019487760199602,
      "loss": 0.6647,
      "step": 1446
    },
    {
      "epoch": 0.12938125894134478,
      "grad_norm": 0.12755901481732582,
      "learning_rate": 0.00019486844808650503,
      "loss": 0.692,
      "step": 1447
    },
    {
      "epoch": 0.12947067238912732,
      "grad_norm": 0.13815926148995047,
      "learning_rate": 0.00019485928622048793,
      "loss": 0.7184,
      "step": 1448
    },
    {
      "epoch": 0.12956008583690987,
      "grad_norm": 0.13570197392845046,
      "learning_rate": 0.00019485011639873702,
      "loss": 0.703,
      "step": 1449
    },
    {
      "epoch": 0.1296494992846924,
      "grad_norm": 0.1367274621853015,
      "learning_rate": 0.0001948409386220214,
      "loss": 0.7007,
      "step": 1450
    },
    {
      "epoch": 0.12973891273247495,
      "grad_norm": 0.12177241945923821,
      "learning_rate": 0.00019483175289111083,
      "loss": 0.6752,
      "step": 1451
    },
    {
      "epoch": 0.1298283261802575,
      "grad_norm": 0.13639799199069205,
      "learning_rate": 0.00019482255920677565,
      "loss": 0.7002,
      "step": 1452
    },
    {
      "epoch": 0.12991773962804007,
      "grad_norm": 0.1286450484995104,
      "learning_rate": 0.00019481335756978696,
      "loss": 0.6993,
      "step": 1453
    },
    {
      "epoch": 0.1300071530758226,
      "grad_norm": 0.1393310686203547,
      "learning_rate": 0.00019480414798091647,
      "loss": 0.6939,
      "step": 1454
    },
    {
      "epoch": 0.13009656652360516,
      "grad_norm": 0.14234940690582046,
      "learning_rate": 0.00019479493044093657,
      "loss": 0.7346,
      "step": 1455
    },
    {
      "epoch": 0.1301859799713877,
      "grad_norm": 0.13573751105043216,
      "learning_rate": 0.00019478570495062037,
      "loss": 0.7478,
      "step": 1456
    },
    {
      "epoch": 0.13027539341917024,
      "grad_norm": 0.13455872595829907,
      "learning_rate": 0.00019477647151074155,
      "loss": 0.7255,
      "step": 1457
    },
    {
      "epoch": 0.1303648068669528,
      "grad_norm": 0.139902853291727,
      "learning_rate": 0.0001947672301220745,
      "loss": 0.7213,
      "step": 1458
    },
    {
      "epoch": 0.13045422031473533,
      "grad_norm": 0.1393152335949605,
      "learning_rate": 0.00019475798078539433,
      "loss": 0.7399,
      "step": 1459
    },
    {
      "epoch": 0.13054363376251787,
      "grad_norm": 0.1455355813345573,
      "learning_rate": 0.00019474872350147676,
      "loss": 0.7022,
      "step": 1460
    },
    {
      "epoch": 0.13063304721030042,
      "grad_norm": 0.1523763013909242,
      "learning_rate": 0.0001947394582710982,
      "loss": 0.7596,
      "step": 1461
    },
    {
      "epoch": 0.130722460658083,
      "grad_norm": 0.12061146296373551,
      "learning_rate": 0.00019473018509503565,
      "loss": 0.6445,
      "step": 1462
    },
    {
      "epoch": 0.13081187410586553,
      "grad_norm": 0.13510595775777418,
      "learning_rate": 0.00019472090397406686,
      "loss": 0.7011,
      "step": 1463
    },
    {
      "epoch": 0.13090128755364808,
      "grad_norm": 0.1210771856084422,
      "learning_rate": 0.00019471161490897029,
      "loss": 0.7052,
      "step": 1464
    },
    {
      "epoch": 0.13099070100143062,
      "grad_norm": 0.14199661722920226,
      "learning_rate": 0.00019470231790052496,
      "loss": 0.7151,
      "step": 1465
    },
    {
      "epoch": 0.13108011444921316,
      "grad_norm": 0.12249555042521,
      "learning_rate": 0.0001946930129495106,
      "loss": 0.6718,
      "step": 1466
    },
    {
      "epoch": 0.1311695278969957,
      "grad_norm": 0.1427428296476804,
      "learning_rate": 0.00019468370005670758,
      "loss": 0.7347,
      "step": 1467
    },
    {
      "epoch": 0.13125894134477825,
      "grad_norm": 0.1474576476804647,
      "learning_rate": 0.00019467437922289697,
      "loss": 0.7272,
      "step": 1468
    },
    {
      "epoch": 0.1313483547925608,
      "grad_norm": 0.13958997852992525,
      "learning_rate": 0.00019466505044886056,
      "loss": 0.7188,
      "step": 1469
    },
    {
      "epoch": 0.13143776824034334,
      "grad_norm": 0.13842796131798438,
      "learning_rate": 0.00019465571373538068,
      "loss": 0.6762,
      "step": 1470
    },
    {
      "epoch": 0.13152718168812588,
      "grad_norm": 0.13199091217208672,
      "learning_rate": 0.00019464636908324038,
      "loss": 0.7131,
      "step": 1471
    },
    {
      "epoch": 0.13161659513590845,
      "grad_norm": 0.14174980301978435,
      "learning_rate": 0.00019463701649322343,
      "loss": 0.7288,
      "step": 1472
    },
    {
      "epoch": 0.131706008583691,
      "grad_norm": 0.1523715418053003,
      "learning_rate": 0.0001946276559661142,
      "loss": 0.7341,
      "step": 1473
    },
    {
      "epoch": 0.13179542203147354,
      "grad_norm": 0.12027212678519646,
      "learning_rate": 0.00019461828750269775,
      "loss": 0.6902,
      "step": 1474
    },
    {
      "epoch": 0.13188483547925609,
      "grad_norm": 0.13526345633101577,
      "learning_rate": 0.00019460891110375977,
      "loss": 0.7073,
      "step": 1475
    },
    {
      "epoch": 0.13197424892703863,
      "grad_norm": 0.12075498377933423,
      "learning_rate": 0.00019459952677008672,
      "loss": 0.6897,
      "step": 1476
    },
    {
      "epoch": 0.13206366237482117,
      "grad_norm": 0.1485120055095325,
      "learning_rate": 0.00019459013450246558,
      "loss": 0.7166,
      "step": 1477
    },
    {
      "epoch": 0.13215307582260372,
      "grad_norm": 0.13851442149657334,
      "learning_rate": 0.0001945807343016841,
      "loss": 0.684,
      "step": 1478
    },
    {
      "epoch": 0.13224248927038626,
      "grad_norm": 0.12020290501295573,
      "learning_rate": 0.00019457132616853065,
      "loss": 0.6771,
      "step": 1479
    },
    {
      "epoch": 0.1323319027181688,
      "grad_norm": 0.1495493002196963,
      "learning_rate": 0.00019456191010379427,
      "loss": 0.3565,
      "step": 1480
    },
    {
      "epoch": 0.13242131616595135,
      "grad_norm": 0.138883835276027,
      "learning_rate": 0.00019455248610826474,
      "loss": 0.7068,
      "step": 1481
    },
    {
      "epoch": 0.13251072961373392,
      "grad_norm": 0.13389370340197834,
      "learning_rate": 0.00019454305418273234,
      "loss": 0.7324,
      "step": 1482
    },
    {
      "epoch": 0.13260014306151646,
      "grad_norm": 0.14873278298052728,
      "learning_rate": 0.0001945336143279882,
      "loss": 0.721,
      "step": 1483
    },
    {
      "epoch": 0.132689556509299,
      "grad_norm": 0.1459234830947023,
      "learning_rate": 0.000194524166544824,
      "loss": 0.7062,
      "step": 1484
    },
    {
      "epoch": 0.13277896995708155,
      "grad_norm": 0.16323290059339512,
      "learning_rate": 0.00019451471083403209,
      "loss": 0.7561,
      "step": 1485
    },
    {
      "epoch": 0.1328683834048641,
      "grad_norm": 0.15740942673850167,
      "learning_rate": 0.0001945052471964055,
      "loss": 0.6815,
      "step": 1486
    },
    {
      "epoch": 0.13295779685264664,
      "grad_norm": 0.12599536693912378,
      "learning_rate": 0.000194495775632738,
      "loss": 0.7136,
      "step": 1487
    },
    {
      "epoch": 0.13304721030042918,
      "grad_norm": 0.13367320643887906,
      "learning_rate": 0.0001944862961438239,
      "loss": 0.3449,
      "step": 1488
    },
    {
      "epoch": 0.13313662374821172,
      "grad_norm": 0.1833042944564259,
      "learning_rate": 0.0001944768087304583,
      "loss": 0.726,
      "step": 1489
    },
    {
      "epoch": 0.13322603719599427,
      "grad_norm": 0.1364073977650881,
      "learning_rate": 0.0001944673133934368,
      "loss": 0.7549,
      "step": 1490
    },
    {
      "epoch": 0.1333154506437768,
      "grad_norm": 0.13356642636488472,
      "learning_rate": 0.00019445781013355582,
      "loss": 0.7258,
      "step": 1491
    },
    {
      "epoch": 0.13340486409155938,
      "grad_norm": 0.14511297733581785,
      "learning_rate": 0.00019444829895161239,
      "loss": 0.6972,
      "step": 1492
    },
    {
      "epoch": 0.13349427753934193,
      "grad_norm": 0.14630697351645977,
      "learning_rate": 0.0001944387798484042,
      "loss": 0.7266,
      "step": 1493
    },
    {
      "epoch": 0.13358369098712447,
      "grad_norm": 0.13739540928291788,
      "learning_rate": 0.00019442925282472958,
      "loss": 0.6939,
      "step": 1494
    },
    {
      "epoch": 0.13367310443490701,
      "grad_norm": 0.1517128353630757,
      "learning_rate": 0.00019441971788138756,
      "loss": 0.7243,
      "step": 1495
    },
    {
      "epoch": 0.13376251788268956,
      "grad_norm": 0.13913032255768523,
      "learning_rate": 0.00019441017501917784,
      "loss": 0.6936,
      "step": 1496
    },
    {
      "epoch": 0.1338519313304721,
      "grad_norm": 0.13484458671527824,
      "learning_rate": 0.0001944006242389008,
      "loss": 0.7267,
      "step": 1497
    },
    {
      "epoch": 0.13394134477825465,
      "grad_norm": 0.1455989951183495,
      "learning_rate": 0.00019439106554135736,
      "loss": 0.7496,
      "step": 1498
    },
    {
      "epoch": 0.1340307582260372,
      "grad_norm": 0.14226502548822512,
      "learning_rate": 0.00019438149892734926,
      "loss": 0.754,
      "step": 1499
    },
    {
      "epoch": 0.13412017167381973,
      "grad_norm": 0.12209375779056106,
      "learning_rate": 0.00019437192439767883,
      "loss": 0.6829,
      "step": 1500
    },
    {
      "epoch": 0.13420958512160228,
      "grad_norm": 0.13354273220974755,
      "learning_rate": 0.00019436234195314907,
      "loss": 0.7368,
      "step": 1501
    },
    {
      "epoch": 0.13429899856938485,
      "grad_norm": 0.14669276690731375,
      "learning_rate": 0.00019435275159456364,
      "loss": 0.6896,
      "step": 1502
    },
    {
      "epoch": 0.1343884120171674,
      "grad_norm": 0.1376471172280157,
      "learning_rate": 0.00019434315332272692,
      "loss": 0.7196,
      "step": 1503
    },
    {
      "epoch": 0.13447782546494993,
      "grad_norm": 0.14024254514131893,
      "learning_rate": 0.00019433354713844386,
      "loss": 0.7094,
      "step": 1504
    },
    {
      "epoch": 0.13456723891273248,
      "grad_norm": 0.1516282846025737,
      "learning_rate": 0.00019432393304252013,
      "loss": 0.7665,
      "step": 1505
    },
    {
      "epoch": 0.13465665236051502,
      "grad_norm": 0.13789013191730504,
      "learning_rate": 0.00019431431103576202,
      "loss": 0.7258,
      "step": 1506
    },
    {
      "epoch": 0.13474606580829757,
      "grad_norm": 0.13609904095797595,
      "learning_rate": 0.00019430468111897656,
      "loss": 0.7174,
      "step": 1507
    },
    {
      "epoch": 0.1348354792560801,
      "grad_norm": 0.13048199094078536,
      "learning_rate": 0.0001942950432929714,
      "loss": 0.7358,
      "step": 1508
    },
    {
      "epoch": 0.13492489270386265,
      "grad_norm": 0.10856537354504009,
      "learning_rate": 0.00019428539755855483,
      "loss": 0.629,
      "step": 1509
    },
    {
      "epoch": 0.1350143061516452,
      "grad_norm": 0.14619060586631683,
      "learning_rate": 0.00019427574391653581,
      "loss": 0.7492,
      "step": 1510
    },
    {
      "epoch": 0.13510371959942774,
      "grad_norm": 0.11076552151145422,
      "learning_rate": 0.00019426608236772404,
      "loss": 0.7112,
      "step": 1511
    },
    {
      "epoch": 0.1351931330472103,
      "grad_norm": 0.12371372968154226,
      "learning_rate": 0.00019425641291292978,
      "loss": 0.7256,
      "step": 1512
    },
    {
      "epoch": 0.13528254649499286,
      "grad_norm": 0.13324843120493493,
      "learning_rate": 0.000194246735552964,
      "loss": 0.7058,
      "step": 1513
    },
    {
      "epoch": 0.1353719599427754,
      "grad_norm": 0.13587197822056613,
      "learning_rate": 0.00019423705028863832,
      "loss": 0.716,
      "step": 1514
    },
    {
      "epoch": 0.13546137339055794,
      "grad_norm": 0.14344418582802312,
      "learning_rate": 0.00019422735712076506,
      "loss": 0.7124,
      "step": 1515
    },
    {
      "epoch": 0.1355507868383405,
      "grad_norm": 0.14469177905573577,
      "learning_rate": 0.00019421765605015713,
      "loss": 0.7122,
      "step": 1516
    },
    {
      "epoch": 0.13564020028612303,
      "grad_norm": 0.150315319267287,
      "learning_rate": 0.0001942079470776282,
      "loss": 0.7069,
      "step": 1517
    },
    {
      "epoch": 0.13572961373390557,
      "grad_norm": 0.14884144618781936,
      "learning_rate": 0.0001941982302039925,
      "loss": 0.7096,
      "step": 1518
    },
    {
      "epoch": 0.13581902718168812,
      "grad_norm": 0.14373407347280923,
      "learning_rate": 0.000194188505430065,
      "loss": 0.7009,
      "step": 1519
    },
    {
      "epoch": 0.13590844062947066,
      "grad_norm": 0.13361451698422225,
      "learning_rate": 0.0001941787727566613,
      "loss": 0.6918,
      "step": 1520
    },
    {
      "epoch": 0.1359978540772532,
      "grad_norm": 0.13093761986697536,
      "learning_rate": 0.0001941690321845977,
      "loss": 0.6692,
      "step": 1521
    },
    {
      "epoch": 0.13608726752503578,
      "grad_norm": 0.147972606890441,
      "learning_rate": 0.00019415928371469105,
      "loss": 0.7208,
      "step": 1522
    },
    {
      "epoch": 0.13617668097281832,
      "grad_norm": 0.14022420837621608,
      "learning_rate": 0.000194149527347759,
      "loss": 0.6928,
      "step": 1523
    },
    {
      "epoch": 0.13626609442060086,
      "grad_norm": 0.13737219654454413,
      "learning_rate": 0.00019413976308461982,
      "loss": 0.6771,
      "step": 1524
    },
    {
      "epoch": 0.1363555078683834,
      "grad_norm": 0.14586656725027777,
      "learning_rate": 0.0001941299909260924,
      "loss": 0.7059,
      "step": 1525
    },
    {
      "epoch": 0.13644492131616595,
      "grad_norm": 0.14975920726727712,
      "learning_rate": 0.0001941202108729963,
      "loss": 0.7466,
      "step": 1526
    },
    {
      "epoch": 0.1365343347639485,
      "grad_norm": 0.15341980737314415,
      "learning_rate": 0.0001941104229261518,
      "loss": 0.7877,
      "step": 1527
    },
    {
      "epoch": 0.13662374821173104,
      "grad_norm": 0.13142071138000297,
      "learning_rate": 0.0001941006270863798,
      "loss": 0.6742,
      "step": 1528
    },
    {
      "epoch": 0.13671316165951358,
      "grad_norm": 0.14687429637838548,
      "learning_rate": 0.0001940908233545018,
      "loss": 0.7099,
      "step": 1529
    },
    {
      "epoch": 0.13680257510729613,
      "grad_norm": 0.1109215185936796,
      "learning_rate": 0.00019408101173134013,
      "loss": 0.682,
      "step": 1530
    },
    {
      "epoch": 0.1368919885550787,
      "grad_norm": 0.1444009289713663,
      "learning_rate": 0.00019407119221771758,
      "loss": 0.764,
      "step": 1531
    },
    {
      "epoch": 0.13698140200286124,
      "grad_norm": 0.13278558794040088,
      "learning_rate": 0.00019406136481445782,
      "loss": 0.6724,
      "step": 1532
    },
    {
      "epoch": 0.13707081545064378,
      "grad_norm": 0.13355317781006476,
      "learning_rate": 0.0001940515295223849,
      "loss": 0.6823,
      "step": 1533
    },
    {
      "epoch": 0.13716022889842633,
      "grad_norm": 0.14414480399344495,
      "learning_rate": 0.00019404168634232382,
      "loss": 0.7401,
      "step": 1534
    },
    {
      "epoch": 0.13724964234620887,
      "grad_norm": 0.11800942715293913,
      "learning_rate": 0.0001940318352751001,
      "loss": 0.7078,
      "step": 1535
    },
    {
      "epoch": 0.13733905579399142,
      "grad_norm": 0.14103414992588567,
      "learning_rate": 0.00019402197632153992,
      "loss": 0.6971,
      "step": 1536
    },
    {
      "epoch": 0.13742846924177396,
      "grad_norm": 0.13461459675617202,
      "learning_rate": 0.0001940121094824701,
      "loss": 0.72,
      "step": 1537
    },
    {
      "epoch": 0.1375178826895565,
      "grad_norm": 0.13896464770547515,
      "learning_rate": 0.00019400223475871825,
      "loss": 0.7339,
      "step": 1538
    },
    {
      "epoch": 0.13760729613733905,
      "grad_norm": 0.1275237136673671,
      "learning_rate": 0.00019399235215111245,
      "loss": 0.6804,
      "step": 1539
    },
    {
      "epoch": 0.1376967095851216,
      "grad_norm": 0.13977803019388146,
      "learning_rate": 0.00019398246166048159,
      "loss": 0.718,
      "step": 1540
    },
    {
      "epoch": 0.13778612303290416,
      "grad_norm": 0.1290716438653391,
      "learning_rate": 0.00019397256328765517,
      "loss": 0.7034,
      "step": 1541
    },
    {
      "epoch": 0.1378755364806867,
      "grad_norm": 0.1226717192852829,
      "learning_rate": 0.00019396265703346339,
      "loss": 0.664,
      "step": 1542
    },
    {
      "epoch": 0.13796494992846925,
      "grad_norm": 0.15330721251948862,
      "learning_rate": 0.00019395274289873705,
      "loss": 0.7055,
      "step": 1543
    },
    {
      "epoch": 0.1380543633762518,
      "grad_norm": 0.122906919469247,
      "learning_rate": 0.00019394282088430758,
      "loss": 0.6724,
      "step": 1544
    },
    {
      "epoch": 0.13814377682403434,
      "grad_norm": 0.14863813732801684,
      "learning_rate": 0.0001939328909910072,
      "loss": 0.7139,
      "step": 1545
    },
    {
      "epoch": 0.13823319027181688,
      "grad_norm": 0.12580199261917868,
      "learning_rate": 0.0001939229532196687,
      "loss": 0.6873,
      "step": 1546
    },
    {
      "epoch": 0.13832260371959942,
      "grad_norm": 0.1368978597695372,
      "learning_rate": 0.00019391300757112557,
      "loss": 0.7347,
      "step": 1547
    },
    {
      "epoch": 0.13841201716738197,
      "grad_norm": 0.1520780346420872,
      "learning_rate": 0.00019390305404621186,
      "loss": 0.7317,
      "step": 1548
    },
    {
      "epoch": 0.1385014306151645,
      "grad_norm": 0.1539626005499914,
      "learning_rate": 0.00019389309264576242,
      "loss": 0.7394,
      "step": 1549
    },
    {
      "epoch": 0.13859084406294706,
      "grad_norm": 0.14852290145331223,
      "learning_rate": 0.00019388312337061274,
      "loss": 0.6886,
      "step": 1550
    },
    {
      "epoch": 0.13868025751072963,
      "grad_norm": 0.12425265456757807,
      "learning_rate": 0.00019387314622159885,
      "loss": 0.71,
      "step": 1551
    },
    {
      "epoch": 0.13876967095851217,
      "grad_norm": 0.13083125495341302,
      "learning_rate": 0.00019386316119955756,
      "loss": 0.6485,
      "step": 1552
    },
    {
      "epoch": 0.1388590844062947,
      "grad_norm": 0.14254341949121363,
      "learning_rate": 0.0001938531683053263,
      "loss": 0.7148,
      "step": 1553
    },
    {
      "epoch": 0.13894849785407726,
      "grad_norm": 0.14316616041296362,
      "learning_rate": 0.00019384316753974314,
      "loss": 0.7325,
      "step": 1554
    },
    {
      "epoch": 0.1390379113018598,
      "grad_norm": 0.1469300041983612,
      "learning_rate": 0.00019383315890364689,
      "loss": 0.7597,
      "step": 1555
    },
    {
      "epoch": 0.13912732474964234,
      "grad_norm": 0.15498409328556204,
      "learning_rate": 0.00019382314239787691,
      "loss": 0.6786,
      "step": 1556
    },
    {
      "epoch": 0.1392167381974249,
      "grad_norm": 0.14255699523004597,
      "learning_rate": 0.00019381311802327327,
      "loss": 0.6883,
      "step": 1557
    },
    {
      "epoch": 0.13930615164520743,
      "grad_norm": 0.15035906799283383,
      "learning_rate": 0.00019380308578067674,
      "loss": 0.7324,
      "step": 1558
    },
    {
      "epoch": 0.13939556509298998,
      "grad_norm": 0.16215583693901844,
      "learning_rate": 0.00019379304567092867,
      "loss": 0.7301,
      "step": 1559
    },
    {
      "epoch": 0.13948497854077252,
      "grad_norm": 0.1413481100036493,
      "learning_rate": 0.00019378299769487117,
      "loss": 0.6957,
      "step": 1560
    },
    {
      "epoch": 0.1395743919885551,
      "grad_norm": 0.14391926004346933,
      "learning_rate": 0.0001937729418533469,
      "loss": 0.7133,
      "step": 1561
    },
    {
      "epoch": 0.13966380543633763,
      "grad_norm": 0.12639159675231582,
      "learning_rate": 0.0001937628781471992,
      "loss": 0.6894,
      "step": 1562
    },
    {
      "epoch": 0.13975321888412018,
      "grad_norm": 0.1306106319727696,
      "learning_rate": 0.0001937528065772722,
      "loss": 0.7542,
      "step": 1563
    },
    {
      "epoch": 0.13984263233190272,
      "grad_norm": 0.14821802164611184,
      "learning_rate": 0.0001937427271444105,
      "loss": 0.6687,
      "step": 1564
    },
    {
      "epoch": 0.13993204577968527,
      "grad_norm": 0.14085753630978148,
      "learning_rate": 0.00019373263984945953,
      "loss": 0.7427,
      "step": 1565
    },
    {
      "epoch": 0.1400214592274678,
      "grad_norm": 0.13051874069316488,
      "learning_rate": 0.00019372254469326522,
      "loss": 0.6856,
      "step": 1566
    },
    {
      "epoch": 0.14011087267525035,
      "grad_norm": 0.14901618780947307,
      "learning_rate": 0.0001937124416766743,
      "loss": 0.7355,
      "step": 1567
    },
    {
      "epoch": 0.1402002861230329,
      "grad_norm": 0.14865800130882637,
      "learning_rate": 0.00019370233080053407,
      "loss": 0.7241,
      "step": 1568
    },
    {
      "epoch": 0.14028969957081544,
      "grad_norm": 0.1587212397987338,
      "learning_rate": 0.0001936922120656925,
      "loss": 0.7074,
      "step": 1569
    },
    {
      "epoch": 0.14037911301859798,
      "grad_norm": 0.13784333591082212,
      "learning_rate": 0.00019368208547299826,
      "loss": 0.699,
      "step": 1570
    },
    {
      "epoch": 0.14046852646638056,
      "grad_norm": 0.13927852251403244,
      "learning_rate": 0.00019367195102330066,
      "loss": 0.7292,
      "step": 1571
    },
    {
      "epoch": 0.1405579399141631,
      "grad_norm": 0.13529867142972252,
      "learning_rate": 0.00019366180871744964,
      "loss": 0.6774,
      "step": 1572
    },
    {
      "epoch": 0.14064735336194564,
      "grad_norm": 0.1296885952880117,
      "learning_rate": 0.00019365165855629587,
      "loss": 0.6915,
      "step": 1573
    },
    {
      "epoch": 0.1407367668097282,
      "grad_norm": 0.14133743584293648,
      "learning_rate": 0.00019364150054069059,
      "loss": 0.6782,
      "step": 1574
    },
    {
      "epoch": 0.14082618025751073,
      "grad_norm": 0.13414327283006017,
      "learning_rate": 0.00019363133467148572,
      "loss": 0.7,
      "step": 1575
    },
    {
      "epoch": 0.14091559370529327,
      "grad_norm": 0.12225082406687365,
      "learning_rate": 0.00019362116094953391,
      "loss": 0.6827,
      "step": 1576
    },
    {
      "epoch": 0.14100500715307582,
      "grad_norm": 0.12200763465091374,
      "learning_rate": 0.0001936109793756884,
      "loss": 0.6906,
      "step": 1577
    },
    {
      "epoch": 0.14109442060085836,
      "grad_norm": 0.14995356745676336,
      "learning_rate": 0.00019360078995080308,
      "loss": 0.7328,
      "step": 1578
    },
    {
      "epoch": 0.1411838340486409,
      "grad_norm": 0.13442006389274758,
      "learning_rate": 0.0001935905926757326,
      "loss": 0.7311,
      "step": 1579
    },
    {
      "epoch": 0.14127324749642345,
      "grad_norm": 0.13022096918463652,
      "learning_rate": 0.0001935803875513321,
      "loss": 0.6745,
      "step": 1580
    },
    {
      "epoch": 0.14136266094420602,
      "grad_norm": 0.15401860587995206,
      "learning_rate": 0.0001935701745784575,
      "loss": 0.7783,
      "step": 1581
    },
    {
      "epoch": 0.14145207439198856,
      "grad_norm": 0.14103447469491676,
      "learning_rate": 0.0001935599537579654,
      "loss": 0.698,
      "step": 1582
    },
    {
      "epoch": 0.1415414878397711,
      "grad_norm": 0.14933738606285013,
      "learning_rate": 0.00019354972509071295,
      "loss": 0.7328,
      "step": 1583
    },
    {
      "epoch": 0.14163090128755365,
      "grad_norm": 0.12807954162312718,
      "learning_rate": 0.00019353948857755803,
      "loss": 0.7021,
      "step": 1584
    },
    {
      "epoch": 0.1417203147353362,
      "grad_norm": 0.12854246792035795,
      "learning_rate": 0.00019352924421935916,
      "loss": 0.6948,
      "step": 1585
    },
    {
      "epoch": 0.14180972818311874,
      "grad_norm": 0.11664982426716324,
      "learning_rate": 0.00019351899201697556,
      "loss": 0.6901,
      "step": 1586
    },
    {
      "epoch": 0.14189914163090128,
      "grad_norm": 0.13618524115494243,
      "learning_rate": 0.00019350873197126705,
      "loss": 0.7058,
      "step": 1587
    },
    {
      "epoch": 0.14198855507868383,
      "grad_norm": 0.12807513324086897,
      "learning_rate": 0.0001934984640830941,
      "loss": 0.7026,
      "step": 1588
    },
    {
      "epoch": 0.14207796852646637,
      "grad_norm": 0.12551898374311649,
      "learning_rate": 0.00019348818835331788,
      "loss": 0.6807,
      "step": 1589
    },
    {
      "epoch": 0.14216738197424894,
      "grad_norm": 0.12752133605197813,
      "learning_rate": 0.0001934779047828002,
      "loss": 0.7233,
      "step": 1590
    },
    {
      "epoch": 0.14225679542203148,
      "grad_norm": 0.14222445409423481,
      "learning_rate": 0.00019346761337240355,
      "loss": 0.7304,
      "step": 1591
    },
    {
      "epoch": 0.14234620886981403,
      "grad_norm": 0.13905468671649734,
      "learning_rate": 0.00019345731412299106,
      "loss": 0.693,
      "step": 1592
    },
    {
      "epoch": 0.14243562231759657,
      "grad_norm": 0.11137987974546473,
      "learning_rate": 0.0001934470070354265,
      "loss": 0.6817,
      "step": 1593
    },
    {
      "epoch": 0.14252503576537912,
      "grad_norm": 0.15693008726301688,
      "learning_rate": 0.00019343669211057432,
      "loss": 0.7793,
      "step": 1594
    },
    {
      "epoch": 0.14261444921316166,
      "grad_norm": 0.12385538487778659,
      "learning_rate": 0.00019342636934929959,
      "loss": 0.6535,
      "step": 1595
    },
    {
      "epoch": 0.1427038626609442,
      "grad_norm": 0.1434075498745092,
      "learning_rate": 0.0001934160387524681,
      "loss": 0.7331,
      "step": 1596
    },
    {
      "epoch": 0.14279327610872675,
      "grad_norm": 0.15088652757987353,
      "learning_rate": 0.00019340570032094626,
      "loss": 0.6572,
      "step": 1597
    },
    {
      "epoch": 0.1428826895565093,
      "grad_norm": 0.14481146084903268,
      "learning_rate": 0.00019339535405560115,
      "loss": 0.7264,
      "step": 1598
    },
    {
      "epoch": 0.14297210300429183,
      "grad_norm": 0.15930964977078896,
      "learning_rate": 0.00019338499995730048,
      "loss": 0.7403,
      "step": 1599
    },
    {
      "epoch": 0.1430615164520744,
      "grad_norm": 0.1518299913896726,
      "learning_rate": 0.00019337463802691264,
      "loss": 0.7412,
      "step": 1600
    },
    {
      "epoch": 0.14315092989985695,
      "grad_norm": 0.15756999328974414,
      "learning_rate": 0.00019336426826530668,
      "loss": 0.736,
      "step": 1601
    },
    {
      "epoch": 0.1432403433476395,
      "grad_norm": 0.15453024273086233,
      "learning_rate": 0.0001933538906733523,
      "loss": 0.7184,
      "step": 1602
    },
    {
      "epoch": 0.14332975679542204,
      "grad_norm": 0.13030983222293646,
      "learning_rate": 0.00019334350525191987,
      "loss": 0.648,
      "step": 1603
    },
    {
      "epoch": 0.14341917024320458,
      "grad_norm": 0.1492077834867817,
      "learning_rate": 0.00019333311200188036,
      "loss": 0.7034,
      "step": 1604
    },
    {
      "epoch": 0.14350858369098712,
      "grad_norm": 0.15153258075896522,
      "learning_rate": 0.00019332271092410545,
      "loss": 0.753,
      "step": 1605
    },
    {
      "epoch": 0.14359799713876967,
      "grad_norm": 0.14628412980626201,
      "learning_rate": 0.0001933123020194675,
      "loss": 0.7095,
      "step": 1606
    },
    {
      "epoch": 0.1436874105865522,
      "grad_norm": 0.1445700786589755,
      "learning_rate": 0.00019330188528883947,
      "loss": 0.733,
      "step": 1607
    },
    {
      "epoch": 0.14377682403433475,
      "grad_norm": 0.14387141979947926,
      "learning_rate": 0.00019329146073309504,
      "loss": 0.7245,
      "step": 1608
    },
    {
      "epoch": 0.1438662374821173,
      "grad_norm": 0.10947564788824286,
      "learning_rate": 0.00019328102835310842,
      "loss": 0.6765,
      "step": 1609
    },
    {
      "epoch": 0.14395565092989987,
      "grad_norm": 0.13308221546401822,
      "learning_rate": 0.00019327058814975462,
      "loss": 0.7164,
      "step": 1610
    },
    {
      "epoch": 0.1440450643776824,
      "grad_norm": 0.13880564556847197,
      "learning_rate": 0.00019326014012390922,
      "loss": 0.7225,
      "step": 1611
    },
    {
      "epoch": 0.14413447782546496,
      "grad_norm": 0.12753380926047014,
      "learning_rate": 0.00019324968427644848,
      "loss": 0.7109,
      "step": 1612
    },
    {
      "epoch": 0.1442238912732475,
      "grad_norm": 0.13738010645576149,
      "learning_rate": 0.00019323922060824939,
      "loss": 0.7084,
      "step": 1613
    },
    {
      "epoch": 0.14431330472103004,
      "grad_norm": 0.13179620846473375,
      "learning_rate": 0.00019322874912018945,
      "loss": 0.7043,
      "step": 1614
    },
    {
      "epoch": 0.1444027181688126,
      "grad_norm": 0.13702123522780515,
      "learning_rate": 0.00019321826981314691,
      "loss": 0.7073,
      "step": 1615
    },
    {
      "epoch": 0.14449213161659513,
      "grad_norm": 0.13928388634257102,
      "learning_rate": 0.00019320778268800066,
      "loss": 0.7003,
      "step": 1616
    },
    {
      "epoch": 0.14458154506437768,
      "grad_norm": 0.1295095808151373,
      "learning_rate": 0.00019319728774563023,
      "loss": 0.7098,
      "step": 1617
    },
    {
      "epoch": 0.14467095851216022,
      "grad_norm": 0.11986917136549131,
      "learning_rate": 0.00019318678498691586,
      "loss": 0.6992,
      "step": 1618
    },
    {
      "epoch": 0.14476037195994276,
      "grad_norm": 0.15318757714555045,
      "learning_rate": 0.00019317627441273836,
      "loss": 0.7503,
      "step": 1619
    },
    {
      "epoch": 0.14484978540772533,
      "grad_norm": 0.15758067240780096,
      "learning_rate": 0.00019316575602397923,
      "loss": 0.7041,
      "step": 1620
    },
    {
      "epoch": 0.14493919885550788,
      "grad_norm": 0.14403993652316954,
      "learning_rate": 0.0001931552298215207,
      "loss": 0.734,
      "step": 1621
    },
    {
      "epoch": 0.14502861230329042,
      "grad_norm": 0.1215097356742273,
      "learning_rate": 0.0001931446958062455,
      "loss": 0.6751,
      "step": 1622
    },
    {
      "epoch": 0.14511802575107297,
      "grad_norm": 0.15386015184025884,
      "learning_rate": 0.0001931341539790372,
      "loss": 0.7474,
      "step": 1623
    },
    {
      "epoch": 0.1452074391988555,
      "grad_norm": 0.1359600005262594,
      "learning_rate": 0.00019312360434077985,
      "loss": 0.7339,
      "step": 1624
    },
    {
      "epoch": 0.14529685264663805,
      "grad_norm": 0.11648471816414267,
      "learning_rate": 0.0001931130468923583,
      "loss": 0.6501,
      "step": 1625
    },
    {
      "epoch": 0.1453862660944206,
      "grad_norm": 0.1598285304326715,
      "learning_rate": 0.00019310248163465795,
      "loss": 0.73,
      "step": 1626
    },
    {
      "epoch": 0.14547567954220314,
      "grad_norm": 0.14347909641019121,
      "learning_rate": 0.00019309190856856486,
      "loss": 0.7324,
      "step": 1627
    },
    {
      "epoch": 0.14556509298998568,
      "grad_norm": 0.13647723094035974,
      "learning_rate": 0.0001930813276949659,
      "loss": 0.7014,
      "step": 1628
    },
    {
      "epoch": 0.14565450643776823,
      "grad_norm": 0.1426108490421542,
      "learning_rate": 0.00019307073901474834,
      "loss": 0.7417,
      "step": 1629
    },
    {
      "epoch": 0.1457439198855508,
      "grad_norm": 0.15850592933160507,
      "learning_rate": 0.00019306014252880034,
      "loss": 0.7472,
      "step": 1630
    },
    {
      "epoch": 0.14583333333333334,
      "grad_norm": 0.1167851434898761,
      "learning_rate": 0.00019304953823801055,
      "loss": 0.7095,
      "step": 1631
    },
    {
      "epoch": 0.1459227467811159,
      "grad_norm": 0.13159298832630784,
      "learning_rate": 0.00019303892614326836,
      "loss": 0.7045,
      "step": 1632
    },
    {
      "epoch": 0.14601216022889843,
      "grad_norm": 0.14460459816160842,
      "learning_rate": 0.0001930283062454638,
      "loss": 0.6929,
      "step": 1633
    },
    {
      "epoch": 0.14610157367668097,
      "grad_norm": 0.13616664858663305,
      "learning_rate": 0.00019301767854548756,
      "loss": 0.6937,
      "step": 1634
    },
    {
      "epoch": 0.14619098712446352,
      "grad_norm": 0.1360924773113797,
      "learning_rate": 0.00019300704304423094,
      "loss": 0.6926,
      "step": 1635
    },
    {
      "epoch": 0.14628040057224606,
      "grad_norm": 0.14131594219848498,
      "learning_rate": 0.00019299639974258598,
      "loss": 0.6883,
      "step": 1636
    },
    {
      "epoch": 0.1463698140200286,
      "grad_norm": 0.137594413534711,
      "learning_rate": 0.00019298574864144523,
      "loss": 0.7261,
      "step": 1637
    },
    {
      "epoch": 0.14645922746781115,
      "grad_norm": 0.13049043043114364,
      "learning_rate": 0.00019297508974170207,
      "loss": 0.6757,
      "step": 1638
    },
    {
      "epoch": 0.1465486409155937,
      "grad_norm": 0.11898963768320717,
      "learning_rate": 0.0001929644230442504,
      "loss": 0.6382,
      "step": 1639
    },
    {
      "epoch": 0.14663805436337626,
      "grad_norm": 0.1402691795243587,
      "learning_rate": 0.00019295374854998488,
      "loss": 0.7332,
      "step": 1640
    },
    {
      "epoch": 0.1467274678111588,
      "grad_norm": 0.14917472002251358,
      "learning_rate": 0.0001929430662598007,
      "loss": 0.7026,
      "step": 1641
    },
    {
      "epoch": 0.14681688125894135,
      "grad_norm": 0.1249109423963242,
      "learning_rate": 0.00019293237617459382,
      "loss": 0.672,
      "step": 1642
    },
    {
      "epoch": 0.1469062947067239,
      "grad_norm": 0.13594286876680375,
      "learning_rate": 0.00019292167829526076,
      "loss": 0.7203,
      "step": 1643
    },
    {
      "epoch": 0.14699570815450644,
      "grad_norm": 0.14397723296524637,
      "learning_rate": 0.00019291097262269874,
      "loss": 0.7247,
      "step": 1644
    },
    {
      "epoch": 0.14708512160228898,
      "grad_norm": 0.12660112503812526,
      "learning_rate": 0.0001929002591578057,
      "loss": 0.6972,
      "step": 1645
    },
    {
      "epoch": 0.14717453505007153,
      "grad_norm": 0.14227978327814048,
      "learning_rate": 0.00019288953790148013,
      "loss": 0.7556,
      "step": 1646
    },
    {
      "epoch": 0.14726394849785407,
      "grad_norm": 0.12479746448228211,
      "learning_rate": 0.00019287880885462115,
      "loss": 0.6866,
      "step": 1647
    },
    {
      "epoch": 0.1473533619456366,
      "grad_norm": 0.12816109045809837,
      "learning_rate": 0.00019286807201812867,
      "loss": 0.6828,
      "step": 1648
    },
    {
      "epoch": 0.14744277539341916,
      "grad_norm": 0.1376676987830108,
      "learning_rate": 0.00019285732739290315,
      "loss": 0.6977,
      "step": 1649
    },
    {
      "epoch": 0.14753218884120173,
      "grad_norm": 0.1289893040928445,
      "learning_rate": 0.0001928465749798457,
      "loss": 0.7095,
      "step": 1650
    },
    {
      "epoch": 0.14762160228898427,
      "grad_norm": 0.1287522136472953,
      "learning_rate": 0.00019283581477985817,
      "loss": 0.6981,
      "step": 1651
    },
    {
      "epoch": 0.14771101573676682,
      "grad_norm": 0.13322908918310142,
      "learning_rate": 0.00019282504679384293,
      "loss": 0.7192,
      "step": 1652
    },
    {
      "epoch": 0.14780042918454936,
      "grad_norm": 0.14785473941228794,
      "learning_rate": 0.00019281427102270314,
      "loss": 0.7073,
      "step": 1653
    },
    {
      "epoch": 0.1478898426323319,
      "grad_norm": 0.12404406645116284,
      "learning_rate": 0.00019280348746734255,
      "loss": 0.6918,
      "step": 1654
    },
    {
      "epoch": 0.14797925608011445,
      "grad_norm": 0.14840125020902814,
      "learning_rate": 0.00019279269612866554,
      "loss": 0.7203,
      "step": 1655
    },
    {
      "epoch": 0.148068669527897,
      "grad_norm": 0.1412731894922006,
      "learning_rate": 0.00019278189700757715,
      "loss": 0.6693,
      "step": 1656
    },
    {
      "epoch": 0.14815808297567953,
      "grad_norm": 0.13821198595289222,
      "learning_rate": 0.0001927710901049831,
      "loss": 0.7324,
      "step": 1657
    },
    {
      "epoch": 0.14824749642346208,
      "grad_norm": 0.14450623764735301,
      "learning_rate": 0.00019276027542178978,
      "loss": 0.7682,
      "step": 1658
    },
    {
      "epoch": 0.14833690987124465,
      "grad_norm": 0.15987711349975137,
      "learning_rate": 0.0001927494529589042,
      "loss": 0.7031,
      "step": 1659
    },
    {
      "epoch": 0.1484263233190272,
      "grad_norm": 0.12410481606769129,
      "learning_rate": 0.000192738622717234,
      "loss": 0.6761,
      "step": 1660
    },
    {
      "epoch": 0.14851573676680974,
      "grad_norm": 0.14616804415128418,
      "learning_rate": 0.0001927277846976875,
      "loss": 0.7149,
      "step": 1661
    },
    {
      "epoch": 0.14860515021459228,
      "grad_norm": 0.14627854716440822,
      "learning_rate": 0.00019271693890117372,
      "loss": 0.7123,
      "step": 1662
    },
    {
      "epoch": 0.14869456366237482,
      "grad_norm": 0.12463221051566686,
      "learning_rate": 0.00019270608532860224,
      "loss": 0.7259,
      "step": 1663
    },
    {
      "epoch": 0.14878397711015737,
      "grad_norm": 0.14485537125606218,
      "learning_rate": 0.00019269522398088332,
      "loss": 0.708,
      "step": 1664
    },
    {
      "epoch": 0.1488733905579399,
      "grad_norm": 0.13815240127056386,
      "learning_rate": 0.0001926843548589279,
      "loss": 0.7195,
      "step": 1665
    },
    {
      "epoch": 0.14896280400572245,
      "grad_norm": 0.14444007983457857,
      "learning_rate": 0.0001926734779636476,
      "loss": 0.7036,
      "step": 1666
    },
    {
      "epoch": 0.149052217453505,
      "grad_norm": 0.12777772103050333,
      "learning_rate": 0.00019266259329595462,
      "loss": 0.6854,
      "step": 1667
    },
    {
      "epoch": 0.14914163090128754,
      "grad_norm": 0.11755564615047968,
      "learning_rate": 0.00019265170085676185,
      "loss": 0.6889,
      "step": 1668
    },
    {
      "epoch": 0.1492310443490701,
      "grad_norm": 0.1324806147036349,
      "learning_rate": 0.00019264080064698282,
      "loss": 0.7029,
      "step": 1669
    },
    {
      "epoch": 0.14932045779685266,
      "grad_norm": 0.14545248447178402,
      "learning_rate": 0.00019262989266753173,
      "loss": 0.7261,
      "step": 1670
    },
    {
      "epoch": 0.1494098712446352,
      "grad_norm": 0.12598821093788293,
      "learning_rate": 0.0001926189769193234,
      "loss": 0.6905,
      "step": 1671
    },
    {
      "epoch": 0.14949928469241774,
      "grad_norm": 0.1516523696077971,
      "learning_rate": 0.00019260805340327335,
      "loss": 0.3875,
      "step": 1672
    },
    {
      "epoch": 0.1495886981402003,
      "grad_norm": 0.1441642901863055,
      "learning_rate": 0.00019259712212029765,
      "loss": 0.7293,
      "step": 1673
    },
    {
      "epoch": 0.14967811158798283,
      "grad_norm": 0.12783821033990778,
      "learning_rate": 0.0001925861830713132,
      "loss": 0.637,
      "step": 1674
    },
    {
      "epoch": 0.14976752503576538,
      "grad_norm": 0.14223781881652217,
      "learning_rate": 0.00019257523625723736,
      "loss": 0.756,
      "step": 1675
    },
    {
      "epoch": 0.14985693848354792,
      "grad_norm": 0.16796388297568202,
      "learning_rate": 0.0001925642816789883,
      "loss": 0.7383,
      "step": 1676
    },
    {
      "epoch": 0.14994635193133046,
      "grad_norm": 0.1543555422620976,
      "learning_rate": 0.00019255331933748472,
      "loss": 0.7048,
      "step": 1677
    },
    {
      "epoch": 0.150035765379113,
      "grad_norm": 0.15512782073722922,
      "learning_rate": 0.000192542349233646,
      "loss": 0.743,
      "step": 1678
    },
    {
      "epoch": 0.15012517882689558,
      "grad_norm": 0.14924409008170925,
      "learning_rate": 0.0001925313713683922,
      "loss": 0.7224,
      "step": 1679
    },
    {
      "epoch": 0.15021459227467812,
      "grad_norm": 0.12914866072890183,
      "learning_rate": 0.00019252038574264405,
      "loss": 0.6903,
      "step": 1680
    },
    {
      "epoch": 0.15030400572246067,
      "grad_norm": 0.1368287681112742,
      "learning_rate": 0.00019250939235732287,
      "loss": 0.6743,
      "step": 1681
    },
    {
      "epoch": 0.1503934191702432,
      "grad_norm": 0.1390657067906296,
      "learning_rate": 0.00019249839121335068,
      "loss": 0.6802,
      "step": 1682
    },
    {
      "epoch": 0.15048283261802575,
      "grad_norm": 0.12225107541897251,
      "learning_rate": 0.00019248738231165017,
      "loss": 0.6623,
      "step": 1683
    },
    {
      "epoch": 0.1505722460658083,
      "grad_norm": 0.156278918443858,
      "learning_rate": 0.00019247636565314453,
      "loss": 0.7516,
      "step": 1684
    },
    {
      "epoch": 0.15066165951359084,
      "grad_norm": 0.14306108641008325,
      "learning_rate": 0.00019246534123875783,
      "loss": 0.6944,
      "step": 1685
    },
    {
      "epoch": 0.15075107296137338,
      "grad_norm": 0.12570131474192606,
      "learning_rate": 0.00019245430906941464,
      "loss": 0.6947,
      "step": 1686
    },
    {
      "epoch": 0.15084048640915593,
      "grad_norm": 0.1220762750913468,
      "learning_rate": 0.00019244326914604019,
      "loss": 0.6707,
      "step": 1687
    },
    {
      "epoch": 0.15092989985693847,
      "grad_norm": 0.12292563296399214,
      "learning_rate": 0.00019243222146956039,
      "loss": 0.6746,
      "step": 1688
    },
    {
      "epoch": 0.15101931330472104,
      "grad_norm": 0.1321015653287652,
      "learning_rate": 0.0001924211660409018,
      "loss": 0.6916,
      "step": 1689
    },
    {
      "epoch": 0.15110872675250359,
      "grad_norm": 0.13412389847303377,
      "learning_rate": 0.00019241010286099165,
      "loss": 0.6913,
      "step": 1690
    },
    {
      "epoch": 0.15119814020028613,
      "grad_norm": 0.13582797913944114,
      "learning_rate": 0.00019239903193075776,
      "loss": 0.6862,
      "step": 1691
    },
    {
      "epoch": 0.15128755364806867,
      "grad_norm": 0.151224534622904,
      "learning_rate": 0.0001923879532511287,
      "loss": 0.7537,
      "step": 1692
    },
    {
      "epoch": 0.15137696709585122,
      "grad_norm": 0.29034537622890133,
      "learning_rate": 0.0001923768668230335,
      "loss": 0.3233,
      "step": 1693
    },
    {
      "epoch": 0.15146638054363376,
      "grad_norm": 0.13976840463679346,
      "learning_rate": 0.0001923657726474021,
      "loss": 0.7255,
      "step": 1694
    },
    {
      "epoch": 0.1515557939914163,
      "grad_norm": 0.1513582456360357,
      "learning_rate": 0.00019235467072516488,
      "loss": 0.7426,
      "step": 1695
    },
    {
      "epoch": 0.15164520743919885,
      "grad_norm": 0.12935303294877176,
      "learning_rate": 0.00019234356105725297,
      "loss": 0.6773,
      "step": 1696
    },
    {
      "epoch": 0.1517346208869814,
      "grad_norm": 0.14484678544963636,
      "learning_rate": 0.00019233244364459814,
      "loss": 0.7047,
      "step": 1697
    },
    {
      "epoch": 0.15182403433476394,
      "grad_norm": 0.13679765009985692,
      "learning_rate": 0.00019232131848813272,
      "loss": 0.6832,
      "step": 1698
    },
    {
      "epoch": 0.1519134477825465,
      "grad_norm": 0.13840280947844275,
      "learning_rate": 0.00019231018558878984,
      "loss": 0.7371,
      "step": 1699
    },
    {
      "epoch": 0.15200286123032905,
      "grad_norm": 0.13047727850053462,
      "learning_rate": 0.00019229904494750315,
      "loss": 0.7174,
      "step": 1700
    },
    {
      "epoch": 0.1520922746781116,
      "grad_norm": 0.13071136308730372,
      "learning_rate": 0.00019228789656520708,
      "loss": 0.7431,
      "step": 1701
    },
    {
      "epoch": 0.15218168812589414,
      "grad_norm": 0.12787239835806327,
      "learning_rate": 0.00019227674044283653,
      "loss": 0.6733,
      "step": 1702
    },
    {
      "epoch": 0.15227110157367668,
      "grad_norm": 0.13031937054378462,
      "learning_rate": 0.00019226557658132723,
      "loss": 0.6845,
      "step": 1703
    },
    {
      "epoch": 0.15236051502145923,
      "grad_norm": 0.13121705523226931,
      "learning_rate": 0.00019225440498161546,
      "loss": 0.6852,
      "step": 1704
    },
    {
      "epoch": 0.15244992846924177,
      "grad_norm": 0.15110426775992344,
      "learning_rate": 0.00019224322564463813,
      "loss": 0.7876,
      "step": 1705
    },
    {
      "epoch": 0.1525393419170243,
      "grad_norm": 0.12964733690015143,
      "learning_rate": 0.00019223203857133287,
      "loss": 0.6865,
      "step": 1706
    },
    {
      "epoch": 0.15262875536480686,
      "grad_norm": 0.130066807255658,
      "learning_rate": 0.00019222084376263794,
      "loss": 0.6793,
      "step": 1707
    },
    {
      "epoch": 0.1527181688125894,
      "grad_norm": 0.14997665577773528,
      "learning_rate": 0.0001922096412194922,
      "loss": 0.7492,
      "step": 1708
    },
    {
      "epoch": 0.15280758226037197,
      "grad_norm": 0.13141365940776242,
      "learning_rate": 0.00019219843094283524,
      "loss": 0.6602,
      "step": 1709
    },
    {
      "epoch": 0.15289699570815452,
      "grad_norm": 0.16329157853653337,
      "learning_rate": 0.00019218721293360718,
      "loss": 0.7415,
      "step": 1710
    },
    {
      "epoch": 0.15298640915593706,
      "grad_norm": 0.14519503936879743,
      "learning_rate": 0.00019217598719274896,
      "loss": 0.6952,
      "step": 1711
    },
    {
      "epoch": 0.1530758226037196,
      "grad_norm": 0.12107065976622236,
      "learning_rate": 0.00019216475372120197,
      "loss": 0.6739,
      "step": 1712
    },
    {
      "epoch": 0.15316523605150215,
      "grad_norm": 0.2209501206210533,
      "learning_rate": 0.0001921535125199084,
      "loss": 0.3811,
      "step": 1713
    },
    {
      "epoch": 0.1532546494992847,
      "grad_norm": 0.17115485088853077,
      "learning_rate": 0.00019214226358981105,
      "loss": 0.745,
      "step": 1714
    },
    {
      "epoch": 0.15334406294706723,
      "grad_norm": 0.14110213308992636,
      "learning_rate": 0.00019213100693185332,
      "loss": 0.7191,
      "step": 1715
    },
    {
      "epoch": 0.15343347639484978,
      "grad_norm": 0.13060852071040646,
      "learning_rate": 0.00019211974254697932,
      "loss": 0.6888,
      "step": 1716
    },
    {
      "epoch": 0.15352288984263232,
      "grad_norm": 0.15547176187225056,
      "learning_rate": 0.00019210847043613373,
      "loss": 0.7298,
      "step": 1717
    },
    {
      "epoch": 0.1536123032904149,
      "grad_norm": 0.14418287452251308,
      "learning_rate": 0.000192097190600262,
      "loss": 0.7049,
      "step": 1718
    },
    {
      "epoch": 0.15370171673819744,
      "grad_norm": 0.12995447780024305,
      "learning_rate": 0.0001920859030403101,
      "loss": 0.6969,
      "step": 1719
    },
    {
      "epoch": 0.15379113018597998,
      "grad_norm": 0.13577630527667792,
      "learning_rate": 0.00019207460775722473,
      "loss": 0.6969,
      "step": 1720
    },
    {
      "epoch": 0.15388054363376252,
      "grad_norm": 0.13951460282848271,
      "learning_rate": 0.00019206330475195319,
      "loss": 0.6875,
      "step": 1721
    },
    {
      "epoch": 0.15396995708154507,
      "grad_norm": 0.1238137698347345,
      "learning_rate": 0.0001920519940254435,
      "loss": 0.7101,
      "step": 1722
    },
    {
      "epoch": 0.1540593705293276,
      "grad_norm": 0.1629568609900895,
      "learning_rate": 0.0001920406755786442,
      "loss": 0.371,
      "step": 1723
    },
    {
      "epoch": 0.15414878397711015,
      "grad_norm": 0.13102617664524813,
      "learning_rate": 0.0001920293494125046,
      "loss": 0.6766,
      "step": 1724
    },
    {
      "epoch": 0.1542381974248927,
      "grad_norm": 0.13786426676602145,
      "learning_rate": 0.00019201801552797462,
      "loss": 0.7285,
      "step": 1725
    },
    {
      "epoch": 0.15432761087267524,
      "grad_norm": 0.12388243535010857,
      "learning_rate": 0.0001920066739260048,
      "loss": 0.6618,
      "step": 1726
    },
    {
      "epoch": 0.15441702432045779,
      "grad_norm": 0.13552439774929073,
      "learning_rate": 0.0001919953246075464,
      "loss": 0.7131,
      "step": 1727
    },
    {
      "epoch": 0.15450643776824036,
      "grad_norm": 0.13580443243076654,
      "learning_rate": 0.00019198396757355118,
      "loss": 0.7132,
      "step": 1728
    },
    {
      "epoch": 0.1545958512160229,
      "grad_norm": 0.12876339047715257,
      "learning_rate": 0.00019197260282497171,
      "loss": 0.6405,
      "step": 1729
    },
    {
      "epoch": 0.15468526466380544,
      "grad_norm": 0.15386416373421025,
      "learning_rate": 0.0001919612303627611,
      "loss": 0.3336,
      "step": 1730
    },
    {
      "epoch": 0.154774678111588,
      "grad_norm": 0.15436741336215168,
      "learning_rate": 0.00019194985018787316,
      "loss": 0.6997,
      "step": 1731
    },
    {
      "epoch": 0.15486409155937053,
      "grad_norm": 0.15168819243034698,
      "learning_rate": 0.00019193846230126233,
      "loss": 0.7253,
      "step": 1732
    },
    {
      "epoch": 0.15495350500715308,
      "grad_norm": 0.1634705467513724,
      "learning_rate": 0.00019192706670388373,
      "loss": 0.7503,
      "step": 1733
    },
    {
      "epoch": 0.15504291845493562,
      "grad_norm": 0.11996652791803562,
      "learning_rate": 0.00019191566339669302,
      "loss": 0.6901,
      "step": 1734
    },
    {
      "epoch": 0.15513233190271816,
      "grad_norm": 0.1296126553060153,
      "learning_rate": 0.00019190425238064667,
      "loss": 0.7349,
      "step": 1735
    },
    {
      "epoch": 0.1552217453505007,
      "grad_norm": 0.13274519663694062,
      "learning_rate": 0.00019189283365670163,
      "loss": 0.6759,
      "step": 1736
    },
    {
      "epoch": 0.15531115879828325,
      "grad_norm": 0.13213488704828347,
      "learning_rate": 0.00019188140722581562,
      "loss": 0.7172,
      "step": 1737
    },
    {
      "epoch": 0.15540057224606582,
      "grad_norm": 0.16885468066899267,
      "learning_rate": 0.00019186997308894696,
      "loss": 0.7188,
      "step": 1738
    },
    {
      "epoch": 0.15548998569384836,
      "grad_norm": 0.143905198246205,
      "learning_rate": 0.0001918585312470546,
      "loss": 0.7449,
      "step": 1739
    },
    {
      "epoch": 0.1555793991416309,
      "grad_norm": 0.1345938699878257,
      "learning_rate": 0.00019184708170109818,
      "loss": 0.7278,
      "step": 1740
    },
    {
      "epoch": 0.15566881258941345,
      "grad_norm": 0.1526473103581837,
      "learning_rate": 0.00019183562445203794,
      "loss": 0.7273,
      "step": 1741
    },
    {
      "epoch": 0.155758226037196,
      "grad_norm": 0.13101238077800667,
      "learning_rate": 0.00019182415950083477,
      "loss": 0.694,
      "step": 1742
    },
    {
      "epoch": 0.15584763948497854,
      "grad_norm": 0.15562756897799446,
      "learning_rate": 0.0001918126868484502,
      "loss": 0.7124,
      "step": 1743
    },
    {
      "epoch": 0.15593705293276108,
      "grad_norm": 0.16074216543363665,
      "learning_rate": 0.00019180120649584653,
      "loss": 0.7603,
      "step": 1744
    },
    {
      "epoch": 0.15602646638054363,
      "grad_norm": 0.13763077777324695,
      "learning_rate": 0.00019178971844398653,
      "loss": 0.6979,
      "step": 1745
    },
    {
      "epoch": 0.15611587982832617,
      "grad_norm": 0.13261497171231112,
      "learning_rate": 0.00019177822269383368,
      "loss": 0.7007,
      "step": 1746
    },
    {
      "epoch": 0.15620529327610871,
      "grad_norm": 0.14396580894016037,
      "learning_rate": 0.00019176671924635215,
      "loss": 0.7329,
      "step": 1747
    },
    {
      "epoch": 0.15629470672389129,
      "grad_norm": 0.14396249910223766,
      "learning_rate": 0.00019175520810250666,
      "loss": 0.7089,
      "step": 1748
    },
    {
      "epoch": 0.15638412017167383,
      "grad_norm": 0.1534322173010951,
      "learning_rate": 0.00019174368926326273,
      "loss": 0.6888,
      "step": 1749
    },
    {
      "epoch": 0.15647353361945637,
      "grad_norm": 0.1257669354884055,
      "learning_rate": 0.00019173216272958633,
      "loss": 0.6878,
      "step": 1750
    },
    {
      "epoch": 0.15656294706723892,
      "grad_norm": 0.1454164195206384,
      "learning_rate": 0.00019172062850244425,
      "loss": 0.7358,
      "step": 1751
    },
    {
      "epoch": 0.15665236051502146,
      "grad_norm": 0.14068695200118955,
      "learning_rate": 0.00019170908658280386,
      "loss": 0.6926,
      "step": 1752
    },
    {
      "epoch": 0.156741773962804,
      "grad_norm": 0.15654845316884056,
      "learning_rate": 0.0001916975369716331,
      "loss": 0.7931,
      "step": 1753
    },
    {
      "epoch": 0.15683118741058655,
      "grad_norm": 0.13774472900557144,
      "learning_rate": 0.00019168597966990065,
      "loss": 0.7066,
      "step": 1754
    },
    {
      "epoch": 0.1569206008583691,
      "grad_norm": 0.12571861095808662,
      "learning_rate": 0.00019167441467857584,
      "loss": 0.6876,
      "step": 1755
    },
    {
      "epoch": 0.15701001430615164,
      "grad_norm": 0.12743305558594423,
      "learning_rate": 0.00019166284199862856,
      "loss": 0.6812,
      "step": 1756
    },
    {
      "epoch": 0.15709942775393418,
      "grad_norm": 0.12016813637316079,
      "learning_rate": 0.00019165126163102943,
      "loss": 0.6997,
      "step": 1757
    },
    {
      "epoch": 0.15718884120171675,
      "grad_norm": 0.12580913485928585,
      "learning_rate": 0.0001916396735767497,
      "loss": 0.7012,
      "step": 1758
    },
    {
      "epoch": 0.1572782546494993,
      "grad_norm": 0.14552393991920415,
      "learning_rate": 0.00019162807783676118,
      "loss": 0.7098,
      "step": 1759
    },
    {
      "epoch": 0.15736766809728184,
      "grad_norm": 0.1252466740052438,
      "learning_rate": 0.00019161647441203646,
      "loss": 0.6886,
      "step": 1760
    },
    {
      "epoch": 0.15745708154506438,
      "grad_norm": 0.1289512433469938,
      "learning_rate": 0.0001916048633035487,
      "loss": 0.7079,
      "step": 1761
    },
    {
      "epoch": 0.15754649499284692,
      "grad_norm": 0.11951518511936308,
      "learning_rate": 0.00019159324451227164,
      "loss": 0.6808,
      "step": 1762
    },
    {
      "epoch": 0.15763590844062947,
      "grad_norm": 0.14863096606784157,
      "learning_rate": 0.00019158161803917975,
      "loss": 0.7075,
      "step": 1763
    },
    {
      "epoch": 0.157725321888412,
      "grad_norm": 0.1263822433079371,
      "learning_rate": 0.0001915699838852482,
      "loss": 0.7338,
      "step": 1764
    },
    {
      "epoch": 0.15781473533619456,
      "grad_norm": 0.12805944121824503,
      "learning_rate": 0.0001915583420514527,
      "loss": 0.6982,
      "step": 1765
    },
    {
      "epoch": 0.1579041487839771,
      "grad_norm": 0.1344984324508085,
      "learning_rate": 0.00019154669253876962,
      "loss": 0.6958,
      "step": 1766
    },
    {
      "epoch": 0.15799356223175964,
      "grad_norm": 0.14334561715237332,
      "learning_rate": 0.000191535035348176,
      "loss": 0.714,
      "step": 1767
    },
    {
      "epoch": 0.15808297567954221,
      "grad_norm": 0.12517920155516787,
      "learning_rate": 0.00019152337048064947,
      "loss": 0.6937,
      "step": 1768
    },
    {
      "epoch": 0.15817238912732476,
      "grad_norm": 0.1270528731607595,
      "learning_rate": 0.00019151169793716843,
      "loss": 0.674,
      "step": 1769
    },
    {
      "epoch": 0.1582618025751073,
      "grad_norm": 0.15270818767416866,
      "learning_rate": 0.0001915000177187118,
      "loss": 0.7318,
      "step": 1770
    },
    {
      "epoch": 0.15835121602288985,
      "grad_norm": 0.10989448520685886,
      "learning_rate": 0.00019148832982625918,
      "loss": 0.6471,
      "step": 1771
    },
    {
      "epoch": 0.1584406294706724,
      "grad_norm": 0.13890266815833577,
      "learning_rate": 0.00019147663426079083,
      "loss": 0.7189,
      "step": 1772
    },
    {
      "epoch": 0.15853004291845493,
      "grad_norm": 0.1448665769443341,
      "learning_rate": 0.00019146493102328765,
      "loss": 0.6532,
      "step": 1773
    },
    {
      "epoch": 0.15861945636623748,
      "grad_norm": 0.14547842717860984,
      "learning_rate": 0.00019145322011473117,
      "loss": 0.7022,
      "step": 1774
    },
    {
      "epoch": 0.15870886981402002,
      "grad_norm": 0.1312542477033763,
      "learning_rate": 0.00019144150153610354,
      "loss": 0.7067,
      "step": 1775
    },
    {
      "epoch": 0.15879828326180256,
      "grad_norm": 0.13995054443000743,
      "learning_rate": 0.00019142977528838762,
      "loss": 0.7,
      "step": 1776
    },
    {
      "epoch": 0.1588876967095851,
      "grad_norm": 0.14081966965126755,
      "learning_rate": 0.00019141804137256686,
      "loss": 0.6933,
      "step": 1777
    },
    {
      "epoch": 0.15897711015736768,
      "grad_norm": 0.1273427214419422,
      "learning_rate": 0.0001914062997896254,
      "loss": 0.6789,
      "step": 1778
    },
    {
      "epoch": 0.15906652360515022,
      "grad_norm": 0.12370281387720485,
      "learning_rate": 0.00019139455054054794,
      "loss": 0.6589,
      "step": 1779
    },
    {
      "epoch": 0.15915593705293277,
      "grad_norm": 0.1374338685214712,
      "learning_rate": 0.0001913827936263199,
      "loss": 0.7225,
      "step": 1780
    },
    {
      "epoch": 0.1592453505007153,
      "grad_norm": 0.13755715590239845,
      "learning_rate": 0.00019137102904792736,
      "loss": 0.7146,
      "step": 1781
    },
    {
      "epoch": 0.15933476394849785,
      "grad_norm": 0.1546258107722819,
      "learning_rate": 0.00019135925680635694,
      "loss": 0.7026,
      "step": 1782
    },
    {
      "epoch": 0.1594241773962804,
      "grad_norm": 0.147748425490426,
      "learning_rate": 0.00019134747690259597,
      "loss": 0.3308,
      "step": 1783
    },
    {
      "epoch": 0.15951359084406294,
      "grad_norm": 0.15184719812090766,
      "learning_rate": 0.00019133568933763244,
      "loss": 0.7514,
      "step": 1784
    },
    {
      "epoch": 0.15960300429184548,
      "grad_norm": 0.15094136844624947,
      "learning_rate": 0.00019132389411245497,
      "loss": 0.7758,
      "step": 1785
    },
    {
      "epoch": 0.15969241773962803,
      "grad_norm": 0.1268352500740089,
      "learning_rate": 0.00019131209122805277,
      "loss": 0.673,
      "step": 1786
    },
    {
      "epoch": 0.1597818311874106,
      "grad_norm": 0.1317075966119242,
      "learning_rate": 0.00019130028068541576,
      "loss": 0.6788,
      "step": 1787
    },
    {
      "epoch": 0.15987124463519314,
      "grad_norm": 0.12143735521665147,
      "learning_rate": 0.0001912884624855345,
      "loss": 0.6882,
      "step": 1788
    },
    {
      "epoch": 0.1599606580829757,
      "grad_norm": 0.13344409216030964,
      "learning_rate": 0.0001912766366294001,
      "loss": 0.7117,
      "step": 1789
    },
    {
      "epoch": 0.16005007153075823,
      "grad_norm": 0.1380497435711661,
      "learning_rate": 0.00019126480311800444,
      "loss": 0.6649,
      "step": 1790
    },
    {
      "epoch": 0.16013948497854077,
      "grad_norm": 0.13216613637678973,
      "learning_rate": 0.00019125296195233996,
      "loss": 0.7197,
      "step": 1791
    },
    {
      "epoch": 0.16022889842632332,
      "grad_norm": 0.13080907733343441,
      "learning_rate": 0.00019124111313339976,
      "loss": 0.6685,
      "step": 1792
    },
    {
      "epoch": 0.16031831187410586,
      "grad_norm": 0.12918372513561815,
      "learning_rate": 0.0001912292566621776,
      "loss": 0.7105,
      "step": 1793
    },
    {
      "epoch": 0.1604077253218884,
      "grad_norm": 0.1334621120237485,
      "learning_rate": 0.00019121739253966785,
      "loss": 0.6448,
      "step": 1794
    },
    {
      "epoch": 0.16049713876967095,
      "grad_norm": 0.13064333327481434,
      "learning_rate": 0.00019120552076686554,
      "loss": 0.6689,
      "step": 1795
    },
    {
      "epoch": 0.1605865522174535,
      "grad_norm": 0.14888913969465123,
      "learning_rate": 0.0001911936413447664,
      "loss": 0.7597,
      "step": 1796
    },
    {
      "epoch": 0.16067596566523606,
      "grad_norm": 0.1422009460159121,
      "learning_rate": 0.00019118175427436666,
      "loss": 0.6945,
      "step": 1797
    },
    {
      "epoch": 0.1607653791130186,
      "grad_norm": 0.11696237990657317,
      "learning_rate": 0.0001911698595566633,
      "loss": 0.7051,
      "step": 1798
    },
    {
      "epoch": 0.16085479256080115,
      "grad_norm": 0.1561448309465992,
      "learning_rate": 0.00019115795719265395,
      "loss": 0.7509,
      "step": 1799
    },
    {
      "epoch": 0.1609442060085837,
      "grad_norm": 0.144210111190638,
      "learning_rate": 0.0001911460471833368,
      "loss": 0.69,
      "step": 1800
    },
    {
      "epoch": 0.16103361945636624,
      "grad_norm": 0.14071456923859008,
      "learning_rate": 0.00019113412952971077,
      "loss": 0.7113,
      "step": 1801
    },
    {
      "epoch": 0.16112303290414878,
      "grad_norm": 0.13749824561794155,
      "learning_rate": 0.00019112220423277534,
      "loss": 0.6631,
      "step": 1802
    },
    {
      "epoch": 0.16121244635193133,
      "grad_norm": 0.15764334382330436,
      "learning_rate": 0.0001911102712935307,
      "loss": 0.728,
      "step": 1803
    },
    {
      "epoch": 0.16130185979971387,
      "grad_norm": 0.13041634639428254,
      "learning_rate": 0.00019109833071297763,
      "loss": 0.6892,
      "step": 1804
    },
    {
      "epoch": 0.16139127324749641,
      "grad_norm": 0.1411135698251852,
      "learning_rate": 0.00019108638249211758,
      "loss": 0.7139,
      "step": 1805
    },
    {
      "epoch": 0.16148068669527896,
      "grad_norm": 0.13847855754477142,
      "learning_rate": 0.00019107442663195265,
      "loss": 0.6942,
      "step": 1806
    },
    {
      "epoch": 0.16157010014306153,
      "grad_norm": 0.14129713162426927,
      "learning_rate": 0.00019106246313348554,
      "loss": 0.7073,
      "step": 1807
    },
    {
      "epoch": 0.16165951359084407,
      "grad_norm": 0.13449136125899658,
      "learning_rate": 0.00019105049199771962,
      "loss": 0.7277,
      "step": 1808
    },
    {
      "epoch": 0.16174892703862662,
      "grad_norm": 0.21667861882580067,
      "learning_rate": 0.00019103851322565892,
      "loss": 0.3834,
      "step": 1809
    },
    {
      "epoch": 0.16183834048640916,
      "grad_norm": 0.1397846094308107,
      "learning_rate": 0.00019102652681830804,
      "loss": 0.722,
      "step": 1810
    },
    {
      "epoch": 0.1619277539341917,
      "grad_norm": 0.14378116617921807,
      "learning_rate": 0.00019101453277667226,
      "loss": 0.719,
      "step": 1811
    },
    {
      "epoch": 0.16201716738197425,
      "grad_norm": 0.13598164368771673,
      "learning_rate": 0.00019100253110175758,
      "loss": 0.6888,
      "step": 1812
    },
    {
      "epoch": 0.1621065808297568,
      "grad_norm": 0.1425816289357811,
      "learning_rate": 0.00019099052179457054,
      "loss": 0.7328,
      "step": 1813
    },
    {
      "epoch": 0.16219599427753933,
      "grad_norm": 0.13639689867326754,
      "learning_rate": 0.00019097850485611827,
      "loss": 0.6938,
      "step": 1814
    },
    {
      "epoch": 0.16228540772532188,
      "grad_norm": 0.14141005987362446,
      "learning_rate": 0.00019096648028740868,
      "loss": 0.7068,
      "step": 1815
    },
    {
      "epoch": 0.16237482117310442,
      "grad_norm": 0.14412878679407717,
      "learning_rate": 0.00019095444808945027,
      "loss": 0.697,
      "step": 1816
    },
    {
      "epoch": 0.162464234620887,
      "grad_norm": 0.1297531381790781,
      "learning_rate": 0.00019094240826325213,
      "loss": 0.6525,
      "step": 1817
    },
    {
      "epoch": 0.16255364806866954,
      "grad_norm": 0.13805414557475318,
      "learning_rate": 0.00019093036080982404,
      "loss": 0.7258,
      "step": 1818
    },
    {
      "epoch": 0.16264306151645208,
      "grad_norm": 0.1509577605389202,
      "learning_rate": 0.0001909183057301764,
      "loss": 0.7204,
      "step": 1819
    },
    {
      "epoch": 0.16273247496423462,
      "grad_norm": 0.14589010602069286,
      "learning_rate": 0.0001909062430253203,
      "loss": 0.7161,
      "step": 1820
    },
    {
      "epoch": 0.16282188841201717,
      "grad_norm": 0.13030328530614863,
      "learning_rate": 0.00019089417269626733,
      "loss": 0.7224,
      "step": 1821
    },
    {
      "epoch": 0.1629113018597997,
      "grad_norm": 0.15145328840176675,
      "learning_rate": 0.00019088209474402992,
      "loss": 0.7254,
      "step": 1822
    },
    {
      "epoch": 0.16300071530758226,
      "grad_norm": 0.15521884387869153,
      "learning_rate": 0.00019087000916962095,
      "loss": 0.7143,
      "step": 1823
    },
    {
      "epoch": 0.1630901287553648,
      "grad_norm": 0.13633914567324024,
      "learning_rate": 0.00019085791597405404,
      "loss": 0.704,
      "step": 1824
    },
    {
      "epoch": 0.16317954220314734,
      "grad_norm": 0.12183496984900873,
      "learning_rate": 0.00019084581515834347,
      "loss": 0.6742,
      "step": 1825
    },
    {
      "epoch": 0.1632689556509299,
      "grad_norm": 0.12830150699161763,
      "learning_rate": 0.00019083370672350408,
      "loss": 0.714,
      "step": 1826
    },
    {
      "epoch": 0.16335836909871246,
      "grad_norm": 0.14800362144983936,
      "learning_rate": 0.0001908215906705514,
      "loss": 0.6904,
      "step": 1827
    },
    {
      "epoch": 0.163447782546495,
      "grad_norm": 0.12735795593657778,
      "learning_rate": 0.00019080946700050162,
      "loss": 0.6697,
      "step": 1828
    },
    {
      "epoch": 0.16353719599427755,
      "grad_norm": 0.14505065766060504,
      "learning_rate": 0.00019079733571437154,
      "loss": 0.7003,
      "step": 1829
    },
    {
      "epoch": 0.1636266094420601,
      "grad_norm": 0.16426316328468307,
      "learning_rate": 0.0001907851968131785,
      "loss": 0.7621,
      "step": 1830
    },
    {
      "epoch": 0.16371602288984263,
      "grad_norm": 0.1515242290162983,
      "learning_rate": 0.00019077305029794068,
      "loss": 0.7141,
      "step": 1831
    },
    {
      "epoch": 0.16380543633762518,
      "grad_norm": 0.14275641866827238,
      "learning_rate": 0.00019076089616967677,
      "loss": 0.6956,
      "step": 1832
    },
    {
      "epoch": 0.16389484978540772,
      "grad_norm": 0.14835485971715032,
      "learning_rate": 0.0001907487344294061,
      "loss": 0.721,
      "step": 1833
    },
    {
      "epoch": 0.16398426323319026,
      "grad_norm": 0.1488544606072828,
      "learning_rate": 0.00019073656507814866,
      "loss": 0.7221,
      "step": 1834
    },
    {
      "epoch": 0.1640736766809728,
      "grad_norm": 0.13715960448332704,
      "learning_rate": 0.00019072438811692507,
      "loss": 0.7121,
      "step": 1835
    },
    {
      "epoch": 0.16416309012875535,
      "grad_norm": 0.13487639296666018,
      "learning_rate": 0.00019071220354675665,
      "loss": 0.7117,
      "step": 1836
    },
    {
      "epoch": 0.16425250357653792,
      "grad_norm": 0.14683959952203623,
      "learning_rate": 0.00019070001136866526,
      "loss": 0.7062,
      "step": 1837
    },
    {
      "epoch": 0.16434191702432047,
      "grad_norm": 0.14624403204793493,
      "learning_rate": 0.00019068781158367346,
      "loss": 0.7365,
      "step": 1838
    },
    {
      "epoch": 0.164431330472103,
      "grad_norm": 0.14124939665157496,
      "learning_rate": 0.00019067560419280442,
      "loss": 0.7013,
      "step": 1839
    },
    {
      "epoch": 0.16452074391988555,
      "grad_norm": 0.1569295451096544,
      "learning_rate": 0.00019066338919708197,
      "loss": 0.7067,
      "step": 1840
    },
    {
      "epoch": 0.1646101573676681,
      "grad_norm": 0.12163512449351954,
      "learning_rate": 0.00019065116659753054,
      "loss": 0.708,
      "step": 1841
    },
    {
      "epoch": 0.16469957081545064,
      "grad_norm": 0.13305137890553814,
      "learning_rate": 0.00019063893639517527,
      "loss": 0.6893,
      "step": 1842
    },
    {
      "epoch": 0.16478898426323318,
      "grad_norm": 0.13119458930028732,
      "learning_rate": 0.00019062669859104187,
      "loss": 0.7213,
      "step": 1843
    },
    {
      "epoch": 0.16487839771101573,
      "grad_norm": 0.14452735689773166,
      "learning_rate": 0.0001906144531861567,
      "loss": 0.6816,
      "step": 1844
    },
    {
      "epoch": 0.16496781115879827,
      "grad_norm": 0.12836972858209122,
      "learning_rate": 0.00019060220018154677,
      "loss": 0.6465,
      "step": 1845
    },
    {
      "epoch": 0.16505722460658084,
      "grad_norm": 0.1221677757306901,
      "learning_rate": 0.00019058993957823974,
      "loss": 0.6847,
      "step": 1846
    },
    {
      "epoch": 0.1651466380543634,
      "grad_norm": 0.12846065117298816,
      "learning_rate": 0.00019057767137726388,
      "loss": 0.6659,
      "step": 1847
    },
    {
      "epoch": 0.16523605150214593,
      "grad_norm": 0.14108494783658648,
      "learning_rate": 0.00019056539557964813,
      "loss": 0.6855,
      "step": 1848
    },
    {
      "epoch": 0.16532546494992847,
      "grad_norm": 0.14860078426998485,
      "learning_rate": 0.000190553112186422,
      "loss": 0.7334,
      "step": 1849
    },
    {
      "epoch": 0.16541487839771102,
      "grad_norm": 0.16009964413600059,
      "learning_rate": 0.00019054082119861573,
      "loss": 0.7358,
      "step": 1850
    },
    {
      "epoch": 0.16550429184549356,
      "grad_norm": 0.15309138875145498,
      "learning_rate": 0.0001905285226172601,
      "loss": 0.7129,
      "step": 1851
    },
    {
      "epoch": 0.1655937052932761,
      "grad_norm": 0.1350951873052478,
      "learning_rate": 0.00019051621644338665,
      "loss": 0.7279,
      "step": 1852
    },
    {
      "epoch": 0.16568311874105865,
      "grad_norm": 0.1398810763308177,
      "learning_rate": 0.0001905039026780274,
      "loss": 0.7341,
      "step": 1853
    },
    {
      "epoch": 0.1657725321888412,
      "grad_norm": 0.162025878915607,
      "learning_rate": 0.00019049158132221515,
      "loss": 0.7087,
      "step": 1854
    },
    {
      "epoch": 0.16586194563662374,
      "grad_norm": 0.13436353097367548,
      "learning_rate": 0.0001904792523769833,
      "loss": 0.7282,
      "step": 1855
    },
    {
      "epoch": 0.1659513590844063,
      "grad_norm": 0.14522010163706728,
      "learning_rate": 0.00019046691584336577,
      "loss": 0.6865,
      "step": 1856
    },
    {
      "epoch": 0.16604077253218885,
      "grad_norm": 0.13486788911924313,
      "learning_rate": 0.0001904545717223973,
      "loss": 0.7168,
      "step": 1857
    },
    {
      "epoch": 0.1661301859799714,
      "grad_norm": 0.13614999564214772,
      "learning_rate": 0.00019044222001511312,
      "loss": 0.7286,
      "step": 1858
    },
    {
      "epoch": 0.16621959942775394,
      "grad_norm": 0.1453152811879836,
      "learning_rate": 0.00019042986072254919,
      "loss": 0.6852,
      "step": 1859
    },
    {
      "epoch": 0.16630901287553648,
      "grad_norm": 0.13198032471984797,
      "learning_rate": 0.00019041749384574204,
      "loss": 0.7061,
      "step": 1860
    },
    {
      "epoch": 0.16639842632331903,
      "grad_norm": 0.13592459758845307,
      "learning_rate": 0.0001904051193857289,
      "loss": 0.7335,
      "step": 1861
    },
    {
      "epoch": 0.16648783977110157,
      "grad_norm": 0.14008177323292498,
      "learning_rate": 0.00019039273734354755,
      "loss": 0.6575,
      "step": 1862
    },
    {
      "epoch": 0.1665772532188841,
      "grad_norm": 0.14173387190514572,
      "learning_rate": 0.00019038034772023648,
      "loss": 0.7547,
      "step": 1863
    },
    {
      "epoch": 0.16666666666666666,
      "grad_norm": 0.13333509840960123,
      "learning_rate": 0.00019036795051683483,
      "loss": 0.6657,
      "step": 1864
    },
    {
      "epoch": 0.1667560801144492,
      "grad_norm": 0.12392969546243836,
      "learning_rate": 0.0001903555457343823,
      "loss": 0.6657,
      "step": 1865
    },
    {
      "epoch": 0.16684549356223177,
      "grad_norm": 0.13346819086629905,
      "learning_rate": 0.00019034313337391924,
      "loss": 0.6822,
      "step": 1866
    },
    {
      "epoch": 0.16693490701001432,
      "grad_norm": 0.14933455671642992,
      "learning_rate": 0.00019033071343648673,
      "loss": 0.7042,
      "step": 1867
    },
    {
      "epoch": 0.16702432045779686,
      "grad_norm": 0.15138473993500515,
      "learning_rate": 0.00019031828592312635,
      "loss": 0.706,
      "step": 1868
    },
    {
      "epoch": 0.1671137339055794,
      "grad_norm": 0.13149597193300816,
      "learning_rate": 0.00019030585083488043,
      "loss": 0.6992,
      "step": 1869
    },
    {
      "epoch": 0.16720314735336195,
      "grad_norm": 0.13840166437436394,
      "learning_rate": 0.00019029340817279183,
      "loss": 0.6744,
      "step": 1870
    },
    {
      "epoch": 0.1672925608011445,
      "grad_norm": 0.13163064769182542,
      "learning_rate": 0.00019028095793790416,
      "loss": 0.6869,
      "step": 1871
    },
    {
      "epoch": 0.16738197424892703,
      "grad_norm": 0.13518112240697522,
      "learning_rate": 0.00019026850013126157,
      "loss": 0.7188,
      "step": 1872
    },
    {
      "epoch": 0.16747138769670958,
      "grad_norm": 0.13568467158982056,
      "learning_rate": 0.0001902560347539089,
      "loss": 0.7115,
      "step": 1873
    },
    {
      "epoch": 0.16756080114449212,
      "grad_norm": 0.13886391337851528,
      "learning_rate": 0.00019024356180689158,
      "loss": 0.6917,
      "step": 1874
    },
    {
      "epoch": 0.16765021459227467,
      "grad_norm": 0.14402654208259533,
      "learning_rate": 0.00019023108129125572,
      "loss": 0.7393,
      "step": 1875
    },
    {
      "epoch": 0.16773962804005724,
      "grad_norm": 0.17744533358280393,
      "learning_rate": 0.00019021859320804806,
      "loss": 0.7611,
      "step": 1876
    },
    {
      "epoch": 0.16782904148783978,
      "grad_norm": 0.12278472470997691,
      "learning_rate": 0.00019020609755831592,
      "loss": 0.7032,
      "step": 1877
    },
    {
      "epoch": 0.16791845493562232,
      "grad_norm": 0.1553477737281354,
      "learning_rate": 0.00019019359434310738,
      "loss": 0.6938,
      "step": 1878
    },
    {
      "epoch": 0.16800786838340487,
      "grad_norm": 0.13004404900697225,
      "learning_rate": 0.00019018108356347094,
      "loss": 0.6649,
      "step": 1879
    },
    {
      "epoch": 0.1680972818311874,
      "grad_norm": 0.14066991659995215,
      "learning_rate": 0.00019016856522045597,
      "loss": 0.6674,
      "step": 1880
    },
    {
      "epoch": 0.16818669527896996,
      "grad_norm": 0.1157438802485788,
      "learning_rate": 0.0001901560393151123,
      "loss": 0.6596,
      "step": 1881
    },
    {
      "epoch": 0.1682761087267525,
      "grad_norm": 0.15518747906237604,
      "learning_rate": 0.00019014350584849052,
      "loss": 0.7236,
      "step": 1882
    },
    {
      "epoch": 0.16836552217453504,
      "grad_norm": 0.14667716433586309,
      "learning_rate": 0.00019013096482164177,
      "loss": 0.7213,
      "step": 1883
    },
    {
      "epoch": 0.1684549356223176,
      "grad_norm": 0.18090443186914368,
      "learning_rate": 0.00019011841623561783,
      "loss": 0.3547,
      "step": 1884
    },
    {
      "epoch": 0.16854434907010013,
      "grad_norm": 0.16385569601652006,
      "learning_rate": 0.00019010586009147117,
      "loss": 0.3631,
      "step": 1885
    },
    {
      "epoch": 0.1686337625178827,
      "grad_norm": 0.15363193183318083,
      "learning_rate": 0.00019009329639025483,
      "loss": 0.699,
      "step": 1886
    },
    {
      "epoch": 0.16872317596566525,
      "grad_norm": 0.17062132649394154,
      "learning_rate": 0.00019008072513302255,
      "loss": 0.7405,
      "step": 1887
    },
    {
      "epoch": 0.1688125894134478,
      "grad_norm": 0.15244910967808345,
      "learning_rate": 0.00019006814632082863,
      "loss": 0.74,
      "step": 1888
    },
    {
      "epoch": 0.16890200286123033,
      "grad_norm": 0.18189849748202264,
      "learning_rate": 0.00019005555995472805,
      "loss": 0.7664,
      "step": 1889
    },
    {
      "epoch": 0.16899141630901288,
      "grad_norm": 0.1481259239474397,
      "learning_rate": 0.00019004296603577646,
      "loss": 0.6813,
      "step": 1890
    },
    {
      "epoch": 0.16908082975679542,
      "grad_norm": 0.23154505231513967,
      "learning_rate": 0.00019003036456503,
      "loss": 0.3371,
      "step": 1891
    },
    {
      "epoch": 0.16917024320457796,
      "grad_norm": 0.1403586822850786,
      "learning_rate": 0.0001900177555435456,
      "loss": 0.7094,
      "step": 1892
    },
    {
      "epoch": 0.1692596566523605,
      "grad_norm": 0.1333523103509462,
      "learning_rate": 0.00019000513897238076,
      "loss": 0.7469,
      "step": 1893
    },
    {
      "epoch": 0.16934907010014305,
      "grad_norm": 0.12314012285684356,
      "learning_rate": 0.00018999251485259363,
      "loss": 0.6823,
      "step": 1894
    },
    {
      "epoch": 0.1694384835479256,
      "grad_norm": 0.12376780505841439,
      "learning_rate": 0.00018997988318524293,
      "loss": 0.7243,
      "step": 1895
    },
    {
      "epoch": 0.16952789699570817,
      "grad_norm": 0.12801821038584238,
      "learning_rate": 0.00018996724397138813,
      "loss": 0.7239,
      "step": 1896
    },
    {
      "epoch": 0.1696173104434907,
      "grad_norm": 0.1582826738894331,
      "learning_rate": 0.0001899545972120892,
      "loss": 0.6759,
      "step": 1897
    },
    {
      "epoch": 0.16970672389127325,
      "grad_norm": 0.15208900674912276,
      "learning_rate": 0.00018994194290840687,
      "loss": 0.733,
      "step": 1898
    },
    {
      "epoch": 0.1697961373390558,
      "grad_norm": 0.1360453262404092,
      "learning_rate": 0.0001899292810614024,
      "loss": 0.6878,
      "step": 1899
    },
    {
      "epoch": 0.16988555078683834,
      "grad_norm": 0.1515404701887519,
      "learning_rate": 0.00018991661167213773,
      "loss": 0.7608,
      "step": 1900
    },
    {
      "epoch": 0.16997496423462088,
      "grad_norm": 0.1429142822375736,
      "learning_rate": 0.00018990393474167542,
      "loss": 0.7296,
      "step": 1901
    },
    {
      "epoch": 0.17006437768240343,
      "grad_norm": 0.13641716468062345,
      "learning_rate": 0.0001898912502710787,
      "loss": 0.7243,
      "step": 1902
    },
    {
      "epoch": 0.17015379113018597,
      "grad_norm": 0.13359079710398591,
      "learning_rate": 0.00018987855826141137,
      "loss": 0.709,
      "step": 1903
    },
    {
      "epoch": 0.17024320457796852,
      "grad_norm": 0.13159833116729072,
      "learning_rate": 0.0001898658587137379,
      "loss": 0.6835,
      "step": 1904
    },
    {
      "epoch": 0.17033261802575106,
      "grad_norm": 0.14938050648241996,
      "learning_rate": 0.0001898531516291234,
      "loss": 0.7213,
      "step": 1905
    },
    {
      "epoch": 0.17042203147353363,
      "grad_norm": 0.12510442878498326,
      "learning_rate": 0.00018984043700863356,
      "loss": 0.6602,
      "step": 1906
    },
    {
      "epoch": 0.17051144492131617,
      "grad_norm": 0.14364565149992056,
      "learning_rate": 0.0001898277148533348,
      "loss": 0.7327,
      "step": 1907
    },
    {
      "epoch": 0.17060085836909872,
      "grad_norm": 0.14250639649146493,
      "learning_rate": 0.0001898149851642941,
      "loss": 0.6648,
      "step": 1908
    },
    {
      "epoch": 0.17069027181688126,
      "grad_norm": 0.13439656416578402,
      "learning_rate": 0.00018980224794257905,
      "loss": 0.736,
      "step": 1909
    },
    {
      "epoch": 0.1707796852646638,
      "grad_norm": 0.2689100499229044,
      "learning_rate": 0.0001897895031892579,
      "loss": 0.3852,
      "step": 1910
    },
    {
      "epoch": 0.17086909871244635,
      "grad_norm": 0.13763202976545713,
      "learning_rate": 0.00018977675090539955,
      "loss": 0.73,
      "step": 1911
    },
    {
      "epoch": 0.1709585121602289,
      "grad_norm": 0.14800401185529435,
      "learning_rate": 0.00018976399109207353,
      "loss": 0.7155,
      "step": 1912
    },
    {
      "epoch": 0.17104792560801144,
      "grad_norm": 0.1465238924007225,
      "learning_rate": 0.00018975122375035,
      "loss": 0.7315,
      "step": 1913
    },
    {
      "epoch": 0.17113733905579398,
      "grad_norm": 0.14138045378257122,
      "learning_rate": 0.0001897384488812997,
      "loss": 0.71,
      "step": 1914
    },
    {
      "epoch": 0.17122675250357655,
      "grad_norm": 0.13014800452716563,
      "learning_rate": 0.00018972566648599407,
      "loss": 0.6945,
      "step": 1915
    },
    {
      "epoch": 0.1713161659513591,
      "grad_norm": 0.13153822022418016,
      "learning_rate": 0.0001897128765655052,
      "loss": 0.6597,
      "step": 1916
    },
    {
      "epoch": 0.17140557939914164,
      "grad_norm": 0.1663729141333768,
      "learning_rate": 0.00018970007912090566,
      "loss": 0.728,
      "step": 1917
    },
    {
      "epoch": 0.17149499284692418,
      "grad_norm": 0.1439411548836202,
      "learning_rate": 0.00018968727415326884,
      "loss": 0.7289,
      "step": 1918
    },
    {
      "epoch": 0.17158440629470673,
      "grad_norm": 0.15269004620552287,
      "learning_rate": 0.00018967446166366867,
      "loss": 0.7172,
      "step": 1919
    },
    {
      "epoch": 0.17167381974248927,
      "grad_norm": 0.14799671867773245,
      "learning_rate": 0.00018966164165317966,
      "loss": 0.7324,
      "step": 1920
    },
    {
      "epoch": 0.1717632331902718,
      "grad_norm": 0.12305284039740448,
      "learning_rate": 0.00018964881412287708,
      "loss": 0.6903,
      "step": 1921
    },
    {
      "epoch": 0.17185264663805436,
      "grad_norm": 0.14443617587523397,
      "learning_rate": 0.00018963597907383672,
      "loss": 0.7142,
      "step": 1922
    },
    {
      "epoch": 0.1719420600858369,
      "grad_norm": 0.1278594886211318,
      "learning_rate": 0.00018962313650713503,
      "loss": 0.6825,
      "step": 1923
    },
    {
      "epoch": 0.17203147353361944,
      "grad_norm": 0.1490208163903855,
      "learning_rate": 0.00018961028642384915,
      "loss": 0.6969,
      "step": 1924
    },
    {
      "epoch": 0.17212088698140202,
      "grad_norm": 0.14584435805663443,
      "learning_rate": 0.00018959742882505674,
      "loss": 0.7131,
      "step": 1925
    },
    {
      "epoch": 0.17221030042918456,
      "grad_norm": 0.14159903970094168,
      "learning_rate": 0.00018958456371183618,
      "loss": 0.6856,
      "step": 1926
    },
    {
      "epoch": 0.1722997138769671,
      "grad_norm": 0.12892504114710804,
      "learning_rate": 0.00018957169108526646,
      "loss": 0.6765,
      "step": 1927
    },
    {
      "epoch": 0.17238912732474965,
      "grad_norm": 0.15865122504200363,
      "learning_rate": 0.00018955881094642721,
      "loss": 0.7138,
      "step": 1928
    },
    {
      "epoch": 0.1724785407725322,
      "grad_norm": 0.14989929504760183,
      "learning_rate": 0.00018954592329639862,
      "loss": 0.7035,
      "step": 1929
    },
    {
      "epoch": 0.17256795422031473,
      "grad_norm": 0.14115466955204106,
      "learning_rate": 0.00018953302813626158,
      "loss": 0.6929,
      "step": 1930
    },
    {
      "epoch": 0.17265736766809728,
      "grad_norm": 0.13888780712628981,
      "learning_rate": 0.00018952012546709764,
      "loss": 0.6715,
      "step": 1931
    },
    {
      "epoch": 0.17274678111587982,
      "grad_norm": 0.12980784344095633,
      "learning_rate": 0.00018950721528998885,
      "loss": 0.6544,
      "step": 1932
    },
    {
      "epoch": 0.17283619456366237,
      "grad_norm": 0.16595440761292155,
      "learning_rate": 0.00018949429760601802,
      "loss": 0.7627,
      "step": 1933
    },
    {
      "epoch": 0.1729256080114449,
      "grad_norm": 0.14346121939292023,
      "learning_rate": 0.00018948137241626853,
      "loss": 0.7016,
      "step": 1934
    },
    {
      "epoch": 0.17301502145922748,
      "grad_norm": 0.12928381231740416,
      "learning_rate": 0.0001894684397218244,
      "loss": 0.6792,
      "step": 1935
    },
    {
      "epoch": 0.17310443490701002,
      "grad_norm": 0.13383783281316353,
      "learning_rate": 0.0001894554995237703,
      "loss": 0.6825,
      "step": 1936
    },
    {
      "epoch": 0.17319384835479257,
      "grad_norm": 0.15078676388624265,
      "learning_rate": 0.00018944255182319148,
      "loss": 0.7253,
      "step": 1937
    },
    {
      "epoch": 0.1732832618025751,
      "grad_norm": 0.15211359086842913,
      "learning_rate": 0.00018942959662117384,
      "loss": 0.7637,
      "step": 1938
    },
    {
      "epoch": 0.17337267525035766,
      "grad_norm": 0.13841436598048923,
      "learning_rate": 0.00018941663391880396,
      "loss": 0.666,
      "step": 1939
    },
    {
      "epoch": 0.1734620886981402,
      "grad_norm": 0.13691743734338369,
      "learning_rate": 0.00018940366371716897,
      "loss": 0.7018,
      "step": 1940
    },
    {
      "epoch": 0.17355150214592274,
      "grad_norm": 0.14980082428368652,
      "learning_rate": 0.00018939068601735666,
      "loss": 0.7357,
      "step": 1941
    },
    {
      "epoch": 0.1736409155937053,
      "grad_norm": 0.1424072936723062,
      "learning_rate": 0.0001893777008204555,
      "loss": 0.7126,
      "step": 1942
    },
    {
      "epoch": 0.17373032904148783,
      "grad_norm": 0.14058397568819733,
      "learning_rate": 0.0001893647081275545,
      "loss": 0.695,
      "step": 1943
    },
    {
      "epoch": 0.17381974248927037,
      "grad_norm": 0.11427936905251707,
      "learning_rate": 0.00018935170793974335,
      "loss": 0.6849,
      "step": 1944
    },
    {
      "epoch": 0.17390915593705294,
      "grad_norm": 0.13495065627538014,
      "learning_rate": 0.00018933870025811237,
      "loss": 0.6861,
      "step": 1945
    },
    {
      "epoch": 0.1739985693848355,
      "grad_norm": 0.13710836951589717,
      "learning_rate": 0.0001893256850837525,
      "loss": 0.716,
      "step": 1946
    },
    {
      "epoch": 0.17408798283261803,
      "grad_norm": 0.13195124205923467,
      "learning_rate": 0.0001893126624177553,
      "loss": 0.6939,
      "step": 1947
    },
    {
      "epoch": 0.17417739628040058,
      "grad_norm": 0.13184217369225695,
      "learning_rate": 0.00018929963226121295,
      "loss": 0.7196,
      "step": 1948
    },
    {
      "epoch": 0.17426680972818312,
      "grad_norm": 0.14552033414245535,
      "learning_rate": 0.0001892865946152183,
      "loss": 0.7214,
      "step": 1949
    },
    {
      "epoch": 0.17435622317596566,
      "grad_norm": 0.12398998753385358,
      "learning_rate": 0.0001892735494808648,
      "loss": 0.728,
      "step": 1950
    },
    {
      "epoch": 0.1744456366237482,
      "grad_norm": 0.10984633636000705,
      "learning_rate": 0.0001892604968592465,
      "loss": 0.6415,
      "step": 1951
    },
    {
      "epoch": 0.17453505007153075,
      "grad_norm": 0.12444883628727174,
      "learning_rate": 0.00018924743675145813,
      "loss": 0.6685,
      "step": 1952
    },
    {
      "epoch": 0.1746244635193133,
      "grad_norm": 0.12758912337174064,
      "learning_rate": 0.00018923436915859503,
      "loss": 0.6525,
      "step": 1953
    },
    {
      "epoch": 0.17471387696709584,
      "grad_norm": 0.19966903418931606,
      "learning_rate": 0.00018922129408175314,
      "loss": 0.4025,
      "step": 1954
    },
    {
      "epoch": 0.1748032904148784,
      "grad_norm": 0.1409288642677228,
      "learning_rate": 0.0001892082115220291,
      "loss": 0.7209,
      "step": 1955
    },
    {
      "epoch": 0.17489270386266095,
      "grad_norm": 0.15508506769450098,
      "learning_rate": 0.00018919512148052005,
      "loss": 0.743,
      "step": 1956
    },
    {
      "epoch": 0.1749821173104435,
      "grad_norm": 0.13944403060063573,
      "learning_rate": 0.0001891820239583239,
      "loss": 0.7147,
      "step": 1957
    },
    {
      "epoch": 0.17507153075822604,
      "grad_norm": 0.15248583880061167,
      "learning_rate": 0.00018916891895653915,
      "loss": 0.7328,
      "step": 1958
    },
    {
      "epoch": 0.17516094420600858,
      "grad_norm": 0.13950410677874375,
      "learning_rate": 0.0001891558064762648,
      "loss": 0.6934,
      "step": 1959
    },
    {
      "epoch": 0.17525035765379113,
      "grad_norm": 0.14309147650767592,
      "learning_rate": 0.00018914268651860067,
      "loss": 0.6833,
      "step": 1960
    },
    {
      "epoch": 0.17533977110157367,
      "grad_norm": 0.15636479627106495,
      "learning_rate": 0.00018912955908464708,
      "loss": 0.7147,
      "step": 1961
    },
    {
      "epoch": 0.17542918454935622,
      "grad_norm": 0.1762426443185663,
      "learning_rate": 0.00018911642417550497,
      "loss": 0.7497,
      "step": 1962
    },
    {
      "epoch": 0.17551859799713876,
      "grad_norm": 0.15275895586975038,
      "learning_rate": 0.00018910328179227605,
      "loss": 0.3639,
      "step": 1963
    },
    {
      "epoch": 0.1756080114449213,
      "grad_norm": 0.1496003128470242,
      "learning_rate": 0.0001890901319360624,
      "loss": 0.7326,
      "step": 1964
    },
    {
      "epoch": 0.17569742489270387,
      "grad_norm": 0.162235304321082,
      "learning_rate": 0.00018907697460796707,
      "loss": 0.7005,
      "step": 1965
    },
    {
      "epoch": 0.17578683834048642,
      "grad_norm": 0.14164544560211234,
      "learning_rate": 0.00018906380980909343,
      "loss": 0.7062,
      "step": 1966
    },
    {
      "epoch": 0.17587625178826896,
      "grad_norm": 0.1473997391277559,
      "learning_rate": 0.00018905063754054563,
      "loss": 0.7023,
      "step": 1967
    },
    {
      "epoch": 0.1759656652360515,
      "grad_norm": 0.14304659021973307,
      "learning_rate": 0.00018903745780342839,
      "loss": 0.7222,
      "step": 1968
    },
    {
      "epoch": 0.17605507868383405,
      "grad_norm": 0.13626850230361906,
      "learning_rate": 0.00018902427059884708,
      "loss": 0.7057,
      "step": 1969
    },
    {
      "epoch": 0.1761444921316166,
      "grad_norm": 0.13630505413990693,
      "learning_rate": 0.00018901107592790776,
      "loss": 0.6938,
      "step": 1970
    },
    {
      "epoch": 0.17623390557939914,
      "grad_norm": 0.13698641117655597,
      "learning_rate": 0.00018899787379171693,
      "loss": 0.6724,
      "step": 1971
    },
    {
      "epoch": 0.17632331902718168,
      "grad_norm": 0.14379089238157702,
      "learning_rate": 0.00018898466419138197,
      "loss": 0.722,
      "step": 1972
    },
    {
      "epoch": 0.17641273247496422,
      "grad_norm": 0.1275220111909541,
      "learning_rate": 0.00018897144712801066,
      "loss": 0.7051,
      "step": 1973
    },
    {
      "epoch": 0.1765021459227468,
      "grad_norm": 0.13792432144803998,
      "learning_rate": 0.00018895822260271152,
      "loss": 0.6864,
      "step": 1974
    },
    {
      "epoch": 0.17659155937052934,
      "grad_norm": 0.13961083838284194,
      "learning_rate": 0.0001889449906165937,
      "loss": 0.7333,
      "step": 1975
    },
    {
      "epoch": 0.17668097281831188,
      "grad_norm": 0.13722370402617565,
      "learning_rate": 0.00018893175117076693,
      "loss": 0.6947,
      "step": 1976
    },
    {
      "epoch": 0.17677038626609443,
      "grad_norm": 0.1552414653681275,
      "learning_rate": 0.00018891850426634162,
      "loss": 0.7568,
      "step": 1977
    },
    {
      "epoch": 0.17685979971387697,
      "grad_norm": 0.12830168224990832,
      "learning_rate": 0.00018890524990442873,
      "loss": 0.6651,
      "step": 1978
    },
    {
      "epoch": 0.1769492131616595,
      "grad_norm": 0.13207290549849626,
      "learning_rate": 0.00018889198808613985,
      "loss": 0.6806,
      "step": 1979
    },
    {
      "epoch": 0.17703862660944206,
      "grad_norm": 0.15445083588569014,
      "learning_rate": 0.00018887871881258735,
      "loss": 0.7541,
      "step": 1980
    },
    {
      "epoch": 0.1771280400572246,
      "grad_norm": 0.14131930883705762,
      "learning_rate": 0.000188865442084884,
      "loss": 0.6699,
      "step": 1981
    },
    {
      "epoch": 0.17721745350500714,
      "grad_norm": 0.13779202973946686,
      "learning_rate": 0.00018885215790414336,
      "loss": 0.7016,
      "step": 1982
    },
    {
      "epoch": 0.1773068669527897,
      "grad_norm": 0.13500865641345788,
      "learning_rate": 0.00018883886627147955,
      "loss": 0.7133,
      "step": 1983
    },
    {
      "epoch": 0.17739628040057226,
      "grad_norm": 0.13984893099616744,
      "learning_rate": 0.0001888255671880073,
      "loss": 0.6844,
      "step": 1984
    },
    {
      "epoch": 0.1774856938483548,
      "grad_norm": 0.13611911571228502,
      "learning_rate": 0.00018881226065484204,
      "loss": 0.686,
      "step": 1985
    },
    {
      "epoch": 0.17757510729613735,
      "grad_norm": 0.1475551281307728,
      "learning_rate": 0.0001887989466730997,
      "loss": 0.7316,
      "step": 1986
    },
    {
      "epoch": 0.1776645207439199,
      "grad_norm": 0.15050157833531447,
      "learning_rate": 0.00018878562524389696,
      "loss": 0.7379,
      "step": 1987
    },
    {
      "epoch": 0.17775393419170243,
      "grad_norm": 0.1466763668377586,
      "learning_rate": 0.00018877229636835106,
      "loss": 0.6969,
      "step": 1988
    },
    {
      "epoch": 0.17784334763948498,
      "grad_norm": 0.1507659189435184,
      "learning_rate": 0.00018875896004757984,
      "loss": 0.7479,
      "step": 1989
    },
    {
      "epoch": 0.17793276108726752,
      "grad_norm": 0.1263087734977256,
      "learning_rate": 0.00018874561628270188,
      "loss": 0.6956,
      "step": 1990
    },
    {
      "epoch": 0.17802217453505007,
      "grad_norm": 0.15805177350354438,
      "learning_rate": 0.00018873226507483623,
      "loss": 0.7743,
      "step": 1991
    },
    {
      "epoch": 0.1781115879828326,
      "grad_norm": 0.12704082193727603,
      "learning_rate": 0.0001887189064251027,
      "loss": 0.6543,
      "step": 1992
    },
    {
      "epoch": 0.17820100143061515,
      "grad_norm": 0.13717757949379905,
      "learning_rate": 0.00018870554033462159,
      "loss": 0.7261,
      "step": 1993
    },
    {
      "epoch": 0.17829041487839772,
      "grad_norm": 0.15703076551654543,
      "learning_rate": 0.00018869216680451398,
      "loss": 0.7363,
      "step": 1994
    },
    {
      "epoch": 0.17837982832618027,
      "grad_norm": 0.12197886350506336,
      "learning_rate": 0.0001886787858359014,
      "loss": 0.6897,
      "step": 1995
    },
    {
      "epoch": 0.1784692417739628,
      "grad_norm": 0.14319555029297243,
      "learning_rate": 0.0001886653974299062,
      "loss": 0.7281,
      "step": 1996
    },
    {
      "epoch": 0.17855865522174535,
      "grad_norm": 0.1295808054112932,
      "learning_rate": 0.0001886520015876512,
      "loss": 0.6617,
      "step": 1997
    },
    {
      "epoch": 0.1786480686695279,
      "grad_norm": 0.11956136484493923,
      "learning_rate": 0.00018863859831025988,
      "loss": 0.6612,
      "step": 1998
    },
    {
      "epoch": 0.17873748211731044,
      "grad_norm": 0.13670743207960281,
      "learning_rate": 0.00018862518759885636,
      "loss": 0.6721,
      "step": 1999
    },
    {
      "epoch": 0.17882689556509299,
      "grad_norm": 0.14847487241450905,
      "learning_rate": 0.0001886117694545654,
      "loss": 0.7257,
      "step": 2000
    },
    {
      "epoch": 0.17891630901287553,
      "grad_norm": 0.13011752833188384,
      "learning_rate": 0.00018859834387851233,
      "loss": 0.6872,
      "step": 2001
    },
    {
      "epoch": 0.17900572246065807,
      "grad_norm": 0.13282299352747867,
      "learning_rate": 0.00018858491087182317,
      "loss": 0.6995,
      "step": 2002
    },
    {
      "epoch": 0.17909513590844062,
      "grad_norm": 0.1509498431992379,
      "learning_rate": 0.00018857147043562452,
      "loss": 0.7105,
      "step": 2003
    },
    {
      "epoch": 0.1791845493562232,
      "grad_norm": 0.12290866110058155,
      "learning_rate": 0.00018855802257104363,
      "loss": 0.6568,
      "step": 2004
    },
    {
      "epoch": 0.17927396280400573,
      "grad_norm": 0.14972356403356873,
      "learning_rate": 0.0001885445672792083,
      "loss": 0.7232,
      "step": 2005
    },
    {
      "epoch": 0.17936337625178828,
      "grad_norm": 0.13160783400394474,
      "learning_rate": 0.00018853110456124709,
      "loss": 0.7052,
      "step": 2006
    },
    {
      "epoch": 0.17945278969957082,
      "grad_norm": 0.1336450835844608,
      "learning_rate": 0.00018851763441828903,
      "loss": 0.6858,
      "step": 2007
    },
    {
      "epoch": 0.17954220314735336,
      "grad_norm": 0.13553286277640023,
      "learning_rate": 0.00018850415685146387,
      "loss": 0.7037,
      "step": 2008
    },
    {
      "epoch": 0.1796316165951359,
      "grad_norm": 0.1557549681826165,
      "learning_rate": 0.00018849067186190198,
      "loss": 0.6919,
      "step": 2009
    },
    {
      "epoch": 0.17972103004291845,
      "grad_norm": 0.13682749802386315,
      "learning_rate": 0.0001884771794507343,
      "loss": 0.7115,
      "step": 2010
    },
    {
      "epoch": 0.179810443490701,
      "grad_norm": 0.14928979185675206,
      "learning_rate": 0.00018846367961909244,
      "loss": 0.7037,
      "step": 2011
    },
    {
      "epoch": 0.17989985693848354,
      "grad_norm": 0.158535869881409,
      "learning_rate": 0.0001884501723681086,
      "loss": 0.7175,
      "step": 2012
    },
    {
      "epoch": 0.17998927038626608,
      "grad_norm": 0.12117293087417409,
      "learning_rate": 0.00018843665769891562,
      "loss": 0.6639,
      "step": 2013
    },
    {
      "epoch": 0.18007868383404865,
      "grad_norm": 0.15352361485062038,
      "learning_rate": 0.00018842313561264696,
      "loss": 0.7148,
      "step": 2014
    },
    {
      "epoch": 0.1801680972818312,
      "grad_norm": 0.12671890178527215,
      "learning_rate": 0.0001884096061104367,
      "loss": 0.6966,
      "step": 2015
    },
    {
      "epoch": 0.18025751072961374,
      "grad_norm": 0.14782648954712202,
      "learning_rate": 0.0001883960691934196,
      "loss": 0.7625,
      "step": 2016
    },
    {
      "epoch": 0.18034692417739628,
      "grad_norm": 0.1315406639500855,
      "learning_rate": 0.00018838252486273087,
      "loss": 0.6853,
      "step": 2017
    },
    {
      "epoch": 0.18043633762517883,
      "grad_norm": 0.12129308950853686,
      "learning_rate": 0.00018836897311950653,
      "loss": 0.6841,
      "step": 2018
    },
    {
      "epoch": 0.18052575107296137,
      "grad_norm": 0.128631678912637,
      "learning_rate": 0.00018835541396488315,
      "loss": 0.6608,
      "step": 2019
    },
    {
      "epoch": 0.18061516452074391,
      "grad_norm": 0.12390902213940819,
      "learning_rate": 0.00018834184739999793,
      "loss": 0.6562,
      "step": 2020
    },
    {
      "epoch": 0.18070457796852646,
      "grad_norm": 0.13572227103912557,
      "learning_rate": 0.00018832827342598861,
      "loss": 0.6992,
      "step": 2021
    },
    {
      "epoch": 0.180793991416309,
      "grad_norm": 0.14783430064459677,
      "learning_rate": 0.0001883146920439937,
      "loss": 0.7044,
      "step": 2022
    },
    {
      "epoch": 0.18088340486409155,
      "grad_norm": 0.1281577641091319,
      "learning_rate": 0.00018830110325515222,
      "loss": 0.6783,
      "step": 2023
    },
    {
      "epoch": 0.18097281831187412,
      "grad_norm": 0.14549038052400018,
      "learning_rate": 0.00018828750706060385,
      "loss": 0.708,
      "step": 2024
    },
    {
      "epoch": 0.18106223175965666,
      "grad_norm": 0.14122158216657574,
      "learning_rate": 0.00018827390346148887,
      "loss": 0.6883,
      "step": 2025
    },
    {
      "epoch": 0.1811516452074392,
      "grad_norm": 0.14859866683344897,
      "learning_rate": 0.00018826029245894827,
      "loss": 0.7369,
      "step": 2026
    },
    {
      "epoch": 0.18124105865522175,
      "grad_norm": 0.1736841334252909,
      "learning_rate": 0.00018824667405412348,
      "loss": 0.7332,
      "step": 2027
    },
    {
      "epoch": 0.1813304721030043,
      "grad_norm": 0.13359384321873244,
      "learning_rate": 0.00018823304824815672,
      "loss": 0.7102,
      "step": 2028
    },
    {
      "epoch": 0.18141988555078684,
      "grad_norm": 0.1269249908471165,
      "learning_rate": 0.0001882194150421908,
      "loss": 0.6342,
      "step": 2029
    },
    {
      "epoch": 0.18150929899856938,
      "grad_norm": 0.13283807743465034,
      "learning_rate": 0.00018820577443736904,
      "loss": 0.7116,
      "step": 2030
    },
    {
      "epoch": 0.18159871244635192,
      "grad_norm": 0.15180721406134443,
      "learning_rate": 0.0001881921264348355,
      "loss": 0.7077,
      "step": 2031
    },
    {
      "epoch": 0.18168812589413447,
      "grad_norm": 0.13806582296281866,
      "learning_rate": 0.00018817847103573486,
      "loss": 0.6862,
      "step": 2032
    },
    {
      "epoch": 0.181777539341917,
      "grad_norm": 0.1333884746788651,
      "learning_rate": 0.00018816480824121232,
      "loss": 0.6837,
      "step": 2033
    },
    {
      "epoch": 0.18186695278969958,
      "grad_norm": 0.1339490176023313,
      "learning_rate": 0.0001881511380524138,
      "loss": 0.686,
      "step": 2034
    },
    {
      "epoch": 0.18195636623748213,
      "grad_norm": 0.14339396569269744,
      "learning_rate": 0.0001881374604704858,
      "loss": 0.7111,
      "step": 2035
    },
    {
      "epoch": 0.18204577968526467,
      "grad_norm": 0.1332122103532693,
      "learning_rate": 0.0001881237754965754,
      "loss": 0.6629,
      "step": 2036
    },
    {
      "epoch": 0.1821351931330472,
      "grad_norm": 0.1497310670078888,
      "learning_rate": 0.0001881100831318304,
      "loss": 0.7358,
      "step": 2037
    },
    {
      "epoch": 0.18222460658082976,
      "grad_norm": 0.1470671334239069,
      "learning_rate": 0.00018809638337739915,
      "loss": 0.7212,
      "step": 2038
    },
    {
      "epoch": 0.1823140200286123,
      "grad_norm": 0.15023701501482328,
      "learning_rate": 0.0001880826762344306,
      "loss": 0.7052,
      "step": 2039
    },
    {
      "epoch": 0.18240343347639484,
      "grad_norm": 0.13402008262337917,
      "learning_rate": 0.00018806896170407437,
      "loss": 0.7014,
      "step": 2040
    },
    {
      "epoch": 0.1824928469241774,
      "grad_norm": 0.1374521205468929,
      "learning_rate": 0.00018805523978748068,
      "loss": 0.7075,
      "step": 2041
    },
    {
      "epoch": 0.18258226037195993,
      "grad_norm": 0.12938908720973288,
      "learning_rate": 0.0001880415104858004,
      "loss": 0.6825,
      "step": 2042
    },
    {
      "epoch": 0.1826716738197425,
      "grad_norm": 0.1303059537318243,
      "learning_rate": 0.00018802777380018496,
      "loss": 0.709,
      "step": 2043
    },
    {
      "epoch": 0.18276108726752505,
      "grad_norm": 0.14478935313664537,
      "learning_rate": 0.00018801402973178642,
      "loss": 0.679,
      "step": 2044
    },
    {
      "epoch": 0.1828505007153076,
      "grad_norm": 0.12550131074229612,
      "learning_rate": 0.0001880002782817575,
      "loss": 0.6707,
      "step": 2045
    },
    {
      "epoch": 0.18293991416309013,
      "grad_norm": 0.1409139660584533,
      "learning_rate": 0.00018798651945125153,
      "loss": 0.6546,
      "step": 2046
    },
    {
      "epoch": 0.18302932761087268,
      "grad_norm": 0.14470698163288656,
      "learning_rate": 0.00018797275324142242,
      "loss": 0.7121,
      "step": 2047
    },
    {
      "epoch": 0.18311874105865522,
      "grad_norm": 0.13139773377781744,
      "learning_rate": 0.00018795897965342474,
      "loss": 0.7008,
      "step": 2048
    },
    {
      "epoch": 0.18320815450643776,
      "grad_norm": 0.15047520353232713,
      "learning_rate": 0.00018794519868841367,
      "loss": 0.7072,
      "step": 2049
    },
    {
      "epoch": 0.1832975679542203,
      "grad_norm": 0.14711783653906635,
      "learning_rate": 0.000187931410347545,
      "loss": 0.6881,
      "step": 2050
    },
    {
      "epoch": 0.18338698140200285,
      "grad_norm": 0.13810464378697312,
      "learning_rate": 0.00018791761463197513,
      "loss": 0.6964,
      "step": 2051
    },
    {
      "epoch": 0.1834763948497854,
      "grad_norm": 0.13767781737888535,
      "learning_rate": 0.00018790381154286113,
      "loss": 0.6751,
      "step": 2052
    },
    {
      "epoch": 0.18356580829756797,
      "grad_norm": 0.14947563914777162,
      "learning_rate": 0.00018789000108136058,
      "loss": 0.7184,
      "step": 2053
    },
    {
      "epoch": 0.1836552217453505,
      "grad_norm": 0.14102776617806406,
      "learning_rate": 0.0001878761832486318,
      "loss": 0.6991,
      "step": 2054
    },
    {
      "epoch": 0.18374463519313305,
      "grad_norm": 0.13491865701662972,
      "learning_rate": 0.00018786235804583366,
      "loss": 0.7062,
      "step": 2055
    },
    {
      "epoch": 0.1838340486409156,
      "grad_norm": 0.13325932310718638,
      "learning_rate": 0.00018784852547412565,
      "loss": 0.7431,
      "step": 2056
    },
    {
      "epoch": 0.18392346208869814,
      "grad_norm": 0.1306233344179176,
      "learning_rate": 0.0001878346855346679,
      "loss": 0.7131,
      "step": 2057
    },
    {
      "epoch": 0.18401287553648069,
      "grad_norm": 0.12548346181392,
      "learning_rate": 0.00018782083822862114,
      "loss": 0.6763,
      "step": 2058
    },
    {
      "epoch": 0.18410228898426323,
      "grad_norm": 0.15187167377627053,
      "learning_rate": 0.0001878069835571468,
      "loss": 0.6964,
      "step": 2059
    },
    {
      "epoch": 0.18419170243204577,
      "grad_norm": 0.1269803610644461,
      "learning_rate": 0.00018779312152140674,
      "loss": 0.6982,
      "step": 2060
    },
    {
      "epoch": 0.18428111587982832,
      "grad_norm": 0.12792824643890996,
      "learning_rate": 0.0001877792521225636,
      "loss": 0.6854,
      "step": 2061
    },
    {
      "epoch": 0.18437052932761086,
      "grad_norm": 0.15558294860878188,
      "learning_rate": 0.00018776537536178064,
      "loss": 0.6827,
      "step": 2062
    },
    {
      "epoch": 0.18445994277539343,
      "grad_norm": 0.12487399223160707,
      "learning_rate": 0.00018775149124022162,
      "loss": 0.6973,
      "step": 2063
    },
    {
      "epoch": 0.18454935622317598,
      "grad_norm": 0.14422679135989191,
      "learning_rate": 0.00018773759975905098,
      "loss": 0.6858,
      "step": 2064
    },
    {
      "epoch": 0.18463876967095852,
      "grad_norm": 0.12737944441652463,
      "learning_rate": 0.00018772370091943384,
      "loss": 0.6892,
      "step": 2065
    },
    {
      "epoch": 0.18472818311874106,
      "grad_norm": 0.14469361121773283,
      "learning_rate": 0.00018770979472253581,
      "loss": 0.7199,
      "step": 2066
    },
    {
      "epoch": 0.1848175965665236,
      "grad_norm": 0.13760652667168027,
      "learning_rate": 0.0001876958811695233,
      "loss": 0.6646,
      "step": 2067
    },
    {
      "epoch": 0.18490701001430615,
      "grad_norm": 0.13697706197598442,
      "learning_rate": 0.00018768196026156306,
      "loss": 0.6799,
      "step": 2068
    },
    {
      "epoch": 0.1849964234620887,
      "grad_norm": 0.13819172119533862,
      "learning_rate": 0.00018766803199982273,
      "loss": 0.652,
      "step": 2069
    },
    {
      "epoch": 0.18508583690987124,
      "grad_norm": 0.1501927372422546,
      "learning_rate": 0.00018765409638547048,
      "loss": 0.7168,
      "step": 2070
    },
    {
      "epoch": 0.18517525035765378,
      "grad_norm": 0.1679680987052416,
      "learning_rate": 0.00018764015341967498,
      "loss": 0.6571,
      "step": 2071
    },
    {
      "epoch": 0.18526466380543632,
      "grad_norm": 0.14314050348149948,
      "learning_rate": 0.00018762620310360567,
      "loss": 0.6932,
      "step": 2072
    },
    {
      "epoch": 0.1853540772532189,
      "grad_norm": 0.1465730752210987,
      "learning_rate": 0.00018761224543843255,
      "loss": 0.7524,
      "step": 2073
    },
    {
      "epoch": 0.18544349070100144,
      "grad_norm": 0.14224402919562107,
      "learning_rate": 0.00018759828042532616,
      "loss": 0.645,
      "step": 2074
    },
    {
      "epoch": 0.18553290414878398,
      "grad_norm": 0.13482778798076983,
      "learning_rate": 0.00018758430806545783,
      "loss": 0.7232,
      "step": 2075
    },
    {
      "epoch": 0.18562231759656653,
      "grad_norm": 0.1537972789867172,
      "learning_rate": 0.00018757032835999931,
      "loss": 0.7283,
      "step": 2076
    },
    {
      "epoch": 0.18571173104434907,
      "grad_norm": 0.1397559906110671,
      "learning_rate": 0.00018755634131012317,
      "loss": 0.7202,
      "step": 2077
    },
    {
      "epoch": 0.18580114449213161,
      "grad_norm": 0.15993331803570968,
      "learning_rate": 0.00018754234691700238,
      "loss": 0.7651,
      "step": 2078
    },
    {
      "epoch": 0.18589055793991416,
      "grad_norm": 0.13929874460611547,
      "learning_rate": 0.00018752834518181072,
      "loss": 0.698,
      "step": 2079
    },
    {
      "epoch": 0.1859799713876967,
      "grad_norm": 0.14928449425090834,
      "learning_rate": 0.00018751433610572242,
      "loss": 0.7363,
      "step": 2080
    },
    {
      "epoch": 0.18606938483547925,
      "grad_norm": 0.1467089874610228,
      "learning_rate": 0.00018750031968991243,
      "loss": 0.7148,
      "step": 2081
    },
    {
      "epoch": 0.1861587982832618,
      "grad_norm": 0.1650589102329835,
      "learning_rate": 0.00018748629593555633,
      "loss": 0.7129,
      "step": 2082
    },
    {
      "epoch": 0.18624821173104436,
      "grad_norm": 0.12462652622475333,
      "learning_rate": 0.00018747226484383024,
      "loss": 0.6688,
      "step": 2083
    },
    {
      "epoch": 0.1863376251788269,
      "grad_norm": 0.13376766601580406,
      "learning_rate": 0.00018745822641591094,
      "loss": 0.7021,
      "step": 2084
    },
    {
      "epoch": 0.18642703862660945,
      "grad_norm": 0.13666049924717763,
      "learning_rate": 0.00018744418065297583,
      "loss": 0.7059,
      "step": 2085
    },
    {
      "epoch": 0.186516452074392,
      "grad_norm": 0.14285697227808222,
      "learning_rate": 0.00018743012755620286,
      "loss": 0.6863,
      "step": 2086
    },
    {
      "epoch": 0.18660586552217454,
      "grad_norm": 0.13969264183119515,
      "learning_rate": 0.0001874160671267707,
      "loss": 0.7167,
      "step": 2087
    },
    {
      "epoch": 0.18669527896995708,
      "grad_norm": 0.12561007565925753,
      "learning_rate": 0.00018740199936585853,
      "loss": 0.6945,
      "step": 2088
    },
    {
      "epoch": 0.18678469241773962,
      "grad_norm": 0.13511994880299316,
      "learning_rate": 0.00018738792427464625,
      "loss": 0.6822,
      "step": 2089
    },
    {
      "epoch": 0.18687410586552217,
      "grad_norm": 0.1301193760790992,
      "learning_rate": 0.00018737384185431432,
      "loss": 0.6874,
      "step": 2090
    },
    {
      "epoch": 0.1869635193133047,
      "grad_norm": 0.1431655595411482,
      "learning_rate": 0.00018735975210604376,
      "loss": 0.7241,
      "step": 2091
    },
    {
      "epoch": 0.18705293276108725,
      "grad_norm": 0.16937814264241044,
      "learning_rate": 0.00018734565503101636,
      "loss": 0.7406,
      "step": 2092
    },
    {
      "epoch": 0.18714234620886983,
      "grad_norm": 0.1195528152295847,
      "learning_rate": 0.0001873315506304143,
      "loss": 0.6476,
      "step": 2093
    },
    {
      "epoch": 0.18723175965665237,
      "grad_norm": 0.125250429013572,
      "learning_rate": 0.00018731743890542058,
      "loss": 0.6895,
      "step": 2094
    },
    {
      "epoch": 0.1873211731044349,
      "grad_norm": 0.13707561568692936,
      "learning_rate": 0.0001873033198572187,
      "loss": 0.6886,
      "step": 2095
    },
    {
      "epoch": 0.18741058655221746,
      "grad_norm": 0.14784455004772565,
      "learning_rate": 0.00018728919348699283,
      "loss": 0.7229,
      "step": 2096
    },
    {
      "epoch": 0.1875,
      "grad_norm": 0.1354849594077841,
      "learning_rate": 0.0001872750597959277,
      "loss": 0.7269,
      "step": 2097
    },
    {
      "epoch": 0.18758941344778254,
      "grad_norm": 0.14508924401247625,
      "learning_rate": 0.00018726091878520871,
      "loss": 0.6824,
      "step": 2098
    },
    {
      "epoch": 0.1876788268955651,
      "grad_norm": 0.13547125840505891,
      "learning_rate": 0.00018724677045602186,
      "loss": 0.6715,
      "step": 2099
    },
    {
      "epoch": 0.18776824034334763,
      "grad_norm": 0.12698168423328068,
      "learning_rate": 0.00018723261480955373,
      "loss": 0.6952,
      "step": 2100
    },
    {
      "epoch": 0.18785765379113017,
      "grad_norm": 0.13708076990865595,
      "learning_rate": 0.00018721845184699158,
      "loss": 0.6649,
      "step": 2101
    },
    {
      "epoch": 0.18794706723891275,
      "grad_norm": 0.13017568618240932,
      "learning_rate": 0.00018720428156952316,
      "loss": 0.652,
      "step": 2102
    },
    {
      "epoch": 0.1880364806866953,
      "grad_norm": 0.14491923513047336,
      "learning_rate": 0.00018719010397833698,
      "loss": 0.6914,
      "step": 2103
    },
    {
      "epoch": 0.18812589413447783,
      "grad_norm": 0.14508522230755985,
      "learning_rate": 0.00018717591907462208,
      "loss": 0.6571,
      "step": 2104
    },
    {
      "epoch": 0.18821530758226038,
      "grad_norm": 0.1422122654599022,
      "learning_rate": 0.00018716172685956815,
      "loss": 0.6837,
      "step": 2105
    },
    {
      "epoch": 0.18830472103004292,
      "grad_norm": 0.11777134817871526,
      "learning_rate": 0.0001871475273343654,
      "loss": 0.6588,
      "step": 2106
    },
    {
      "epoch": 0.18839413447782546,
      "grad_norm": 0.15269021114671152,
      "learning_rate": 0.00018713332050020482,
      "loss": 0.7209,
      "step": 2107
    },
    {
      "epoch": 0.188483547925608,
      "grad_norm": 0.14331643673658717,
      "learning_rate": 0.00018711910635827787,
      "loss": 0.6682,
      "step": 2108
    },
    {
      "epoch": 0.18857296137339055,
      "grad_norm": 0.1530259062325271,
      "learning_rate": 0.0001871048849097767,
      "loss": 0.7375,
      "step": 2109
    },
    {
      "epoch": 0.1886623748211731,
      "grad_norm": 0.13765362775585033,
      "learning_rate": 0.000187090656155894,
      "loss": 0.7004,
      "step": 2110
    },
    {
      "epoch": 0.18875178826895564,
      "grad_norm": 0.13412206234620236,
      "learning_rate": 0.00018707642009782317,
      "loss": 0.7333,
      "step": 2111
    },
    {
      "epoch": 0.1888412017167382,
      "grad_norm": 0.16264089748876445,
      "learning_rate": 0.00018706217673675811,
      "loss": 0.7522,
      "step": 2112
    },
    {
      "epoch": 0.18893061516452075,
      "grad_norm": 0.14493713509787504,
      "learning_rate": 0.00018704792607389346,
      "loss": 0.7345,
      "step": 2113
    },
    {
      "epoch": 0.1890200286123033,
      "grad_norm": 0.13447441930990198,
      "learning_rate": 0.00018703366811042438,
      "loss": 0.6602,
      "step": 2114
    },
    {
      "epoch": 0.18910944206008584,
      "grad_norm": 0.1462040181977366,
      "learning_rate": 0.00018701940284754665,
      "loss": 0.7034,
      "step": 2115
    },
    {
      "epoch": 0.18919885550786839,
      "grad_norm": 0.12072475263503349,
      "learning_rate": 0.00018700513028645672,
      "loss": 0.6886,
      "step": 2116
    },
    {
      "epoch": 0.18928826895565093,
      "grad_norm": 0.12572310331740236,
      "learning_rate": 0.00018699085042835157,
      "loss": 0.6747,
      "step": 2117
    },
    {
      "epoch": 0.18937768240343347,
      "grad_norm": 0.1515479904248112,
      "learning_rate": 0.00018697656327442888,
      "loss": 0.3554,
      "step": 2118
    },
    {
      "epoch": 0.18946709585121602,
      "grad_norm": 0.13563833324568217,
      "learning_rate": 0.00018696226882588683,
      "loss": 0.6914,
      "step": 2119
    },
    {
      "epoch": 0.18955650929899856,
      "grad_norm": 0.1429252880360971,
      "learning_rate": 0.00018694796708392436,
      "loss": 0.6814,
      "step": 2120
    },
    {
      "epoch": 0.1896459227467811,
      "grad_norm": 0.1449040824215457,
      "learning_rate": 0.00018693365804974086,
      "loss": 0.7141,
      "step": 2121
    },
    {
      "epoch": 0.18973533619456368,
      "grad_norm": 0.13933731522846685,
      "learning_rate": 0.00018691934172453646,
      "loss": 0.7005,
      "step": 2122
    },
    {
      "epoch": 0.18982474964234622,
      "grad_norm": 0.1503129799915789,
      "learning_rate": 0.00018690501810951182,
      "loss": 0.6848,
      "step": 2123
    },
    {
      "epoch": 0.18991416309012876,
      "grad_norm": 0.1447388249544256,
      "learning_rate": 0.0001868906872058683,
      "loss": 0.6646,
      "step": 2124
    },
    {
      "epoch": 0.1900035765379113,
      "grad_norm": 0.13263773059667328,
      "learning_rate": 0.00018687634901480777,
      "loss": 0.6731,
      "step": 2125
    },
    {
      "epoch": 0.19009298998569385,
      "grad_norm": 0.1460384126803639,
      "learning_rate": 0.00018686200353753275,
      "loss": 0.6942,
      "step": 2126
    },
    {
      "epoch": 0.1901824034334764,
      "grad_norm": 0.12410113856390267,
      "learning_rate": 0.00018684765077524643,
      "loss": 0.6967,
      "step": 2127
    },
    {
      "epoch": 0.19027181688125894,
      "grad_norm": 0.14688987466461625,
      "learning_rate": 0.00018683329072915252,
      "loss": 0.7008,
      "step": 2128
    },
    {
      "epoch": 0.19036123032904148,
      "grad_norm": 0.14658759945342092,
      "learning_rate": 0.00018681892340045538,
      "loss": 0.7417,
      "step": 2129
    },
    {
      "epoch": 0.19045064377682402,
      "grad_norm": 0.13596463876496487,
      "learning_rate": 0.00018680454879035997,
      "loss": 0.7086,
      "step": 2130
    },
    {
      "epoch": 0.19054005722460657,
      "grad_norm": 0.14367575273932698,
      "learning_rate": 0.0001867901669000719,
      "loss": 0.6633,
      "step": 2131
    },
    {
      "epoch": 0.19062947067238914,
      "grad_norm": 0.1549236626240954,
      "learning_rate": 0.00018677577773079733,
      "loss": 0.7016,
      "step": 2132
    },
    {
      "epoch": 0.19071888412017168,
      "grad_norm": 0.14161602485670294,
      "learning_rate": 0.00018676138128374313,
      "loss": 0.7027,
      "step": 2133
    },
    {
      "epoch": 0.19080829756795423,
      "grad_norm": 0.1554841949013603,
      "learning_rate": 0.0001867469775601166,
      "loss": 0.7142,
      "step": 2134
    },
    {
      "epoch": 0.19089771101573677,
      "grad_norm": 0.13956273645385883,
      "learning_rate": 0.00018673256656112584,
      "loss": 0.697,
      "step": 2135
    },
    {
      "epoch": 0.19098712446351931,
      "grad_norm": 0.14225641344773454,
      "learning_rate": 0.0001867181482879795,
      "loss": 0.7145,
      "step": 2136
    },
    {
      "epoch": 0.19107653791130186,
      "grad_norm": 0.19233249668422797,
      "learning_rate": 0.00018670372274188677,
      "loss": 0.3732,
      "step": 2137
    },
    {
      "epoch": 0.1911659513590844,
      "grad_norm": 0.13231451932082328,
      "learning_rate": 0.00018668928992405755,
      "loss": 0.656,
      "step": 2138
    },
    {
      "epoch": 0.19125536480686695,
      "grad_norm": 0.15423799092373972,
      "learning_rate": 0.00018667484983570223,
      "loss": 0.7098,
      "step": 2139
    },
    {
      "epoch": 0.1913447782546495,
      "grad_norm": 0.14977845084172506,
      "learning_rate": 0.00018666040247803195,
      "loss": 0.7244,
      "step": 2140
    },
    {
      "epoch": 0.19143419170243203,
      "grad_norm": 0.13368105315021714,
      "learning_rate": 0.0001866459478522584,
      "loss": 0.6342,
      "step": 2141
    },
    {
      "epoch": 0.1915236051502146,
      "grad_norm": 0.12571203835491593,
      "learning_rate": 0.0001866314859595938,
      "loss": 0.7033,
      "step": 2142
    },
    {
      "epoch": 0.19161301859799715,
      "grad_norm": 0.12727984468161832,
      "learning_rate": 0.00018661701680125115,
      "loss": 0.6732,
      "step": 2143
    },
    {
      "epoch": 0.1917024320457797,
      "grad_norm": 0.1467619624176278,
      "learning_rate": 0.00018660254037844388,
      "loss": 0.7396,
      "step": 2144
    },
    {
      "epoch": 0.19179184549356224,
      "grad_norm": 0.14801226863750797,
      "learning_rate": 0.00018658805669238612,
      "loss": 0.7223,
      "step": 2145
    },
    {
      "epoch": 0.19188125894134478,
      "grad_norm": 0.12738200885328693,
      "learning_rate": 0.00018657356574429266,
      "loss": 0.6792,
      "step": 2146
    },
    {
      "epoch": 0.19197067238912732,
      "grad_norm": 0.14751873805149476,
      "learning_rate": 0.00018655906753537878,
      "loss": 0.6907,
      "step": 2147
    },
    {
      "epoch": 0.19206008583690987,
      "grad_norm": 0.13585514935560616,
      "learning_rate": 0.00018654456206686042,
      "loss": 0.6855,
      "step": 2148
    },
    {
      "epoch": 0.1921494992846924,
      "grad_norm": 0.13641650167033675,
      "learning_rate": 0.00018653004933995418,
      "loss": 0.7145,
      "step": 2149
    },
    {
      "epoch": 0.19223891273247495,
      "grad_norm": 0.13675260399086506,
      "learning_rate": 0.00018651552935587717,
      "loss": 0.6976,
      "step": 2150
    },
    {
      "epoch": 0.1923283261802575,
      "grad_norm": 0.14636219804517428,
      "learning_rate": 0.00018650100211584723,
      "loss": 0.6726,
      "step": 2151
    },
    {
      "epoch": 0.19241773962804007,
      "grad_norm": 0.1464845996163211,
      "learning_rate": 0.00018648646762108273,
      "loss": 0.6282,
      "step": 2152
    },
    {
      "epoch": 0.1925071530758226,
      "grad_norm": 0.13393700639278602,
      "learning_rate": 0.0001864719258728026,
      "loss": 0.6881,
      "step": 2153
    },
    {
      "epoch": 0.19259656652360516,
      "grad_norm": 0.15085300698697604,
      "learning_rate": 0.0001864573768722265,
      "loss": 0.6947,
      "step": 2154
    },
    {
      "epoch": 0.1926859799713877,
      "grad_norm": 0.13692638817276884,
      "learning_rate": 0.0001864428206205746,
      "loss": 0.6825,
      "step": 2155
    },
    {
      "epoch": 0.19277539341917024,
      "grad_norm": 0.13141800020175673,
      "learning_rate": 0.00018642825711906772,
      "loss": 0.6893,
      "step": 2156
    },
    {
      "epoch": 0.1928648068669528,
      "grad_norm": 0.13425835179339754,
      "learning_rate": 0.00018641368636892734,
      "loss": 0.6678,
      "step": 2157
    },
    {
      "epoch": 0.19295422031473533,
      "grad_norm": 0.15486000023960142,
      "learning_rate": 0.00018639910837137542,
      "loss": 0.7424,
      "step": 2158
    },
    {
      "epoch": 0.19304363376251787,
      "grad_norm": 0.14037453215092124,
      "learning_rate": 0.0001863845231276346,
      "loss": 0.6571,
      "step": 2159
    },
    {
      "epoch": 0.19313304721030042,
      "grad_norm": 0.16247061003679547,
      "learning_rate": 0.0001863699306389282,
      "loss": 0.7299,
      "step": 2160
    },
    {
      "epoch": 0.193222460658083,
      "grad_norm": 0.14671979561882348,
      "learning_rate": 0.00018635533090647998,
      "loss": 0.68,
      "step": 2161
    },
    {
      "epoch": 0.19331187410586553,
      "grad_norm": 0.14556734550239808,
      "learning_rate": 0.00018634072393151446,
      "loss": 0.7013,
      "step": 2162
    },
    {
      "epoch": 0.19340128755364808,
      "grad_norm": 0.14370654763662757,
      "learning_rate": 0.00018632610971525671,
      "loss": 0.6779,
      "step": 2163
    },
    {
      "epoch": 0.19349070100143062,
      "grad_norm": 0.16285468629624758,
      "learning_rate": 0.00018631148825893238,
      "loss": 0.7083,
      "step": 2164
    },
    {
      "epoch": 0.19358011444921316,
      "grad_norm": 0.1501102740714492,
      "learning_rate": 0.00018629685956376779,
      "loss": 0.6898,
      "step": 2165
    },
    {
      "epoch": 0.1936695278969957,
      "grad_norm": 0.15093552379089792,
      "learning_rate": 0.0001862822236309898,
      "loss": 0.7369,
      "step": 2166
    },
    {
      "epoch": 0.19375894134477825,
      "grad_norm": 0.15253394102338172,
      "learning_rate": 0.0001862675804618259,
      "loss": 0.7052,
      "step": 2167
    },
    {
      "epoch": 0.1938483547925608,
      "grad_norm": 0.1434780273179734,
      "learning_rate": 0.00018625293005750424,
      "loss": 0.6928,
      "step": 2168
    },
    {
      "epoch": 0.19393776824034334,
      "grad_norm": 0.15325501556642207,
      "learning_rate": 0.00018623827241925347,
      "loss": 0.7167,
      "step": 2169
    },
    {
      "epoch": 0.19402718168812588,
      "grad_norm": 0.15009720704447105,
      "learning_rate": 0.000186223607548303,
      "loss": 0.7201,
      "step": 2170
    },
    {
      "epoch": 0.19411659513590845,
      "grad_norm": 0.1392638716937916,
      "learning_rate": 0.00018620893544588264,
      "loss": 0.6914,
      "step": 2171
    },
    {
      "epoch": 0.194206008583691,
      "grad_norm": 0.14106351781429455,
      "learning_rate": 0.00018619425611322298,
      "loss": 0.6678,
      "step": 2172
    },
    {
      "epoch": 0.19429542203147354,
      "grad_norm": 0.15219726120643706,
      "learning_rate": 0.0001861795695515552,
      "loss": 0.7252,
      "step": 2173
    },
    {
      "epoch": 0.19438483547925609,
      "grad_norm": 0.14732138739907008,
      "learning_rate": 0.00018616487576211092,
      "loss": 0.7439,
      "step": 2174
    },
    {
      "epoch": 0.19447424892703863,
      "grad_norm": 0.13896401261291316,
      "learning_rate": 0.00018615017474612265,
      "loss": 0.7166,
      "step": 2175
    },
    {
      "epoch": 0.19456366237482117,
      "grad_norm": 0.14625265606298143,
      "learning_rate": 0.00018613546650482322,
      "loss": 0.7196,
      "step": 2176
    },
    {
      "epoch": 0.19465307582260372,
      "grad_norm": 0.15924798296393697,
      "learning_rate": 0.00018612075103944625,
      "loss": 0.7441,
      "step": 2177
    },
    {
      "epoch": 0.19474248927038626,
      "grad_norm": 0.14491365053169167,
      "learning_rate": 0.00018610602835122592,
      "loss": 0.6764,
      "step": 2178
    },
    {
      "epoch": 0.1948319027181688,
      "grad_norm": 0.14817281459566392,
      "learning_rate": 0.00018609129844139697,
      "loss": 0.7405,
      "step": 2179
    },
    {
      "epoch": 0.19492131616595135,
      "grad_norm": 0.12168514620509115,
      "learning_rate": 0.00018607656131119476,
      "loss": 0.6819,
      "step": 2180
    },
    {
      "epoch": 0.19501072961373392,
      "grad_norm": 0.13081544877557613,
      "learning_rate": 0.00018606181696185535,
      "loss": 0.7004,
      "step": 2181
    },
    {
      "epoch": 0.19510014306151646,
      "grad_norm": 0.13767861873360854,
      "learning_rate": 0.00018604706539461526,
      "loss": 0.6955,
      "step": 2182
    },
    {
      "epoch": 0.195189556509299,
      "grad_norm": 0.137921001942002,
      "learning_rate": 0.00018603230661071174,
      "loss": 0.7151,
      "step": 2183
    },
    {
      "epoch": 0.19527896995708155,
      "grad_norm": 0.13540582387605313,
      "learning_rate": 0.00018601754061138256,
      "loss": 0.7043,
      "step": 2184
    },
    {
      "epoch": 0.1953683834048641,
      "grad_norm": 0.15451015641459487,
      "learning_rate": 0.00018600276739786612,
      "loss": 0.6955,
      "step": 2185
    },
    {
      "epoch": 0.19545779685264664,
      "grad_norm": 0.1342195504412982,
      "learning_rate": 0.00018598798697140145,
      "loss": 0.6812,
      "step": 2186
    },
    {
      "epoch": 0.19554721030042918,
      "grad_norm": 0.13680044497330068,
      "learning_rate": 0.00018597319933322815,
      "loss": 0.6713,
      "step": 2187
    },
    {
      "epoch": 0.19563662374821172,
      "grad_norm": 0.11899745957653163,
      "learning_rate": 0.0001859584044845865,
      "loss": 0.6472,
      "step": 2188
    },
    {
      "epoch": 0.19572603719599427,
      "grad_norm": 0.14243035104116097,
      "learning_rate": 0.0001859436024267172,
      "loss": 0.7133,
      "step": 2189
    },
    {
      "epoch": 0.1958154506437768,
      "grad_norm": 0.17098913154580453,
      "learning_rate": 0.0001859287931608618,
      "loss": 0.3777,
      "step": 2190
    },
    {
      "epoch": 0.19590486409155938,
      "grad_norm": 0.16160173909783843,
      "learning_rate": 0.00018591397668826228,
      "loss": 0.7335,
      "step": 2191
    },
    {
      "epoch": 0.19599427753934193,
      "grad_norm": 0.1279305326689755,
      "learning_rate": 0.0001858991530101613,
      "loss": 0.6808,
      "step": 2192
    },
    {
      "epoch": 0.19608369098712447,
      "grad_norm": 0.1407654706855708,
      "learning_rate": 0.00018588432212780212,
      "loss": 0.6797,
      "step": 2193
    },
    {
      "epoch": 0.19617310443490701,
      "grad_norm": 0.1382815929999753,
      "learning_rate": 0.00018586948404242853,
      "loss": 0.6867,
      "step": 2194
    },
    {
      "epoch": 0.19626251788268956,
      "grad_norm": 0.15658557904678183,
      "learning_rate": 0.00018585463875528505,
      "loss": 0.6947,
      "step": 2195
    },
    {
      "epoch": 0.1963519313304721,
      "grad_norm": 0.1365234543099332,
      "learning_rate": 0.00018583978626761667,
      "loss": 0.7173,
      "step": 2196
    },
    {
      "epoch": 0.19644134477825465,
      "grad_norm": 0.15521911859140775,
      "learning_rate": 0.00018582492658066909,
      "loss": 0.7536,
      "step": 2197
    },
    {
      "epoch": 0.1965307582260372,
      "grad_norm": 0.13271025638948267,
      "learning_rate": 0.00018581005969568856,
      "loss": 0.6558,
      "step": 2198
    },
    {
      "epoch": 0.19662017167381973,
      "grad_norm": 0.1539993383639679,
      "learning_rate": 0.00018579518561392198,
      "loss": 0.7418,
      "step": 2199
    },
    {
      "epoch": 0.19670958512160228,
      "grad_norm": 0.1500695375041176,
      "learning_rate": 0.00018578030433661678,
      "loss": 0.7098,
      "step": 2200
    },
    {
      "epoch": 0.19679899856938485,
      "grad_norm": 0.15429887649551235,
      "learning_rate": 0.00018576541586502106,
      "loss": 0.756,
      "step": 2201
    },
    {
      "epoch": 0.1968884120171674,
      "grad_norm": 0.13096417774835376,
      "learning_rate": 0.00018575052020038352,
      "loss": 0.6987,
      "step": 2202
    },
    {
      "epoch": 0.19697782546494993,
      "grad_norm": 0.17113254770583425,
      "learning_rate": 0.00018573561734395338,
      "loss": 0.3724,
      "step": 2203
    },
    {
      "epoch": 0.19706723891273248,
      "grad_norm": 0.14020895255404545,
      "learning_rate": 0.0001857207072969805,
      "loss": 0.713,
      "step": 2204
    },
    {
      "epoch": 0.19715665236051502,
      "grad_norm": 0.13903544248235705,
      "learning_rate": 0.0001857057900607155,
      "loss": 0.7048,
      "step": 2205
    },
    {
      "epoch": 0.19724606580829757,
      "grad_norm": 0.12488326246548861,
      "learning_rate": 0.0001856908656364094,
      "loss": 0.6733,
      "step": 2206
    },
    {
      "epoch": 0.1973354792560801,
      "grad_norm": 0.12862936921335164,
      "learning_rate": 0.00018567593402531385,
      "loss": 0.6835,
      "step": 2207
    },
    {
      "epoch": 0.19742489270386265,
      "grad_norm": 0.14271472094110244,
      "learning_rate": 0.00018566099522868119,
      "loss": 0.7111,
      "step": 2208
    },
    {
      "epoch": 0.1975143061516452,
      "grad_norm": 0.1586614069004843,
      "learning_rate": 0.00018564604924776432,
      "loss": 0.7641,
      "step": 2209
    },
    {
      "epoch": 0.19760371959942774,
      "grad_norm": 0.14404075390232526,
      "learning_rate": 0.00018563109608381675,
      "loss": 0.7131,
      "step": 2210
    },
    {
      "epoch": 0.1976931330472103,
      "grad_norm": 0.14184280751584158,
      "learning_rate": 0.00018561613573809253,
      "loss": 0.6568,
      "step": 2211
    },
    {
      "epoch": 0.19778254649499286,
      "grad_norm": 0.1553029697827383,
      "learning_rate": 0.00018560116821184642,
      "loss": 0.7468,
      "step": 2212
    },
    {
      "epoch": 0.1978719599427754,
      "grad_norm": 0.12880217681717535,
      "learning_rate": 0.0001855861935063337,
      "loss": 0.6442,
      "step": 2213
    },
    {
      "epoch": 0.19796137339055794,
      "grad_norm": 0.13815872785905986,
      "learning_rate": 0.00018557121162281033,
      "loss": 0.6648,
      "step": 2214
    },
    {
      "epoch": 0.1980507868383405,
      "grad_norm": 0.1578749269289152,
      "learning_rate": 0.00018555622256253274,
      "loss": 0.7168,
      "step": 2215
    },
    {
      "epoch": 0.19814020028612303,
      "grad_norm": 0.13811306107253862,
      "learning_rate": 0.00018554122632675815,
      "loss": 0.6738,
      "step": 2216
    },
    {
      "epoch": 0.19822961373390557,
      "grad_norm": 0.12425008356670977,
      "learning_rate": 0.00018552622291674416,
      "loss": 0.6851,
      "step": 2217
    },
    {
      "epoch": 0.19831902718168812,
      "grad_norm": 0.1400230062245868,
      "learning_rate": 0.00018551121233374915,
      "loss": 0.7074,
      "step": 2218
    },
    {
      "epoch": 0.19840844062947066,
      "grad_norm": 0.1269640581167216,
      "learning_rate": 0.00018549619457903206,
      "loss": 0.667,
      "step": 2219
    },
    {
      "epoch": 0.1984978540772532,
      "grad_norm": 0.12501733124546976,
      "learning_rate": 0.00018548116965385236,
      "loss": 0.6807,
      "step": 2220
    },
    {
      "epoch": 0.19858726752503578,
      "grad_norm": 0.147384984848208,
      "learning_rate": 0.0001854661375594702,
      "loss": 0.7214,
      "step": 2221
    },
    {
      "epoch": 0.19867668097281832,
      "grad_norm": 0.1610514959299636,
      "learning_rate": 0.0001854510982971463,
      "loss": 0.3464,
      "step": 2222
    },
    {
      "epoch": 0.19876609442060086,
      "grad_norm": 0.1444377742541057,
      "learning_rate": 0.000185436051868142,
      "loss": 0.6756,
      "step": 2223
    },
    {
      "epoch": 0.1988555078683834,
      "grad_norm": 0.11436242813222489,
      "learning_rate": 0.0001854209982737192,
      "loss": 0.6567,
      "step": 2224
    },
    {
      "epoch": 0.19894492131616595,
      "grad_norm": 0.15103124882626207,
      "learning_rate": 0.00018540593751514042,
      "loss": 0.7155,
      "step": 2225
    },
    {
      "epoch": 0.1990343347639485,
      "grad_norm": 0.14867406512819328,
      "learning_rate": 0.00018539086959366881,
      "loss": 0.6748,
      "step": 2226
    },
    {
      "epoch": 0.19912374821173104,
      "grad_norm": 0.15418603907914735,
      "learning_rate": 0.00018537579451056811,
      "loss": 0.6816,
      "step": 2227
    },
    {
      "epoch": 0.19921316165951358,
      "grad_norm": 0.15350534860628431,
      "learning_rate": 0.00018536071226710267,
      "loss": 0.3247,
      "step": 2228
    },
    {
      "epoch": 0.19930257510729613,
      "grad_norm": 0.13418929244953948,
      "learning_rate": 0.0001853456228645373,
      "loss": 0.6896,
      "step": 2229
    },
    {
      "epoch": 0.1993919885550787,
      "grad_norm": 0.15195167947891855,
      "learning_rate": 0.00018533052630413766,
      "loss": 0.6876,
      "step": 2230
    },
    {
      "epoch": 0.19948140200286124,
      "grad_norm": 0.1265279398487721,
      "learning_rate": 0.00018531542258716982,
      "loss": 0.6541,
      "step": 2231
    },
    {
      "epoch": 0.19957081545064378,
      "grad_norm": 0.11321262158084117,
      "learning_rate": 0.00018530031171490053,
      "loss": 0.7014,
      "step": 2232
    },
    {
      "epoch": 0.19966022889842633,
      "grad_norm": 0.14393798565379176,
      "learning_rate": 0.0001852851936885971,
      "loss": 0.7502,
      "step": 2233
    },
    {
      "epoch": 0.19974964234620887,
      "grad_norm": 0.12694384450183635,
      "learning_rate": 0.00018527006850952747,
      "loss": 0.6955,
      "step": 2234
    },
    {
      "epoch": 0.19983905579399142,
      "grad_norm": 0.1375984653980164,
      "learning_rate": 0.0001852549361789602,
      "loss": 0.7314,
      "step": 2235
    },
    {
      "epoch": 0.19992846924177396,
      "grad_norm": 0.15665872185032367,
      "learning_rate": 0.00018523979669816438,
      "loss": 0.7022,
      "step": 2236
    },
    {
      "epoch": 0.2000178826895565,
      "grad_norm": 0.1546219407688764,
      "learning_rate": 0.00018522465006840975,
      "loss": 0.7373,
      "step": 2237
    },
    {
      "epoch": 0.20010729613733905,
      "grad_norm": 0.1475225821745555,
      "learning_rate": 0.00018520949629096664,
      "loss": 0.7336,
      "step": 2238
    },
    {
      "epoch": 0.2001967095851216,
      "grad_norm": 0.14338310534470491,
      "learning_rate": 0.000185194335367106,
      "loss": 0.6969,
      "step": 2239
    },
    {
      "epoch": 0.20028612303290416,
      "grad_norm": 0.14078846497469133,
      "learning_rate": 0.0001851791672980993,
      "loss": 0.7236,
      "step": 2240
    },
    {
      "epoch": 0.2003755364806867,
      "grad_norm": 0.1385399104637739,
      "learning_rate": 0.0001851639920852188,
      "loss": 0.6511,
      "step": 2241
    },
    {
      "epoch": 0.20046494992846925,
      "grad_norm": 0.15255810982691448,
      "learning_rate": 0.00018514880972973706,
      "loss": 0.7157,
      "step": 2242
    },
    {
      "epoch": 0.2005543633762518,
      "grad_norm": 0.1278984774787839,
      "learning_rate": 0.0001851336202329275,
      "loss": 0.6727,
      "step": 2243
    },
    {
      "epoch": 0.20064377682403434,
      "grad_norm": 0.14189718158123832,
      "learning_rate": 0.00018511842359606403,
      "loss": 0.7461,
      "step": 2244
    },
    {
      "epoch": 0.20073319027181688,
      "grad_norm": 0.15981122884582225,
      "learning_rate": 0.00018510321982042116,
      "loss": 0.7212,
      "step": 2245
    },
    {
      "epoch": 0.20082260371959942,
      "grad_norm": 0.18524770092695683,
      "learning_rate": 0.00018508800890727403,
      "loss": 0.3738,
      "step": 2246
    },
    {
      "epoch": 0.20091201716738197,
      "grad_norm": 0.16176074524265613,
      "learning_rate": 0.00018507279085789834,
      "loss": 0.7563,
      "step": 2247
    },
    {
      "epoch": 0.2010014306151645,
      "grad_norm": 0.13777655075858478,
      "learning_rate": 0.00018505756567357046,
      "loss": 0.6951,
      "step": 2248
    },
    {
      "epoch": 0.20109084406294706,
      "grad_norm": 0.14624547146704467,
      "learning_rate": 0.00018504233335556723,
      "loss": 0.7182,
      "step": 2249
    },
    {
      "epoch": 0.20118025751072963,
      "grad_norm": 0.14736295405763447,
      "learning_rate": 0.00018502709390516624,
      "loss": 0.6989,
      "step": 2250
    },
    {
      "epoch": 0.20126967095851217,
      "grad_norm": 0.14562600907135886,
      "learning_rate": 0.00018501184732364553,
      "loss": 0.6964,
      "step": 2251
    },
    {
      "epoch": 0.2013590844062947,
      "grad_norm": 0.1425213914090051,
      "learning_rate": 0.0001849965936122839,
      "loss": 0.6759,
      "step": 2252
    },
    {
      "epoch": 0.20144849785407726,
      "grad_norm": 0.13861993964402566,
      "learning_rate": 0.00018498133277236058,
      "loss": 0.6806,
      "step": 2253
    },
    {
      "epoch": 0.2015379113018598,
      "grad_norm": 0.1552716933132898,
      "learning_rate": 0.00018496606480515552,
      "loss": 0.7144,
      "step": 2254
    },
    {
      "epoch": 0.20162732474964234,
      "grad_norm": 0.1426350731686644,
      "learning_rate": 0.0001849507897119492,
      "loss": 0.6897,
      "step": 2255
    },
    {
      "epoch": 0.2017167381974249,
      "grad_norm": 0.13124844512489894,
      "learning_rate": 0.00018493550749402278,
      "loss": 0.6566,
      "step": 2256
    },
    {
      "epoch": 0.20180615164520743,
      "grad_norm": 0.13504041734403002,
      "learning_rate": 0.0001849202181526579,
      "loss": 0.6831,
      "step": 2257
    },
    {
      "epoch": 0.20189556509298998,
      "grad_norm": 0.12893387083479732,
      "learning_rate": 0.00018490492168913688,
      "loss": 0.6914,
      "step": 2258
    },
    {
      "epoch": 0.20198497854077252,
      "grad_norm": 0.17422618584001914,
      "learning_rate": 0.00018488961810474264,
      "loss": 0.3708,
      "step": 2259
    },
    {
      "epoch": 0.2020743919885551,
      "grad_norm": 0.15811833143929038,
      "learning_rate": 0.00018487430740075862,
      "loss": 0.7391,
      "step": 2260
    },
    {
      "epoch": 0.20216380543633763,
      "grad_norm": 0.13310804939456036,
      "learning_rate": 0.00018485898957846896,
      "loss": 0.67,
      "step": 2261
    },
    {
      "epoch": 0.20225321888412018,
      "grad_norm": 0.12871316465284213,
      "learning_rate": 0.0001848436646391583,
      "loss": 0.6864,
      "step": 2262
    },
    {
      "epoch": 0.20234263233190272,
      "grad_norm": 0.1493021704581254,
      "learning_rate": 0.000184828332584112,
      "loss": 0.7296,
      "step": 2263
    },
    {
      "epoch": 0.20243204577968527,
      "grad_norm": 0.1532884218878011,
      "learning_rate": 0.00018481299341461583,
      "loss": 0.6769,
      "step": 2264
    },
    {
      "epoch": 0.2025214592274678,
      "grad_norm": 0.1333651091538974,
      "learning_rate": 0.0001847976471319564,
      "loss": 0.6933,
      "step": 2265
    },
    {
      "epoch": 0.20261087267525035,
      "grad_norm": 0.1700613872194853,
      "learning_rate": 0.00018478229373742065,
      "loss": 0.7004,
      "step": 2266
    },
    {
      "epoch": 0.2027002861230329,
      "grad_norm": 0.1466664696662762,
      "learning_rate": 0.00018476693323229637,
      "loss": 0.7036,
      "step": 2267
    },
    {
      "epoch": 0.20278969957081544,
      "grad_norm": 0.15338630038222828,
      "learning_rate": 0.00018475156561787172,
      "loss": 0.7314,
      "step": 2268
    },
    {
      "epoch": 0.20287911301859798,
      "grad_norm": 0.1285716763562768,
      "learning_rate": 0.00018473619089543565,
      "loss": 0.6721,
      "step": 2269
    },
    {
      "epoch": 0.20296852646638056,
      "grad_norm": 0.13083639756383675,
      "learning_rate": 0.00018472080906627758,
      "loss": 0.7198,
      "step": 2270
    },
    {
      "epoch": 0.2030579399141631,
      "grad_norm": 0.14578957571735082,
      "learning_rate": 0.00018470542013168757,
      "loss": 0.6938,
      "step": 2271
    },
    {
      "epoch": 0.20314735336194564,
      "grad_norm": 0.14749169122377298,
      "learning_rate": 0.00018469002409295628,
      "loss": 0.755,
      "step": 2272
    },
    {
      "epoch": 0.2032367668097282,
      "grad_norm": 0.135323958084595,
      "learning_rate": 0.00018467462095137494,
      "loss": 0.7102,
      "step": 2273
    },
    {
      "epoch": 0.20332618025751073,
      "grad_norm": 0.13705757025037948,
      "learning_rate": 0.0001846592107082354,
      "loss": 0.6992,
      "step": 2274
    },
    {
      "epoch": 0.20341559370529327,
      "grad_norm": 0.14122734523840325,
      "learning_rate": 0.0001846437933648301,
      "loss": 0.7216,
      "step": 2275
    },
    {
      "epoch": 0.20350500715307582,
      "grad_norm": 0.12173424591397322,
      "learning_rate": 0.00018462836892245207,
      "loss": 0.7136,
      "step": 2276
    },
    {
      "epoch": 0.20359442060085836,
      "grad_norm": 0.13236005826833933,
      "learning_rate": 0.00018461293738239495,
      "loss": 0.7154,
      "step": 2277
    },
    {
      "epoch": 0.2036838340486409,
      "grad_norm": 0.18139036700563874,
      "learning_rate": 0.00018459749874595298,
      "loss": 0.4145,
      "step": 2278
    },
    {
      "epoch": 0.20377324749642345,
      "grad_norm": 0.1490155431616601,
      "learning_rate": 0.00018458205301442093,
      "loss": 0.712,
      "step": 2279
    },
    {
      "epoch": 0.20386266094420602,
      "grad_norm": 0.13957885910535714,
      "learning_rate": 0.00018456660018909425,
      "loss": 0.6818,
      "step": 2280
    },
    {
      "epoch": 0.20395207439198856,
      "grad_norm": 0.15046938200094873,
      "learning_rate": 0.0001845511402712689,
      "loss": 0.7517,
      "step": 2281
    },
    {
      "epoch": 0.2040414878397711,
      "grad_norm": 0.1303072716581252,
      "learning_rate": 0.0001845356732622416,
      "loss": 0.6961,
      "step": 2282
    },
    {
      "epoch": 0.20413090128755365,
      "grad_norm": 0.12095523571905148,
      "learning_rate": 0.00018452019916330944,
      "loss": 0.6794,
      "step": 2283
    },
    {
      "epoch": 0.2042203147353362,
      "grad_norm": 0.1466200213651253,
      "learning_rate": 0.00018450471797577028,
      "loss": 0.7246,
      "step": 2284
    },
    {
      "epoch": 0.20430972818311874,
      "grad_norm": 0.13406289491376527,
      "learning_rate": 0.00018448922970092243,
      "loss": 0.6695,
      "step": 2285
    },
    {
      "epoch": 0.20439914163090128,
      "grad_norm": 0.12605673924221253,
      "learning_rate": 0.00018447373434006496,
      "loss": 0.7269,
      "step": 2286
    },
    {
      "epoch": 0.20448855507868383,
      "grad_norm": 0.12495471275669286,
      "learning_rate": 0.0001844582318944974,
      "loss": 0.6689,
      "step": 2287
    },
    {
      "epoch": 0.20457796852646637,
      "grad_norm": 0.14428420757725846,
      "learning_rate": 0.0001844427223655199,
      "loss": 0.715,
      "step": 2288
    },
    {
      "epoch": 0.20466738197424894,
      "grad_norm": 0.1408354693010285,
      "learning_rate": 0.0001844272057544333,
      "loss": 0.6992,
      "step": 2289
    },
    {
      "epoch": 0.20475679542203148,
      "grad_norm": 0.11930170969150936,
      "learning_rate": 0.00018441168206253893,
      "loss": 0.6299,
      "step": 2290
    },
    {
      "epoch": 0.20484620886981403,
      "grad_norm": 0.15977573672432044,
      "learning_rate": 0.00018439615129113866,
      "loss": 0.7483,
      "step": 2291
    },
    {
      "epoch": 0.20493562231759657,
      "grad_norm": 0.18511718670074373,
      "learning_rate": 0.00018438061344153517,
      "loss": 0.3456,
      "step": 2292
    },
    {
      "epoch": 0.20502503576537912,
      "grad_norm": 0.15390650279232657,
      "learning_rate": 0.0001843650685150315,
      "loss": 0.717,
      "step": 2293
    },
    {
      "epoch": 0.20511444921316166,
      "grad_norm": 0.1913471860960656,
      "learning_rate": 0.00018434951651293143,
      "loss": 0.3736,
      "step": 2294
    },
    {
      "epoch": 0.2052038626609442,
      "grad_norm": 0.14594336091648685,
      "learning_rate": 0.0001843339574365393,
      "loss": 0.6678,
      "step": 2295
    },
    {
      "epoch": 0.20529327610872675,
      "grad_norm": 0.12228526070913905,
      "learning_rate": 0.00018431839128715997,
      "loss": 0.6798,
      "step": 2296
    },
    {
      "epoch": 0.2053826895565093,
      "grad_norm": 0.135562226405388,
      "learning_rate": 0.000184302818066099,
      "loss": 0.6678,
      "step": 2297
    },
    {
      "epoch": 0.20547210300429183,
      "grad_norm": 0.16043255733486522,
      "learning_rate": 0.00018428723777466253,
      "loss": 0.7248,
      "step": 2298
    },
    {
      "epoch": 0.2055615164520744,
      "grad_norm": 0.1612857649212811,
      "learning_rate": 0.0001842716504141572,
      "loss": 0.7293,
      "step": 2299
    },
    {
      "epoch": 0.20565092989985695,
      "grad_norm": 0.13788231151303318,
      "learning_rate": 0.00018425605598589031,
      "loss": 0.7079,
      "step": 2300
    },
    {
      "epoch": 0.2057403433476395,
      "grad_norm": 0.14329089351441854,
      "learning_rate": 0.00018424045449116978,
      "loss": 0.758,
      "step": 2301
    },
    {
      "epoch": 0.20582975679542204,
      "grad_norm": 0.13735651326146267,
      "learning_rate": 0.000184224845931304,
      "loss": 0.6771,
      "step": 2302
    },
    {
      "epoch": 0.20591917024320458,
      "grad_norm": 0.13685861563460824,
      "learning_rate": 0.0001842092303076022,
      "loss": 0.6974,
      "step": 2303
    },
    {
      "epoch": 0.20600858369098712,
      "grad_norm": 0.14688347597961088,
      "learning_rate": 0.00018419360762137395,
      "loss": 0.7254,
      "step": 2304
    },
    {
      "epoch": 0.20609799713876967,
      "grad_norm": 0.21697973175184782,
      "learning_rate": 0.00018417797787392948,
      "loss": 0.3764,
      "step": 2305
    },
    {
      "epoch": 0.2061874105865522,
      "grad_norm": 0.18219440307944673,
      "learning_rate": 0.00018416234106657963,
      "loss": 0.3372,
      "step": 2306
    },
    {
      "epoch": 0.20627682403433475,
      "grad_norm": 0.17483912663813966,
      "learning_rate": 0.00018414669720063592,
      "loss": 0.6789,
      "step": 2307
    },
    {
      "epoch": 0.2063662374821173,
      "grad_norm": 0.15106154131507682,
      "learning_rate": 0.00018413104627741035,
      "loss": 0.6544,
      "step": 2308
    },
    {
      "epoch": 0.20645565092989987,
      "grad_norm": 0.15115901100293033,
      "learning_rate": 0.00018411538829821552,
      "loss": 0.6862,
      "step": 2309
    },
    {
      "epoch": 0.2065450643776824,
      "grad_norm": 0.1649593966283964,
      "learning_rate": 0.00018409972326436465,
      "loss": 0.7549,
      "step": 2310
    },
    {
      "epoch": 0.20663447782546496,
      "grad_norm": 0.16872055217937199,
      "learning_rate": 0.00018408405117717154,
      "loss": 0.7112,
      "step": 2311
    },
    {
      "epoch": 0.2067238912732475,
      "grad_norm": 0.23959092807925048,
      "learning_rate": 0.00018406837203795067,
      "loss": 0.3995,
      "step": 2312
    },
    {
      "epoch": 0.20681330472103004,
      "grad_norm": 0.1401682657346661,
      "learning_rate": 0.0001840526858480169,
      "loss": 0.6841,
      "step": 2313
    },
    {
      "epoch": 0.2069027181688126,
      "grad_norm": 0.1397663124848186,
      "learning_rate": 0.0001840369926086859,
      "loss": 0.6871,
      "step": 2314
    },
    {
      "epoch": 0.20699213161659513,
      "grad_norm": 0.13305515399505544,
      "learning_rate": 0.00018402129232127383,
      "loss": 0.6538,
      "step": 2315
    },
    {
      "epoch": 0.20708154506437768,
      "grad_norm": 0.15554856698058495,
      "learning_rate": 0.00018400558498709744,
      "loss": 0.708,
      "step": 2316
    },
    {
      "epoch": 0.20717095851216022,
      "grad_norm": 0.13646456796765724,
      "learning_rate": 0.00018398987060747407,
      "loss": 0.6793,
      "step": 2317
    },
    {
      "epoch": 0.20726037195994276,
      "grad_norm": 0.12771851151140032,
      "learning_rate": 0.00018397414918372172,
      "loss": 0.6964,
      "step": 2318
    },
    {
      "epoch": 0.20734978540772533,
      "grad_norm": 0.1527489391205136,
      "learning_rate": 0.00018395842071715888,
      "loss": 0.7511,
      "step": 2319
    },
    {
      "epoch": 0.20743919885550788,
      "grad_norm": 0.12818098276122455,
      "learning_rate": 0.00018394268520910466,
      "loss": 0.6343,
      "step": 2320
    },
    {
      "epoch": 0.20752861230329042,
      "grad_norm": 0.1267541369705419,
      "learning_rate": 0.00018392694266087885,
      "loss": 0.6679,
      "step": 2321
    },
    {
      "epoch": 0.20761802575107297,
      "grad_norm": 0.14052779531206916,
      "learning_rate": 0.00018391119307380172,
      "loss": 0.72,
      "step": 2322
    },
    {
      "epoch": 0.2077074391988555,
      "grad_norm": 0.1571863023646491,
      "learning_rate": 0.00018389543644919414,
      "loss": 0.7193,
      "step": 2323
    },
    {
      "epoch": 0.20779685264663805,
      "grad_norm": 0.1537227574896807,
      "learning_rate": 0.00018387967278837763,
      "loss": 0.7309,
      "step": 2324
    },
    {
      "epoch": 0.2078862660944206,
      "grad_norm": 0.1426036764210673,
      "learning_rate": 0.00018386390209267428,
      "loss": 0.715,
      "step": 2325
    },
    {
      "epoch": 0.20797567954220314,
      "grad_norm": 0.13708042827989594,
      "learning_rate": 0.00018384812436340672,
      "loss": 0.6972,
      "step": 2326
    },
    {
      "epoch": 0.20806509298998568,
      "grad_norm": 0.1337524010523437,
      "learning_rate": 0.00018383233960189826,
      "loss": 0.6737,
      "step": 2327
    },
    {
      "epoch": 0.20815450643776823,
      "grad_norm": 0.12432086036397709,
      "learning_rate": 0.0001838165478094727,
      "loss": 0.6252,
      "step": 2328
    },
    {
      "epoch": 0.2082439198855508,
      "grad_norm": 0.13660365246166298,
      "learning_rate": 0.0001838007489874545,
      "loss": 0.6806,
      "step": 2329
    },
    {
      "epoch": 0.20833333333333334,
      "grad_norm": 0.14043145939184232,
      "learning_rate": 0.0001837849431371687,
      "loss": 0.693,
      "step": 2330
    },
    {
      "epoch": 0.2084227467811159,
      "grad_norm": 0.1451574959561786,
      "learning_rate": 0.0001837691302599409,
      "loss": 0.7061,
      "step": 2331
    },
    {
      "epoch": 0.20851216022889843,
      "grad_norm": 0.1458248768715619,
      "learning_rate": 0.0001837533103570973,
      "loss": 0.7051,
      "step": 2332
    },
    {
      "epoch": 0.20860157367668097,
      "grad_norm": 0.1304935895228124,
      "learning_rate": 0.00018373748342996474,
      "loss": 0.6981,
      "step": 2333
    },
    {
      "epoch": 0.20869098712446352,
      "grad_norm": 0.14382262234267515,
      "learning_rate": 0.00018372164947987054,
      "loss": 0.6767,
      "step": 2334
    },
    {
      "epoch": 0.20878040057224606,
      "grad_norm": 0.134963244864096,
      "learning_rate": 0.00018370580850814272,
      "loss": 0.6806,
      "step": 2335
    },
    {
      "epoch": 0.2088698140200286,
      "grad_norm": 0.13754124669277765,
      "learning_rate": 0.00018368996051610986,
      "loss": 0.6574,
      "step": 2336
    },
    {
      "epoch": 0.20895922746781115,
      "grad_norm": 0.13763608133041125,
      "learning_rate": 0.00018367410550510104,
      "loss": 0.6791,
      "step": 2337
    },
    {
      "epoch": 0.2090486409155937,
      "grad_norm": 0.14600582844336335,
      "learning_rate": 0.00018365824347644607,
      "loss": 0.7147,
      "step": 2338
    },
    {
      "epoch": 0.20913805436337626,
      "grad_norm": 0.14122665278189175,
      "learning_rate": 0.00018364237443147525,
      "loss": 0.6801,
      "step": 2339
    },
    {
      "epoch": 0.2092274678111588,
      "grad_norm": 0.1496642847233718,
      "learning_rate": 0.00018362649837151947,
      "loss": 0.7074,
      "step": 2340
    },
    {
      "epoch": 0.20931688125894135,
      "grad_norm": 0.14014289085359863,
      "learning_rate": 0.0001836106152979103,
      "loss": 0.6858,
      "step": 2341
    },
    {
      "epoch": 0.2094062947067239,
      "grad_norm": 0.14756118049367822,
      "learning_rate": 0.0001835947252119798,
      "loss": 0.7091,
      "step": 2342
    },
    {
      "epoch": 0.20949570815450644,
      "grad_norm": 0.13175201041490142,
      "learning_rate": 0.00018357882811506065,
      "loss": 0.6728,
      "step": 2343
    },
    {
      "epoch": 0.20958512160228898,
      "grad_norm": 0.14042692501762585,
      "learning_rate": 0.00018356292400848611,
      "loss": 0.7362,
      "step": 2344
    },
    {
      "epoch": 0.20967453505007153,
      "grad_norm": 0.15539437553705143,
      "learning_rate": 0.00018354701289359005,
      "loss": 0.7096,
      "step": 2345
    },
    {
      "epoch": 0.20976394849785407,
      "grad_norm": 0.1394652941041713,
      "learning_rate": 0.00018353109477170696,
      "loss": 0.7131,
      "step": 2346
    },
    {
      "epoch": 0.2098533619456366,
      "grad_norm": 0.11884460763998003,
      "learning_rate": 0.0001835151696441718,
      "loss": 0.6927,
      "step": 2347
    },
    {
      "epoch": 0.20994277539341916,
      "grad_norm": 0.1382500750613389,
      "learning_rate": 0.00018349923751232022,
      "loss": 0.6698,
      "step": 2348
    },
    {
      "epoch": 0.21003218884120173,
      "grad_norm": 0.16486801110118288,
      "learning_rate": 0.00018348329837748843,
      "loss": 0.7036,
      "step": 2349
    },
    {
      "epoch": 0.21012160228898427,
      "grad_norm": 0.15123092542340646,
      "learning_rate": 0.00018346735224101325,
      "loss": 0.6815,
      "step": 2350
    },
    {
      "epoch": 0.21021101573676682,
      "grad_norm": 0.1345690567844832,
      "learning_rate": 0.000183451399104232,
      "loss": 0.7068,
      "step": 2351
    },
    {
      "epoch": 0.21030042918454936,
      "grad_norm": 0.13446104504442694,
      "learning_rate": 0.00018343543896848273,
      "loss": 0.6822,
      "step": 2352
    },
    {
      "epoch": 0.2103898426323319,
      "grad_norm": 0.13624718718814682,
      "learning_rate": 0.00018341947183510393,
      "loss": 0.6714,
      "step": 2353
    },
    {
      "epoch": 0.21047925608011445,
      "grad_norm": 0.15125533812419129,
      "learning_rate": 0.00018340349770543481,
      "loss": 0.7062,
      "step": 2354
    },
    {
      "epoch": 0.210568669527897,
      "grad_norm": 0.13006544566633801,
      "learning_rate": 0.00018338751658081504,
      "loss": 0.6809,
      "step": 2355
    },
    {
      "epoch": 0.21065808297567953,
      "grad_norm": 0.15593223121138608,
      "learning_rate": 0.00018337152846258493,
      "loss": 0.7281,
      "step": 2356
    },
    {
      "epoch": 0.21074749642346208,
      "grad_norm": 0.1364616773113954,
      "learning_rate": 0.00018335553335208546,
      "loss": 0.6691,
      "step": 2357
    },
    {
      "epoch": 0.21083690987124465,
      "grad_norm": 0.14245346668506334,
      "learning_rate": 0.00018333953125065805,
      "loss": 0.719,
      "step": 2358
    },
    {
      "epoch": 0.2109263233190272,
      "grad_norm": 0.14517888279702218,
      "learning_rate": 0.0001833235221596448,
      "loss": 0.659,
      "step": 2359
    },
    {
      "epoch": 0.21101573676680974,
      "grad_norm": 0.14379967575513744,
      "learning_rate": 0.00018330750608038844,
      "loss": 0.7199,
      "step": 2360
    },
    {
      "epoch": 0.21110515021459228,
      "grad_norm": 0.12799870738524136,
      "learning_rate": 0.0001832914830142321,
      "loss": 0.643,
      "step": 2361
    },
    {
      "epoch": 0.21119456366237482,
      "grad_norm": 0.12761293291186826,
      "learning_rate": 0.00018327545296251968,
      "loss": 0.7179,
      "step": 2362
    },
    {
      "epoch": 0.21128397711015737,
      "grad_norm": 0.14289796663769383,
      "learning_rate": 0.00018325941592659553,
      "loss": 0.6861,
      "step": 2363
    },
    {
      "epoch": 0.2113733905579399,
      "grad_norm": 0.14322597141779853,
      "learning_rate": 0.0001832433719078048,
      "loss": 0.7047,
      "step": 2364
    },
    {
      "epoch": 0.21146280400572245,
      "grad_norm": 0.13755798108309142,
      "learning_rate": 0.00018322732090749296,
      "loss": 0.6574,
      "step": 2365
    },
    {
      "epoch": 0.211552217453505,
      "grad_norm": 0.1718608892004533,
      "learning_rate": 0.00018321126292700628,
      "loss": 0.7105,
      "step": 2366
    },
    {
      "epoch": 0.21164163090128754,
      "grad_norm": 0.16094567351397698,
      "learning_rate": 0.00018319519796769143,
      "loss": 0.7353,
      "step": 2367
    },
    {
      "epoch": 0.2117310443490701,
      "grad_norm": 0.15863282684994026,
      "learning_rate": 0.0001831791260308958,
      "loss": 0.7136,
      "step": 2368
    },
    {
      "epoch": 0.21182045779685266,
      "grad_norm": 0.13926051535739536,
      "learning_rate": 0.00018316304711796732,
      "loss": 0.685,
      "step": 2369
    },
    {
      "epoch": 0.2119098712446352,
      "grad_norm": 0.132918005867686,
      "learning_rate": 0.00018314696123025454,
      "loss": 0.7143,
      "step": 2370
    },
    {
      "epoch": 0.21199928469241774,
      "grad_norm": 0.14391167321594553,
      "learning_rate": 0.0001831308683691065,
      "loss": 0.6976,
      "step": 2371
    },
    {
      "epoch": 0.2120886981402003,
      "grad_norm": 0.13440277882954552,
      "learning_rate": 0.00018311476853587297,
      "loss": 0.7212,
      "step": 2372
    },
    {
      "epoch": 0.21217811158798283,
      "grad_norm": 0.11748857254632875,
      "learning_rate": 0.00018309866173190416,
      "loss": 0.6431,
      "step": 2373
    },
    {
      "epoch": 0.21226752503576538,
      "grad_norm": 0.15983530480151928,
      "learning_rate": 0.00018308254795855095,
      "loss": 0.7468,
      "step": 2374
    },
    {
      "epoch": 0.21235693848354792,
      "grad_norm": 0.12954517875965424,
      "learning_rate": 0.00018306642721716476,
      "loss": 0.674,
      "step": 2375
    },
    {
      "epoch": 0.21244635193133046,
      "grad_norm": 0.12655611156408467,
      "learning_rate": 0.00018305029950909768,
      "loss": 0.661,
      "step": 2376
    },
    {
      "epoch": 0.212535765379113,
      "grad_norm": 0.13945774325105278,
      "learning_rate": 0.00018303416483570227,
      "loss": 0.6954,
      "step": 2377
    },
    {
      "epoch": 0.21262517882689558,
      "grad_norm": 0.13147693701817265,
      "learning_rate": 0.0001830180231983317,
      "loss": 0.6962,
      "step": 2378
    },
    {
      "epoch": 0.21271459227467812,
      "grad_norm": 0.14912408177029038,
      "learning_rate": 0.00018300187459833981,
      "loss": 0.6904,
      "step": 2379
    },
    {
      "epoch": 0.21280400572246067,
      "grad_norm": 0.1372589776323331,
      "learning_rate": 0.00018298571903708092,
      "loss": 0.6485,
      "step": 2380
    },
    {
      "epoch": 0.2128934191702432,
      "grad_norm": 0.13923436150224708,
      "learning_rate": 0.00018296955651591002,
      "loss": 0.7175,
      "step": 2381
    },
    {
      "epoch": 0.21298283261802575,
      "grad_norm": 0.12901078216305595,
      "learning_rate": 0.00018295338703618258,
      "loss": 0.6745,
      "step": 2382
    },
    {
      "epoch": 0.2130722460658083,
      "grad_norm": 0.13463470697931598,
      "learning_rate": 0.0001829372105992548,
      "loss": 0.6623,
      "step": 2383
    },
    {
      "epoch": 0.21316165951359084,
      "grad_norm": 0.15070543044719278,
      "learning_rate": 0.00018292102720648333,
      "loss": 0.6948,
      "step": 2384
    },
    {
      "epoch": 0.21325107296137338,
      "grad_norm": 0.1490145635932194,
      "learning_rate": 0.0001829048368592254,
      "loss": 0.6814,
      "step": 2385
    },
    {
      "epoch": 0.21334048640915593,
      "grad_norm": 0.16323410074072212,
      "learning_rate": 0.00018288863955883897,
      "loss": 0.6966,
      "step": 2386
    },
    {
      "epoch": 0.21342989985693847,
      "grad_norm": 0.15715976194574682,
      "learning_rate": 0.00018287243530668243,
      "loss": 0.6987,
      "step": 2387
    },
    {
      "epoch": 0.21351931330472104,
      "grad_norm": 0.1452523691124335,
      "learning_rate": 0.00018285622410411484,
      "loss": 0.6843,
      "step": 2388
    },
    {
      "epoch": 0.21360872675250359,
      "grad_norm": 0.14808864728320253,
      "learning_rate": 0.00018284000595249577,
      "loss": 0.6829,
      "step": 2389
    },
    {
      "epoch": 0.21369814020028613,
      "grad_norm": 0.14526749988621443,
      "learning_rate": 0.00018282378085318545,
      "loss": 0.694,
      "step": 2390
    },
    {
      "epoch": 0.21378755364806867,
      "grad_norm": 0.14049789966854323,
      "learning_rate": 0.00018280754880754468,
      "loss": 0.6679,
      "step": 2391
    },
    {
      "epoch": 0.21387696709585122,
      "grad_norm": 0.1324598593359374,
      "learning_rate": 0.0001827913098169348,
      "loss": 0.652,
      "step": 2392
    },
    {
      "epoch": 0.21396638054363376,
      "grad_norm": 0.1475225259199232,
      "learning_rate": 0.00018277506388271773,
      "loss": 0.72,
      "step": 2393
    },
    {
      "epoch": 0.2140557939914163,
      "grad_norm": 0.1538998181957495,
      "learning_rate": 0.000182758811006256,
      "loss": 0.7253,
      "step": 2394
    },
    {
      "epoch": 0.21414520743919885,
      "grad_norm": 0.1400840278295501,
      "learning_rate": 0.0001827425511889128,
      "loss": 0.7236,
      "step": 2395
    },
    {
      "epoch": 0.2142346208869814,
      "grad_norm": 0.12086241151743651,
      "learning_rate": 0.00018272628443205172,
      "loss": 0.6809,
      "step": 2396
    },
    {
      "epoch": 0.21432403433476394,
      "grad_norm": 0.11593277382106208,
      "learning_rate": 0.00018271001073703706,
      "loss": 0.6664,
      "step": 2397
    },
    {
      "epoch": 0.2144134477825465,
      "grad_norm": 0.13753193274782277,
      "learning_rate": 0.0001826937301052337,
      "loss": 0.6944,
      "step": 2398
    },
    {
      "epoch": 0.21450286123032905,
      "grad_norm": 0.16527123624082035,
      "learning_rate": 0.00018267744253800707,
      "loss": 0.7014,
      "step": 2399
    },
    {
      "epoch": 0.2145922746781116,
      "grad_norm": 0.13997575203568657,
      "learning_rate": 0.00018266114803672318,
      "loss": 0.722,
      "step": 2400
    },
    {
      "epoch": 0.21468168812589414,
      "grad_norm": 0.1353051446860857,
      "learning_rate": 0.00018264484660274866,
      "loss": 0.7157,
      "step": 2401
    },
    {
      "epoch": 0.21477110157367668,
      "grad_norm": 0.14272567380130338,
      "learning_rate": 0.00018262853823745062,
      "loss": 0.6929,
      "step": 2402
    },
    {
      "epoch": 0.21486051502145923,
      "grad_norm": 0.11259239845478786,
      "learning_rate": 0.0001826122229421969,
      "loss": 0.6346,
      "step": 2403
    },
    {
      "epoch": 0.21494992846924177,
      "grad_norm": 0.14831733173991374,
      "learning_rate": 0.0001825959007183558,
      "loss": 0.7617,
      "step": 2404
    },
    {
      "epoch": 0.2150393419170243,
      "grad_norm": 0.15777314604853812,
      "learning_rate": 0.00018257957156729625,
      "loss": 0.718,
      "step": 2405
    },
    {
      "epoch": 0.21512875536480686,
      "grad_norm": 0.17353046214865664,
      "learning_rate": 0.00018256323549038778,
      "loss": 0.396,
      "step": 2406
    },
    {
      "epoch": 0.2152181688125894,
      "grad_norm": 0.15212612197354078,
      "learning_rate": 0.00018254689248900047,
      "loss": 0.6859,
      "step": 2407
    },
    {
      "epoch": 0.21530758226037197,
      "grad_norm": 0.1375703923803857,
      "learning_rate": 0.00018253054256450494,
      "loss": 0.6792,
      "step": 2408
    },
    {
      "epoch": 0.21539699570815452,
      "grad_norm": 0.13203588992673698,
      "learning_rate": 0.0001825141857182725,
      "loss": 0.6945,
      "step": 2409
    },
    {
      "epoch": 0.21548640915593706,
      "grad_norm": 0.1481363005291274,
      "learning_rate": 0.00018249782195167496,
      "loss": 0.6867,
      "step": 2410
    },
    {
      "epoch": 0.2155758226037196,
      "grad_norm": 0.13859846440429271,
      "learning_rate": 0.0001824814512660847,
      "loss": 0.7105,
      "step": 2411
    },
    {
      "epoch": 0.21566523605150215,
      "grad_norm": 0.15835042402349164,
      "learning_rate": 0.00018246507366287475,
      "loss": 0.7966,
      "step": 2412
    },
    {
      "epoch": 0.2157546494992847,
      "grad_norm": 0.14272538430430015,
      "learning_rate": 0.0001824486891434187,
      "loss": 0.6688,
      "step": 2413
    },
    {
      "epoch": 0.21584406294706723,
      "grad_norm": 0.13831797614680055,
      "learning_rate": 0.0001824322977090906,
      "loss": 0.7088,
      "step": 2414
    },
    {
      "epoch": 0.21593347639484978,
      "grad_norm": 0.14114273654756862,
      "learning_rate": 0.0001824158993612653,
      "loss": 0.6608,
      "step": 2415
    },
    {
      "epoch": 0.21602288984263232,
      "grad_norm": 0.13404807342992103,
      "learning_rate": 0.00018239949410131802,
      "loss": 0.677,
      "step": 2416
    },
    {
      "epoch": 0.2161123032904149,
      "grad_norm": 0.14808405065346988,
      "learning_rate": 0.0001823830819306247,
      "loss": 0.693,
      "step": 2417
    },
    {
      "epoch": 0.21620171673819744,
      "grad_norm": 0.17052266255264,
      "learning_rate": 0.0001823666628505618,
      "loss": 0.7299,
      "step": 2418
    },
    {
      "epoch": 0.21629113018597998,
      "grad_norm": 0.13907997437447464,
      "learning_rate": 0.00018235023686250635,
      "loss": 0.6732,
      "step": 2419
    },
    {
      "epoch": 0.21638054363376252,
      "grad_norm": 0.1545584950305787,
      "learning_rate": 0.00018233380396783595,
      "loss": 0.6731,
      "step": 2420
    },
    {
      "epoch": 0.21646995708154507,
      "grad_norm": 0.1996294928138227,
      "learning_rate": 0.0001823173641679289,
      "loss": 0.3523,
      "step": 2421
    },
    {
      "epoch": 0.2165593705293276,
      "grad_norm": 0.14183535500502065,
      "learning_rate": 0.0001823009174641639,
      "loss": 0.7307,
      "step": 2422
    },
    {
      "epoch": 0.21664878397711015,
      "grad_norm": 0.13044739980491937,
      "learning_rate": 0.00018228446385792037,
      "loss": 0.7019,
      "step": 2423
    },
    {
      "epoch": 0.2167381974248927,
      "grad_norm": 0.15470430481449032,
      "learning_rate": 0.00018226800335057822,
      "loss": 0.7246,
      "step": 2424
    },
    {
      "epoch": 0.21682761087267524,
      "grad_norm": 0.1398264415717194,
      "learning_rate": 0.00018225153594351795,
      "loss": 0.691,
      "step": 2425
    },
    {
      "epoch": 0.21691702432045779,
      "grad_norm": 0.14976027254980104,
      "learning_rate": 0.00018223506163812076,
      "loss": 0.6746,
      "step": 2426
    },
    {
      "epoch": 0.21700643776824036,
      "grad_norm": 0.1327442424551766,
      "learning_rate": 0.0001822185804357682,
      "loss": 0.6771,
      "step": 2427
    },
    {
      "epoch": 0.2170958512160229,
      "grad_norm": 0.16892975156980952,
      "learning_rate": 0.00018220209233784266,
      "loss": 0.7276,
      "step": 2428
    },
    {
      "epoch": 0.21718526466380544,
      "grad_norm": 0.1530588970282083,
      "learning_rate": 0.00018218559734572686,
      "loss": 0.7156,
      "step": 2429
    },
    {
      "epoch": 0.217274678111588,
      "grad_norm": 0.12915124266428482,
      "learning_rate": 0.00018216909546080428,
      "loss": 0.7039,
      "step": 2430
    },
    {
      "epoch": 0.21736409155937053,
      "grad_norm": 0.13554726015399504,
      "learning_rate": 0.00018215258668445892,
      "loss": 0.6942,
      "step": 2431
    },
    {
      "epoch": 0.21745350500715308,
      "grad_norm": 0.13449109558693131,
      "learning_rate": 0.00018213607101807527,
      "loss": 0.6782,
      "step": 2432
    },
    {
      "epoch": 0.21754291845493562,
      "grad_norm": 0.16099738159614746,
      "learning_rate": 0.0001821195484630386,
      "loss": 0.7494,
      "step": 2433
    },
    {
      "epoch": 0.21763233190271816,
      "grad_norm": 0.1295361308795125,
      "learning_rate": 0.00018210301902073456,
      "loss": 0.7041,
      "step": 2434
    },
    {
      "epoch": 0.2177217453505007,
      "grad_norm": 0.14149376927745172,
      "learning_rate": 0.00018208648269254946,
      "loss": 0.6833,
      "step": 2435
    },
    {
      "epoch": 0.21781115879828325,
      "grad_norm": 0.14964914993148512,
      "learning_rate": 0.0001820699394798702,
      "loss": 0.674,
      "step": 2436
    },
    {
      "epoch": 0.21790057224606582,
      "grad_norm": 0.13919300036427562,
      "learning_rate": 0.00018205338938408425,
      "loss": 0.6814,
      "step": 2437
    },
    {
      "epoch": 0.21798998569384836,
      "grad_norm": 0.16327526289016064,
      "learning_rate": 0.0001820368324065796,
      "loss": 0.7256,
      "step": 2438
    },
    {
      "epoch": 0.2180793991416309,
      "grad_norm": 0.1494512251380565,
      "learning_rate": 0.00018202026854874487,
      "loss": 0.692,
      "step": 2439
    },
    {
      "epoch": 0.21816881258941345,
      "grad_norm": 0.15191283342949216,
      "learning_rate": 0.00018200369781196934,
      "loss": 0.6505,
      "step": 2440
    },
    {
      "epoch": 0.218258226037196,
      "grad_norm": 0.15375177975026405,
      "learning_rate": 0.00018198712019764266,
      "loss": 0.7061,
      "step": 2441
    },
    {
      "epoch": 0.21834763948497854,
      "grad_norm": 0.13643463348976967,
      "learning_rate": 0.00018197053570715523,
      "loss": 0.6807,
      "step": 2442
    },
    {
      "epoch": 0.21843705293276108,
      "grad_norm": 0.17165035943057758,
      "learning_rate": 0.00018195394434189797,
      "loss": 0.7007,
      "step": 2443
    },
    {
      "epoch": 0.21852646638054363,
      "grad_norm": 0.1485056616569075,
      "learning_rate": 0.00018193734610326239,
      "loss": 0.7268,
      "step": 2444
    },
    {
      "epoch": 0.21861587982832617,
      "grad_norm": 0.1413169775783055,
      "learning_rate": 0.0001819207409926405,
      "loss": 0.6861,
      "step": 2445
    },
    {
      "epoch": 0.21870529327610871,
      "grad_norm": 0.13388911438926251,
      "learning_rate": 0.00018190412901142504,
      "loss": 0.7247,
      "step": 2446
    },
    {
      "epoch": 0.21879470672389129,
      "grad_norm": 0.14826376556842188,
      "learning_rate": 0.00018188751016100918,
      "loss": 0.6975,
      "step": 2447
    },
    {
      "epoch": 0.21888412017167383,
      "grad_norm": 0.1350321931814116,
      "learning_rate": 0.00018187088444278674,
      "loss": 0.7178,
      "step": 2448
    },
    {
      "epoch": 0.21897353361945637,
      "grad_norm": 0.1449607740235393,
      "learning_rate": 0.0001818542518581521,
      "loss": 0.7291,
      "step": 2449
    },
    {
      "epoch": 0.21906294706723892,
      "grad_norm": 0.14291858141559607,
      "learning_rate": 0.0001818376124085002,
      "loss": 0.7067,
      "step": 2450
    },
    {
      "epoch": 0.21915236051502146,
      "grad_norm": 0.13083083448594904,
      "learning_rate": 0.0001818209660952266,
      "loss": 0.6707,
      "step": 2451
    },
    {
      "epoch": 0.219241773962804,
      "grad_norm": 0.13194757849886057,
      "learning_rate": 0.00018180431291972738,
      "loss": 0.6884,
      "step": 2452
    },
    {
      "epoch": 0.21933118741058655,
      "grad_norm": 0.1125140767278647,
      "learning_rate": 0.00018178765288339924,
      "loss": 0.6361,
      "step": 2453
    },
    {
      "epoch": 0.2194206008583691,
      "grad_norm": 0.13260913185144924,
      "learning_rate": 0.00018177098598763942,
      "loss": 0.6904,
      "step": 2454
    },
    {
      "epoch": 0.21951001430615164,
      "grad_norm": 0.14756740668972498,
      "learning_rate": 0.00018175431223384575,
      "loss": 0.6922,
      "step": 2455
    },
    {
      "epoch": 0.21959942775393418,
      "grad_norm": 0.17028780504233038,
      "learning_rate": 0.00018173763162341667,
      "loss": 0.3532,
      "step": 2456
    },
    {
      "epoch": 0.21968884120171675,
      "grad_norm": 0.15427892746206429,
      "learning_rate": 0.00018172094415775113,
      "loss": 0.7148,
      "step": 2457
    },
    {
      "epoch": 0.2197782546494993,
      "grad_norm": 0.1441952352452872,
      "learning_rate": 0.00018170424983824868,
      "loss": 0.7106,
      "step": 2458
    },
    {
      "epoch": 0.21986766809728184,
      "grad_norm": 0.14896332989762373,
      "learning_rate": 0.00018168754866630947,
      "loss": 0.7095,
      "step": 2459
    },
    {
      "epoch": 0.21995708154506438,
      "grad_norm": 0.16098394120592616,
      "learning_rate": 0.00018167084064333423,
      "loss": 0.3716,
      "step": 2460
    },
    {
      "epoch": 0.22004649499284692,
      "grad_norm": 0.15574947493062172,
      "learning_rate": 0.0001816541257707242,
      "loss": 0.7002,
      "step": 2461
    },
    {
      "epoch": 0.22013590844062947,
      "grad_norm": 0.14482158653869032,
      "learning_rate": 0.00018163740404988126,
      "loss": 0.7158,
      "step": 2462
    },
    {
      "epoch": 0.220225321888412,
      "grad_norm": 0.1428756727866067,
      "learning_rate": 0.00018162067548220786,
      "loss": 0.6874,
      "step": 2463
    },
    {
      "epoch": 0.22031473533619456,
      "grad_norm": 0.1378543480332668,
      "learning_rate": 0.00018160394006910694,
      "loss": 0.6697,
      "step": 2464
    },
    {
      "epoch": 0.2204041487839771,
      "grad_norm": 0.1484072617127026,
      "learning_rate": 0.00018158719781198213,
      "loss": 0.668,
      "step": 2465
    },
    {
      "epoch": 0.22049356223175964,
      "grad_norm": 0.15904113562437275,
      "learning_rate": 0.00018157044871223757,
      "loss": 0.7285,
      "step": 2466
    },
    {
      "epoch": 0.22058297567954221,
      "grad_norm": 0.139259323261461,
      "learning_rate": 0.00018155369277127802,
      "loss": 0.7215,
      "step": 2467
    },
    {
      "epoch": 0.22067238912732476,
      "grad_norm": 0.156516954606737,
      "learning_rate": 0.00018153692999050872,
      "loss": 0.7055,
      "step": 2468
    },
    {
      "epoch": 0.2207618025751073,
      "grad_norm": 0.13262803687395142,
      "learning_rate": 0.00018152016037133558,
      "loss": 0.7046,
      "step": 2469
    },
    {
      "epoch": 0.22085121602288985,
      "grad_norm": 0.142239664722969,
      "learning_rate": 0.00018150338391516505,
      "loss": 0.6919,
      "step": 2470
    },
    {
      "epoch": 0.2209406294706724,
      "grad_norm": 0.13658096474691564,
      "learning_rate": 0.0001814866006234041,
      "loss": 0.6847,
      "step": 2471
    },
    {
      "epoch": 0.22103004291845493,
      "grad_norm": 0.12789104711207808,
      "learning_rate": 0.00018146981049746043,
      "loss": 0.6925,
      "step": 2472
    },
    {
      "epoch": 0.22111945636623748,
      "grad_norm": 0.14123648989921705,
      "learning_rate": 0.0001814530135387421,
      "loss": 0.6908,
      "step": 2473
    },
    {
      "epoch": 0.22120886981402002,
      "grad_norm": 0.20149663867052922,
      "learning_rate": 0.0001814362097486579,
      "loss": 0.3498,
      "step": 2474
    },
    {
      "epoch": 0.22129828326180256,
      "grad_norm": 0.142227846009428,
      "learning_rate": 0.00018141939912861717,
      "loss": 0.6837,
      "step": 2475
    },
    {
      "epoch": 0.2213876967095851,
      "grad_norm": 0.13545411665038054,
      "learning_rate": 0.00018140258168002971,
      "loss": 0.6566,
      "step": 2476
    },
    {
      "epoch": 0.22147711015736768,
      "grad_norm": 0.1601575769488017,
      "learning_rate": 0.0001813857574043061,
      "loss": 0.7253,
      "step": 2477
    },
    {
      "epoch": 0.22156652360515022,
      "grad_norm": 0.13778720579547335,
      "learning_rate": 0.00018136892630285726,
      "loss": 0.6829,
      "step": 2478
    },
    {
      "epoch": 0.22165593705293277,
      "grad_norm": 0.15070058084560803,
      "learning_rate": 0.00018135208837709486,
      "loss": 0.6924,
      "step": 2479
    },
    {
      "epoch": 0.2217453505007153,
      "grad_norm": 0.1338772674522229,
      "learning_rate": 0.00018133524362843104,
      "loss": 0.7045,
      "step": 2480
    },
    {
      "epoch": 0.22183476394849785,
      "grad_norm": 0.1870383969104017,
      "learning_rate": 0.00018131839205827856,
      "loss": 0.357,
      "step": 2481
    },
    {
      "epoch": 0.2219241773962804,
      "grad_norm": 0.13995388144793294,
      "learning_rate": 0.00018130153366805075,
      "loss": 0.6823,
      "step": 2482
    },
    {
      "epoch": 0.22201359084406294,
      "grad_norm": 0.15997015212829188,
      "learning_rate": 0.00018128466845916154,
      "loss": 0.6966,
      "step": 2483
    },
    {
      "epoch": 0.22210300429184548,
      "grad_norm": 0.12495762039898872,
      "learning_rate": 0.00018126779643302528,
      "loss": 0.6464,
      "step": 2484
    },
    {
      "epoch": 0.22219241773962803,
      "grad_norm": 0.13649464996179553,
      "learning_rate": 0.00018125091759105713,
      "loss": 0.7074,
      "step": 2485
    },
    {
      "epoch": 0.2222818311874106,
      "grad_norm": 0.12487092916355591,
      "learning_rate": 0.00018123403193467266,
      "loss": 0.6574,
      "step": 2486
    },
    {
      "epoch": 0.22237124463519314,
      "grad_norm": 0.15444125165669836,
      "learning_rate": 0.000181217139465288,
      "loss": 0.7273,
      "step": 2487
    },
    {
      "epoch": 0.2224606580829757,
      "grad_norm": 0.13964664580569017,
      "learning_rate": 0.00018120024018431998,
      "loss": 0.6876,
      "step": 2488
    },
    {
      "epoch": 0.22255007153075823,
      "grad_norm": 0.12257657188863955,
      "learning_rate": 0.00018118333409318583,
      "loss": 0.6381,
      "step": 2489
    },
    {
      "epoch": 0.22263948497854077,
      "grad_norm": 0.14964094559489868,
      "learning_rate": 0.00018116642119330354,
      "loss": 0.7076,
      "step": 2490
    },
    {
      "epoch": 0.22272889842632332,
      "grad_norm": 0.14914853330572572,
      "learning_rate": 0.0001811495014860915,
      "loss": 0.7233,
      "step": 2491
    },
    {
      "epoch": 0.22281831187410586,
      "grad_norm": 0.14102770014237245,
      "learning_rate": 0.00018113257497296879,
      "loss": 0.7198,
      "step": 2492
    },
    {
      "epoch": 0.2229077253218884,
      "grad_norm": 0.1470656581570032,
      "learning_rate": 0.000181115641655355,
      "loss": 0.6521,
      "step": 2493
    },
    {
      "epoch": 0.22299713876967095,
      "grad_norm": 0.14226374725233137,
      "learning_rate": 0.00018109870153467031,
      "loss": 0.7333,
      "step": 2494
    },
    {
      "epoch": 0.2230865522174535,
      "grad_norm": 0.15102109841603162,
      "learning_rate": 0.00018108175461233544,
      "loss": 0.739,
      "step": 2495
    },
    {
      "epoch": 0.22317596566523606,
      "grad_norm": 0.14331292494899664,
      "learning_rate": 0.00018106480088977172,
      "loss": 0.6992,
      "step": 2496
    },
    {
      "epoch": 0.2232653791130186,
      "grad_norm": 0.1577624225803511,
      "learning_rate": 0.0001810478403684011,
      "loss": 0.6985,
      "step": 2497
    },
    {
      "epoch": 0.22335479256080115,
      "grad_norm": 0.1563656920755062,
      "learning_rate": 0.00018103087304964597,
      "loss": 0.7058,
      "step": 2498
    },
    {
      "epoch": 0.2234442060085837,
      "grad_norm": 0.1527915790084579,
      "learning_rate": 0.00018101389893492937,
      "loss": 0.726,
      "step": 2499
    },
    {
      "epoch": 0.22353361945636624,
      "grad_norm": 0.15689317967400832,
      "learning_rate": 0.0001809969180256749,
      "loss": 0.3653,
      "step": 2500
    },
    {
      "epoch": 0.22362303290414878,
      "grad_norm": 0.14315011758568144,
      "learning_rate": 0.00018097993032330676,
      "loss": 0.7237,
      "step": 2501
    },
    {
      "epoch": 0.22371244635193133,
      "grad_norm": 0.14002814392335658,
      "learning_rate": 0.00018096293582924963,
      "loss": 0.6864,
      "step": 2502
    },
    {
      "epoch": 0.22380185979971387,
      "grad_norm": 0.16370416056782122,
      "learning_rate": 0.00018094593454492887,
      "loss": 0.7291,
      "step": 2503
    },
    {
      "epoch": 0.22389127324749641,
      "grad_norm": 0.16287571691413016,
      "learning_rate": 0.00018092892647177035,
      "loss": 0.7381,
      "step": 2504
    },
    {
      "epoch": 0.22398068669527896,
      "grad_norm": 0.15048024639792648,
      "learning_rate": 0.0001809119116112005,
      "loss": 0.7129,
      "step": 2505
    },
    {
      "epoch": 0.22407010014306153,
      "grad_norm": 0.15834693624132187,
      "learning_rate": 0.00018089488996464632,
      "loss": 0.7136,
      "step": 2506
    },
    {
      "epoch": 0.22415951359084407,
      "grad_norm": 0.12096981330768912,
      "learning_rate": 0.00018087786153353543,
      "loss": 0.663,
      "step": 2507
    },
    {
      "epoch": 0.22424892703862662,
      "grad_norm": 0.1563866457576495,
      "learning_rate": 0.00018086082631929595,
      "loss": 0.7126,
      "step": 2508
    },
    {
      "epoch": 0.22433834048640916,
      "grad_norm": 0.1370030632509915,
      "learning_rate": 0.00018084378432335667,
      "loss": 0.6718,
      "step": 2509
    },
    {
      "epoch": 0.2244277539341917,
      "grad_norm": 0.15449126038262986,
      "learning_rate": 0.00018082673554714677,
      "loss": 0.6708,
      "step": 2510
    },
    {
      "epoch": 0.22451716738197425,
      "grad_norm": 0.17266935194766223,
      "learning_rate": 0.00018080967999209622,
      "loss": 0.6886,
      "step": 2511
    },
    {
      "epoch": 0.2246065808297568,
      "grad_norm": 0.1485379236386676,
      "learning_rate": 0.00018079261765963537,
      "loss": 0.7449,
      "step": 2512
    },
    {
      "epoch": 0.22469599427753933,
      "grad_norm": 0.14264801706377098,
      "learning_rate": 0.00018077554855119526,
      "loss": 0.7054,
      "step": 2513
    },
    {
      "epoch": 0.22478540772532188,
      "grad_norm": 0.15254892583729893,
      "learning_rate": 0.00018075847266820746,
      "loss": 0.6811,
      "step": 2514
    },
    {
      "epoch": 0.22487482117310442,
      "grad_norm": 0.13689264164404472,
      "learning_rate": 0.0001807413900121041,
      "loss": 0.663,
      "step": 2515
    },
    {
      "epoch": 0.224964234620887,
      "grad_norm": 0.14257878316955677,
      "learning_rate": 0.00018072430058431783,
      "loss": 0.7127,
      "step": 2516
    },
    {
      "epoch": 0.22505364806866954,
      "grad_norm": 0.1307786471368666,
      "learning_rate": 0.000180707204386282,
      "loss": 0.6472,
      "step": 2517
    },
    {
      "epoch": 0.22514306151645208,
      "grad_norm": 0.15913960419094592,
      "learning_rate": 0.00018069010141943037,
      "loss": 0.7083,
      "step": 2518
    },
    {
      "epoch": 0.22523247496423462,
      "grad_norm": 0.13344766205129868,
      "learning_rate": 0.00018067299168519741,
      "loss": 0.6781,
      "step": 2519
    },
    {
      "epoch": 0.22532188841201717,
      "grad_norm": 0.15554621859456685,
      "learning_rate": 0.00018065587518501804,
      "loss": 0.7248,
      "step": 2520
    },
    {
      "epoch": 0.2254113018597997,
      "grad_norm": 0.14632416964663428,
      "learning_rate": 0.00018063875192032787,
      "loss": 0.6744,
      "step": 2521
    },
    {
      "epoch": 0.22550071530758226,
      "grad_norm": 0.13492542276744984,
      "learning_rate": 0.00018062162189256292,
      "loss": 0.6497,
      "step": 2522
    },
    {
      "epoch": 0.2255901287553648,
      "grad_norm": 0.1360845407856306,
      "learning_rate": 0.00018060448510315993,
      "loss": 0.654,
      "step": 2523
    },
    {
      "epoch": 0.22567954220314734,
      "grad_norm": 0.1484470188244813,
      "learning_rate": 0.00018058734155355612,
      "loss": 0.7057,
      "step": 2524
    },
    {
      "epoch": 0.2257689556509299,
      "grad_norm": 0.13840642072383882,
      "learning_rate": 0.00018057019124518927,
      "loss": 0.7107,
      "step": 2525
    },
    {
      "epoch": 0.22585836909871246,
      "grad_norm": 0.1328392961676354,
      "learning_rate": 0.00018055303417949782,
      "loss": 0.6962,
      "step": 2526
    },
    {
      "epoch": 0.225947782546495,
      "grad_norm": 0.14302922630599796,
      "learning_rate": 0.00018053587035792067,
      "loss": 0.6703,
      "step": 2527
    },
    {
      "epoch": 0.22603719599427755,
      "grad_norm": 0.13652618378744838,
      "learning_rate": 0.00018051869978189731,
      "loss": 0.706,
      "step": 2528
    },
    {
      "epoch": 0.2261266094420601,
      "grad_norm": 0.14528901358946747,
      "learning_rate": 0.0001805015224528679,
      "loss": 0.7481,
      "step": 2529
    },
    {
      "epoch": 0.22621602288984263,
      "grad_norm": 0.1328409809826814,
      "learning_rate": 0.00018048433837227295,
      "loss": 0.6348,
      "step": 2530
    },
    {
      "epoch": 0.22630543633762518,
      "grad_norm": 0.1371761782812585,
      "learning_rate": 0.0001804671475415538,
      "loss": 0.6426,
      "step": 2531
    },
    {
      "epoch": 0.22639484978540772,
      "grad_norm": 0.15304113675265454,
      "learning_rate": 0.00018044994996215213,
      "loss": 0.7012,
      "step": 2532
    },
    {
      "epoch": 0.22648426323319026,
      "grad_norm": 0.13639462975806121,
      "learning_rate": 0.00018043274563551035,
      "loss": 0.6878,
      "step": 2533
    },
    {
      "epoch": 0.2265736766809728,
      "grad_norm": 0.16673212185786748,
      "learning_rate": 0.00018041553456307128,
      "loss": 0.3865,
      "step": 2534
    },
    {
      "epoch": 0.22666309012875535,
      "grad_norm": 0.15199915092802097,
      "learning_rate": 0.00018039831674627847,
      "loss": 0.6995,
      "step": 2535
    },
    {
      "epoch": 0.22675250357653792,
      "grad_norm": 0.14680715979899542,
      "learning_rate": 0.00018038109218657594,
      "loss": 0.6899,
      "step": 2536
    },
    {
      "epoch": 0.22684191702432047,
      "grad_norm": 0.15600902253385354,
      "learning_rate": 0.00018036386088540827,
      "loss": 0.7353,
      "step": 2537
    },
    {
      "epoch": 0.226931330472103,
      "grad_norm": 0.14431164100601573,
      "learning_rate": 0.00018034662284422065,
      "loss": 0.6707,
      "step": 2538
    },
    {
      "epoch": 0.22702074391988555,
      "grad_norm": 0.14105761054562,
      "learning_rate": 0.00018032937806445882,
      "loss": 0.7056,
      "step": 2539
    },
    {
      "epoch": 0.2271101573676681,
      "grad_norm": 0.1670689762077898,
      "learning_rate": 0.00018031212654756905,
      "loss": 0.6982,
      "step": 2540
    },
    {
      "epoch": 0.22719957081545064,
      "grad_norm": 0.1428175023796533,
      "learning_rate": 0.00018029486829499822,
      "loss": 0.7288,
      "step": 2541
    },
    {
      "epoch": 0.22728898426323318,
      "grad_norm": 0.1509633908296661,
      "learning_rate": 0.00018027760330819375,
      "loss": 0.722,
      "step": 2542
    },
    {
      "epoch": 0.22737839771101573,
      "grad_norm": 0.1400895074235753,
      "learning_rate": 0.00018026033158860365,
      "loss": 0.721,
      "step": 2543
    },
    {
      "epoch": 0.22746781115879827,
      "grad_norm": 0.14640281509104291,
      "learning_rate": 0.00018024305313767646,
      "loss": 0.7167,
      "step": 2544
    },
    {
      "epoch": 0.22755722460658084,
      "grad_norm": 0.1310641234961346,
      "learning_rate": 0.00018022576795686133,
      "loss": 0.6595,
      "step": 2545
    },
    {
      "epoch": 0.2276466380543634,
      "grad_norm": 0.13267925342602235,
      "learning_rate": 0.00018020847604760794,
      "loss": 0.7089,
      "step": 2546
    },
    {
      "epoch": 0.22773605150214593,
      "grad_norm": 0.18200809877082322,
      "learning_rate": 0.00018019117741136648,
      "loss": 0.3471,
      "step": 2547
    },
    {
      "epoch": 0.22782546494992847,
      "grad_norm": 0.1449650566009701,
      "learning_rate": 0.00018017387204958784,
      "loss": 0.6835,
      "step": 2548
    },
    {
      "epoch": 0.22791487839771102,
      "grad_norm": 0.14714952591291103,
      "learning_rate": 0.0001801565599637234,
      "loss": 0.71,
      "step": 2549
    },
    {
      "epoch": 0.22800429184549356,
      "grad_norm": 0.13648297675318424,
      "learning_rate": 0.00018013924115522508,
      "loss": 0.7087,
      "step": 2550
    },
    {
      "epoch": 0.2280937052932761,
      "grad_norm": 0.13215019980012682,
      "learning_rate": 0.00018012191562554537,
      "loss": 0.6575,
      "step": 2551
    },
    {
      "epoch": 0.22818311874105865,
      "grad_norm": 0.14161886766462575,
      "learning_rate": 0.00018010458337613735,
      "loss": 0.6999,
      "step": 2552
    },
    {
      "epoch": 0.2282725321888412,
      "grad_norm": 0.16484244409192178,
      "learning_rate": 0.00018008724440845468,
      "loss": 0.7548,
      "step": 2553
    },
    {
      "epoch": 0.22836194563662374,
      "grad_norm": 0.14670710444723933,
      "learning_rate": 0.00018006989872395156,
      "loss": 0.706,
      "step": 2554
    },
    {
      "epoch": 0.2284513590844063,
      "grad_norm": 0.1340505107846717,
      "learning_rate": 0.0001800525463240827,
      "loss": 0.6694,
      "step": 2555
    },
    {
      "epoch": 0.22854077253218885,
      "grad_norm": 0.14580273706635283,
      "learning_rate": 0.00018003518721030349,
      "loss": 0.6402,
      "step": 2556
    },
    {
      "epoch": 0.2286301859799714,
      "grad_norm": 0.1472762700955265,
      "learning_rate": 0.00018001782138406976,
      "loss": 0.6942,
      "step": 2557
    },
    {
      "epoch": 0.22871959942775394,
      "grad_norm": 0.12982209650631002,
      "learning_rate": 0.000180000448846838,
      "loss": 0.6691,
      "step": 2558
    },
    {
      "epoch": 0.22880901287553648,
      "grad_norm": 0.13932793375117875,
      "learning_rate": 0.0001799830696000652,
      "loss": 0.6942,
      "step": 2559
    },
    {
      "epoch": 0.22889842632331903,
      "grad_norm": 0.147665030581173,
      "learning_rate": 0.00017996568364520897,
      "loss": 0.7126,
      "step": 2560
    },
    {
      "epoch": 0.22898783977110157,
      "grad_norm": 0.15517794652706407,
      "learning_rate": 0.00017994829098372738,
      "loss": 0.6992,
      "step": 2561
    },
    {
      "epoch": 0.2290772532188841,
      "grad_norm": 0.14107245529871842,
      "learning_rate": 0.0001799308916170792,
      "loss": 0.7148,
      "step": 2562
    },
    {
      "epoch": 0.22916666666666666,
      "grad_norm": 0.12986722967459854,
      "learning_rate": 0.00017991348554672373,
      "loss": 0.6909,
      "step": 2563
    },
    {
      "epoch": 0.2292560801144492,
      "grad_norm": 0.15366804455404873,
      "learning_rate": 0.00017989607277412066,
      "loss": 0.7262,
      "step": 2564
    },
    {
      "epoch": 0.22934549356223177,
      "grad_norm": 0.14158130913724082,
      "learning_rate": 0.00017987865330073048,
      "loss": 0.691,
      "step": 2565
    },
    {
      "epoch": 0.22943490701001432,
      "grad_norm": 0.17084756864484185,
      "learning_rate": 0.00017986122712801414,
      "loss": 0.3532,
      "step": 2566
    },
    {
      "epoch": 0.22952432045779686,
      "grad_norm": 0.14749012606573175,
      "learning_rate": 0.0001798437942574331,
      "loss": 0.7073,
      "step": 2567
    },
    {
      "epoch": 0.2296137339055794,
      "grad_norm": 0.1128256729614427,
      "learning_rate": 0.0001798263546904495,
      "loss": 0.624,
      "step": 2568
    },
    {
      "epoch": 0.22970314735336195,
      "grad_norm": 0.14411262514889495,
      "learning_rate": 0.0001798089084285259,
      "loss": 0.6744,
      "step": 2569
    },
    {
      "epoch": 0.2297925608011445,
      "grad_norm": 0.14390225749304383,
      "learning_rate": 0.00017979145547312555,
      "loss": 0.7189,
      "step": 2570
    },
    {
      "epoch": 0.22988197424892703,
      "grad_norm": 0.1500449566824962,
      "learning_rate": 0.0001797739958257122,
      "loss": 0.7279,
      "step": 2571
    },
    {
      "epoch": 0.22997138769670958,
      "grad_norm": 0.16778813942183515,
      "learning_rate": 0.00017975652948775013,
      "loss": 0.7487,
      "step": 2572
    },
    {
      "epoch": 0.23006080114449212,
      "grad_norm": 0.15051577754600082,
      "learning_rate": 0.0001797390564607043,
      "loss": 0.7428,
      "step": 2573
    },
    {
      "epoch": 0.23015021459227467,
      "grad_norm": 0.14855926945934406,
      "learning_rate": 0.00017972157674604007,
      "loss": 0.714,
      "step": 2574
    },
    {
      "epoch": 0.23023962804005724,
      "grad_norm": 0.13041838693609645,
      "learning_rate": 0.00017970409034522348,
      "loss": 0.7088,
      "step": 2575
    },
    {
      "epoch": 0.23032904148783978,
      "grad_norm": 0.14252591585301755,
      "learning_rate": 0.00017968659725972112,
      "loss": 0.6957,
      "step": 2576
    },
    {
      "epoch": 0.23041845493562232,
      "grad_norm": 0.1295698446658738,
      "learning_rate": 0.00017966909749100006,
      "loss": 0.6808,
      "step": 2577
    },
    {
      "epoch": 0.23050786838340487,
      "grad_norm": 0.14364197917409985,
      "learning_rate": 0.00017965159104052803,
      "loss": 0.6797,
      "step": 2578
    },
    {
      "epoch": 0.2305972818311874,
      "grad_norm": 0.1633742112146949,
      "learning_rate": 0.00017963407790977322,
      "loss": 0.746,
      "step": 2579
    },
    {
      "epoch": 0.23068669527896996,
      "grad_norm": 0.18994548772201766,
      "learning_rate": 0.00017961655810020452,
      "loss": 0.3647,
      "step": 2580
    },
    {
      "epoch": 0.2307761087267525,
      "grad_norm": 0.14930226595154394,
      "learning_rate": 0.00017959903161329118,
      "loss": 0.7219,
      "step": 2581
    },
    {
      "epoch": 0.23086552217453504,
      "grad_norm": 0.14280841725583737,
      "learning_rate": 0.00017958149845050323,
      "loss": 0.7022,
      "step": 2582
    },
    {
      "epoch": 0.2309549356223176,
      "grad_norm": 0.13808833791735495,
      "learning_rate": 0.0001795639586133111,
      "loss": 0.6885,
      "step": 2583
    },
    {
      "epoch": 0.23104434907010013,
      "grad_norm": 0.16239292150082957,
      "learning_rate": 0.00017954641210318588,
      "loss": 0.7112,
      "step": 2584
    },
    {
      "epoch": 0.2311337625178827,
      "grad_norm": 0.14658609183793425,
      "learning_rate": 0.0001795288589215991,
      "loss": 0.6975,
      "step": 2585
    },
    {
      "epoch": 0.23122317596566525,
      "grad_norm": 0.15509894733031754,
      "learning_rate": 0.000179511299070023,
      "loss": 0.6952,
      "step": 2586
    },
    {
      "epoch": 0.2313125894134478,
      "grad_norm": 0.12480324620024635,
      "learning_rate": 0.00017949373254993027,
      "loss": 0.687,
      "step": 2587
    },
    {
      "epoch": 0.23140200286123033,
      "grad_norm": 0.13803332047614805,
      "learning_rate": 0.00017947615936279417,
      "loss": 0.6945,
      "step": 2588
    },
    {
      "epoch": 0.23149141630901288,
      "grad_norm": 0.13615577313570013,
      "learning_rate": 0.00017945857951008859,
      "loss": 0.6823,
      "step": 2589
    },
    {
      "epoch": 0.23158082975679542,
      "grad_norm": 0.15224695748971673,
      "learning_rate": 0.00017944099299328791,
      "loss": 0.6957,
      "step": 2590
    },
    {
      "epoch": 0.23167024320457796,
      "grad_norm": 0.1562076326550832,
      "learning_rate": 0.00017942339981386708,
      "loss": 0.6916,
      "step": 2591
    },
    {
      "epoch": 0.2317596566523605,
      "grad_norm": 0.16196265014225916,
      "learning_rate": 0.00017940579997330165,
      "loss": 0.7057,
      "step": 2592
    },
    {
      "epoch": 0.23184907010014305,
      "grad_norm": 0.1348100324505487,
      "learning_rate": 0.00017938819347306764,
      "loss": 0.6871,
      "step": 2593
    },
    {
      "epoch": 0.2319384835479256,
      "grad_norm": 0.15082999784967044,
      "learning_rate": 0.00017937058031464173,
      "loss": 0.7346,
      "step": 2594
    },
    {
      "epoch": 0.23202789699570817,
      "grad_norm": 0.14276778261034972,
      "learning_rate": 0.0001793529604995011,
      "loss": 0.6878,
      "step": 2595
    },
    {
      "epoch": 0.2321173104434907,
      "grad_norm": 0.16232522392712517,
      "learning_rate": 0.00017933533402912354,
      "loss": 0.7371,
      "step": 2596
    },
    {
      "epoch": 0.23220672389127325,
      "grad_norm": 0.14488801652716748,
      "learning_rate": 0.0001793177009049873,
      "loss": 0.724,
      "step": 2597
    },
    {
      "epoch": 0.2322961373390558,
      "grad_norm": 0.15322034287768704,
      "learning_rate": 0.00017930006112857127,
      "loss": 0.705,
      "step": 2598
    },
    {
      "epoch": 0.23238555078683834,
      "grad_norm": 0.16098312065214804,
      "learning_rate": 0.0001792824147013549,
      "loss": 0.693,
      "step": 2599
    },
    {
      "epoch": 0.23247496423462088,
      "grad_norm": 0.14206288934489758,
      "learning_rate": 0.00017926476162481817,
      "loss": 0.6726,
      "step": 2600
    },
    {
      "epoch": 0.23256437768240343,
      "grad_norm": 0.12344052004395455,
      "learning_rate": 0.00017924710190044156,
      "loss": 0.7013,
      "step": 2601
    },
    {
      "epoch": 0.23265379113018597,
      "grad_norm": 0.15381350343865627,
      "learning_rate": 0.00017922943552970625,
      "loss": 0.6998,
      "step": 2602
    },
    {
      "epoch": 0.23274320457796852,
      "grad_norm": 0.16266112798621007,
      "learning_rate": 0.0001792117625140939,
      "loss": 0.6886,
      "step": 2603
    },
    {
      "epoch": 0.23283261802575106,
      "grad_norm": 0.1576045525753794,
      "learning_rate": 0.00017919408285508662,
      "loss": 0.7112,
      "step": 2604
    },
    {
      "epoch": 0.23292203147353363,
      "grad_norm": 0.13163380703094626,
      "learning_rate": 0.0001791763965541673,
      "loss": 0.6536,
      "step": 2605
    },
    {
      "epoch": 0.23301144492131617,
      "grad_norm": 0.13368610840296394,
      "learning_rate": 0.00017915870361281922,
      "loss": 0.7045,
      "step": 2606
    },
    {
      "epoch": 0.23310085836909872,
      "grad_norm": 0.14943882935962394,
      "learning_rate": 0.00017914100403252628,
      "loss": 0.7412,
      "step": 2607
    },
    {
      "epoch": 0.23319027181688126,
      "grad_norm": 0.1303430293052636,
      "learning_rate": 0.00017912329781477287,
      "loss": 0.6987,
      "step": 2608
    },
    {
      "epoch": 0.2332796852646638,
      "grad_norm": 0.12794959786897675,
      "learning_rate": 0.00017910558496104403,
      "loss": 0.7003,
      "step": 2609
    },
    {
      "epoch": 0.23336909871244635,
      "grad_norm": 0.139998691350694,
      "learning_rate": 0.00017908786547282538,
      "loss": 0.7118,
      "step": 2610
    },
    {
      "epoch": 0.2334585121602289,
      "grad_norm": 0.13370825787147583,
      "learning_rate": 0.0001790701393516029,
      "loss": 0.6909,
      "step": 2611
    },
    {
      "epoch": 0.23354792560801144,
      "grad_norm": 0.14790068965057113,
      "learning_rate": 0.00017905240659886335,
      "loss": 0.682,
      "step": 2612
    },
    {
      "epoch": 0.23363733905579398,
      "grad_norm": 0.14113504106086827,
      "learning_rate": 0.00017903466721609393,
      "loss": 0.7341,
      "step": 2613
    },
    {
      "epoch": 0.23372675250357655,
      "grad_norm": 0.1499182292835216,
      "learning_rate": 0.0001790169212047824,
      "loss": 0.6665,
      "step": 2614
    },
    {
      "epoch": 0.2338161659513591,
      "grad_norm": 0.1538757322331032,
      "learning_rate": 0.00017899916856641714,
      "loss": 0.7053,
      "step": 2615
    },
    {
      "epoch": 0.23390557939914164,
      "grad_norm": 0.16044007761804738,
      "learning_rate": 0.00017898140930248704,
      "loss": 0.7504,
      "step": 2616
    },
    {
      "epoch": 0.23399499284692418,
      "grad_norm": 0.1349252867306173,
      "learning_rate": 0.0001789636434144815,
      "loss": 0.6515,
      "step": 2617
    },
    {
      "epoch": 0.23408440629470673,
      "grad_norm": 0.13666377806497212,
      "learning_rate": 0.00017894587090389052,
      "loss": 0.6829,
      "step": 2618
    },
    {
      "epoch": 0.23417381974248927,
      "grad_norm": 0.1458941526640156,
      "learning_rate": 0.00017892809177220474,
      "loss": 0.7098,
      "step": 2619
    },
    {
      "epoch": 0.2342632331902718,
      "grad_norm": 0.15995542742962596,
      "learning_rate": 0.00017891030602091519,
      "loss": 0.7204,
      "step": 2620
    },
    {
      "epoch": 0.23435264663805436,
      "grad_norm": 0.16110662859794073,
      "learning_rate": 0.0001788925136515136,
      "loss": 0.7365,
      "step": 2621
    },
    {
      "epoch": 0.2344420600858369,
      "grad_norm": 0.16146557371693404,
      "learning_rate": 0.00017887471466549216,
      "loss": 0.7294,
      "step": 2622
    },
    {
      "epoch": 0.23453147353361944,
      "grad_norm": 0.1314676795326647,
      "learning_rate": 0.00017885690906434365,
      "loss": 0.6776,
      "step": 2623
    },
    {
      "epoch": 0.23462088698140202,
      "grad_norm": 0.1500145571349966,
      "learning_rate": 0.0001788390968495614,
      "loss": 0.7024,
      "step": 2624
    },
    {
      "epoch": 0.23471030042918456,
      "grad_norm": 0.15052028558881897,
      "learning_rate": 0.00017882127802263935,
      "loss": 0.7253,
      "step": 2625
    },
    {
      "epoch": 0.2347997138769671,
      "grad_norm": 0.123238086587457,
      "learning_rate": 0.00017880345258507188,
      "loss": 0.658,
      "step": 2626
    },
    {
      "epoch": 0.23488912732474965,
      "grad_norm": 0.1340578177569348,
      "learning_rate": 0.000178785620538354,
      "loss": 0.6957,
      "step": 2627
    },
    {
      "epoch": 0.2349785407725322,
      "grad_norm": 0.12915081306004733,
      "learning_rate": 0.00017876778188398128,
      "loss": 0.6764,
      "step": 2628
    },
    {
      "epoch": 0.23506795422031473,
      "grad_norm": 0.11990258430356956,
      "learning_rate": 0.00017874993662344983,
      "loss": 0.659,
      "step": 2629
    },
    {
      "epoch": 0.23515736766809728,
      "grad_norm": 0.12446734904888666,
      "learning_rate": 0.00017873208475825632,
      "loss": 0.6672,
      "step": 2630
    },
    {
      "epoch": 0.23524678111587982,
      "grad_norm": 0.13681412761981482,
      "learning_rate": 0.0001787142262898979,
      "loss": 0.6847,
      "step": 2631
    },
    {
      "epoch": 0.23533619456366237,
      "grad_norm": 0.15092097546751895,
      "learning_rate": 0.00017869636121987243,
      "loss": 0.6903,
      "step": 2632
    },
    {
      "epoch": 0.2354256080114449,
      "grad_norm": 0.13336894619108808,
      "learning_rate": 0.00017867848954967815,
      "loss": 0.6974,
      "step": 2633
    },
    {
      "epoch": 0.23551502145922748,
      "grad_norm": 0.13934975814447012,
      "learning_rate": 0.000178660611280814,
      "loss": 0.6366,
      "step": 2634
    },
    {
      "epoch": 0.23560443490701002,
      "grad_norm": 0.14433978614440718,
      "learning_rate": 0.00017864272641477936,
      "loss": 0.6815,
      "step": 2635
    },
    {
      "epoch": 0.23569384835479257,
      "grad_norm": 0.12358861046998088,
      "learning_rate": 0.00017862483495307424,
      "loss": 0.6685,
      "step": 2636
    },
    {
      "epoch": 0.2357832618025751,
      "grad_norm": 0.12443970530664723,
      "learning_rate": 0.00017860693689719916,
      "loss": 0.6745,
      "step": 2637
    },
    {
      "epoch": 0.23587267525035766,
      "grad_norm": 0.15424346157323088,
      "learning_rate": 0.0001785890322486552,
      "loss": 0.7336,
      "step": 2638
    },
    {
      "epoch": 0.2359620886981402,
      "grad_norm": 0.15077948338503494,
      "learning_rate": 0.00017857112100894406,
      "loss": 0.7276,
      "step": 2639
    },
    {
      "epoch": 0.23605150214592274,
      "grad_norm": 0.1406662516425555,
      "learning_rate": 0.00017855320317956784,
      "loss": 0.6919,
      "step": 2640
    },
    {
      "epoch": 0.2361409155937053,
      "grad_norm": 0.14204238481930476,
      "learning_rate": 0.0001785352787620294,
      "loss": 0.6844,
      "step": 2641
    },
    {
      "epoch": 0.23623032904148783,
      "grad_norm": 0.12693441877056832,
      "learning_rate": 0.00017851734775783194,
      "loss": 0.6799,
      "step": 2642
    },
    {
      "epoch": 0.23631974248927037,
      "grad_norm": 0.13480375442464645,
      "learning_rate": 0.00017849941016847933,
      "loss": 0.6765,
      "step": 2643
    },
    {
      "epoch": 0.23640915593705294,
      "grad_norm": 0.1380620830253065,
      "learning_rate": 0.000178481465995476,
      "loss": 0.6767,
      "step": 2644
    },
    {
      "epoch": 0.2364985693848355,
      "grad_norm": 0.14412356796062986,
      "learning_rate": 0.00017846351524032693,
      "loss": 0.6718,
      "step": 2645
    },
    {
      "epoch": 0.23658798283261803,
      "grad_norm": 0.15920093398049806,
      "learning_rate": 0.0001784455579045376,
      "loss": 0.7183,
      "step": 2646
    },
    {
      "epoch": 0.23667739628040058,
      "grad_norm": 0.15598629364664854,
      "learning_rate": 0.00017842759398961405,
      "loss": 0.7265,
      "step": 2647
    },
    {
      "epoch": 0.23676680972818312,
      "grad_norm": 0.1224826897971211,
      "learning_rate": 0.00017840962349706288,
      "loss": 0.6752,
      "step": 2648
    },
    {
      "epoch": 0.23685622317596566,
      "grad_norm": 0.14841259780148325,
      "learning_rate": 0.00017839164642839133,
      "loss": 0.6898,
      "step": 2649
    },
    {
      "epoch": 0.2369456366237482,
      "grad_norm": 0.13380501543802004,
      "learning_rate": 0.000178373662785107,
      "loss": 0.6969,
      "step": 2650
    },
    {
      "epoch": 0.23703505007153075,
      "grad_norm": 0.14215401636484676,
      "learning_rate": 0.00017835567256871827,
      "loss": 0.6804,
      "step": 2651
    },
    {
      "epoch": 0.2371244635193133,
      "grad_norm": 0.12586871157099122,
      "learning_rate": 0.00017833767578073393,
      "loss": 0.6652,
      "step": 2652
    },
    {
      "epoch": 0.23721387696709584,
      "grad_norm": 0.14140719018242864,
      "learning_rate": 0.0001783196724226633,
      "loss": 0.6951,
      "step": 2653
    },
    {
      "epoch": 0.2373032904148784,
      "grad_norm": 0.1456785988888078,
      "learning_rate": 0.00017830166249601637,
      "loss": 0.7137,
      "step": 2654
    },
    {
      "epoch": 0.23739270386266095,
      "grad_norm": 0.16151722563419552,
      "learning_rate": 0.00017828364600230352,
      "loss": 0.6917,
      "step": 2655
    },
    {
      "epoch": 0.2374821173104435,
      "grad_norm": 0.15916318760610923,
      "learning_rate": 0.00017826562294303585,
      "loss": 0.7067,
      "step": 2656
    },
    {
      "epoch": 0.23757153075822604,
      "grad_norm": 0.1458423223534626,
      "learning_rate": 0.0001782475933197249,
      "loss": 0.723,
      "step": 2657
    },
    {
      "epoch": 0.23766094420600858,
      "grad_norm": 0.13718634023143864,
      "learning_rate": 0.00017822955713388277,
      "loss": 0.6699,
      "step": 2658
    },
    {
      "epoch": 0.23775035765379113,
      "grad_norm": 0.1433113061443509,
      "learning_rate": 0.0001782115143870222,
      "loss": 0.6787,
      "step": 2659
    },
    {
      "epoch": 0.23783977110157367,
      "grad_norm": 0.1427527013926923,
      "learning_rate": 0.00017819346508065635,
      "loss": 0.7032,
      "step": 2660
    },
    {
      "epoch": 0.23792918454935622,
      "grad_norm": 0.1416543778575411,
      "learning_rate": 0.00017817540921629904,
      "loss": 0.7152,
      "step": 2661
    },
    {
      "epoch": 0.23801859799713876,
      "grad_norm": 0.15071178500622567,
      "learning_rate": 0.00017815734679546457,
      "loss": 0.6785,
      "step": 2662
    },
    {
      "epoch": 0.2381080114449213,
      "grad_norm": 0.13050077606777746,
      "learning_rate": 0.00017813927781966778,
      "loss": 0.6438,
      "step": 2663
    },
    {
      "epoch": 0.23819742489270387,
      "grad_norm": 0.13777460721744816,
      "learning_rate": 0.00017812120229042416,
      "loss": 0.6693,
      "step": 2664
    },
    {
      "epoch": 0.23828683834048642,
      "grad_norm": 0.14959609799695628,
      "learning_rate": 0.00017810312020924963,
      "loss": 0.7079,
      "step": 2665
    },
    {
      "epoch": 0.23837625178826896,
      "grad_norm": 0.1371713389096146,
      "learning_rate": 0.00017808503157766073,
      "loss": 0.7005,
      "step": 2666
    },
    {
      "epoch": 0.2384656652360515,
      "grad_norm": 0.14512419797221224,
      "learning_rate": 0.00017806693639717456,
      "loss": 0.7233,
      "step": 2667
    },
    {
      "epoch": 0.23855507868383405,
      "grad_norm": 0.13912444602240878,
      "learning_rate": 0.0001780488346693087,
      "loss": 0.6932,
      "step": 2668
    },
    {
      "epoch": 0.2386444921316166,
      "grad_norm": 0.1393923752380072,
      "learning_rate": 0.00017803072639558133,
      "loss": 0.6812,
      "step": 2669
    },
    {
      "epoch": 0.23873390557939914,
      "grad_norm": 0.12946506335866587,
      "learning_rate": 0.0001780126115775112,
      "loss": 0.6665,
      "step": 2670
    },
    {
      "epoch": 0.23882331902718168,
      "grad_norm": 0.1249035040704714,
      "learning_rate": 0.00017799449021661752,
      "loss": 0.6692,
      "step": 2671
    },
    {
      "epoch": 0.23891273247496422,
      "grad_norm": 0.12093761693212907,
      "learning_rate": 0.00017797636231442016,
      "loss": 0.6409,
      "step": 2672
    },
    {
      "epoch": 0.2390021459227468,
      "grad_norm": 0.1427500684952455,
      "learning_rate": 0.00017795822787243946,
      "loss": 0.6948,
      "step": 2673
    },
    {
      "epoch": 0.23909155937052934,
      "grad_norm": 0.14423814309593308,
      "learning_rate": 0.0001779400868921963,
      "loss": 0.7227,
      "step": 2674
    },
    {
      "epoch": 0.23918097281831188,
      "grad_norm": 0.15657427302440194,
      "learning_rate": 0.00017792193937521224,
      "loss": 0.7219,
      "step": 2675
    },
    {
      "epoch": 0.23927038626609443,
      "grad_norm": 0.16638751121600115,
      "learning_rate": 0.0001779037853230092,
      "loss": 0.7384,
      "step": 2676
    },
    {
      "epoch": 0.23935979971387697,
      "grad_norm": 0.1400802392586747,
      "learning_rate": 0.00017788562473710978,
      "loss": 0.6998,
      "step": 2677
    },
    {
      "epoch": 0.2394492131616595,
      "grad_norm": 0.12703916684455346,
      "learning_rate": 0.00017786745761903708,
      "loss": 0.6687,
      "step": 2678
    },
    {
      "epoch": 0.23953862660944206,
      "grad_norm": 0.13086852196323137,
      "learning_rate": 0.00017784928397031476,
      "loss": 0.6916,
      "step": 2679
    },
    {
      "epoch": 0.2396280400572246,
      "grad_norm": 0.13328141311886435,
      "learning_rate": 0.00017783110379246696,
      "loss": 0.6373,
      "step": 2680
    },
    {
      "epoch": 0.23971745350500714,
      "grad_norm": 0.16936550991342586,
      "learning_rate": 0.00017781291708701853,
      "loss": 0.7157,
      "step": 2681
    },
    {
      "epoch": 0.2398068669527897,
      "grad_norm": 0.16097922302972464,
      "learning_rate": 0.0001777947238554947,
      "loss": 0.7285,
      "step": 2682
    },
    {
      "epoch": 0.23989628040057226,
      "grad_norm": 0.14989344416815,
      "learning_rate": 0.00017777652409942132,
      "loss": 0.6821,
      "step": 2683
    },
    {
      "epoch": 0.2399856938483548,
      "grad_norm": 0.1398235105329101,
      "learning_rate": 0.00017775831782032483,
      "loss": 0.7264,
      "step": 2684
    },
    {
      "epoch": 0.24007510729613735,
      "grad_norm": 0.13881650468983503,
      "learning_rate": 0.00017774010501973208,
      "loss": 0.6934,
      "step": 2685
    },
    {
      "epoch": 0.2401645207439199,
      "grad_norm": 0.16873997652732634,
      "learning_rate": 0.00017772188569917065,
      "loss": 0.3668,
      "step": 2686
    },
    {
      "epoch": 0.24025393419170243,
      "grad_norm": 0.14638462993454765,
      "learning_rate": 0.00017770365986016852,
      "loss": 0.679,
      "step": 2687
    },
    {
      "epoch": 0.24034334763948498,
      "grad_norm": 0.15222932429560185,
      "learning_rate": 0.00017768542750425426,
      "loss": 0.6897,
      "step": 2688
    },
    {
      "epoch": 0.24043276108726752,
      "grad_norm": 0.15702507830021029,
      "learning_rate": 0.00017766718863295705,
      "loss": 0.6972,
      "step": 2689
    },
    {
      "epoch": 0.24052217453505007,
      "grad_norm": 0.16035121586849935,
      "learning_rate": 0.00017764894324780653,
      "loss": 0.7446,
      "step": 2690
    },
    {
      "epoch": 0.2406115879828326,
      "grad_norm": 0.1431429116100712,
      "learning_rate": 0.0001776306913503329,
      "loss": 0.6848,
      "step": 2691
    },
    {
      "epoch": 0.24070100143061515,
      "grad_norm": 0.14905214008986836,
      "learning_rate": 0.00017761243294206694,
      "loss": 0.679,
      "step": 2692
    },
    {
      "epoch": 0.24079041487839772,
      "grad_norm": 0.1384039395194759,
      "learning_rate": 0.00017759416802453997,
      "loss": 0.6774,
      "step": 2693
    },
    {
      "epoch": 0.24087982832618027,
      "grad_norm": 0.14793516121135655,
      "learning_rate": 0.0001775758965992838,
      "loss": 0.7127,
      "step": 2694
    },
    {
      "epoch": 0.2409692417739628,
      "grad_norm": 0.16274371965533926,
      "learning_rate": 0.0001775576186678309,
      "loss": 0.7074,
      "step": 2695
    },
    {
      "epoch": 0.24105865522174535,
      "grad_norm": 0.17216981077937077,
      "learning_rate": 0.00017753933423171421,
      "loss": 0.7445,
      "step": 2696
    },
    {
      "epoch": 0.2411480686695279,
      "grad_norm": 0.15560831291122404,
      "learning_rate": 0.00017752104329246717,
      "loss": 0.715,
      "step": 2697
    },
    {
      "epoch": 0.24123748211731044,
      "grad_norm": 0.12780862905107662,
      "learning_rate": 0.00017750274585162385,
      "loss": 0.6766,
      "step": 2698
    },
    {
      "epoch": 0.24132689556509299,
      "grad_norm": 0.15213829329838843,
      "learning_rate": 0.00017748444191071884,
      "loss": 0.6982,
      "step": 2699
    },
    {
      "epoch": 0.24141630901287553,
      "grad_norm": 0.1361233253363342,
      "learning_rate": 0.00017746613147128726,
      "loss": 0.6915,
      "step": 2700
    },
    {
      "epoch": 0.24150572246065807,
      "grad_norm": 0.1814165148252674,
      "learning_rate": 0.0001774478145348648,
      "loss": 0.3781,
      "step": 2701
    },
    {
      "epoch": 0.24159513590844062,
      "grad_norm": 0.13500043145747306,
      "learning_rate": 0.00017742949110298767,
      "loss": 0.6575,
      "step": 2702
    },
    {
      "epoch": 0.2416845493562232,
      "grad_norm": 0.13202490984690393,
      "learning_rate": 0.00017741116117719262,
      "loss": 0.6746,
      "step": 2703
    },
    {
      "epoch": 0.24177396280400573,
      "grad_norm": 0.15539178374329046,
      "learning_rate": 0.000177392824759017,
      "loss": 0.7515,
      "step": 2704
    },
    {
      "epoch": 0.24186337625178828,
      "grad_norm": 0.12118365306488918,
      "learning_rate": 0.0001773744818499986,
      "loss": 0.6459,
      "step": 2705
    },
    {
      "epoch": 0.24195278969957082,
      "grad_norm": 0.1377215820630961,
      "learning_rate": 0.00017735613245167586,
      "loss": 0.7002,
      "step": 2706
    },
    {
      "epoch": 0.24204220314735336,
      "grad_norm": 0.15202185529100176,
      "learning_rate": 0.00017733777656558773,
      "loss": 0.7059,
      "step": 2707
    },
    {
      "epoch": 0.2421316165951359,
      "grad_norm": 0.1303147705201079,
      "learning_rate": 0.00017731941419327365,
      "loss": 0.6841,
      "step": 2708
    },
    {
      "epoch": 0.24222103004291845,
      "grad_norm": 0.13726596867033192,
      "learning_rate": 0.0001773010453362737,
      "loss": 0.6882,
      "step": 2709
    },
    {
      "epoch": 0.242310443490701,
      "grad_norm": 0.13747231568435142,
      "learning_rate": 0.00017728266999612844,
      "loss": 0.6797,
      "step": 2710
    },
    {
      "epoch": 0.24239985693848354,
      "grad_norm": 0.14146397598467345,
      "learning_rate": 0.000177264288174379,
      "loss": 0.6902,
      "step": 2711
    },
    {
      "epoch": 0.24248927038626608,
      "grad_norm": 0.1405870494572857,
      "learning_rate": 0.00017724589987256698,
      "loss": 0.6879,
      "step": 2712
    },
    {
      "epoch": 0.24257868383404865,
      "grad_norm": 0.13273199254012327,
      "learning_rate": 0.00017722750509223465,
      "loss": 0.7069,
      "step": 2713
    },
    {
      "epoch": 0.2426680972818312,
      "grad_norm": 0.15637228163399586,
      "learning_rate": 0.0001772091038349247,
      "loss": 0.7172,
      "step": 2714
    },
    {
      "epoch": 0.24275751072961374,
      "grad_norm": 0.14172178503053293,
      "learning_rate": 0.00017719069610218048,
      "loss": 0.698,
      "step": 2715
    },
    {
      "epoch": 0.24284692417739628,
      "grad_norm": 0.14932116638091553,
      "learning_rate": 0.00017717228189554582,
      "loss": 0.7204,
      "step": 2716
    },
    {
      "epoch": 0.24293633762517883,
      "grad_norm": 0.13227759719717747,
      "learning_rate": 0.00017715386121656507,
      "loss": 0.685,
      "step": 2717
    },
    {
      "epoch": 0.24302575107296137,
      "grad_norm": 0.14438547051978995,
      "learning_rate": 0.00017713543406678315,
      "loss": 0.6801,
      "step": 2718
    },
    {
      "epoch": 0.24311516452074391,
      "grad_norm": 0.1489804771906694,
      "learning_rate": 0.0001771170004477455,
      "loss": 0.6878,
      "step": 2719
    },
    {
      "epoch": 0.24320457796852646,
      "grad_norm": 0.12751207426593864,
      "learning_rate": 0.0001770985603609982,
      "loss": 0.6807,
      "step": 2720
    },
    {
      "epoch": 0.243293991416309,
      "grad_norm": 0.13705071023133514,
      "learning_rate": 0.00017708011380808774,
      "loss": 0.6768,
      "step": 2721
    },
    {
      "epoch": 0.24338340486409155,
      "grad_norm": 0.14490672120269865,
      "learning_rate": 0.00017706166079056124,
      "loss": 0.6812,
      "step": 2722
    },
    {
      "epoch": 0.24347281831187412,
      "grad_norm": 0.15292627546619683,
      "learning_rate": 0.0001770432013099663,
      "loss": 0.7602,
      "step": 2723
    },
    {
      "epoch": 0.24356223175965666,
      "grad_norm": 0.13909586233709842,
      "learning_rate": 0.0001770247353678511,
      "loss": 0.7001,
      "step": 2724
    },
    {
      "epoch": 0.2436516452074392,
      "grad_norm": 0.15224789327494084,
      "learning_rate": 0.0001770062629657644,
      "loss": 0.7173,
      "step": 2725
    },
    {
      "epoch": 0.24374105865522175,
      "grad_norm": 0.13977723293287084,
      "learning_rate": 0.0001769877841052554,
      "loss": 0.6984,
      "step": 2726
    },
    {
      "epoch": 0.2438304721030043,
      "grad_norm": 0.1609788233110359,
      "learning_rate": 0.00017696929878787394,
      "loss": 0.6947,
      "step": 2727
    },
    {
      "epoch": 0.24391988555078684,
      "grad_norm": 0.1410111621556879,
      "learning_rate": 0.00017695080701517034,
      "loss": 0.6869,
      "step": 2728
    },
    {
      "epoch": 0.24400929899856938,
      "grad_norm": 0.16427607139197115,
      "learning_rate": 0.00017693230878869547,
      "loss": 0.4224,
      "step": 2729
    },
    {
      "epoch": 0.24409871244635192,
      "grad_norm": 0.14685858585973746,
      "learning_rate": 0.00017691380411000079,
      "loss": 0.6937,
      "step": 2730
    },
    {
      "epoch": 0.24418812589413447,
      "grad_norm": 0.16323716763508422,
      "learning_rate": 0.00017689529298063822,
      "loss": 0.6815,
      "step": 2731
    },
    {
      "epoch": 0.244277539341917,
      "grad_norm": 0.13791109067317608,
      "learning_rate": 0.00017687677540216033,
      "loss": 0.6905,
      "step": 2732
    },
    {
      "epoch": 0.24436695278969958,
      "grad_norm": 0.14547842119568483,
      "learning_rate": 0.00017685825137612012,
      "loss": 0.6912,
      "step": 2733
    },
    {
      "epoch": 0.24445636623748213,
      "grad_norm": 0.1539490639929358,
      "learning_rate": 0.00017683972090407123,
      "loss": 0.6763,
      "step": 2734
    },
    {
      "epoch": 0.24454577968526467,
      "grad_norm": 0.14035247493087888,
      "learning_rate": 0.00017682118398756766,
      "loss": 0.6509,
      "step": 2735
    },
    {
      "epoch": 0.2446351931330472,
      "grad_norm": 0.14983013137909645,
      "learning_rate": 0.0001768026406281642,
      "loss": 0.7132,
      "step": 2736
    },
    {
      "epoch": 0.24472460658082976,
      "grad_norm": 0.15817437707986107,
      "learning_rate": 0.00017678409082741604,
      "loss": 0.7108,
      "step": 2737
    },
    {
      "epoch": 0.2448140200286123,
      "grad_norm": 0.13599517172036427,
      "learning_rate": 0.00017676553458687892,
      "loss": 0.6571,
      "step": 2738
    },
    {
      "epoch": 0.24490343347639484,
      "grad_norm": 0.1374381071887382,
      "learning_rate": 0.00017674697190810912,
      "loss": 0.7032,
      "step": 2739
    },
    {
      "epoch": 0.2449928469241774,
      "grad_norm": 0.1666429485102303,
      "learning_rate": 0.00017672840279266345,
      "loss": 0.377,
      "step": 2740
    },
    {
      "epoch": 0.24508226037195993,
      "grad_norm": 0.14700957543860313,
      "learning_rate": 0.00017670982724209933,
      "loss": 0.7062,
      "step": 2741
    },
    {
      "epoch": 0.2451716738197425,
      "grad_norm": 0.14456743113389567,
      "learning_rate": 0.00017669124525797463,
      "loss": 0.7141,
      "step": 2742
    },
    {
      "epoch": 0.24526108726752505,
      "grad_norm": 0.1560917016746954,
      "learning_rate": 0.0001766726568418478,
      "loss": 0.6759,
      "step": 2743
    },
    {
      "epoch": 0.2453505007153076,
      "grad_norm": 0.1476843848654852,
      "learning_rate": 0.00017665406199527785,
      "loss": 0.7169,
      "step": 2744
    },
    {
      "epoch": 0.24543991416309013,
      "grad_norm": 0.1411391661192345,
      "learning_rate": 0.00017663546071982432,
      "loss": 0.6813,
      "step": 2745
    },
    {
      "epoch": 0.24552932761087268,
      "grad_norm": 0.13026487572206888,
      "learning_rate": 0.0001766168530170472,
      "loss": 0.6559,
      "step": 2746
    },
    {
      "epoch": 0.24561874105865522,
      "grad_norm": 0.1376770197357828,
      "learning_rate": 0.00017659823888850715,
      "loss": 0.7107,
      "step": 2747
    },
    {
      "epoch": 0.24570815450643776,
      "grad_norm": 0.15648318902291203,
      "learning_rate": 0.00017657961833576535,
      "loss": 0.7154,
      "step": 2748
    },
    {
      "epoch": 0.2457975679542203,
      "grad_norm": 0.1593964895846504,
      "learning_rate": 0.0001765609913603834,
      "loss": 0.7395,
      "step": 2749
    },
    {
      "epoch": 0.24588698140200285,
      "grad_norm": 0.1428844496281555,
      "learning_rate": 0.00017654235796392363,
      "loss": 0.6769,
      "step": 2750
    },
    {
      "epoch": 0.2459763948497854,
      "grad_norm": 0.15159777791190546,
      "learning_rate": 0.0001765237181479487,
      "loss": 0.6773,
      "step": 2751
    },
    {
      "epoch": 0.24606580829756797,
      "grad_norm": 0.14063829529287714,
      "learning_rate": 0.00017650507191402194,
      "loss": 0.6981,
      "step": 2752
    },
    {
      "epoch": 0.2461552217453505,
      "grad_norm": 0.12981362971196542,
      "learning_rate": 0.0001764864192637072,
      "loss": 0.6541,
      "step": 2753
    },
    {
      "epoch": 0.24624463519313305,
      "grad_norm": 0.15264619060383403,
      "learning_rate": 0.00017646776019856884,
      "loss": 0.6847,
      "step": 2754
    },
    {
      "epoch": 0.2463340486409156,
      "grad_norm": 0.16607172275671675,
      "learning_rate": 0.0001764490947201718,
      "loss": 0.7163,
      "step": 2755
    },
    {
      "epoch": 0.24642346208869814,
      "grad_norm": 0.1477874176854611,
      "learning_rate": 0.00017643042283008148,
      "loss": 0.7395,
      "step": 2756
    },
    {
      "epoch": 0.24651287553648069,
      "grad_norm": 0.1292215641455561,
      "learning_rate": 0.00017641174452986396,
      "loss": 0.6767,
      "step": 2757
    },
    {
      "epoch": 0.24660228898426323,
      "grad_norm": 0.14819700129236943,
      "learning_rate": 0.00017639305982108567,
      "loss": 0.6896,
      "step": 2758
    },
    {
      "epoch": 0.24669170243204577,
      "grad_norm": 0.16247545082475334,
      "learning_rate": 0.0001763743687053137,
      "loss": 0.7383,
      "step": 2759
    },
    {
      "epoch": 0.24678111587982832,
      "grad_norm": 0.12766056490926492,
      "learning_rate": 0.0001763556711841157,
      "loss": 0.6924,
      "step": 2760
    },
    {
      "epoch": 0.24687052932761086,
      "grad_norm": 0.1300834420584698,
      "learning_rate": 0.00017633696725905974,
      "loss": 0.6537,
      "step": 2761
    },
    {
      "epoch": 0.24695994277539343,
      "grad_norm": 0.17386639734393075,
      "learning_rate": 0.00017631825693171453,
      "loss": 0.396,
      "step": 2762
    },
    {
      "epoch": 0.24704935622317598,
      "grad_norm": 0.14178402764465786,
      "learning_rate": 0.0001762995402036493,
      "loss": 0.6686,
      "step": 2763
    },
    {
      "epoch": 0.24713876967095852,
      "grad_norm": 0.13634712261336848,
      "learning_rate": 0.00017628081707643376,
      "loss": 0.6686,
      "step": 2764
    },
    {
      "epoch": 0.24722818311874106,
      "grad_norm": 0.14268118871234117,
      "learning_rate": 0.00017626208755163822,
      "loss": 0.7048,
      "step": 2765
    },
    {
      "epoch": 0.2473175965665236,
      "grad_norm": 0.1313085665061281,
      "learning_rate": 0.0001762433516308335,
      "loss": 0.6657,
      "step": 2766
    },
    {
      "epoch": 0.24740701001430615,
      "grad_norm": 0.12500294959489502,
      "learning_rate": 0.00017622460931559098,
      "loss": 0.7107,
      "step": 2767
    },
    {
      "epoch": 0.2474964234620887,
      "grad_norm": 0.14665764803183282,
      "learning_rate": 0.00017620586060748252,
      "loss": 0.7382,
      "step": 2768
    },
    {
      "epoch": 0.24758583690987124,
      "grad_norm": 0.1197763277031223,
      "learning_rate": 0.00017618710550808056,
      "loss": 0.6581,
      "step": 2769
    },
    {
      "epoch": 0.24767525035765378,
      "grad_norm": 0.14185208025040358,
      "learning_rate": 0.00017616834401895805,
      "loss": 0.6957,
      "step": 2770
    },
    {
      "epoch": 0.24776466380543632,
      "grad_norm": 0.12204885750043876,
      "learning_rate": 0.0001761495761416885,
      "loss": 0.686,
      "step": 2771
    },
    {
      "epoch": 0.2478540772532189,
      "grad_norm": 0.15572524495121712,
      "learning_rate": 0.00017613080187784603,
      "loss": 0.7294,
      "step": 2772
    },
    {
      "epoch": 0.24794349070100144,
      "grad_norm": 0.1333320540392987,
      "learning_rate": 0.00017611202122900512,
      "loss": 0.6775,
      "step": 2773
    },
    {
      "epoch": 0.24803290414878398,
      "grad_norm": 0.11920753136428579,
      "learning_rate": 0.0001760932341967409,
      "loss": 0.667,
      "step": 2774
    },
    {
      "epoch": 0.24812231759656653,
      "grad_norm": 0.14959141779244947,
      "learning_rate": 0.00017607444078262903,
      "loss": 0.6681,
      "step": 2775
    },
    {
      "epoch": 0.24821173104434907,
      "grad_norm": 0.14171918690509863,
      "learning_rate": 0.00017605564098824568,
      "loss": 0.7198,
      "step": 2776
    },
    {
      "epoch": 0.24830114449213161,
      "grad_norm": 0.13868002210863276,
      "learning_rate": 0.00017603683481516762,
      "loss": 0.6698,
      "step": 2777
    },
    {
      "epoch": 0.24839055793991416,
      "grad_norm": 0.137423293555516,
      "learning_rate": 0.000176018022264972,
      "loss": 0.6666,
      "step": 2778
    },
    {
      "epoch": 0.2484799713876967,
      "grad_norm": 0.14342253812891986,
      "learning_rate": 0.00017599920333923668,
      "loss": 0.6506,
      "step": 2779
    },
    {
      "epoch": 0.24856938483547925,
      "grad_norm": 0.133219731432541,
      "learning_rate": 0.00017598037803953994,
      "loss": 0.6991,
      "step": 2780
    },
    {
      "epoch": 0.2486587982832618,
      "grad_norm": 0.1390698333692157,
      "learning_rate": 0.00017596154636746066,
      "loss": 0.6699,
      "step": 2781
    },
    {
      "epoch": 0.24874821173104436,
      "grad_norm": 0.16549320828228692,
      "learning_rate": 0.00017594270832457825,
      "loss": 0.72,
      "step": 2782
    },
    {
      "epoch": 0.2488376251788269,
      "grad_norm": 0.1668409990540998,
      "learning_rate": 0.0001759238639124726,
      "loss": 0.7357,
      "step": 2783
    },
    {
      "epoch": 0.24892703862660945,
      "grad_norm": 0.12496562388533684,
      "learning_rate": 0.00017590501313272415,
      "loss": 0.7019,
      "step": 2784
    },
    {
      "epoch": 0.249016452074392,
      "grad_norm": 0.13244229884228087,
      "learning_rate": 0.00017588615598691397,
      "loss": 0.6874,
      "step": 2785
    },
    {
      "epoch": 0.24910586552217454,
      "grad_norm": 0.140388551482037,
      "learning_rate": 0.00017586729247662345,
      "loss": 0.7133,
      "step": 2786
    },
    {
      "epoch": 0.24919527896995708,
      "grad_norm": 0.15787316721098418,
      "learning_rate": 0.00017584842260343482,
      "loss": 0.7271,
      "step": 2787
    },
    {
      "epoch": 0.24928469241773962,
      "grad_norm": 0.12724487582476152,
      "learning_rate": 0.00017582954636893055,
      "loss": 0.6721,
      "step": 2788
    },
    {
      "epoch": 0.24937410586552217,
      "grad_norm": 0.1289929127606433,
      "learning_rate": 0.0001758106637746938,
      "loss": 0.6474,
      "step": 2789
    },
    {
      "epoch": 0.2494635193133047,
      "grad_norm": 0.16665430429067166,
      "learning_rate": 0.00017579177482230824,
      "loss": 0.7408,
      "step": 2790
    },
    {
      "epoch": 0.24955293276108725,
      "grad_norm": 0.14874824329648154,
      "learning_rate": 0.00017577287951335807,
      "loss": 0.675,
      "step": 2791
    },
    {
      "epoch": 0.24964234620886983,
      "grad_norm": 0.15398668378667085,
      "learning_rate": 0.00017575397784942799,
      "loss": 0.7123,
      "step": 2792
    },
    {
      "epoch": 0.24973175965665237,
      "grad_norm": 0.1374473029273038,
      "learning_rate": 0.00017573506983210329,
      "loss": 0.6521,
      "step": 2793
    },
    {
      "epoch": 0.2498211731044349,
      "grad_norm": 0.16058539835860858,
      "learning_rate": 0.00017571615546296972,
      "loss": 0.6916,
      "step": 2794
    },
    {
      "epoch": 0.24991058655221746,
      "grad_norm": 0.14910416558429362,
      "learning_rate": 0.00017569723474361365,
      "loss": 0.7072,
      "step": 2795
    },
    {
      "epoch": 0.25,
      "grad_norm": 0.13044897086572352,
      "learning_rate": 0.00017567830767562198,
      "loss": 0.6929,
      "step": 2796
    },
    {
      "epoch": 0.25008941344778257,
      "grad_norm": 0.1482496064075168,
      "learning_rate": 0.00017565937426058196,
      "loss": 0.7281,
      "step": 2797
    },
    {
      "epoch": 0.2501788268955651,
      "grad_norm": 0.15272335201642676,
      "learning_rate": 0.00017564043450008163,
      "loss": 0.6937,
      "step": 2798
    },
    {
      "epoch": 0.25026824034334766,
      "grad_norm": 0.15010761617291016,
      "learning_rate": 0.0001756214883957094,
      "loss": 0.7029,
      "step": 2799
    },
    {
      "epoch": 0.2503576537911302,
      "grad_norm": 0.19338405767753455,
      "learning_rate": 0.00017560253594905425,
      "loss": 0.3872,
      "step": 2800
    },
    {
      "epoch": 0.25044706723891275,
      "grad_norm": 0.15041122247336483,
      "learning_rate": 0.00017558357716170573,
      "loss": 0.6462,
      "step": 2801
    },
    {
      "epoch": 0.25053648068669526,
      "grad_norm": 0.1492690560692534,
      "learning_rate": 0.00017556461203525387,
      "loss": 0.6738,
      "step": 2802
    },
    {
      "epoch": 0.25062589413447783,
      "grad_norm": 0.15743452768723185,
      "learning_rate": 0.00017554564057128928,
      "loss": 0.6674,
      "step": 2803
    },
    {
      "epoch": 0.25071530758226035,
      "grad_norm": 0.13376343230906412,
      "learning_rate": 0.00017552666277140304,
      "loss": 0.6845,
      "step": 2804
    },
    {
      "epoch": 0.2508047210300429,
      "grad_norm": 0.13136956726756818,
      "learning_rate": 0.0001755076786371868,
      "loss": 0.6655,
      "step": 2805
    },
    {
      "epoch": 0.2508941344778255,
      "grad_norm": 0.14949291730820857,
      "learning_rate": 0.00017548868817023275,
      "loss": 0.6875,
      "step": 2806
    },
    {
      "epoch": 0.250983547925608,
      "grad_norm": 0.15827058265798002,
      "learning_rate": 0.00017546969137213357,
      "loss": 0.6956,
      "step": 2807
    },
    {
      "epoch": 0.2510729613733906,
      "grad_norm": 0.1416948067604923,
      "learning_rate": 0.00017545068824448255,
      "loss": 0.686,
      "step": 2808
    },
    {
      "epoch": 0.2511623748211731,
      "grad_norm": 0.1439605616071136,
      "learning_rate": 0.0001754316787888734,
      "loss": 0.6574,
      "step": 2809
    },
    {
      "epoch": 0.25125178826895567,
      "grad_norm": 0.124800306791925,
      "learning_rate": 0.00017541266300690047,
      "loss": 0.6675,
      "step": 2810
    },
    {
      "epoch": 0.2513412017167382,
      "grad_norm": 0.13585338164098934,
      "learning_rate": 0.00017539364090015855,
      "loss": 0.717,
      "step": 2811
    },
    {
      "epoch": 0.25143061516452075,
      "grad_norm": 0.135572293267392,
      "learning_rate": 0.00017537461247024304,
      "loss": 0.6433,
      "step": 2812
    },
    {
      "epoch": 0.25152002861230327,
      "grad_norm": 0.15198741560520146,
      "learning_rate": 0.0001753555777187498,
      "loss": 0.7273,
      "step": 2813
    },
    {
      "epoch": 0.25160944206008584,
      "grad_norm": 0.15288289867752064,
      "learning_rate": 0.00017533653664727529,
      "loss": 0.6849,
      "step": 2814
    },
    {
      "epoch": 0.25169885550786836,
      "grad_norm": 0.14547531045725834,
      "learning_rate": 0.0001753174892574164,
      "loss": 0.7303,
      "step": 2815
    },
    {
      "epoch": 0.25178826895565093,
      "grad_norm": 0.1609862178127862,
      "learning_rate": 0.00017529843555077066,
      "loss": 0.7377,
      "step": 2816
    },
    {
      "epoch": 0.2518776824034335,
      "grad_norm": 0.12319905589242681,
      "learning_rate": 0.00017527937552893605,
      "loss": 0.6438,
      "step": 2817
    },
    {
      "epoch": 0.251967095851216,
      "grad_norm": 0.14366566410435877,
      "learning_rate": 0.00017526030919351113,
      "loss": 0.7106,
      "step": 2818
    },
    {
      "epoch": 0.2520565092989986,
      "grad_norm": 0.15098702819590976,
      "learning_rate": 0.000175241236546095,
      "loss": 0.713,
      "step": 2819
    },
    {
      "epoch": 0.2521459227467811,
      "grad_norm": 0.21535901635174398,
      "learning_rate": 0.00017522215758828722,
      "loss": 0.4094,
      "step": 2820
    },
    {
      "epoch": 0.2522353361945637,
      "grad_norm": 0.15704302359404357,
      "learning_rate": 0.0001752030723216879,
      "loss": 0.7307,
      "step": 2821
    },
    {
      "epoch": 0.2523247496423462,
      "grad_norm": 0.14512494769786316,
      "learning_rate": 0.00017518398074789775,
      "loss": 0.6884,
      "step": 2822
    },
    {
      "epoch": 0.25241416309012876,
      "grad_norm": 0.14518058059701003,
      "learning_rate": 0.00017516488286851794,
      "loss": 0.6872,
      "step": 2823
    },
    {
      "epoch": 0.2525035765379113,
      "grad_norm": 0.1345526016305527,
      "learning_rate": 0.00017514577868515016,
      "loss": 0.6896,
      "step": 2824
    },
    {
      "epoch": 0.25259298998569385,
      "grad_norm": 0.1417652759457063,
      "learning_rate": 0.0001751266681993967,
      "loss": 0.7024,
      "step": 2825
    },
    {
      "epoch": 0.2526824034334764,
      "grad_norm": 0.13438109660715017,
      "learning_rate": 0.00017510755141286028,
      "loss": 0.6634,
      "step": 2826
    },
    {
      "epoch": 0.25277181688125894,
      "grad_norm": 0.1367762030211471,
      "learning_rate": 0.00017508842832714426,
      "loss": 0.6738,
      "step": 2827
    },
    {
      "epoch": 0.2528612303290415,
      "grad_norm": 0.139599240556439,
      "learning_rate": 0.0001750692989438524,
      "loss": 0.7099,
      "step": 2828
    },
    {
      "epoch": 0.252950643776824,
      "grad_norm": 0.14401352698747336,
      "learning_rate": 0.00017505016326458913,
      "loss": 0.7207,
      "step": 2829
    },
    {
      "epoch": 0.2530400572246066,
      "grad_norm": 0.1440878774599023,
      "learning_rate": 0.00017503102129095928,
      "loss": 0.7312,
      "step": 2830
    },
    {
      "epoch": 0.2531294706723891,
      "grad_norm": 0.13056571884820528,
      "learning_rate": 0.0001750118730245683,
      "loss": 0.689,
      "step": 2831
    },
    {
      "epoch": 0.2532188841201717,
      "grad_norm": 0.14077601975848517,
      "learning_rate": 0.00017499271846702213,
      "loss": 0.6782,
      "step": 2832
    },
    {
      "epoch": 0.2533082975679542,
      "grad_norm": 0.13728168710638564,
      "learning_rate": 0.00017497355761992724,
      "loss": 0.6742,
      "step": 2833
    },
    {
      "epoch": 0.25339771101573677,
      "grad_norm": 0.15858639967501023,
      "learning_rate": 0.00017495439048489063,
      "loss": 0.72,
      "step": 2834
    },
    {
      "epoch": 0.2534871244635193,
      "grad_norm": 0.16070572546576792,
      "learning_rate": 0.00017493521706351975,
      "loss": 0.7324,
      "step": 2835
    },
    {
      "epoch": 0.25357653791130186,
      "grad_norm": 0.12444827141305179,
      "learning_rate": 0.00017491603735742277,
      "loss": 0.6619,
      "step": 2836
    },
    {
      "epoch": 0.25366595135908443,
      "grad_norm": 0.14917880737560105,
      "learning_rate": 0.0001748968513682082,
      "loss": 0.6886,
      "step": 2837
    },
    {
      "epoch": 0.25375536480686695,
      "grad_norm": 0.12987000193339363,
      "learning_rate": 0.00017487765909748513,
      "loss": 0.6621,
      "step": 2838
    },
    {
      "epoch": 0.2538447782546495,
      "grad_norm": 0.14976846765352922,
      "learning_rate": 0.00017485846054686324,
      "loss": 0.7205,
      "step": 2839
    },
    {
      "epoch": 0.25393419170243203,
      "grad_norm": 0.13443037466109994,
      "learning_rate": 0.00017483925571795268,
      "loss": 0.6566,
      "step": 2840
    },
    {
      "epoch": 0.2540236051502146,
      "grad_norm": 0.1450227298013294,
      "learning_rate": 0.00017482004461236413,
      "loss": 0.717,
      "step": 2841
    },
    {
      "epoch": 0.2541130185979971,
      "grad_norm": 0.13162393395716115,
      "learning_rate": 0.00017480082723170877,
      "loss": 0.6846,
      "step": 2842
    },
    {
      "epoch": 0.2542024320457797,
      "grad_norm": 0.12885049469895074,
      "learning_rate": 0.00017478160357759838,
      "loss": 0.6754,
      "step": 2843
    },
    {
      "epoch": 0.2542918454935622,
      "grad_norm": 0.153971275501914,
      "learning_rate": 0.00017476237365164523,
      "loss": 0.6971,
      "step": 2844
    },
    {
      "epoch": 0.2543812589413448,
      "grad_norm": 0.15188124058687633,
      "learning_rate": 0.00017474313745546204,
      "loss": 0.6975,
      "step": 2845
    },
    {
      "epoch": 0.25447067238912735,
      "grad_norm": 0.1295833436967907,
      "learning_rate": 0.00017472389499066223,
      "loss": 0.6886,
      "step": 2846
    },
    {
      "epoch": 0.25456008583690987,
      "grad_norm": 0.1479391107957662,
      "learning_rate": 0.00017470464625885958,
      "loss": 0.6878,
      "step": 2847
    },
    {
      "epoch": 0.25464949928469244,
      "grad_norm": 0.1578613509060748,
      "learning_rate": 0.00017468539126166846,
      "loss": 0.7182,
      "step": 2848
    },
    {
      "epoch": 0.25473891273247495,
      "grad_norm": 0.16514047017905872,
      "learning_rate": 0.0001746661300007038,
      "loss": 0.6984,
      "step": 2849
    },
    {
      "epoch": 0.2548283261802575,
      "grad_norm": 0.13830167840300747,
      "learning_rate": 0.00017464686247758095,
      "loss": 0.7119,
      "step": 2850
    },
    {
      "epoch": 0.25491773962804004,
      "grad_norm": 0.15887864271440624,
      "learning_rate": 0.00017462758869391591,
      "loss": 0.7095,
      "step": 2851
    },
    {
      "epoch": 0.2550071530758226,
      "grad_norm": 0.14683048957058636,
      "learning_rate": 0.00017460830865132513,
      "loss": 0.7167,
      "step": 2852
    },
    {
      "epoch": 0.25509656652360513,
      "grad_norm": 0.1739463151007428,
      "learning_rate": 0.00017458902235142562,
      "loss": 0.7127,
      "step": 2853
    },
    {
      "epoch": 0.2551859799713877,
      "grad_norm": 0.15438446453451704,
      "learning_rate": 0.00017456972979583486,
      "loss": 0.7029,
      "step": 2854
    },
    {
      "epoch": 0.2552753934191702,
      "grad_norm": 0.14093270315151055,
      "learning_rate": 0.00017455043098617097,
      "loss": 0.7223,
      "step": 2855
    },
    {
      "epoch": 0.2553648068669528,
      "grad_norm": 0.12806737835564994,
      "learning_rate": 0.00017453112592405242,
      "loss": 0.687,
      "step": 2856
    },
    {
      "epoch": 0.25545422031473536,
      "grad_norm": 0.14384808981940475,
      "learning_rate": 0.00017451181461109835,
      "loss": 0.7064,
      "step": 2857
    },
    {
      "epoch": 0.2555436337625179,
      "grad_norm": 0.12288785302919569,
      "learning_rate": 0.0001744924970489284,
      "loss": 0.6835,
      "step": 2858
    },
    {
      "epoch": 0.25563304721030045,
      "grad_norm": 0.1523810152500668,
      "learning_rate": 0.00017447317323916267,
      "loss": 0.7001,
      "step": 2859
    },
    {
      "epoch": 0.25572246065808296,
      "grad_norm": 0.14309458998124208,
      "learning_rate": 0.00017445384318342185,
      "loss": 0.7001,
      "step": 2860
    },
    {
      "epoch": 0.25581187410586553,
      "grad_norm": 0.13843187882691388,
      "learning_rate": 0.00017443450688332712,
      "loss": 0.664,
      "step": 2861
    },
    {
      "epoch": 0.25590128755364805,
      "grad_norm": 0.14116085475559326,
      "learning_rate": 0.00017441516434050017,
      "loss": 0.6854,
      "step": 2862
    },
    {
      "epoch": 0.2559907010014306,
      "grad_norm": 0.1292579096400266,
      "learning_rate": 0.0001743958155565633,
      "loss": 0.6734,
      "step": 2863
    },
    {
      "epoch": 0.25608011444921314,
      "grad_norm": 0.14168359270342887,
      "learning_rate": 0.0001743764605331392,
      "loss": 0.6794,
      "step": 2864
    },
    {
      "epoch": 0.2561695278969957,
      "grad_norm": 0.1598043373105665,
      "learning_rate": 0.0001743570992718512,
      "loss": 0.7675,
      "step": 2865
    },
    {
      "epoch": 0.2562589413447783,
      "grad_norm": 0.1543529298940379,
      "learning_rate": 0.00017433773177432307,
      "loss": 0.6928,
      "step": 2866
    },
    {
      "epoch": 0.2563483547925608,
      "grad_norm": 0.12973193787804124,
      "learning_rate": 0.00017431835804217912,
      "loss": 0.6631,
      "step": 2867
    },
    {
      "epoch": 0.25643776824034337,
      "grad_norm": 0.13405587169102023,
      "learning_rate": 0.00017429897807704427,
      "loss": 0.6369,
      "step": 2868
    },
    {
      "epoch": 0.2565271816881259,
      "grad_norm": 0.14539843998417917,
      "learning_rate": 0.00017427959188054385,
      "loss": 0.715,
      "step": 2869
    },
    {
      "epoch": 0.25661659513590845,
      "grad_norm": 0.13555039172647895,
      "learning_rate": 0.0001742601994543038,
      "loss": 0.681,
      "step": 2870
    },
    {
      "epoch": 0.25670600858369097,
      "grad_norm": 0.14767190496631735,
      "learning_rate": 0.00017424080079995045,
      "loss": 0.685,
      "step": 2871
    },
    {
      "epoch": 0.25679542203147354,
      "grad_norm": 0.15751519529676403,
      "learning_rate": 0.00017422139591911085,
      "loss": 0.7367,
      "step": 2872
    },
    {
      "epoch": 0.25688483547925606,
      "grad_norm": 0.14850858962472488,
      "learning_rate": 0.00017420198481341237,
      "loss": 0.7053,
      "step": 2873
    },
    {
      "epoch": 0.25697424892703863,
      "grad_norm": 0.13582487724994605,
      "learning_rate": 0.00017418256748448304,
      "loss": 0.6526,
      "step": 2874
    },
    {
      "epoch": 0.2570636623748212,
      "grad_norm": 0.16066326776085885,
      "learning_rate": 0.0001741631439339514,
      "loss": 0.6845,
      "step": 2875
    },
    {
      "epoch": 0.2571530758226037,
      "grad_norm": 0.1602601869239172,
      "learning_rate": 0.0001741437141634464,
      "loss": 0.7148,
      "step": 2876
    },
    {
      "epoch": 0.2572424892703863,
      "grad_norm": 0.14642951248672884,
      "learning_rate": 0.00017412427817459767,
      "loss": 0.6797,
      "step": 2877
    },
    {
      "epoch": 0.2573319027181688,
      "grad_norm": 0.206574494518093,
      "learning_rate": 0.00017410483596903525,
      "loss": 0.3863,
      "step": 2878
    },
    {
      "epoch": 0.2574213161659514,
      "grad_norm": 0.15513443829582008,
      "learning_rate": 0.0001740853875483897,
      "loss": 0.6944,
      "step": 2879
    },
    {
      "epoch": 0.2575107296137339,
      "grad_norm": 0.16708977756829715,
      "learning_rate": 0.00017406593291429217,
      "loss": 0.7039,
      "step": 2880
    },
    {
      "epoch": 0.25760014306151646,
      "grad_norm": 0.1273735256614427,
      "learning_rate": 0.00017404647206837432,
      "loss": 0.6776,
      "step": 2881
    },
    {
      "epoch": 0.257689556509299,
      "grad_norm": 0.14717595772128164,
      "learning_rate": 0.00017402700501226826,
      "loss": 0.7112,
      "step": 2882
    },
    {
      "epoch": 0.25777896995708155,
      "grad_norm": 0.12744324754829817,
      "learning_rate": 0.00017400753174760672,
      "loss": 0.6564,
      "step": 2883
    },
    {
      "epoch": 0.25786838340486407,
      "grad_norm": 0.1477755090526267,
      "learning_rate": 0.0001739880522760229,
      "loss": 0.6872,
      "step": 2884
    },
    {
      "epoch": 0.25795779685264664,
      "grad_norm": 0.1451795013311464,
      "learning_rate": 0.00017396856659915045,
      "loss": 0.6558,
      "step": 2885
    },
    {
      "epoch": 0.2580472103004292,
      "grad_norm": 0.12613844332276872,
      "learning_rate": 0.00017394907471862363,
      "loss": 0.6642,
      "step": 2886
    },
    {
      "epoch": 0.2581366237482117,
      "grad_norm": 0.15726952379829898,
      "learning_rate": 0.00017392957663607723,
      "loss": 0.7092,
      "step": 2887
    },
    {
      "epoch": 0.2582260371959943,
      "grad_norm": 0.150019536354702,
      "learning_rate": 0.00017391007235314655,
      "loss": 0.7135,
      "step": 2888
    },
    {
      "epoch": 0.2583154506437768,
      "grad_norm": 0.14310837610994767,
      "learning_rate": 0.00017389056187146733,
      "loss": 0.6958,
      "step": 2889
    },
    {
      "epoch": 0.2584048640915594,
      "grad_norm": 0.14301213898068396,
      "learning_rate": 0.00017387104519267594,
      "loss": 0.7224,
      "step": 2890
    },
    {
      "epoch": 0.2584942775393419,
      "grad_norm": 0.1362537646777627,
      "learning_rate": 0.0001738515223184092,
      "loss": 0.6512,
      "step": 2891
    },
    {
      "epoch": 0.25858369098712447,
      "grad_norm": 0.13428512380819405,
      "learning_rate": 0.00017383199325030448,
      "loss": 0.7087,
      "step": 2892
    },
    {
      "epoch": 0.258673104434907,
      "grad_norm": 0.13770198702545505,
      "learning_rate": 0.00017381245798999965,
      "loss": 0.6717,
      "step": 2893
    },
    {
      "epoch": 0.25876251788268956,
      "grad_norm": 0.1335719937137214,
      "learning_rate": 0.00017379291653913311,
      "loss": 0.677,
      "step": 2894
    },
    {
      "epoch": 0.25885193133047213,
      "grad_norm": 0.15245481021166932,
      "learning_rate": 0.0001737733688993438,
      "loss": 0.669,
      "step": 2895
    },
    {
      "epoch": 0.25894134477825465,
      "grad_norm": 0.13812011380225164,
      "learning_rate": 0.00017375381507227108,
      "loss": 0.6845,
      "step": 2896
    },
    {
      "epoch": 0.2590307582260372,
      "grad_norm": 0.13086505367495732,
      "learning_rate": 0.000173734255059555,
      "loss": 0.6555,
      "step": 2897
    },
    {
      "epoch": 0.25912017167381973,
      "grad_norm": 0.15114940466344323,
      "learning_rate": 0.000173714688862836,
      "loss": 0.7208,
      "step": 2898
    },
    {
      "epoch": 0.2592095851216023,
      "grad_norm": 0.1421409078524336,
      "learning_rate": 0.00017369511648375507,
      "loss": 0.7123,
      "step": 2899
    },
    {
      "epoch": 0.2592989985693848,
      "grad_norm": 0.14404535328237528,
      "learning_rate": 0.00017367553792395373,
      "loss": 0.6888,
      "step": 2900
    },
    {
      "epoch": 0.2593884120171674,
      "grad_norm": 0.14232120136678633,
      "learning_rate": 0.00017365595318507397,
      "loss": 0.6884,
      "step": 2901
    },
    {
      "epoch": 0.2594778254649499,
      "grad_norm": 0.13810287551929604,
      "learning_rate": 0.00017363636226875836,
      "loss": 0.6748,
      "step": 2902
    },
    {
      "epoch": 0.2595672389127325,
      "grad_norm": 0.18645996164038453,
      "learning_rate": 0.00017361676517665001,
      "loss": 0.4125,
      "step": 2903
    },
    {
      "epoch": 0.259656652360515,
      "grad_norm": 0.1655007101676393,
      "learning_rate": 0.00017359716191039248,
      "loss": 0.6942,
      "step": 2904
    },
    {
      "epoch": 0.25974606580829757,
      "grad_norm": 0.15472984064551773,
      "learning_rate": 0.00017357755247162984,
      "loss": 0.7196,
      "step": 2905
    },
    {
      "epoch": 0.25983547925608014,
      "grad_norm": 0.16779403509535076,
      "learning_rate": 0.00017355793686200675,
      "loss": 0.744,
      "step": 2906
    },
    {
      "epoch": 0.25992489270386265,
      "grad_norm": 0.16516989028735704,
      "learning_rate": 0.00017353831508316834,
      "loss": 0.6859,
      "step": 2907
    },
    {
      "epoch": 0.2600143061516452,
      "grad_norm": 0.14879149365481661,
      "learning_rate": 0.00017351868713676023,
      "loss": 0.6817,
      "step": 2908
    },
    {
      "epoch": 0.26010371959942774,
      "grad_norm": 0.15288695501160612,
      "learning_rate": 0.00017349905302442863,
      "loss": 0.7046,
      "step": 2909
    },
    {
      "epoch": 0.2601931330472103,
      "grad_norm": 0.15477326164056932,
      "learning_rate": 0.0001734794127478202,
      "loss": 0.7302,
      "step": 2910
    },
    {
      "epoch": 0.26028254649499283,
      "grad_norm": 0.1492496929813918,
      "learning_rate": 0.00017345976630858218,
      "loss": 0.6985,
      "step": 2911
    },
    {
      "epoch": 0.2603719599427754,
      "grad_norm": 0.137200226750518,
      "learning_rate": 0.00017344011370836227,
      "loss": 0.7186,
      "step": 2912
    },
    {
      "epoch": 0.2604613733905579,
      "grad_norm": 0.13444784095521706,
      "learning_rate": 0.00017342045494880872,
      "loss": 0.6726,
      "step": 2913
    },
    {
      "epoch": 0.2605507868383405,
      "grad_norm": 0.14169656778310122,
      "learning_rate": 0.0001734007900315703,
      "loss": 0.694,
      "step": 2914
    },
    {
      "epoch": 0.26064020028612306,
      "grad_norm": 0.14463857291374826,
      "learning_rate": 0.00017338111895829624,
      "loss": 0.6699,
      "step": 2915
    },
    {
      "epoch": 0.2607296137339056,
      "grad_norm": 0.14413536495494994,
      "learning_rate": 0.00017336144173063636,
      "loss": 0.715,
      "step": 2916
    },
    {
      "epoch": 0.26081902718168815,
      "grad_norm": 0.1489422787169974,
      "learning_rate": 0.00017334175835024095,
      "loss": 0.7123,
      "step": 2917
    },
    {
      "epoch": 0.26090844062947066,
      "grad_norm": 0.13951432769999522,
      "learning_rate": 0.00017332206881876086,
      "loss": 0.701,
      "step": 2918
    },
    {
      "epoch": 0.26099785407725323,
      "grad_norm": 0.16335423674606622,
      "learning_rate": 0.0001733023731378474,
      "loss": 0.7536,
      "step": 2919
    },
    {
      "epoch": 0.26108726752503575,
      "grad_norm": 0.1412762839963171,
      "learning_rate": 0.00017328267130915244,
      "loss": 0.648,
      "step": 2920
    },
    {
      "epoch": 0.2611766809728183,
      "grad_norm": 0.151200519321709,
      "learning_rate": 0.00017326296333432833,
      "loss": 0.6844,
      "step": 2921
    },
    {
      "epoch": 0.26126609442060084,
      "grad_norm": 0.14771935284218732,
      "learning_rate": 0.000173243249215028,
      "loss": 0.6834,
      "step": 2922
    },
    {
      "epoch": 0.2613555078683834,
      "grad_norm": 0.13310823033229302,
      "learning_rate": 0.00017322352895290477,
      "loss": 0.6699,
      "step": 2923
    },
    {
      "epoch": 0.261444921316166,
      "grad_norm": 0.14152801004599735,
      "learning_rate": 0.0001732038025496126,
      "loss": 0.6868,
      "step": 2924
    },
    {
      "epoch": 0.2615343347639485,
      "grad_norm": 0.14889786385194584,
      "learning_rate": 0.0001731840700068059,
      "loss": 0.7071,
      "step": 2925
    },
    {
      "epoch": 0.26162374821173107,
      "grad_norm": 0.16955358680070903,
      "learning_rate": 0.00017316433132613969,
      "loss": 0.7212,
      "step": 2926
    },
    {
      "epoch": 0.2617131616595136,
      "grad_norm": 0.13817661850051072,
      "learning_rate": 0.00017314458650926934,
      "loss": 0.6792,
      "step": 2927
    },
    {
      "epoch": 0.26180257510729615,
      "grad_norm": 0.14689024218944863,
      "learning_rate": 0.00017312483555785086,
      "loss": 0.6938,
      "step": 2928
    },
    {
      "epoch": 0.26189198855507867,
      "grad_norm": 0.13405866643765268,
      "learning_rate": 0.00017310507847354077,
      "loss": 0.6856,
      "step": 2929
    },
    {
      "epoch": 0.26198140200286124,
      "grad_norm": 0.12094074428955043,
      "learning_rate": 0.00017308531525799597,
      "loss": 0.6528,
      "step": 2930
    },
    {
      "epoch": 0.26207081545064376,
      "grad_norm": 0.1608829214154607,
      "learning_rate": 0.0001730655459128741,
      "loss": 0.6743,
      "step": 2931
    },
    {
      "epoch": 0.26216022889842633,
      "grad_norm": 0.15406371641176075,
      "learning_rate": 0.0001730457704398331,
      "loss": 0.6893,
      "step": 2932
    },
    {
      "epoch": 0.26224964234620884,
      "grad_norm": 0.14704159700652175,
      "learning_rate": 0.00017302598884053153,
      "loss": 0.6858,
      "step": 2933
    },
    {
      "epoch": 0.2623390557939914,
      "grad_norm": 0.13358186887465362,
      "learning_rate": 0.00017300620111662852,
      "loss": 0.6311,
      "step": 2934
    },
    {
      "epoch": 0.262428469241774,
      "grad_norm": 0.15080576923867087,
      "learning_rate": 0.00017298640726978357,
      "loss": 0.7133,
      "step": 2935
    },
    {
      "epoch": 0.2625178826895565,
      "grad_norm": 0.15403661214706552,
      "learning_rate": 0.00017296660730165678,
      "loss": 0.6847,
      "step": 2936
    },
    {
      "epoch": 0.2626072961373391,
      "grad_norm": 0.1509613669742573,
      "learning_rate": 0.00017294680121390877,
      "loss": 0.6706,
      "step": 2937
    },
    {
      "epoch": 0.2626967095851216,
      "grad_norm": 0.1329111091797492,
      "learning_rate": 0.00017292698900820064,
      "loss": 0.6824,
      "step": 2938
    },
    {
      "epoch": 0.26278612303290416,
      "grad_norm": 0.13862295614627354,
      "learning_rate": 0.00017290717068619402,
      "loss": 0.666,
      "step": 2939
    },
    {
      "epoch": 0.2628755364806867,
      "grad_norm": 0.15291458602436617,
      "learning_rate": 0.00017288734624955102,
      "loss": 0.7074,
      "step": 2940
    },
    {
      "epoch": 0.26296494992846925,
      "grad_norm": 0.16093897216017342,
      "learning_rate": 0.00017286751569993433,
      "loss": 0.7191,
      "step": 2941
    },
    {
      "epoch": 0.26305436337625177,
      "grad_norm": 0.13275070822619656,
      "learning_rate": 0.0001728476790390071,
      "loss": 0.7283,
      "step": 2942
    },
    {
      "epoch": 0.26314377682403434,
      "grad_norm": 0.14899218591503294,
      "learning_rate": 0.00017282783626843302,
      "loss": 0.6964,
      "step": 2943
    },
    {
      "epoch": 0.2632331902718169,
      "grad_norm": 0.15775349171672098,
      "learning_rate": 0.00017280798738987624,
      "loss": 0.6839,
      "step": 2944
    },
    {
      "epoch": 0.2633226037195994,
      "grad_norm": 0.13010292969340556,
      "learning_rate": 0.00017278813240500154,
      "loss": 0.626,
      "step": 2945
    },
    {
      "epoch": 0.263412017167382,
      "grad_norm": 0.16057431042787515,
      "learning_rate": 0.000172768271315474,
      "loss": 0.7024,
      "step": 2946
    },
    {
      "epoch": 0.2635014306151645,
      "grad_norm": 0.13554938967304925,
      "learning_rate": 0.00017274840412295948,
      "loss": 0.6942,
      "step": 2947
    },
    {
      "epoch": 0.2635908440629471,
      "grad_norm": 0.13545640007933543,
      "learning_rate": 0.00017272853082912418,
      "loss": 0.6557,
      "step": 2948
    },
    {
      "epoch": 0.2636802575107296,
      "grad_norm": 0.16177583702046058,
      "learning_rate": 0.00017270865143563478,
      "loss": 0.7031,
      "step": 2949
    },
    {
      "epoch": 0.26376967095851217,
      "grad_norm": 0.16201030920916457,
      "learning_rate": 0.00017268876594415863,
      "loss": 0.6969,
      "step": 2950
    },
    {
      "epoch": 0.2638590844062947,
      "grad_norm": 0.14120036581388948,
      "learning_rate": 0.00017266887435636344,
      "loss": 0.6657,
      "step": 2951
    },
    {
      "epoch": 0.26394849785407726,
      "grad_norm": 0.12888124344561655,
      "learning_rate": 0.00017264897667391754,
      "loss": 0.6524,
      "step": 2952
    },
    {
      "epoch": 0.2640379113018598,
      "grad_norm": 0.15857592441364882,
      "learning_rate": 0.0001726290728984897,
      "loss": 0.6927,
      "step": 2953
    },
    {
      "epoch": 0.26412732474964234,
      "grad_norm": 0.18216298196088757,
      "learning_rate": 0.00017260916303174923,
      "loss": 0.7644,
      "step": 2954
    },
    {
      "epoch": 0.2642167381974249,
      "grad_norm": 0.14554697147169526,
      "learning_rate": 0.00017258924707536596,
      "loss": 0.6993,
      "step": 2955
    },
    {
      "epoch": 0.26430615164520743,
      "grad_norm": 0.13243297191352338,
      "learning_rate": 0.00017256932503101018,
      "loss": 0.6999,
      "step": 2956
    },
    {
      "epoch": 0.26439556509299,
      "grad_norm": 0.13590967495620587,
      "learning_rate": 0.00017254939690035276,
      "loss": 0.692,
      "step": 2957
    },
    {
      "epoch": 0.2644849785407725,
      "grad_norm": 0.1673042737195656,
      "learning_rate": 0.00017252946268506505,
      "loss": 0.7716,
      "step": 2958
    },
    {
      "epoch": 0.2645743919885551,
      "grad_norm": 0.15382135295826868,
      "learning_rate": 0.00017250952238681889,
      "loss": 0.719,
      "step": 2959
    },
    {
      "epoch": 0.2646638054363376,
      "grad_norm": 0.14527475254009437,
      "learning_rate": 0.00017248957600728664,
      "loss": 0.6906,
      "step": 2960
    },
    {
      "epoch": 0.2647532188841202,
      "grad_norm": 0.15476216779241864,
      "learning_rate": 0.0001724696235481412,
      "loss": 0.6924,
      "step": 2961
    },
    {
      "epoch": 0.2648426323319027,
      "grad_norm": 0.13472887379309523,
      "learning_rate": 0.00017244966501105596,
      "loss": 0.6895,
      "step": 2962
    },
    {
      "epoch": 0.26493204577968527,
      "grad_norm": 0.17272124381719492,
      "learning_rate": 0.0001724297003977048,
      "loss": 0.7271,
      "step": 2963
    },
    {
      "epoch": 0.26502145922746784,
      "grad_norm": 0.15427662002059667,
      "learning_rate": 0.0001724097297097622,
      "loss": 0.7461,
      "step": 2964
    },
    {
      "epoch": 0.26511087267525035,
      "grad_norm": 0.1351084563700634,
      "learning_rate": 0.00017238975294890297,
      "loss": 0.681,
      "step": 2965
    },
    {
      "epoch": 0.2652002861230329,
      "grad_norm": 0.14945581159916485,
      "learning_rate": 0.00017236977011680257,
      "loss": 0.6579,
      "step": 2966
    },
    {
      "epoch": 0.26528969957081544,
      "grad_norm": 0.151150116483559,
      "learning_rate": 0.00017234978121513699,
      "loss": 0.7094,
      "step": 2967
    },
    {
      "epoch": 0.265379113018598,
      "grad_norm": 0.14771403964593172,
      "learning_rate": 0.0001723297862455826,
      "loss": 0.7021,
      "step": 2968
    },
    {
      "epoch": 0.2654685264663805,
      "grad_norm": 0.13898584233863218,
      "learning_rate": 0.00017230978520981643,
      "loss": 0.6794,
      "step": 2969
    },
    {
      "epoch": 0.2655579399141631,
      "grad_norm": 0.13826807811127373,
      "learning_rate": 0.00017228977810951584,
      "loss": 0.6792,
      "step": 2970
    },
    {
      "epoch": 0.2656473533619456,
      "grad_norm": 0.14309794994856145,
      "learning_rate": 0.00017226976494635893,
      "loss": 0.6611,
      "step": 2971
    },
    {
      "epoch": 0.2657367668097282,
      "grad_norm": 0.14222886468527912,
      "learning_rate": 0.00017224974572202409,
      "loss": 0.6906,
      "step": 2972
    },
    {
      "epoch": 0.2658261802575107,
      "grad_norm": 0.15069427169831634,
      "learning_rate": 0.0001722297204381903,
      "loss": 0.6597,
      "step": 2973
    },
    {
      "epoch": 0.2659155937052933,
      "grad_norm": 0.12855579035609885,
      "learning_rate": 0.00017220968909653715,
      "loss": 0.6565,
      "step": 2974
    },
    {
      "epoch": 0.26600500715307585,
      "grad_norm": 0.1446856606634124,
      "learning_rate": 0.00017218965169874456,
      "loss": 0.708,
      "step": 2975
    },
    {
      "epoch": 0.26609442060085836,
      "grad_norm": 0.15555533380107547,
      "learning_rate": 0.00017216960824649303,
      "loss": 0.6894,
      "step": 2976
    },
    {
      "epoch": 0.26618383404864093,
      "grad_norm": 0.13833893567792852,
      "learning_rate": 0.00017214955874146363,
      "loss": 0.7113,
      "step": 2977
    },
    {
      "epoch": 0.26627324749642345,
      "grad_norm": 0.14687169497393243,
      "learning_rate": 0.00017212950318533788,
      "loss": 0.7023,
      "step": 2978
    },
    {
      "epoch": 0.266362660944206,
      "grad_norm": 0.16221987727408585,
      "learning_rate": 0.00017210944157979783,
      "loss": 0.7064,
      "step": 2979
    },
    {
      "epoch": 0.26645207439198854,
      "grad_norm": 0.166655886926531,
      "learning_rate": 0.00017208937392652594,
      "loss": 0.747,
      "step": 2980
    },
    {
      "epoch": 0.2665414878397711,
      "grad_norm": 0.14777011615127147,
      "learning_rate": 0.0001720693002272054,
      "loss": 0.7186,
      "step": 2981
    },
    {
      "epoch": 0.2666309012875536,
      "grad_norm": 0.15408684268656897,
      "learning_rate": 0.00017204922048351964,
      "loss": 0.6835,
      "step": 2982
    },
    {
      "epoch": 0.2667203147353362,
      "grad_norm": 0.1505140833999067,
      "learning_rate": 0.0001720291346971528,
      "loss": 0.7046,
      "step": 2983
    },
    {
      "epoch": 0.26680972818311877,
      "grad_norm": 0.1460038329662881,
      "learning_rate": 0.0001720090428697894,
      "loss": 0.717,
      "step": 2984
    },
    {
      "epoch": 0.2668991416309013,
      "grad_norm": 0.16983061612965708,
      "learning_rate": 0.00017198894500311453,
      "loss": 0.7186,
      "step": 2985
    },
    {
      "epoch": 0.26698855507868385,
      "grad_norm": 0.13896925784788058,
      "learning_rate": 0.0001719688410988138,
      "loss": 0.6875,
      "step": 2986
    },
    {
      "epoch": 0.26707796852646637,
      "grad_norm": 0.1409452490845876,
      "learning_rate": 0.00017194873115857328,
      "loss": 0.6987,
      "step": 2987
    },
    {
      "epoch": 0.26716738197424894,
      "grad_norm": 0.12750148624682983,
      "learning_rate": 0.00017192861518407958,
      "loss": 0.6797,
      "step": 2988
    },
    {
      "epoch": 0.26725679542203146,
      "grad_norm": 0.1550275852775564,
      "learning_rate": 0.00017190849317701975,
      "loss": 0.6918,
      "step": 2989
    },
    {
      "epoch": 0.26734620886981403,
      "grad_norm": 0.13961910891038415,
      "learning_rate": 0.00017188836513908152,
      "loss": 0.696,
      "step": 2990
    },
    {
      "epoch": 0.26743562231759654,
      "grad_norm": 0.1428960654236228,
      "learning_rate": 0.00017186823107195287,
      "loss": 0.7052,
      "step": 2991
    },
    {
      "epoch": 0.2675250357653791,
      "grad_norm": 0.14951276901738822,
      "learning_rate": 0.00017184809097732246,
      "loss": 0.7057,
      "step": 2992
    },
    {
      "epoch": 0.2676144492131617,
      "grad_norm": 0.1440632697850481,
      "learning_rate": 0.00017182794485687944,
      "loss": 0.6967,
      "step": 2993
    },
    {
      "epoch": 0.2677038626609442,
      "grad_norm": 0.13928929899608838,
      "learning_rate": 0.00017180779271231344,
      "loss": 0.6885,
      "step": 2994
    },
    {
      "epoch": 0.2677932761087268,
      "grad_norm": 0.14763274999337914,
      "learning_rate": 0.0001717876345453146,
      "loss": 0.6984,
      "step": 2995
    },
    {
      "epoch": 0.2678826895565093,
      "grad_norm": 0.13019155431523094,
      "learning_rate": 0.00017176747035757355,
      "loss": 0.6884,
      "step": 2996
    },
    {
      "epoch": 0.26797210300429186,
      "grad_norm": 0.16669533963084096,
      "learning_rate": 0.0001717473001507814,
      "loss": 0.719,
      "step": 2997
    },
    {
      "epoch": 0.2680615164520744,
      "grad_norm": 0.14969394822154167,
      "learning_rate": 0.00017172712392662988,
      "loss": 0.7455,
      "step": 2998
    },
    {
      "epoch": 0.26815092989985695,
      "grad_norm": 0.14092861336021292,
      "learning_rate": 0.00017170694168681106,
      "loss": 0.704,
      "step": 2999
    },
    {
      "epoch": 0.26824034334763946,
      "grad_norm": 0.15787853959333906,
      "learning_rate": 0.00017168675343301769,
      "loss": 0.7107,
      "step": 3000
    },
    {
      "epoch": 0.26832975679542204,
      "grad_norm": 0.15414850212508754,
      "learning_rate": 0.00017166655916694284,
      "loss": 0.7235,
      "step": 3001
    },
    {
      "epoch": 0.26841917024320455,
      "grad_norm": 0.16155944656106433,
      "learning_rate": 0.00017164635889028025,
      "loss": 0.699,
      "step": 3002
    },
    {
      "epoch": 0.2685085836909871,
      "grad_norm": 0.15031559382524592,
      "learning_rate": 0.00017162615260472402,
      "loss": 0.7273,
      "step": 3003
    },
    {
      "epoch": 0.2685979971387697,
      "grad_norm": 0.14845393941755114,
      "learning_rate": 0.00017160594031196894,
      "loss": 0.6779,
      "step": 3004
    },
    {
      "epoch": 0.2686874105865522,
      "grad_norm": 0.17437037000491126,
      "learning_rate": 0.00017158572201371008,
      "loss": 0.7265,
      "step": 3005
    },
    {
      "epoch": 0.2687768240343348,
      "grad_norm": 0.1314867768183433,
      "learning_rate": 0.00017156549771164318,
      "loss": 0.6907,
      "step": 3006
    },
    {
      "epoch": 0.2688662374821173,
      "grad_norm": 0.1414392964856527,
      "learning_rate": 0.00017154526740746442,
      "loss": 0.694,
      "step": 3007
    },
    {
      "epoch": 0.26895565092989987,
      "grad_norm": 0.14280567882975678,
      "learning_rate": 0.00017152503110287048,
      "loss": 0.6909,
      "step": 3008
    },
    {
      "epoch": 0.2690450643776824,
      "grad_norm": 0.1500251068443758,
      "learning_rate": 0.00017150478879955858,
      "loss": 0.6928,
      "step": 3009
    },
    {
      "epoch": 0.26913447782546496,
      "grad_norm": 0.14919950377350372,
      "learning_rate": 0.00017148454049922636,
      "loss": 0.6789,
      "step": 3010
    },
    {
      "epoch": 0.2692238912732475,
      "grad_norm": 0.13213968306460583,
      "learning_rate": 0.00017146428620357212,
      "loss": 0.7069,
      "step": 3011
    },
    {
      "epoch": 0.26931330472103004,
      "grad_norm": 0.15736063285353163,
      "learning_rate": 0.00017144402591429448,
      "loss": 0.6785,
      "step": 3012
    },
    {
      "epoch": 0.2694027181688126,
      "grad_norm": 0.13784372564470443,
      "learning_rate": 0.00017142375963309262,
      "loss": 0.6821,
      "step": 3013
    },
    {
      "epoch": 0.26949213161659513,
      "grad_norm": 0.14462870961444077,
      "learning_rate": 0.00017140348736166636,
      "loss": 0.6964,
      "step": 3014
    },
    {
      "epoch": 0.2695815450643777,
      "grad_norm": 0.13901799495740153,
      "learning_rate": 0.00017138320910171584,
      "loss": 0.709,
      "step": 3015
    },
    {
      "epoch": 0.2696709585121602,
      "grad_norm": 0.16689994516473652,
      "learning_rate": 0.00017136292485494175,
      "loss": 0.7061,
      "step": 3016
    },
    {
      "epoch": 0.2697603719599428,
      "grad_norm": 0.13015374080259548,
      "learning_rate": 0.00017134263462304533,
      "loss": 0.6956,
      "step": 3017
    },
    {
      "epoch": 0.2698497854077253,
      "grad_norm": 0.15083985159425414,
      "learning_rate": 0.00017132233840772836,
      "loss": 0.6921,
      "step": 3018
    },
    {
      "epoch": 0.2699391988555079,
      "grad_norm": 0.140345111359309,
      "learning_rate": 0.00017130203621069297,
      "loss": 0.6485,
      "step": 3019
    },
    {
      "epoch": 0.2700286123032904,
      "grad_norm": 0.1614487603449641,
      "learning_rate": 0.00017128172803364188,
      "loss": 0.7221,
      "step": 3020
    },
    {
      "epoch": 0.27011802575107297,
      "grad_norm": 0.14036437061656346,
      "learning_rate": 0.0001712614138782784,
      "loss": 0.685,
      "step": 3021
    },
    {
      "epoch": 0.2702074391988555,
      "grad_norm": 0.14975802623899856,
      "learning_rate": 0.00017124109374630616,
      "loss": 0.6967,
      "step": 3022
    },
    {
      "epoch": 0.27029685264663805,
      "grad_norm": 0.15702192980189453,
      "learning_rate": 0.00017122076763942946,
      "loss": 0.7081,
      "step": 3023
    },
    {
      "epoch": 0.2703862660944206,
      "grad_norm": 0.14451466596013987,
      "learning_rate": 0.00017120043555935298,
      "loss": 0.6703,
      "step": 3024
    },
    {
      "epoch": 0.27047567954220314,
      "grad_norm": 0.1494592826516494,
      "learning_rate": 0.00017118009750778196,
      "loss": 0.6758,
      "step": 3025
    },
    {
      "epoch": 0.2705650929899857,
      "grad_norm": 0.1294433155872974,
      "learning_rate": 0.00017115975348642212,
      "loss": 0.6895,
      "step": 3026
    },
    {
      "epoch": 0.2706545064377682,
      "grad_norm": 0.13843733370151765,
      "learning_rate": 0.00017113940349697967,
      "loss": 0.6598,
      "step": 3027
    },
    {
      "epoch": 0.2707439198855508,
      "grad_norm": 0.16024630266399276,
      "learning_rate": 0.00017111904754116142,
      "loss": 0.729,
      "step": 3028
    },
    {
      "epoch": 0.2708333333333333,
      "grad_norm": 0.11609175762793532,
      "learning_rate": 0.0001710986856206745,
      "loss": 0.6711,
      "step": 3029
    },
    {
      "epoch": 0.2709227467811159,
      "grad_norm": 0.13435678516250424,
      "learning_rate": 0.00017107831773722668,
      "loss": 0.6916,
      "step": 3030
    },
    {
      "epoch": 0.2710121602288984,
      "grad_norm": 0.1597750879404639,
      "learning_rate": 0.00017105794389252622,
      "loss": 0.676,
      "step": 3031
    },
    {
      "epoch": 0.271101573676681,
      "grad_norm": 0.17220979672898548,
      "learning_rate": 0.00017103756408828183,
      "loss": 0.7171,
      "step": 3032
    },
    {
      "epoch": 0.27119098712446355,
      "grad_norm": 0.14844956365145628,
      "learning_rate": 0.0001710171783262027,
      "loss": 0.7034,
      "step": 3033
    },
    {
      "epoch": 0.27128040057224606,
      "grad_norm": 0.13036211818930937,
      "learning_rate": 0.00017099678660799857,
      "loss": 0.6531,
      "step": 3034
    },
    {
      "epoch": 0.27136981402002863,
      "grad_norm": 0.13922887445759363,
      "learning_rate": 0.00017097638893537976,
      "loss": 0.6911,
      "step": 3035
    },
    {
      "epoch": 0.27145922746781115,
      "grad_norm": 0.14144349451118482,
      "learning_rate": 0.00017095598531005688,
      "loss": 0.6711,
      "step": 3036
    },
    {
      "epoch": 0.2715486409155937,
      "grad_norm": 0.1465744376118123,
      "learning_rate": 0.0001709355757337412,
      "loss": 0.6788,
      "step": 3037
    },
    {
      "epoch": 0.27163805436337624,
      "grad_norm": 0.15443794808834796,
      "learning_rate": 0.00017091516020814447,
      "loss": 0.7277,
      "step": 3038
    },
    {
      "epoch": 0.2717274678111588,
      "grad_norm": 0.15495396805207712,
      "learning_rate": 0.0001708947387349789,
      "loss": 0.7128,
      "step": 3039
    },
    {
      "epoch": 0.2718168812589413,
      "grad_norm": 0.1399453818107223,
      "learning_rate": 0.0001708743113159572,
      "loss": 0.6958,
      "step": 3040
    },
    {
      "epoch": 0.2719062947067239,
      "grad_norm": 0.14268596661474892,
      "learning_rate": 0.0001708538779527926,
      "loss": 0.6901,
      "step": 3041
    },
    {
      "epoch": 0.2719957081545064,
      "grad_norm": 0.15229408741500675,
      "learning_rate": 0.00017083343864719884,
      "loss": 0.756,
      "step": 3042
    },
    {
      "epoch": 0.272085121602289,
      "grad_norm": 0.14984653501505474,
      "learning_rate": 0.00017081299340089012,
      "loss": 0.692,
      "step": 3043
    },
    {
      "epoch": 0.27217453505007155,
      "grad_norm": 0.12012249724785408,
      "learning_rate": 0.00017079254221558115,
      "loss": 0.6485,
      "step": 3044
    },
    {
      "epoch": 0.27226394849785407,
      "grad_norm": 0.1370907572766557,
      "learning_rate": 0.00017077208509298718,
      "loss": 0.7071,
      "step": 3045
    },
    {
      "epoch": 0.27235336194563664,
      "grad_norm": 0.17014942627737947,
      "learning_rate": 0.0001707516220348239,
      "loss": 0.7153,
      "step": 3046
    },
    {
      "epoch": 0.27244277539341916,
      "grad_norm": 0.13150020055903408,
      "learning_rate": 0.00017073115304280754,
      "loss": 0.6368,
      "step": 3047
    },
    {
      "epoch": 0.27253218884120173,
      "grad_norm": 0.14915818572903874,
      "learning_rate": 0.00017071067811865476,
      "loss": 0.7192,
      "step": 3048
    },
    {
      "epoch": 0.27262160228898424,
      "grad_norm": 0.15722777980665067,
      "learning_rate": 0.00017069019726408282,
      "loss": 0.6959,
      "step": 3049
    },
    {
      "epoch": 0.2727110157367668,
      "grad_norm": 0.15679479565690455,
      "learning_rate": 0.0001706697104808094,
      "loss": 0.7359,
      "step": 3050
    },
    {
      "epoch": 0.27280042918454933,
      "grad_norm": 0.144871511879821,
      "learning_rate": 0.00017064921777055272,
      "loss": 0.659,
      "step": 3051
    },
    {
      "epoch": 0.2728898426323319,
      "grad_norm": 0.15900865787879728,
      "learning_rate": 0.00017062871913503148,
      "loss": 0.6833,
      "step": 3052
    },
    {
      "epoch": 0.2729792560801145,
      "grad_norm": 0.11439602256638631,
      "learning_rate": 0.00017060821457596487,
      "loss": 0.6551,
      "step": 3053
    },
    {
      "epoch": 0.273068669527897,
      "grad_norm": 0.15366007889717112,
      "learning_rate": 0.00017058770409507254,
      "loss": 0.7274,
      "step": 3054
    },
    {
      "epoch": 0.27315808297567956,
      "grad_norm": 0.13129308608118884,
      "learning_rate": 0.00017056718769407474,
      "loss": 0.6784,
      "step": 3055
    },
    {
      "epoch": 0.2732474964234621,
      "grad_norm": 0.1502049734907897,
      "learning_rate": 0.00017054666537469213,
      "loss": 0.7386,
      "step": 3056
    },
    {
      "epoch": 0.27333690987124465,
      "grad_norm": 0.15131325741179144,
      "learning_rate": 0.00017052613713864587,
      "loss": 0.7143,
      "step": 3057
    },
    {
      "epoch": 0.27342632331902716,
      "grad_norm": 0.13477717423407257,
      "learning_rate": 0.0001705056029876577,
      "loss": 0.6913,
      "step": 3058
    },
    {
      "epoch": 0.27351573676680974,
      "grad_norm": 0.1313530518714502,
      "learning_rate": 0.00017048506292344974,
      "loss": 0.6451,
      "step": 3059
    },
    {
      "epoch": 0.27360515021459225,
      "grad_norm": 0.1439544516565244,
      "learning_rate": 0.00017046451694774467,
      "loss": 0.6833,
      "step": 3060
    },
    {
      "epoch": 0.2736945636623748,
      "grad_norm": 0.13892232986182365,
      "learning_rate": 0.00017044396506226566,
      "loss": 0.6936,
      "step": 3061
    },
    {
      "epoch": 0.2737839771101574,
      "grad_norm": 0.14345242850163067,
      "learning_rate": 0.0001704234072687364,
      "loss": 0.6813,
      "step": 3062
    },
    {
      "epoch": 0.2738733905579399,
      "grad_norm": 0.14912648991825203,
      "learning_rate": 0.000170402843568881,
      "loss": 0.6923,
      "step": 3063
    },
    {
      "epoch": 0.2739628040057225,
      "grad_norm": 0.15599192757921138,
      "learning_rate": 0.00017038227396442415,
      "loss": 0.737,
      "step": 3064
    },
    {
      "epoch": 0.274052217453505,
      "grad_norm": 0.15917093980954514,
      "learning_rate": 0.00017036169845709097,
      "loss": 0.6908,
      "step": 3065
    },
    {
      "epoch": 0.27414163090128757,
      "grad_norm": 0.16034473666339752,
      "learning_rate": 0.00017034111704860712,
      "loss": 0.7764,
      "step": 3066
    },
    {
      "epoch": 0.2742310443490701,
      "grad_norm": 0.15441422032780788,
      "learning_rate": 0.00017032052974069874,
      "loss": 0.6786,
      "step": 3067
    },
    {
      "epoch": 0.27432045779685266,
      "grad_norm": 0.14927618245238586,
      "learning_rate": 0.00017029993653509243,
      "loss": 0.6909,
      "step": 3068
    },
    {
      "epoch": 0.2744098712446352,
      "grad_norm": 0.12558632757089275,
      "learning_rate": 0.0001702793374335154,
      "loss": 0.6421,
      "step": 3069
    },
    {
      "epoch": 0.27449928469241774,
      "grad_norm": 0.1440608442804218,
      "learning_rate": 0.00017025873243769517,
      "loss": 0.6723,
      "step": 3070
    },
    {
      "epoch": 0.27458869814020026,
      "grad_norm": 0.1689063202302735,
      "learning_rate": 0.0001702381215493599,
      "loss": 0.7551,
      "step": 3071
    },
    {
      "epoch": 0.27467811158798283,
      "grad_norm": 0.1501778162899368,
      "learning_rate": 0.0001702175047702382,
      "loss": 0.7003,
      "step": 3072
    },
    {
      "epoch": 0.2747675250357654,
      "grad_norm": 0.15534404028887064,
      "learning_rate": 0.00017019688210205918,
      "loss": 0.6876,
      "step": 3073
    },
    {
      "epoch": 0.2748569384835479,
      "grad_norm": 0.18693646105532882,
      "learning_rate": 0.00017017625354655245,
      "loss": 0.6934,
      "step": 3074
    },
    {
      "epoch": 0.2749463519313305,
      "grad_norm": 0.14734783172895943,
      "learning_rate": 0.00017015561910544807,
      "loss": 0.7111,
      "step": 3075
    },
    {
      "epoch": 0.275035765379113,
      "grad_norm": 0.18124189048857972,
      "learning_rate": 0.00017013497878047668,
      "loss": 0.7744,
      "step": 3076
    },
    {
      "epoch": 0.2751251788268956,
      "grad_norm": 0.1457591000778729,
      "learning_rate": 0.0001701143325733693,
      "loss": 0.7174,
      "step": 3077
    },
    {
      "epoch": 0.2752145922746781,
      "grad_norm": 0.16175360527029226,
      "learning_rate": 0.0001700936804858575,
      "loss": 0.7138,
      "step": 3078
    },
    {
      "epoch": 0.27530400572246067,
      "grad_norm": 0.148855421216041,
      "learning_rate": 0.00017007302251967338,
      "loss": 0.7334,
      "step": 3079
    },
    {
      "epoch": 0.2753934191702432,
      "grad_norm": 0.14913711754973127,
      "learning_rate": 0.0001700523586765495,
      "loss": 0.6621,
      "step": 3080
    },
    {
      "epoch": 0.27548283261802575,
      "grad_norm": 0.13626084077445166,
      "learning_rate": 0.00017003168895821888,
      "loss": 0.6467,
      "step": 3081
    },
    {
      "epoch": 0.2755722460658083,
      "grad_norm": 0.14206612068844823,
      "learning_rate": 0.00017001101336641512,
      "loss": 0.68,
      "step": 3082
    },
    {
      "epoch": 0.27566165951359084,
      "grad_norm": 0.14974840201544565,
      "learning_rate": 0.0001699903319028722,
      "loss": 0.7491,
      "step": 3083
    },
    {
      "epoch": 0.2757510729613734,
      "grad_norm": 0.1247744439686686,
      "learning_rate": 0.00016996964456932466,
      "loss": 0.6954,
      "step": 3084
    },
    {
      "epoch": 0.2758404864091559,
      "grad_norm": 0.13718906653832308,
      "learning_rate": 0.0001699489513675075,
      "loss": 0.68,
      "step": 3085
    },
    {
      "epoch": 0.2759298998569385,
      "grad_norm": 0.12561388035772997,
      "learning_rate": 0.00016992825229915636,
      "loss": 0.6666,
      "step": 3086
    },
    {
      "epoch": 0.276019313304721,
      "grad_norm": 0.1439679914841577,
      "learning_rate": 0.0001699075473660071,
      "loss": 0.691,
      "step": 3087
    },
    {
      "epoch": 0.2761087267525036,
      "grad_norm": 0.13647871412399132,
      "learning_rate": 0.00016988683656979624,
      "loss": 0.6974,
      "step": 3088
    },
    {
      "epoch": 0.2761981402002861,
      "grad_norm": 0.14194913674371312,
      "learning_rate": 0.00016986611991226086,
      "loss": 0.6613,
      "step": 3089
    },
    {
      "epoch": 0.2762875536480687,
      "grad_norm": 0.1534264338493931,
      "learning_rate": 0.00016984539739513835,
      "loss": 0.6891,
      "step": 3090
    },
    {
      "epoch": 0.2763769670958512,
      "grad_norm": 0.13417329663868707,
      "learning_rate": 0.0001698246690201667,
      "loss": 0.6741,
      "step": 3091
    },
    {
      "epoch": 0.27646638054363376,
      "grad_norm": 0.13822198496490617,
      "learning_rate": 0.00016980393478908438,
      "loss": 0.6761,
      "step": 3092
    },
    {
      "epoch": 0.27655579399141633,
      "grad_norm": 0.16529301766620244,
      "learning_rate": 0.00016978319470363035,
      "loss": 0.7218,
      "step": 3093
    },
    {
      "epoch": 0.27664520743919885,
      "grad_norm": 0.14328739317671704,
      "learning_rate": 0.0001697624487655441,
      "loss": 0.65,
      "step": 3094
    },
    {
      "epoch": 0.2767346208869814,
      "grad_norm": 0.13784829823553718,
      "learning_rate": 0.0001697416969765655,
      "loss": 0.7155,
      "step": 3095
    },
    {
      "epoch": 0.27682403433476394,
      "grad_norm": 0.13614726514598707,
      "learning_rate": 0.000169720939338435,
      "loss": 0.6575,
      "step": 3096
    },
    {
      "epoch": 0.2769134477825465,
      "grad_norm": 0.15752155119542058,
      "learning_rate": 0.0001697001758528935,
      "loss": 0.6716,
      "step": 3097
    },
    {
      "epoch": 0.277002861230329,
      "grad_norm": 0.13973257202443687,
      "learning_rate": 0.00016967940652168247,
      "loss": 0.6831,
      "step": 3098
    },
    {
      "epoch": 0.2770922746781116,
      "grad_norm": 0.13699170706933816,
      "learning_rate": 0.00016965863134654372,
      "loss": 0.6462,
      "step": 3099
    },
    {
      "epoch": 0.2771816881258941,
      "grad_norm": 0.1541871675781933,
      "learning_rate": 0.0001696378503292197,
      "loss": 0.6977,
      "step": 3100
    },
    {
      "epoch": 0.2772711015736767,
      "grad_norm": 0.1305960421974107,
      "learning_rate": 0.0001696170634714533,
      "loss": 0.6561,
      "step": 3101
    },
    {
      "epoch": 0.27736051502145925,
      "grad_norm": 0.15063001971144258,
      "learning_rate": 0.00016959627077498782,
      "loss": 0.719,
      "step": 3102
    },
    {
      "epoch": 0.27744992846924177,
      "grad_norm": 0.15272303609680682,
      "learning_rate": 0.00016957547224156718,
      "loss": 0.6758,
      "step": 3103
    },
    {
      "epoch": 0.27753934191702434,
      "grad_norm": 0.13647272729903825,
      "learning_rate": 0.00016955466787293576,
      "loss": 0.6542,
      "step": 3104
    },
    {
      "epoch": 0.27762875536480686,
      "grad_norm": 0.1302805248112124,
      "learning_rate": 0.00016953385767083827,
      "loss": 0.6697,
      "step": 3105
    },
    {
      "epoch": 0.2777181688125894,
      "grad_norm": 0.16444241343640578,
      "learning_rate": 0.00016951304163702013,
      "loss": 0.6967,
      "step": 3106
    },
    {
      "epoch": 0.27780758226037194,
      "grad_norm": 0.16542272417502243,
      "learning_rate": 0.00016949221977322716,
      "loss": 0.7484,
      "step": 3107
    },
    {
      "epoch": 0.2778969957081545,
      "grad_norm": 0.16137587345396304,
      "learning_rate": 0.00016947139208120564,
      "loss": 0.7043,
      "step": 3108
    },
    {
      "epoch": 0.27798640915593703,
      "grad_norm": 0.1515882697924654,
      "learning_rate": 0.00016945055856270236,
      "loss": 0.71,
      "step": 3109
    },
    {
      "epoch": 0.2780758226037196,
      "grad_norm": 0.18217283300126352,
      "learning_rate": 0.0001694297192194646,
      "loss": 0.3616,
      "step": 3110
    },
    {
      "epoch": 0.2781652360515021,
      "grad_norm": 0.14709778675312932,
      "learning_rate": 0.00016940887405324015,
      "loss": 0.7023,
      "step": 3111
    },
    {
      "epoch": 0.2782546494992847,
      "grad_norm": 0.1501493593671997,
      "learning_rate": 0.00016938802306577726,
      "loss": 0.6837,
      "step": 3112
    },
    {
      "epoch": 0.27834406294706726,
      "grad_norm": 0.15951164376448917,
      "learning_rate": 0.00016936716625882468,
      "loss": 0.7328,
      "step": 3113
    },
    {
      "epoch": 0.2784334763948498,
      "grad_norm": 0.1374081371034816,
      "learning_rate": 0.00016934630363413163,
      "loss": 0.6401,
      "step": 3114
    },
    {
      "epoch": 0.27852288984263235,
      "grad_norm": 0.13711679185101266,
      "learning_rate": 0.00016932543519344783,
      "loss": 0.6745,
      "step": 3115
    },
    {
      "epoch": 0.27861230329041486,
      "grad_norm": 0.14969740657163344,
      "learning_rate": 0.00016930456093852353,
      "loss": 0.7151,
      "step": 3116
    },
    {
      "epoch": 0.27870171673819744,
      "grad_norm": 0.13137549201485033,
      "learning_rate": 0.00016928368087110938,
      "loss": 0.6451,
      "step": 3117
    },
    {
      "epoch": 0.27879113018597995,
      "grad_norm": 0.16350914501346697,
      "learning_rate": 0.0001692627949929566,
      "loss": 0.7027,
      "step": 3118
    },
    {
      "epoch": 0.2788805436337625,
      "grad_norm": 0.15113547437122293,
      "learning_rate": 0.00016924190330581685,
      "loss": 0.7077,
      "step": 3119
    },
    {
      "epoch": 0.27896995708154504,
      "grad_norm": 0.129807373420497,
      "learning_rate": 0.00016922100581144228,
      "loss": 0.6228,
      "step": 3120
    },
    {
      "epoch": 0.2790593705293276,
      "grad_norm": 0.12660329653601496,
      "learning_rate": 0.0001692001025115856,
      "loss": 0.6477,
      "step": 3121
    },
    {
      "epoch": 0.2791487839771102,
      "grad_norm": 0.13819126574080665,
      "learning_rate": 0.00016917919340799986,
      "loss": 0.6815,
      "step": 3122
    },
    {
      "epoch": 0.2792381974248927,
      "grad_norm": 0.13217652286290657,
      "learning_rate": 0.00016915827850243868,
      "loss": 0.6537,
      "step": 3123
    },
    {
      "epoch": 0.27932761087267527,
      "grad_norm": 0.14697295257839044,
      "learning_rate": 0.00016913735779665627,
      "loss": 0.6833,
      "step": 3124
    },
    {
      "epoch": 0.2794170243204578,
      "grad_norm": 0.15293700187073958,
      "learning_rate": 0.00016911643129240714,
      "loss": 0.7357,
      "step": 3125
    },
    {
      "epoch": 0.27950643776824036,
      "grad_norm": 0.12809962681076856,
      "learning_rate": 0.00016909549899144635,
      "loss": 0.6685,
      "step": 3126
    },
    {
      "epoch": 0.2795958512160229,
      "grad_norm": 0.13445552125308233,
      "learning_rate": 0.00016907456089552953,
      "loss": 0.6708,
      "step": 3127
    },
    {
      "epoch": 0.27968526466380544,
      "grad_norm": 0.12443797727591427,
      "learning_rate": 0.00016905361700641271,
      "loss": 0.6669,
      "step": 3128
    },
    {
      "epoch": 0.27977467811158796,
      "grad_norm": 0.14020611105223452,
      "learning_rate": 0.00016903266732585243,
      "loss": 0.6802,
      "step": 3129
    },
    {
      "epoch": 0.27986409155937053,
      "grad_norm": 0.12999569862942315,
      "learning_rate": 0.00016901171185560574,
      "loss": 0.6762,
      "step": 3130
    },
    {
      "epoch": 0.2799535050071531,
      "grad_norm": 0.14833326845021239,
      "learning_rate": 0.00016899075059743007,
      "loss": 0.656,
      "step": 3131
    },
    {
      "epoch": 0.2800429184549356,
      "grad_norm": 0.1353365002584966,
      "learning_rate": 0.00016896978355308352,
      "loss": 0.6654,
      "step": 3132
    },
    {
      "epoch": 0.2801323319027182,
      "grad_norm": 0.1289519859548838,
      "learning_rate": 0.00016894881072432443,
      "loss": 0.6882,
      "step": 3133
    },
    {
      "epoch": 0.2802217453505007,
      "grad_norm": 0.13090984207909528,
      "learning_rate": 0.00016892783211291194,
      "loss": 0.654,
      "step": 3134
    },
    {
      "epoch": 0.2803111587982833,
      "grad_norm": 0.14166319935307112,
      "learning_rate": 0.00016890684772060538,
      "loss": 0.6547,
      "step": 3135
    },
    {
      "epoch": 0.2804005722460658,
      "grad_norm": 0.14395214049166472,
      "learning_rate": 0.00016888585754916476,
      "loss": 0.6889,
      "step": 3136
    },
    {
      "epoch": 0.28048998569384836,
      "grad_norm": 0.1459494475035178,
      "learning_rate": 0.0001688648616003504,
      "loss": 0.6981,
      "step": 3137
    },
    {
      "epoch": 0.2805793991416309,
      "grad_norm": 0.1715360648339952,
      "learning_rate": 0.0001688438598759233,
      "loss": 0.3632,
      "step": 3138
    },
    {
      "epoch": 0.28066881258941345,
      "grad_norm": 0.16534346457262827,
      "learning_rate": 0.00016882285237764482,
      "loss": 0.7128,
      "step": 3139
    },
    {
      "epoch": 0.28075822603719597,
      "grad_norm": 0.1410287377435854,
      "learning_rate": 0.0001688018391072768,
      "loss": 0.6822,
      "step": 3140
    },
    {
      "epoch": 0.28084763948497854,
      "grad_norm": 0.1679244326298919,
      "learning_rate": 0.00016878082006658164,
      "loss": 0.7284,
      "step": 3141
    },
    {
      "epoch": 0.2809370529327611,
      "grad_norm": 0.14555954639307628,
      "learning_rate": 0.00016875979525732214,
      "loss": 0.6864,
      "step": 3142
    },
    {
      "epoch": 0.2810264663805436,
      "grad_norm": 0.16473462461104252,
      "learning_rate": 0.0001687387646812617,
      "loss": 0.7411,
      "step": 3143
    },
    {
      "epoch": 0.2811158798283262,
      "grad_norm": 0.161204018408982,
      "learning_rate": 0.00016871772834016406,
      "loss": 0.7276,
      "step": 3144
    },
    {
      "epoch": 0.2812052932761087,
      "grad_norm": 0.13082255525943903,
      "learning_rate": 0.00016869668623579353,
      "loss": 0.6566,
      "step": 3145
    },
    {
      "epoch": 0.2812947067238913,
      "grad_norm": 0.15705238794225662,
      "learning_rate": 0.00016867563836991492,
      "loss": 0.6986,
      "step": 3146
    },
    {
      "epoch": 0.2813841201716738,
      "grad_norm": 0.13190846531124034,
      "learning_rate": 0.00016865458474429342,
      "loss": 0.7116,
      "step": 3147
    },
    {
      "epoch": 0.2814735336194564,
      "grad_norm": 0.14230890036419228,
      "learning_rate": 0.00016863352536069482,
      "loss": 0.7377,
      "step": 3148
    },
    {
      "epoch": 0.2815629470672389,
      "grad_norm": 0.13047200865118386,
      "learning_rate": 0.00016861246022088536,
      "loss": 0.6803,
      "step": 3149
    },
    {
      "epoch": 0.28165236051502146,
      "grad_norm": 0.13728972403596312,
      "learning_rate": 0.0001685913893266317,
      "loss": 0.6818,
      "step": 3150
    },
    {
      "epoch": 0.28174177396280403,
      "grad_norm": 0.1385436051055126,
      "learning_rate": 0.00016857031267970105,
      "loss": 0.69,
      "step": 3151
    },
    {
      "epoch": 0.28183118741058655,
      "grad_norm": 0.1397153599603963,
      "learning_rate": 0.00016854923028186111,
      "loss": 0.6843,
      "step": 3152
    },
    {
      "epoch": 0.2819206008583691,
      "grad_norm": 0.1681290119689101,
      "learning_rate": 0.00016852814213488,
      "loss": 0.7326,
      "step": 3153
    },
    {
      "epoch": 0.28201001430615164,
      "grad_norm": 0.14956279655126326,
      "learning_rate": 0.00016850704824052635,
      "loss": 0.6735,
      "step": 3154
    },
    {
      "epoch": 0.2820994277539342,
      "grad_norm": 0.14807158011291632,
      "learning_rate": 0.00016848594860056933,
      "loss": 0.6537,
      "step": 3155
    },
    {
      "epoch": 0.2821888412017167,
      "grad_norm": 0.16831453800479357,
      "learning_rate": 0.00016846484321677852,
      "loss": 0.7416,
      "step": 3156
    },
    {
      "epoch": 0.2822782546494993,
      "grad_norm": 0.154742617705706,
      "learning_rate": 0.00016844373209092396,
      "loss": 0.7247,
      "step": 3157
    },
    {
      "epoch": 0.2823676680972818,
      "grad_norm": 0.15205503927117353,
      "learning_rate": 0.00016842261522477628,
      "loss": 0.6913,
      "step": 3158
    },
    {
      "epoch": 0.2824570815450644,
      "grad_norm": 0.13331673954346515,
      "learning_rate": 0.00016840149262010648,
      "loss": 0.6474,
      "step": 3159
    },
    {
      "epoch": 0.2825464949928469,
      "grad_norm": 0.1385344257888011,
      "learning_rate": 0.00016838036427868608,
      "loss": 0.6967,
      "step": 3160
    },
    {
      "epoch": 0.28263590844062947,
      "grad_norm": 0.16731406218898284,
      "learning_rate": 0.00016835923020228712,
      "loss": 0.7329,
      "step": 3161
    },
    {
      "epoch": 0.28272532188841204,
      "grad_norm": 0.14792029392088324,
      "learning_rate": 0.0001683380903926821,
      "loss": 0.7229,
      "step": 3162
    },
    {
      "epoch": 0.28281473533619456,
      "grad_norm": 0.16242887177574736,
      "learning_rate": 0.00016831694485164398,
      "loss": 0.7116,
      "step": 3163
    },
    {
      "epoch": 0.2829041487839771,
      "grad_norm": 0.1407199903236207,
      "learning_rate": 0.00016829579358094616,
      "loss": 0.6987,
      "step": 3164
    },
    {
      "epoch": 0.28299356223175964,
      "grad_norm": 0.1460277942457369,
      "learning_rate": 0.00016827463658236264,
      "loss": 0.6645,
      "step": 3165
    },
    {
      "epoch": 0.2830829756795422,
      "grad_norm": 0.1566825813573471,
      "learning_rate": 0.0001682534738576678,
      "loss": 0.633,
      "step": 3166
    },
    {
      "epoch": 0.28317238912732473,
      "grad_norm": 0.14655816213829603,
      "learning_rate": 0.00016823230540863654,
      "loss": 0.6595,
      "step": 3167
    },
    {
      "epoch": 0.2832618025751073,
      "grad_norm": 0.13899773613984445,
      "learning_rate": 0.00016821113123704424,
      "loss": 0.6128,
      "step": 3168
    },
    {
      "epoch": 0.2833512160228898,
      "grad_norm": 0.136641133577222,
      "learning_rate": 0.0001681899513446667,
      "loss": 0.658,
      "step": 3169
    },
    {
      "epoch": 0.2834406294706724,
      "grad_norm": 0.1599868779834608,
      "learning_rate": 0.00016816876573328037,
      "loss": 0.6963,
      "step": 3170
    },
    {
      "epoch": 0.28353004291845496,
      "grad_norm": 0.1514293195740439,
      "learning_rate": 0.00016814757440466188,
      "loss": 0.6872,
      "step": 3171
    },
    {
      "epoch": 0.2836194563662375,
      "grad_norm": 0.12642011192286567,
      "learning_rate": 0.0001681263773605887,
      "loss": 0.6817,
      "step": 3172
    },
    {
      "epoch": 0.28370886981402005,
      "grad_norm": 0.1464894541066509,
      "learning_rate": 0.00016810517460283853,
      "loss": 0.6856,
      "step": 3173
    },
    {
      "epoch": 0.28379828326180256,
      "grad_norm": 0.17325620436267203,
      "learning_rate": 0.0001680839661331896,
      "loss": 0.4034,
      "step": 3174
    },
    {
      "epoch": 0.28388769670958514,
      "grad_norm": 0.15657322985262573,
      "learning_rate": 0.00016806275195342064,
      "loss": 0.6846,
      "step": 3175
    },
    {
      "epoch": 0.28397711015736765,
      "grad_norm": 0.16422101365537076,
      "learning_rate": 0.00016804153206531088,
      "loss": 0.7324,
      "step": 3176
    },
    {
      "epoch": 0.2840665236051502,
      "grad_norm": 0.16799811362727468,
      "learning_rate": 0.00016802030647064,
      "loss": 0.7211,
      "step": 3177
    },
    {
      "epoch": 0.28415593705293274,
      "grad_norm": 0.15611137989005114,
      "learning_rate": 0.00016799907517118818,
      "loss": 0.6818,
      "step": 3178
    },
    {
      "epoch": 0.2842453505007153,
      "grad_norm": 0.15854179097255594,
      "learning_rate": 0.00016797783816873603,
      "loss": 0.3611,
      "step": 3179
    },
    {
      "epoch": 0.2843347639484979,
      "grad_norm": 0.16376140633870048,
      "learning_rate": 0.00016795659546506468,
      "loss": 0.7226,
      "step": 3180
    },
    {
      "epoch": 0.2844241773962804,
      "grad_norm": 0.14612123401484187,
      "learning_rate": 0.00016793534706195575,
      "loss": 0.72,
      "step": 3181
    },
    {
      "epoch": 0.28451359084406297,
      "grad_norm": 0.14224477092062948,
      "learning_rate": 0.0001679140929611913,
      "loss": 0.6771,
      "step": 3182
    },
    {
      "epoch": 0.2846030042918455,
      "grad_norm": 0.17054824664273122,
      "learning_rate": 0.00016789283316455392,
      "loss": 0.7269,
      "step": 3183
    },
    {
      "epoch": 0.28469241773962806,
      "grad_norm": 0.15140948840419652,
      "learning_rate": 0.00016787156767382659,
      "loss": 0.7011,
      "step": 3184
    },
    {
      "epoch": 0.2847818311874106,
      "grad_norm": 0.15853843698845577,
      "learning_rate": 0.00016785029649079287,
      "loss": 0.6958,
      "step": 3185
    },
    {
      "epoch": 0.28487124463519314,
      "grad_norm": 0.129681393904681,
      "learning_rate": 0.0001678290196172367,
      "loss": 0.6377,
      "step": 3186
    },
    {
      "epoch": 0.28496065808297566,
      "grad_norm": 0.1683811399660816,
      "learning_rate": 0.0001678077370549426,
      "loss": 0.7441,
      "step": 3187
    },
    {
      "epoch": 0.28505007153075823,
      "grad_norm": 0.15524544954297648,
      "learning_rate": 0.00016778644880569544,
      "loss": 0.6749,
      "step": 3188
    },
    {
      "epoch": 0.28513948497854075,
      "grad_norm": 0.12838150151752584,
      "learning_rate": 0.00016776515487128073,
      "loss": 0.6275,
      "step": 3189
    },
    {
      "epoch": 0.2852288984263233,
      "grad_norm": 0.14455585924198058,
      "learning_rate": 0.00016774385525348428,
      "loss": 0.6728,
      "step": 3190
    },
    {
      "epoch": 0.2853183118741059,
      "grad_norm": 0.15215394112415156,
      "learning_rate": 0.00016772254995409255,
      "loss": 0.6758,
      "step": 3191
    },
    {
      "epoch": 0.2854077253218884,
      "grad_norm": 0.1360828144563967,
      "learning_rate": 0.00016770123897489228,
      "loss": 0.6526,
      "step": 3192
    },
    {
      "epoch": 0.285497138769671,
      "grad_norm": 0.13218559263192547,
      "learning_rate": 0.00016767992231767092,
      "loss": 0.6841,
      "step": 3193
    },
    {
      "epoch": 0.2855865522174535,
      "grad_norm": 0.15350630193498463,
      "learning_rate": 0.0001676585999842162,
      "loss": 0.6772,
      "step": 3194
    },
    {
      "epoch": 0.28567596566523606,
      "grad_norm": 0.12744520502231213,
      "learning_rate": 0.0001676372719763164,
      "loss": 0.6865,
      "step": 3195
    },
    {
      "epoch": 0.2857653791130186,
      "grad_norm": 0.14747802022757464,
      "learning_rate": 0.0001676159382957603,
      "loss": 0.7096,
      "step": 3196
    },
    {
      "epoch": 0.28585479256080115,
      "grad_norm": 0.1392620337529867,
      "learning_rate": 0.0001675945989443371,
      "loss": 0.6611,
      "step": 3197
    },
    {
      "epoch": 0.28594420600858367,
      "grad_norm": 0.1484137997492953,
      "learning_rate": 0.0001675732539238365,
      "loss": 0.6825,
      "step": 3198
    },
    {
      "epoch": 0.28603361945636624,
      "grad_norm": 0.13247075430160793,
      "learning_rate": 0.00016755190323604872,
      "loss": 0.6586,
      "step": 3199
    },
    {
      "epoch": 0.2861230329041488,
      "grad_norm": 0.15436881385330176,
      "learning_rate": 0.0001675305468827644,
      "loss": 0.7049,
      "step": 3200
    },
    {
      "epoch": 0.2862124463519313,
      "grad_norm": 0.1436221380681582,
      "learning_rate": 0.00016750918486577466,
      "loss": 0.659,
      "step": 3201
    },
    {
      "epoch": 0.2863018597997139,
      "grad_norm": 0.14144174678036892,
      "learning_rate": 0.00016748781718687111,
      "loss": 0.6945,
      "step": 3202
    },
    {
      "epoch": 0.2863912732474964,
      "grad_norm": 0.15704423406658596,
      "learning_rate": 0.00016746644384784586,
      "loss": 0.6911,
      "step": 3203
    },
    {
      "epoch": 0.286480686695279,
      "grad_norm": 0.25348913793126615,
      "learning_rate": 0.00016744506485049144,
      "loss": 0.4104,
      "step": 3204
    },
    {
      "epoch": 0.2865701001430615,
      "grad_norm": 0.15481767218580744,
      "learning_rate": 0.00016742368019660088,
      "loss": 0.6783,
      "step": 3205
    },
    {
      "epoch": 0.2866595135908441,
      "grad_norm": 0.1492943378198158,
      "learning_rate": 0.0001674022898879677,
      "loss": 0.6911,
      "step": 3206
    },
    {
      "epoch": 0.2867489270386266,
      "grad_norm": 0.13293114821389937,
      "learning_rate": 0.00016738089392638586,
      "loss": 0.6434,
      "step": 3207
    },
    {
      "epoch": 0.28683834048640916,
      "grad_norm": 0.13531618927827702,
      "learning_rate": 0.0001673594923136498,
      "loss": 0.6636,
      "step": 3208
    },
    {
      "epoch": 0.2869277539341917,
      "grad_norm": 0.1583302817185239,
      "learning_rate": 0.00016733808505155448,
      "loss": 0.7512,
      "step": 3209
    },
    {
      "epoch": 0.28701716738197425,
      "grad_norm": 0.13712906576954506,
      "learning_rate": 0.0001673166721418953,
      "loss": 0.6792,
      "step": 3210
    },
    {
      "epoch": 0.2871065808297568,
      "grad_norm": 0.15161625252556493,
      "learning_rate": 0.00016729525358646813,
      "loss": 0.7105,
      "step": 3211
    },
    {
      "epoch": 0.28719599427753933,
      "grad_norm": 0.14161486507566912,
      "learning_rate": 0.00016727382938706931,
      "loss": 0.679,
      "step": 3212
    },
    {
      "epoch": 0.2872854077253219,
      "grad_norm": 0.14221091094122695,
      "learning_rate": 0.00016725239954549565,
      "loss": 0.6514,
      "step": 3213
    },
    {
      "epoch": 0.2873748211731044,
      "grad_norm": 0.14596289287338562,
      "learning_rate": 0.00016723096406354447,
      "loss": 0.6893,
      "step": 3214
    },
    {
      "epoch": 0.287464234620887,
      "grad_norm": 0.13000540508941924,
      "learning_rate": 0.00016720952294301355,
      "loss": 0.7132,
      "step": 3215
    },
    {
      "epoch": 0.2875536480686695,
      "grad_norm": 0.13853712442254276,
      "learning_rate": 0.00016718807618570106,
      "loss": 0.6843,
      "step": 3216
    },
    {
      "epoch": 0.2876430615164521,
      "grad_norm": 0.1533170713063287,
      "learning_rate": 0.0001671666237934058,
      "loss": 0.7075,
      "step": 3217
    },
    {
      "epoch": 0.2877324749642346,
      "grad_norm": 0.13461951111093243,
      "learning_rate": 0.00016714516576792692,
      "loss": 0.6542,
      "step": 3218
    },
    {
      "epoch": 0.28782188841201717,
      "grad_norm": 0.16421648170019504,
      "learning_rate": 0.00016712370211106406,
      "loss": 0.6906,
      "step": 3219
    },
    {
      "epoch": 0.28791130185979974,
      "grad_norm": 0.15448881924803218,
      "learning_rate": 0.0001671022328246174,
      "loss": 0.7422,
      "step": 3220
    },
    {
      "epoch": 0.28800071530758226,
      "grad_norm": 0.1457808355160456,
      "learning_rate": 0.00016708075791038745,
      "loss": 0.6667,
      "step": 3221
    },
    {
      "epoch": 0.2880901287553648,
      "grad_norm": 0.16082030195500852,
      "learning_rate": 0.00016705927737017544,
      "loss": 0.7017,
      "step": 3222
    },
    {
      "epoch": 0.28817954220314734,
      "grad_norm": 0.14728191842853422,
      "learning_rate": 0.00016703779120578273,
      "loss": 0.6913,
      "step": 3223
    },
    {
      "epoch": 0.2882689556509299,
      "grad_norm": 0.13433480988319993,
      "learning_rate": 0.00016701629941901148,
      "loss": 0.6568,
      "step": 3224
    },
    {
      "epoch": 0.28835836909871243,
      "grad_norm": 0.16100435666237745,
      "learning_rate": 0.00016699480201166415,
      "loss": 0.6894,
      "step": 3225
    },
    {
      "epoch": 0.288447782546495,
      "grad_norm": 0.13375703132899575,
      "learning_rate": 0.00016697329898554365,
      "loss": 0.6839,
      "step": 3226
    },
    {
      "epoch": 0.2885371959942775,
      "grad_norm": 0.14504581229505265,
      "learning_rate": 0.00016695179034245346,
      "loss": 0.691,
      "step": 3227
    },
    {
      "epoch": 0.2886266094420601,
      "grad_norm": 0.18108116546952496,
      "learning_rate": 0.00016693027608419747,
      "loss": 0.6922,
      "step": 3228
    },
    {
      "epoch": 0.2887160228898426,
      "grad_norm": 0.16110664504482236,
      "learning_rate": 0.00016690875621258006,
      "loss": 0.7277,
      "step": 3229
    },
    {
      "epoch": 0.2888054363376252,
      "grad_norm": 0.15381375648339066,
      "learning_rate": 0.00016688723072940607,
      "loss": 0.7103,
      "step": 3230
    },
    {
      "epoch": 0.28889484978540775,
      "grad_norm": 0.13635523040055403,
      "learning_rate": 0.0001668656996364808,
      "loss": 0.6922,
      "step": 3231
    },
    {
      "epoch": 0.28898426323319026,
      "grad_norm": 0.139929508805701,
      "learning_rate": 0.0001668441629356101,
      "loss": 0.6967,
      "step": 3232
    },
    {
      "epoch": 0.28907367668097284,
      "grad_norm": 0.1500683602407743,
      "learning_rate": 0.00016682262062860014,
      "loss": 0.6304,
      "step": 3233
    },
    {
      "epoch": 0.28916309012875535,
      "grad_norm": 0.15759704704670865,
      "learning_rate": 0.0001668010727172577,
      "loss": 0.7065,
      "step": 3234
    },
    {
      "epoch": 0.2892525035765379,
      "grad_norm": 0.15625425216821043,
      "learning_rate": 0.00016677951920338995,
      "loss": 0.7381,
      "step": 3235
    },
    {
      "epoch": 0.28934191702432044,
      "grad_norm": 0.1447669621573613,
      "learning_rate": 0.00016675796008880462,
      "loss": 0.6886,
      "step": 3236
    },
    {
      "epoch": 0.289431330472103,
      "grad_norm": 0.14063487976801897,
      "learning_rate": 0.00016673639537530976,
      "loss": 0.6483,
      "step": 3237
    },
    {
      "epoch": 0.2895207439198855,
      "grad_norm": 0.1398168895205578,
      "learning_rate": 0.00016671482506471402,
      "loss": 0.6962,
      "step": 3238
    },
    {
      "epoch": 0.2896101573676681,
      "grad_norm": 0.13709667917813542,
      "learning_rate": 0.0001666932491588265,
      "loss": 0.6593,
      "step": 3239
    },
    {
      "epoch": 0.28969957081545067,
      "grad_norm": 0.1352071942753766,
      "learning_rate": 0.00016667166765945668,
      "loss": 0.6335,
      "step": 3240
    },
    {
      "epoch": 0.2897889842632332,
      "grad_norm": 0.14175945115426683,
      "learning_rate": 0.00016665008056841466,
      "loss": 0.6516,
      "step": 3241
    },
    {
      "epoch": 0.28987839771101576,
      "grad_norm": 0.15744674096801872,
      "learning_rate": 0.00016662848788751085,
      "loss": 0.702,
      "step": 3242
    },
    {
      "epoch": 0.28996781115879827,
      "grad_norm": 0.13588128715270095,
      "learning_rate": 0.00016660688961855623,
      "loss": 0.6818,
      "step": 3243
    },
    {
      "epoch": 0.29005722460658084,
      "grad_norm": 0.1499371629637078,
      "learning_rate": 0.0001665852857633622,
      "loss": 0.7026,
      "step": 3244
    },
    {
      "epoch": 0.29014663805436336,
      "grad_norm": 0.15888552490951546,
      "learning_rate": 0.0001665636763237407,
      "loss": 0.6976,
      "step": 3245
    },
    {
      "epoch": 0.29023605150214593,
      "grad_norm": 0.15885495069825106,
      "learning_rate": 0.00016654206130150404,
      "loss": 0.7175,
      "step": 3246
    },
    {
      "epoch": 0.29032546494992845,
      "grad_norm": 0.13642249014964392,
      "learning_rate": 0.00016652044069846505,
      "loss": 0.7098,
      "step": 3247
    },
    {
      "epoch": 0.290414878397711,
      "grad_norm": 0.12719996605109285,
      "learning_rate": 0.00016649881451643705,
      "loss": 0.6674,
      "step": 3248
    },
    {
      "epoch": 0.2905042918454936,
      "grad_norm": 0.14720439104981328,
      "learning_rate": 0.0001664771827572338,
      "loss": 0.7178,
      "step": 3249
    },
    {
      "epoch": 0.2905937052932761,
      "grad_norm": 0.16240900691741092,
      "learning_rate": 0.0001664555454226695,
      "loss": 0.7079,
      "step": 3250
    },
    {
      "epoch": 0.2906831187410587,
      "grad_norm": 0.1346357892408328,
      "learning_rate": 0.00016643390251455884,
      "loss": 0.6442,
      "step": 3251
    },
    {
      "epoch": 0.2907725321888412,
      "grad_norm": 0.14080986751695618,
      "learning_rate": 0.00016641225403471701,
      "loss": 0.6658,
      "step": 3252
    },
    {
      "epoch": 0.29086194563662376,
      "grad_norm": 0.13849664421425437,
      "learning_rate": 0.00016639059998495968,
      "loss": 0.6765,
      "step": 3253
    },
    {
      "epoch": 0.2909513590844063,
      "grad_norm": 0.1557930487988667,
      "learning_rate": 0.00016636894036710286,
      "loss": 0.7025,
      "step": 3254
    },
    {
      "epoch": 0.29104077253218885,
      "grad_norm": 0.13958136321319675,
      "learning_rate": 0.0001663472751829632,
      "loss": 0.7032,
      "step": 3255
    },
    {
      "epoch": 0.29113018597997137,
      "grad_norm": 0.13543368388413812,
      "learning_rate": 0.0001663256044343577,
      "loss": 0.6955,
      "step": 3256
    },
    {
      "epoch": 0.29121959942775394,
      "grad_norm": 0.12161759254076808,
      "learning_rate": 0.00016630392812310384,
      "loss": 0.6387,
      "step": 3257
    },
    {
      "epoch": 0.29130901287553645,
      "grad_norm": 0.12880093078718574,
      "learning_rate": 0.00016628224625101962,
      "loss": 0.6536,
      "step": 3258
    },
    {
      "epoch": 0.291398426323319,
      "grad_norm": 0.14023737051401855,
      "learning_rate": 0.00016626055881992344,
      "loss": 0.6745,
      "step": 3259
    },
    {
      "epoch": 0.2914878397711016,
      "grad_norm": 0.1460561739691667,
      "learning_rate": 0.00016623886583163423,
      "loss": 0.6625,
      "step": 3260
    },
    {
      "epoch": 0.2915772532188841,
      "grad_norm": 0.13649269559778954,
      "learning_rate": 0.00016621716728797132,
      "loss": 0.6713,
      "step": 3261
    },
    {
      "epoch": 0.2916666666666667,
      "grad_norm": 0.14280632564403212,
      "learning_rate": 0.00016619546319075455,
      "loss": 0.7514,
      "step": 3262
    },
    {
      "epoch": 0.2917560801144492,
      "grad_norm": 0.15711987277269387,
      "learning_rate": 0.00016617375354180424,
      "loss": 0.7145,
      "step": 3263
    },
    {
      "epoch": 0.2918454935622318,
      "grad_norm": 0.16134334831766162,
      "learning_rate": 0.00016615203834294119,
      "loss": 0.7039,
      "step": 3264
    },
    {
      "epoch": 0.2919349070100143,
      "grad_norm": 0.13407921816838353,
      "learning_rate": 0.0001661303175959865,
      "loss": 0.6713,
      "step": 3265
    },
    {
      "epoch": 0.29202432045779686,
      "grad_norm": 0.12187757256379789,
      "learning_rate": 0.00016610859130276198,
      "loss": 0.6686,
      "step": 3266
    },
    {
      "epoch": 0.2921137339055794,
      "grad_norm": 0.12397611429894764,
      "learning_rate": 0.00016608685946508972,
      "loss": 0.6802,
      "step": 3267
    },
    {
      "epoch": 0.29220314735336195,
      "grad_norm": 0.21309616772255047,
      "learning_rate": 0.00016606512208479238,
      "loss": 0.4172,
      "step": 3268
    },
    {
      "epoch": 0.2922925608011445,
      "grad_norm": 0.17212900004293935,
      "learning_rate": 0.00016604337916369306,
      "loss": 0.679,
      "step": 3269
    },
    {
      "epoch": 0.29238197424892703,
      "grad_norm": 0.13095410583574535,
      "learning_rate": 0.00016602163070361526,
      "loss": 0.6476,
      "step": 3270
    },
    {
      "epoch": 0.2924713876967096,
      "grad_norm": 0.16054825572602877,
      "learning_rate": 0.00016599987670638304,
      "loss": 0.6758,
      "step": 3271
    },
    {
      "epoch": 0.2925608011444921,
      "grad_norm": 0.16339250614388046,
      "learning_rate": 0.00016597811717382083,
      "loss": 0.6847,
      "step": 3272
    },
    {
      "epoch": 0.2926502145922747,
      "grad_norm": 0.20951850528536642,
      "learning_rate": 0.00016595635210775366,
      "loss": 0.3988,
      "step": 3273
    },
    {
      "epoch": 0.2927396280400572,
      "grad_norm": 0.17460821311591115,
      "learning_rate": 0.00016593458151000688,
      "loss": 0.668,
      "step": 3274
    },
    {
      "epoch": 0.2928290414878398,
      "grad_norm": 0.16318711657062956,
      "learning_rate": 0.0001659128053824064,
      "loss": 0.6837,
      "step": 3275
    },
    {
      "epoch": 0.2929184549356223,
      "grad_norm": 0.13325120848855296,
      "learning_rate": 0.0001658910237267785,
      "loss": 0.6738,
      "step": 3276
    },
    {
      "epoch": 0.29300786838340487,
      "grad_norm": 0.12124054166649555,
      "learning_rate": 0.00016586923654495004,
      "loss": 0.6703,
      "step": 3277
    },
    {
      "epoch": 0.2930972818311874,
      "grad_norm": 0.15783237894584182,
      "learning_rate": 0.00016584744383874825,
      "loss": 0.6916,
      "step": 3278
    },
    {
      "epoch": 0.29318669527896996,
      "grad_norm": 0.15606335663931836,
      "learning_rate": 0.00016582564561000088,
      "loss": 0.694,
      "step": 3279
    },
    {
      "epoch": 0.2932761087267525,
      "grad_norm": 0.1571570557944595,
      "learning_rate": 0.0001658038418605361,
      "loss": 0.7013,
      "step": 3280
    },
    {
      "epoch": 0.29336552217453504,
      "grad_norm": 0.14520684559653516,
      "learning_rate": 0.00016578203259218257,
      "loss": 0.6947,
      "step": 3281
    },
    {
      "epoch": 0.2934549356223176,
      "grad_norm": 0.15651741309080303,
      "learning_rate": 0.00016576021780676943,
      "loss": 0.6803,
      "step": 3282
    },
    {
      "epoch": 0.29354434907010013,
      "grad_norm": 0.15485479594677065,
      "learning_rate": 0.00016573839750612623,
      "loss": 0.7019,
      "step": 3283
    },
    {
      "epoch": 0.2936337625178827,
      "grad_norm": 0.15107423583182716,
      "learning_rate": 0.00016571657169208302,
      "loss": 0.6661,
      "step": 3284
    },
    {
      "epoch": 0.2937231759656652,
      "grad_norm": 0.13549228070267424,
      "learning_rate": 0.00016569474036647028,
      "loss": 0.6421,
      "step": 3285
    },
    {
      "epoch": 0.2938125894134478,
      "grad_norm": 0.15415544574929085,
      "learning_rate": 0.00016567290353111905,
      "loss": 0.6747,
      "step": 3286
    },
    {
      "epoch": 0.2939020028612303,
      "grad_norm": 0.21574746908671957,
      "learning_rate": 0.0001656510611878607,
      "loss": 0.3574,
      "step": 3287
    },
    {
      "epoch": 0.2939914163090129,
      "grad_norm": 0.16612127534363635,
      "learning_rate": 0.00016562921333852714,
      "loss": 0.7276,
      "step": 3288
    },
    {
      "epoch": 0.29408082975679545,
      "grad_norm": 0.1285121429501177,
      "learning_rate": 0.00016560735998495066,
      "loss": 0.6189,
      "step": 3289
    },
    {
      "epoch": 0.29417024320457796,
      "grad_norm": 0.14221683174703645,
      "learning_rate": 0.0001655855011289642,
      "loss": 0.6771,
      "step": 3290
    },
    {
      "epoch": 0.29425965665236054,
      "grad_norm": 0.1484882833997583,
      "learning_rate": 0.00016556363677240098,
      "loss": 0.6858,
      "step": 3291
    },
    {
      "epoch": 0.29434907010014305,
      "grad_norm": 0.14474471841725065,
      "learning_rate": 0.00016554176691709467,
      "loss": 0.6993,
      "step": 3292
    },
    {
      "epoch": 0.2944384835479256,
      "grad_norm": 0.15324390790355452,
      "learning_rate": 0.00016551989156487955,
      "loss": 0.7124,
      "step": 3293
    },
    {
      "epoch": 0.29452789699570814,
      "grad_norm": 0.1269523511025128,
      "learning_rate": 0.00016549801071759026,
      "loss": 0.6923,
      "step": 3294
    },
    {
      "epoch": 0.2946173104434907,
      "grad_norm": 0.12493671329644411,
      "learning_rate": 0.00016547612437706189,
      "loss": 0.647,
      "step": 3295
    },
    {
      "epoch": 0.2947067238912732,
      "grad_norm": 0.149087659447133,
      "learning_rate": 0.00016545423254513004,
      "loss": 0.6675,
      "step": 3296
    },
    {
      "epoch": 0.2947961373390558,
      "grad_norm": 0.14391433466529968,
      "learning_rate": 0.00016543233522363078,
      "loss": 0.6681,
      "step": 3297
    },
    {
      "epoch": 0.2948855507868383,
      "grad_norm": 0.15531267148507788,
      "learning_rate": 0.00016541043241440057,
      "loss": 0.7152,
      "step": 3298
    },
    {
      "epoch": 0.2949749642346209,
      "grad_norm": 0.12453561624411098,
      "learning_rate": 0.0001653885241192764,
      "loss": 0.6773,
      "step": 3299
    },
    {
      "epoch": 0.29506437768240346,
      "grad_norm": 0.14303778731531744,
      "learning_rate": 0.00016536661034009567,
      "loss": 0.6952,
      "step": 3300
    },
    {
      "epoch": 0.29515379113018597,
      "grad_norm": 0.11844199699177334,
      "learning_rate": 0.00016534469107869627,
      "loss": 0.6609,
      "step": 3301
    },
    {
      "epoch": 0.29524320457796854,
      "grad_norm": 0.15224035258984978,
      "learning_rate": 0.00016532276633691656,
      "loss": 0.6913,
      "step": 3302
    },
    {
      "epoch": 0.29533261802575106,
      "grad_norm": 0.15948219438139447,
      "learning_rate": 0.00016530083611659532,
      "loss": 0.7487,
      "step": 3303
    },
    {
      "epoch": 0.29542203147353363,
      "grad_norm": 0.1585445211373716,
      "learning_rate": 0.00016527890041957184,
      "loss": 0.7372,
      "step": 3304
    },
    {
      "epoch": 0.29551144492131615,
      "grad_norm": 0.15353219742702956,
      "learning_rate": 0.0001652569592476858,
      "loss": 0.7247,
      "step": 3305
    },
    {
      "epoch": 0.2956008583690987,
      "grad_norm": 0.14359396392214885,
      "learning_rate": 0.0001652350126027774,
      "loss": 0.6702,
      "step": 3306
    },
    {
      "epoch": 0.29569027181688123,
      "grad_norm": 0.1889543877589913,
      "learning_rate": 0.00016521306048668727,
      "loss": 0.4072,
      "step": 3307
    },
    {
      "epoch": 0.2957796852646638,
      "grad_norm": 0.12359313615764889,
      "learning_rate": 0.00016519110290125652,
      "loss": 0.6638,
      "step": 3308
    },
    {
      "epoch": 0.2958690987124464,
      "grad_norm": 0.14965222651800508,
      "learning_rate": 0.0001651691398483267,
      "loss": 0.6722,
      "step": 3309
    },
    {
      "epoch": 0.2959585121602289,
      "grad_norm": 0.14840912170652043,
      "learning_rate": 0.00016514717132973982,
      "loss": 0.6571,
      "step": 3310
    },
    {
      "epoch": 0.29604792560801146,
      "grad_norm": 0.15356042569886014,
      "learning_rate": 0.00016512519734733836,
      "loss": 0.6956,
      "step": 3311
    },
    {
      "epoch": 0.296137339055794,
      "grad_norm": 0.1511651236880466,
      "learning_rate": 0.00016510321790296525,
      "loss": 0.6994,
      "step": 3312
    },
    {
      "epoch": 0.29622675250357655,
      "grad_norm": 0.1634084985580763,
      "learning_rate": 0.0001650812329984639,
      "loss": 0.3668,
      "step": 3313
    },
    {
      "epoch": 0.29631616595135907,
      "grad_norm": 0.15282963508206124,
      "learning_rate": 0.0001650592426356781,
      "loss": 0.7069,
      "step": 3314
    },
    {
      "epoch": 0.29640557939914164,
      "grad_norm": 0.14721370416950721,
      "learning_rate": 0.00016503724681645222,
      "loss": 0.6719,
      "step": 3315
    },
    {
      "epoch": 0.29649499284692415,
      "grad_norm": 0.14089108435850334,
      "learning_rate": 0.000165015245542631,
      "loss": 0.6732,
      "step": 3316
    },
    {
      "epoch": 0.2965844062947067,
      "grad_norm": 0.16885989510536756,
      "learning_rate": 0.00016499323881605964,
      "loss": 0.7087,
      "step": 3317
    },
    {
      "epoch": 0.2966738197424893,
      "grad_norm": 0.1458883963846471,
      "learning_rate": 0.00016497122663858385,
      "loss": 0.7142,
      "step": 3318
    },
    {
      "epoch": 0.2967632331902718,
      "grad_norm": 0.13826221004134354,
      "learning_rate": 0.0001649492090120497,
      "loss": 0.6853,
      "step": 3319
    },
    {
      "epoch": 0.2968526466380544,
      "grad_norm": 0.17193134087027542,
      "learning_rate": 0.00016492718593830389,
      "loss": 0.7851,
      "step": 3320
    },
    {
      "epoch": 0.2969420600858369,
      "grad_norm": 0.16161884293016376,
      "learning_rate": 0.00016490515741919334,
      "loss": 0.6925,
      "step": 3321
    },
    {
      "epoch": 0.2970314735336195,
      "grad_norm": 0.1518300014840223,
      "learning_rate": 0.00016488312345656566,
      "loss": 0.6745,
      "step": 3322
    },
    {
      "epoch": 0.297120886981402,
      "grad_norm": 0.14522810072265016,
      "learning_rate": 0.0001648610840522688,
      "loss": 0.7028,
      "step": 3323
    },
    {
      "epoch": 0.29721030042918456,
      "grad_norm": 0.14927883470800996,
      "learning_rate": 0.00016483903920815111,
      "loss": 0.7054,
      "step": 3324
    },
    {
      "epoch": 0.2972997138769671,
      "grad_norm": 0.1561257305057558,
      "learning_rate": 0.0001648169889260615,
      "loss": 0.6982,
      "step": 3325
    },
    {
      "epoch": 0.29738912732474965,
      "grad_norm": 0.16871658487806804,
      "learning_rate": 0.00016479493320784938,
      "loss": 0.3634,
      "step": 3326
    },
    {
      "epoch": 0.29747854077253216,
      "grad_norm": 0.14879485139679524,
      "learning_rate": 0.0001647728720553644,
      "loss": 0.6578,
      "step": 3327
    },
    {
      "epoch": 0.29756795422031473,
      "grad_norm": 0.15913113919271635,
      "learning_rate": 0.00016475080547045687,
      "loss": 0.7086,
      "step": 3328
    },
    {
      "epoch": 0.2976573676680973,
      "grad_norm": 0.15241537294248197,
      "learning_rate": 0.0001647287334549775,
      "loss": 0.7014,
      "step": 3329
    },
    {
      "epoch": 0.2977467811158798,
      "grad_norm": 0.12466356468708302,
      "learning_rate": 0.00016470665601077742,
      "loss": 0.6922,
      "step": 3330
    },
    {
      "epoch": 0.2978361945636624,
      "grad_norm": 0.1712091311986305,
      "learning_rate": 0.00016468457313970826,
      "loss": 0.3563,
      "step": 3331
    },
    {
      "epoch": 0.2979256080114449,
      "grad_norm": 0.16241745934203722,
      "learning_rate": 0.00016466248484362208,
      "loss": 0.6441,
      "step": 3332
    },
    {
      "epoch": 0.2980150214592275,
      "grad_norm": 0.12642387808477104,
      "learning_rate": 0.00016464039112437138,
      "loss": 0.6712,
      "step": 3333
    },
    {
      "epoch": 0.29810443490701,
      "grad_norm": 0.14538714800462757,
      "learning_rate": 0.00016461829198380912,
      "loss": 0.6949,
      "step": 3334
    },
    {
      "epoch": 0.29819384835479257,
      "grad_norm": 0.1571101332461215,
      "learning_rate": 0.00016459618742378876,
      "loss": 0.7098,
      "step": 3335
    },
    {
      "epoch": 0.2982832618025751,
      "grad_norm": 0.14112545066103702,
      "learning_rate": 0.0001645740774461642,
      "loss": 0.6776,
      "step": 3336
    },
    {
      "epoch": 0.29837267525035766,
      "grad_norm": 0.1409341397631404,
      "learning_rate": 0.00016455196205278968,
      "loss": 0.7141,
      "step": 3337
    },
    {
      "epoch": 0.2984620886981402,
      "grad_norm": 0.1435979234198407,
      "learning_rate": 0.0001645298412455201,
      "loss": 0.7177,
      "step": 3338
    },
    {
      "epoch": 0.29855150214592274,
      "grad_norm": 0.14849157218712766,
      "learning_rate": 0.0001645077150262107,
      "loss": 0.7166,
      "step": 3339
    },
    {
      "epoch": 0.2986409155937053,
      "grad_norm": 0.1312303711407435,
      "learning_rate": 0.00016448558339671713,
      "loss": 0.6712,
      "step": 3340
    },
    {
      "epoch": 0.29873032904148783,
      "grad_norm": 0.15815901223500592,
      "learning_rate": 0.00016446344635889554,
      "loss": 0.7226,
      "step": 3341
    },
    {
      "epoch": 0.2988197424892704,
      "grad_norm": 0.15795608450212115,
      "learning_rate": 0.00016444130391460258,
      "loss": 0.6972,
      "step": 3342
    },
    {
      "epoch": 0.2989091559370529,
      "grad_norm": 0.15561791479297998,
      "learning_rate": 0.00016441915606569526,
      "loss": 0.7073,
      "step": 3343
    },
    {
      "epoch": 0.2989985693848355,
      "grad_norm": 0.13042455558827004,
      "learning_rate": 0.00016439700281403114,
      "loss": 0.6834,
      "step": 3344
    },
    {
      "epoch": 0.299087982832618,
      "grad_norm": 0.1276409086011988,
      "learning_rate": 0.00016437484416146817,
      "loss": 0.6386,
      "step": 3345
    },
    {
      "epoch": 0.2991773962804006,
      "grad_norm": 0.13713821410630597,
      "learning_rate": 0.00016435268010986476,
      "loss": 0.7178,
      "step": 3346
    },
    {
      "epoch": 0.2992668097281831,
      "grad_norm": 0.16276948675008612,
      "learning_rate": 0.0001643305106610798,
      "loss": 0.7504,
      "step": 3347
    },
    {
      "epoch": 0.29935622317596566,
      "grad_norm": 0.13956412047666977,
      "learning_rate": 0.00016430833581697254,
      "loss": 0.6749,
      "step": 3348
    },
    {
      "epoch": 0.29944563662374823,
      "grad_norm": 0.14527872054576071,
      "learning_rate": 0.00016428615557940288,
      "loss": 0.7073,
      "step": 3349
    },
    {
      "epoch": 0.29953505007153075,
      "grad_norm": 0.14058807460263484,
      "learning_rate": 0.000164263969950231,
      "loss": 0.7347,
      "step": 3350
    },
    {
      "epoch": 0.2996244635193133,
      "grad_norm": 0.13118893191654563,
      "learning_rate": 0.0001642417789313175,
      "loss": 0.6823,
      "step": 3351
    },
    {
      "epoch": 0.29971387696709584,
      "grad_norm": 0.1277803026861289,
      "learning_rate": 0.00016421958252452363,
      "loss": 0.669,
      "step": 3352
    },
    {
      "epoch": 0.2998032904148784,
      "grad_norm": 0.1647729996588922,
      "learning_rate": 0.00016419738073171093,
      "loss": 0.7078,
      "step": 3353
    },
    {
      "epoch": 0.2998927038626609,
      "grad_norm": 0.1434317843572234,
      "learning_rate": 0.00016417517355474145,
      "loss": 0.6951,
      "step": 3354
    },
    {
      "epoch": 0.2999821173104435,
      "grad_norm": 0.14088783320678075,
      "learning_rate": 0.00016415296099547765,
      "loss": 0.6988,
      "step": 3355
    },
    {
      "epoch": 0.300071530758226,
      "grad_norm": 0.16570061236116745,
      "learning_rate": 0.0001641307430557825,
      "loss": 0.7158,
      "step": 3356
    },
    {
      "epoch": 0.3001609442060086,
      "grad_norm": 0.15044393978636125,
      "learning_rate": 0.0001641085197375194,
      "loss": 0.6942,
      "step": 3357
    },
    {
      "epoch": 0.30025035765379116,
      "grad_norm": 0.16355547920698205,
      "learning_rate": 0.00016408629104255212,
      "loss": 0.7157,
      "step": 3358
    },
    {
      "epoch": 0.30033977110157367,
      "grad_norm": 0.1489458546440592,
      "learning_rate": 0.00016406405697274505,
      "loss": 0.6819,
      "step": 3359
    },
    {
      "epoch": 0.30042918454935624,
      "grad_norm": 0.12320727426488742,
      "learning_rate": 0.00016404181752996289,
      "loss": 0.6709,
      "step": 3360
    },
    {
      "epoch": 0.30051859799713876,
      "grad_norm": 0.1510001924192675,
      "learning_rate": 0.00016401957271607083,
      "loss": 0.6976,
      "step": 3361
    },
    {
      "epoch": 0.30060801144492133,
      "grad_norm": 0.16604685218904322,
      "learning_rate": 0.0001639973225329345,
      "loss": 0.725,
      "step": 3362
    },
    {
      "epoch": 0.30069742489270385,
      "grad_norm": 0.14942201336708122,
      "learning_rate": 0.00016397506698242003,
      "loss": 0.7014,
      "step": 3363
    },
    {
      "epoch": 0.3007868383404864,
      "grad_norm": 0.14190923721562484,
      "learning_rate": 0.00016395280606639395,
      "loss": 0.6476,
      "step": 3364
    },
    {
      "epoch": 0.30087625178826893,
      "grad_norm": 0.1351347661217687,
      "learning_rate": 0.00016393053978672328,
      "loss": 0.6603,
      "step": 3365
    },
    {
      "epoch": 0.3009656652360515,
      "grad_norm": 0.152747090464395,
      "learning_rate": 0.00016390826814527545,
      "loss": 0.7352,
      "step": 3366
    },
    {
      "epoch": 0.301055078683834,
      "grad_norm": 0.1438463578753031,
      "learning_rate": 0.00016388599114391833,
      "loss": 0.6861,
      "step": 3367
    },
    {
      "epoch": 0.3011444921316166,
      "grad_norm": 0.13368554647495165,
      "learning_rate": 0.0001638637087845203,
      "loss": 0.6554,
      "step": 3368
    },
    {
      "epoch": 0.30123390557939916,
      "grad_norm": 0.17550596688895875,
      "learning_rate": 0.00016384142106895015,
      "loss": 0.725,
      "step": 3369
    },
    {
      "epoch": 0.3013233190271817,
      "grad_norm": 0.15312298752322312,
      "learning_rate": 0.0001638191279990771,
      "loss": 0.6942,
      "step": 3370
    },
    {
      "epoch": 0.30141273247496425,
      "grad_norm": 0.16225522194012781,
      "learning_rate": 0.00016379682957677087,
      "loss": 0.704,
      "step": 3371
    },
    {
      "epoch": 0.30150214592274677,
      "grad_norm": 0.14335571768005753,
      "learning_rate": 0.00016377452580390158,
      "loss": 0.6816,
      "step": 3372
    },
    {
      "epoch": 0.30159155937052934,
      "grad_norm": 0.14254220433945594,
      "learning_rate": 0.00016375221668233985,
      "loss": 0.7128,
      "step": 3373
    },
    {
      "epoch": 0.30168097281831185,
      "grad_norm": 0.15176037826794747,
      "learning_rate": 0.00016372990221395666,
      "loss": 0.6923,
      "step": 3374
    },
    {
      "epoch": 0.3017703862660944,
      "grad_norm": 0.1573831490631054,
      "learning_rate": 0.00016370758240062357,
      "loss": 0.7281,
      "step": 3375
    },
    {
      "epoch": 0.30185979971387694,
      "grad_norm": 0.1463742961776806,
      "learning_rate": 0.00016368525724421248,
      "loss": 0.6737,
      "step": 3376
    },
    {
      "epoch": 0.3019492131616595,
      "grad_norm": 0.14657015685874836,
      "learning_rate": 0.00016366292674659577,
      "loss": 0.696,
      "step": 3377
    },
    {
      "epoch": 0.3020386266094421,
      "grad_norm": 0.13402264067728864,
      "learning_rate": 0.0001636405909096463,
      "loss": 0.6768,
      "step": 3378
    },
    {
      "epoch": 0.3021280400572246,
      "grad_norm": 0.14571728470583847,
      "learning_rate": 0.0001636182497352373,
      "loss": 0.6619,
      "step": 3379
    },
    {
      "epoch": 0.30221745350500717,
      "grad_norm": 0.16196281010336436,
      "learning_rate": 0.00016359590322524253,
      "loss": 0.7369,
      "step": 3380
    },
    {
      "epoch": 0.3023068669527897,
      "grad_norm": 0.1629196417055734,
      "learning_rate": 0.0001635735513815362,
      "loss": 0.6776,
      "step": 3381
    },
    {
      "epoch": 0.30239628040057226,
      "grad_norm": 0.15460630705826311,
      "learning_rate": 0.00016355119420599282,
      "loss": 0.7041,
      "step": 3382
    },
    {
      "epoch": 0.3024856938483548,
      "grad_norm": 0.17835402663607142,
      "learning_rate": 0.00016352883170048758,
      "loss": 0.6989,
      "step": 3383
    },
    {
      "epoch": 0.30257510729613735,
      "grad_norm": 0.14459493514753474,
      "learning_rate": 0.00016350646386689593,
      "loss": 0.6412,
      "step": 3384
    },
    {
      "epoch": 0.30266452074391986,
      "grad_norm": 0.15170839249874704,
      "learning_rate": 0.0001634840907070939,
      "loss": 0.7084,
      "step": 3385
    },
    {
      "epoch": 0.30275393419170243,
      "grad_norm": 0.15784241707594387,
      "learning_rate": 0.0001634617122229578,
      "loss": 0.6876,
      "step": 3386
    },
    {
      "epoch": 0.302843347639485,
      "grad_norm": 0.1528394803020767,
      "learning_rate": 0.00016343932841636456,
      "loss": 0.7379,
      "step": 3387
    },
    {
      "epoch": 0.3029327610872675,
      "grad_norm": 0.15599310115167522,
      "learning_rate": 0.00016341693928919145,
      "loss": 0.6863,
      "step": 3388
    },
    {
      "epoch": 0.3030221745350501,
      "grad_norm": 0.15173311640207374,
      "learning_rate": 0.00016339454484331624,
      "loss": 0.6539,
      "step": 3389
    },
    {
      "epoch": 0.3031115879828326,
      "grad_norm": 0.1456870803514347,
      "learning_rate": 0.00016337214508061712,
      "loss": 0.6699,
      "step": 3390
    },
    {
      "epoch": 0.3032010014306152,
      "grad_norm": 0.13734083504772954,
      "learning_rate": 0.00016334974000297271,
      "loss": 0.6971,
      "step": 3391
    },
    {
      "epoch": 0.3032904148783977,
      "grad_norm": 0.12288460917454361,
      "learning_rate": 0.0001633273296122621,
      "loss": 0.6277,
      "step": 3392
    },
    {
      "epoch": 0.30337982832618027,
      "grad_norm": 0.1523379230014121,
      "learning_rate": 0.0001633049139103649,
      "loss": 0.6969,
      "step": 3393
    },
    {
      "epoch": 0.3034692417739628,
      "grad_norm": 0.13929066795796485,
      "learning_rate": 0.00016328249289916097,
      "loss": 0.6519,
      "step": 3394
    },
    {
      "epoch": 0.30355865522174535,
      "grad_norm": 0.16137034760228944,
      "learning_rate": 0.00016326006658053078,
      "loss": 0.693,
      "step": 3395
    },
    {
      "epoch": 0.30364806866952787,
      "grad_norm": 0.1461127640636511,
      "learning_rate": 0.00016323763495635523,
      "loss": 0.6692,
      "step": 3396
    },
    {
      "epoch": 0.30373748211731044,
      "grad_norm": 0.14905598368775066,
      "learning_rate": 0.00016321519802851557,
      "loss": 0.6798,
      "step": 3397
    },
    {
      "epoch": 0.303826895565093,
      "grad_norm": 0.2048345022811784,
      "learning_rate": 0.00016319275579889365,
      "loss": 0.4049,
      "step": 3398
    },
    {
      "epoch": 0.30391630901287553,
      "grad_norm": 0.1531276798947542,
      "learning_rate": 0.0001631703082693716,
      "loss": 0.6519,
      "step": 3399
    },
    {
      "epoch": 0.3040057224606581,
      "grad_norm": 0.16164115052705982,
      "learning_rate": 0.00016314785544183208,
      "loss": 0.7257,
      "step": 3400
    },
    {
      "epoch": 0.3040951359084406,
      "grad_norm": 0.1441676123156321,
      "learning_rate": 0.00016312539731815816,
      "loss": 0.6558,
      "step": 3401
    },
    {
      "epoch": 0.3041845493562232,
      "grad_norm": 0.1503476030876294,
      "learning_rate": 0.00016310293390023344,
      "loss": 0.6468,
      "step": 3402
    },
    {
      "epoch": 0.3042739628040057,
      "grad_norm": 0.1369539538987275,
      "learning_rate": 0.00016308046518994184,
      "loss": 0.6752,
      "step": 3403
    },
    {
      "epoch": 0.3043633762517883,
      "grad_norm": 0.1645956392460097,
      "learning_rate": 0.00016305799118916783,
      "loss": 0.7231,
      "step": 3404
    },
    {
      "epoch": 0.3044527896995708,
      "grad_norm": 0.1666808239029531,
      "learning_rate": 0.00016303551189979625,
      "loss": 0.7359,
      "step": 3405
    },
    {
      "epoch": 0.30454220314735336,
      "grad_norm": 0.13959485101276128,
      "learning_rate": 0.0001630130273237124,
      "loss": 0.7128,
      "step": 3406
    },
    {
      "epoch": 0.30463161659513593,
      "grad_norm": 0.1880899482508615,
      "learning_rate": 0.00016299053746280206,
      "loss": 0.4353,
      "step": 3407
    },
    {
      "epoch": 0.30472103004291845,
      "grad_norm": 0.15811805436528728,
      "learning_rate": 0.00016296804231895142,
      "loss": 0.7105,
      "step": 3408
    },
    {
      "epoch": 0.304810443490701,
      "grad_norm": 0.14757033024995286,
      "learning_rate": 0.00016294554189404708,
      "loss": 0.6819,
      "step": 3409
    },
    {
      "epoch": 0.30489985693848354,
      "grad_norm": 0.14557990424490724,
      "learning_rate": 0.00016292303618997619,
      "loss": 0.6664,
      "step": 3410
    },
    {
      "epoch": 0.3049892703862661,
      "grad_norm": 0.1489070216906672,
      "learning_rate": 0.00016290052520862624,
      "loss": 0.6929,
      "step": 3411
    },
    {
      "epoch": 0.3050786838340486,
      "grad_norm": 0.15827071777889504,
      "learning_rate": 0.00016287800895188522,
      "loss": 0.6811,
      "step": 3412
    },
    {
      "epoch": 0.3051680972818312,
      "grad_norm": 0.18680990010676568,
      "learning_rate": 0.0001628554874216415,
      "loss": 0.7355,
      "step": 3413
    },
    {
      "epoch": 0.3052575107296137,
      "grad_norm": 0.15669903854348508,
      "learning_rate": 0.00016283296061978398,
      "loss": 0.7038,
      "step": 3414
    },
    {
      "epoch": 0.3053469241773963,
      "grad_norm": 0.1721418915582675,
      "learning_rate": 0.00016281042854820194,
      "loss": 0.713,
      "step": 3415
    },
    {
      "epoch": 0.3054363376251788,
      "grad_norm": 0.16363149509886765,
      "learning_rate": 0.0001627878912087851,
      "loss": 0.724,
      "step": 3416
    },
    {
      "epoch": 0.30552575107296137,
      "grad_norm": 0.16385638778650782,
      "learning_rate": 0.00016276534860342368,
      "loss": 0.7415,
      "step": 3417
    },
    {
      "epoch": 0.30561516452074394,
      "grad_norm": 0.15256266164756174,
      "learning_rate": 0.00016274280073400824,
      "loss": 0.675,
      "step": 3418
    },
    {
      "epoch": 0.30570457796852646,
      "grad_norm": 0.14439819465723427,
      "learning_rate": 0.00016272024760242992,
      "loss": 0.669,
      "step": 3419
    },
    {
      "epoch": 0.30579399141630903,
      "grad_norm": 0.16906711757063891,
      "learning_rate": 0.00016269768921058013,
      "loss": 0.7251,
      "step": 3420
    },
    {
      "epoch": 0.30588340486409155,
      "grad_norm": 0.13454887496953383,
      "learning_rate": 0.0001626751255603509,
      "loss": 0.6528,
      "step": 3421
    },
    {
      "epoch": 0.3059728183118741,
      "grad_norm": 0.13837472140517873,
      "learning_rate": 0.00016265255665363454,
      "loss": 0.6598,
      "step": 3422
    },
    {
      "epoch": 0.30606223175965663,
      "grad_norm": 0.14371912152935706,
      "learning_rate": 0.00016262998249232398,
      "loss": 0.7299,
      "step": 3423
    },
    {
      "epoch": 0.3061516452074392,
      "grad_norm": 0.14564629749972702,
      "learning_rate": 0.00016260740307831237,
      "loss": 0.6517,
      "step": 3424
    },
    {
      "epoch": 0.3062410586552217,
      "grad_norm": 0.16071239541714488,
      "learning_rate": 0.00016258481841349348,
      "loss": 0.6942,
      "step": 3425
    },
    {
      "epoch": 0.3063304721030043,
      "grad_norm": 0.1488290598437897,
      "learning_rate": 0.0001625622284997615,
      "loss": 0.7314,
      "step": 3426
    },
    {
      "epoch": 0.30641988555078686,
      "grad_norm": 0.15810067413034037,
      "learning_rate": 0.0001625396333390109,
      "loss": 0.6814,
      "step": 3427
    },
    {
      "epoch": 0.3065092989985694,
      "grad_norm": 0.14557059201414227,
      "learning_rate": 0.00016251703293313687,
      "loss": 0.7,
      "step": 3428
    },
    {
      "epoch": 0.30659871244635195,
      "grad_norm": 0.19573697905017415,
      "learning_rate": 0.00016249442728403474,
      "loss": 0.389,
      "step": 3429
    },
    {
      "epoch": 0.30668812589413447,
      "grad_norm": 0.15778984575219765,
      "learning_rate": 0.00016247181639360045,
      "loss": 0.6588,
      "step": 3430
    },
    {
      "epoch": 0.30677753934191704,
      "grad_norm": 0.17402331232974297,
      "learning_rate": 0.00016244920026373038,
      "loss": 0.7455,
      "step": 3431
    },
    {
      "epoch": 0.30686695278969955,
      "grad_norm": 0.16858298116952342,
      "learning_rate": 0.00016242657889632133,
      "loss": 0.6757,
      "step": 3432
    },
    {
      "epoch": 0.3069563662374821,
      "grad_norm": 0.15636466896115794,
      "learning_rate": 0.0001624039522932705,
      "loss": 0.6551,
      "step": 3433
    },
    {
      "epoch": 0.30704577968526464,
      "grad_norm": 0.17762770253626325,
      "learning_rate": 0.00016238132045647553,
      "loss": 0.7481,
      "step": 3434
    },
    {
      "epoch": 0.3071351931330472,
      "grad_norm": 0.1487205896698832,
      "learning_rate": 0.00016235868338783455,
      "loss": 0.6713,
      "step": 3435
    },
    {
      "epoch": 0.3072246065808298,
      "grad_norm": 0.12596226888252532,
      "learning_rate": 0.00016233604108924609,
      "loss": 0.6774,
      "step": 3436
    },
    {
      "epoch": 0.3073140200286123,
      "grad_norm": 0.15866890964884284,
      "learning_rate": 0.0001623133935626092,
      "loss": 0.6831,
      "step": 3437
    },
    {
      "epoch": 0.30740343347639487,
      "grad_norm": 0.13323476003357976,
      "learning_rate": 0.00016229074080982317,
      "loss": 0.6611,
      "step": 3438
    },
    {
      "epoch": 0.3074928469241774,
      "grad_norm": 0.12201652732315779,
      "learning_rate": 0.000162268082832788,
      "loss": 0.6739,
      "step": 3439
    },
    {
      "epoch": 0.30758226037195996,
      "grad_norm": 0.14353615092712327,
      "learning_rate": 0.00016224541963340391,
      "loss": 0.7078,
      "step": 3440
    },
    {
      "epoch": 0.3076716738197425,
      "grad_norm": 0.1315477102206241,
      "learning_rate": 0.00016222275121357163,
      "loss": 0.6898,
      "step": 3441
    },
    {
      "epoch": 0.30776108726752505,
      "grad_norm": 0.12874352538187914,
      "learning_rate": 0.00016220007757519238,
      "loss": 0.6707,
      "step": 3442
    },
    {
      "epoch": 0.30785050071530756,
      "grad_norm": 0.15410850896214878,
      "learning_rate": 0.00016217739872016772,
      "loss": 0.6812,
      "step": 3443
    },
    {
      "epoch": 0.30793991416309013,
      "grad_norm": 0.12513028426143138,
      "learning_rate": 0.00016215471465039975,
      "loss": 0.6716,
      "step": 3444
    },
    {
      "epoch": 0.30802932761087265,
      "grad_norm": 0.1358627518173616,
      "learning_rate": 0.00016213202536779087,
      "loss": 0.735,
      "step": 3445
    },
    {
      "epoch": 0.3081187410586552,
      "grad_norm": 0.14645172056939415,
      "learning_rate": 0.00016210933087424412,
      "loss": 0.7267,
      "step": 3446
    },
    {
      "epoch": 0.3082081545064378,
      "grad_norm": 0.1547417424410237,
      "learning_rate": 0.00016208663117166277,
      "loss": 0.7063,
      "step": 3447
    },
    {
      "epoch": 0.3082975679542203,
      "grad_norm": 0.15123410346873642,
      "learning_rate": 0.00016206392626195063,
      "loss": 0.7101,
      "step": 3448
    },
    {
      "epoch": 0.3083869814020029,
      "grad_norm": 0.1495129574580578,
      "learning_rate": 0.00016204121614701197,
      "loss": 0.7017,
      "step": 3449
    },
    {
      "epoch": 0.3084763948497854,
      "grad_norm": 0.13242305438772473,
      "learning_rate": 0.00016201850082875146,
      "loss": 0.6741,
      "step": 3450
    },
    {
      "epoch": 0.30856580829756797,
      "grad_norm": 0.12240703517283569,
      "learning_rate": 0.00016199578030907415,
      "loss": 0.689,
      "step": 3451
    },
    {
      "epoch": 0.3086552217453505,
      "grad_norm": 0.12486962021698876,
      "learning_rate": 0.0001619730545898856,
      "loss": 0.7015,
      "step": 3452
    },
    {
      "epoch": 0.30874463519313305,
      "grad_norm": 0.12511601432810932,
      "learning_rate": 0.00016195032367309183,
      "loss": 0.7119,
      "step": 3453
    },
    {
      "epoch": 0.30883404864091557,
      "grad_norm": 0.15622134981614166,
      "learning_rate": 0.00016192758756059926,
      "loss": 0.7412,
      "step": 3454
    },
    {
      "epoch": 0.30892346208869814,
      "grad_norm": 0.15501576657784874,
      "learning_rate": 0.00016190484625431468,
      "loss": 0.6933,
      "step": 3455
    },
    {
      "epoch": 0.3090128755364807,
      "grad_norm": 0.16802853250305366,
      "learning_rate": 0.00016188209975614542,
      "loss": 0.7119,
      "step": 3456
    },
    {
      "epoch": 0.30910228898426323,
      "grad_norm": 0.17132767260184412,
      "learning_rate": 0.00016185934806799916,
      "loss": 0.7362,
      "step": 3457
    },
    {
      "epoch": 0.3091917024320458,
      "grad_norm": 0.14972546634202685,
      "learning_rate": 0.0001618365911917841,
      "loss": 0.7027,
      "step": 3458
    },
    {
      "epoch": 0.3092811158798283,
      "grad_norm": 0.14080987139968013,
      "learning_rate": 0.00016181382912940884,
      "loss": 0.6523,
      "step": 3459
    },
    {
      "epoch": 0.3093705293276109,
      "grad_norm": 0.13877285935766315,
      "learning_rate": 0.00016179106188278234,
      "loss": 0.7112,
      "step": 3460
    },
    {
      "epoch": 0.3094599427753934,
      "grad_norm": 0.13720797195479328,
      "learning_rate": 0.00016176828945381415,
      "loss": 0.6589,
      "step": 3461
    },
    {
      "epoch": 0.309549356223176,
      "grad_norm": 0.15917332619479815,
      "learning_rate": 0.00016174551184441408,
      "loss": 0.7016,
      "step": 3462
    },
    {
      "epoch": 0.3096387696709585,
      "grad_norm": 0.13654118637482598,
      "learning_rate": 0.00016172272905649253,
      "loss": 0.6711,
      "step": 3463
    },
    {
      "epoch": 0.30972818311874106,
      "grad_norm": 0.13937386917170885,
      "learning_rate": 0.00016169994109196023,
      "loss": 0.6461,
      "step": 3464
    },
    {
      "epoch": 0.3098175965665236,
      "grad_norm": 0.15418467525787757,
      "learning_rate": 0.00016167714795272837,
      "loss": 0.7378,
      "step": 3465
    },
    {
      "epoch": 0.30990701001430615,
      "grad_norm": 0.15249639716515245,
      "learning_rate": 0.00016165434964070862,
      "loss": 0.707,
      "step": 3466
    },
    {
      "epoch": 0.3099964234620887,
      "grad_norm": 0.1504290646177633,
      "learning_rate": 0.000161631546157813,
      "loss": 0.7094,
      "step": 3467
    },
    {
      "epoch": 0.31008583690987124,
      "grad_norm": 0.1542647324971191,
      "learning_rate": 0.00016160873750595405,
      "loss": 0.6594,
      "step": 3468
    },
    {
      "epoch": 0.3101752503576538,
      "grad_norm": 0.1494776063401177,
      "learning_rate": 0.00016158592368704472,
      "loss": 0.6857,
      "step": 3469
    },
    {
      "epoch": 0.3102646638054363,
      "grad_norm": 0.13447144443863246,
      "learning_rate": 0.00016156310470299832,
      "loss": 0.664,
      "step": 3470
    },
    {
      "epoch": 0.3103540772532189,
      "grad_norm": 0.15156491849135456,
      "learning_rate": 0.00016154028055572866,
      "loss": 0.7346,
      "step": 3471
    },
    {
      "epoch": 0.3104434907010014,
      "grad_norm": 0.17582160024443189,
      "learning_rate": 0.00016151745124715002,
      "loss": 0.7505,
      "step": 3472
    },
    {
      "epoch": 0.310532904148784,
      "grad_norm": 0.13715182318000785,
      "learning_rate": 0.000161494616779177,
      "loss": 0.648,
      "step": 3473
    },
    {
      "epoch": 0.3106223175965665,
      "grad_norm": 0.14114587019421304,
      "learning_rate": 0.00016147177715372476,
      "loss": 0.6727,
      "step": 3474
    },
    {
      "epoch": 0.31071173104434907,
      "grad_norm": 0.1486359217505858,
      "learning_rate": 0.00016144893237270887,
      "loss": 0.6712,
      "step": 3475
    },
    {
      "epoch": 0.31080114449213164,
      "grad_norm": 0.1450693796303138,
      "learning_rate": 0.00016142608243804513,
      "loss": 0.6915,
      "step": 3476
    },
    {
      "epoch": 0.31089055793991416,
      "grad_norm": 0.17286989406580588,
      "learning_rate": 0.0001614032273516501,
      "loss": 0.7276,
      "step": 3477
    },
    {
      "epoch": 0.31097997138769673,
      "grad_norm": 0.13924747566011594,
      "learning_rate": 0.00016138036711544054,
      "loss": 0.6645,
      "step": 3478
    },
    {
      "epoch": 0.31106938483547925,
      "grad_norm": 0.14932343070909765,
      "learning_rate": 0.0001613575017313337,
      "loss": 0.6547,
      "step": 3479
    },
    {
      "epoch": 0.3111587982832618,
      "grad_norm": 0.16213771750244316,
      "learning_rate": 0.00016133463120124731,
      "loss": 0.7316,
      "step": 3480
    },
    {
      "epoch": 0.31124821173104433,
      "grad_norm": 0.1440921467428029,
      "learning_rate": 0.00016131175552709946,
      "loss": 0.6832,
      "step": 3481
    },
    {
      "epoch": 0.3113376251788269,
      "grad_norm": 0.13711528593587943,
      "learning_rate": 0.00016128887471080874,
      "loss": 0.7186,
      "step": 3482
    },
    {
      "epoch": 0.3114270386266094,
      "grad_norm": 0.13908685472593532,
      "learning_rate": 0.00016126598875429408,
      "loss": 0.6786,
      "step": 3483
    },
    {
      "epoch": 0.311516452074392,
      "grad_norm": 0.1251818225796448,
      "learning_rate": 0.00016124309765947498,
      "loss": 0.6533,
      "step": 3484
    },
    {
      "epoch": 0.3116058655221745,
      "grad_norm": 0.13197888007297287,
      "learning_rate": 0.00016122020142827123,
      "loss": 0.6597,
      "step": 3485
    },
    {
      "epoch": 0.3116952789699571,
      "grad_norm": 0.15414608272507027,
      "learning_rate": 0.0001611973000626031,
      "loss": 0.6912,
      "step": 3486
    },
    {
      "epoch": 0.31178469241773965,
      "grad_norm": 0.15294344278690208,
      "learning_rate": 0.00016117439356439132,
      "loss": 0.7285,
      "step": 3487
    },
    {
      "epoch": 0.31187410586552217,
      "grad_norm": 0.14049774135527057,
      "learning_rate": 0.00016115148193555706,
      "loss": 0.6576,
      "step": 3488
    },
    {
      "epoch": 0.31196351931330474,
      "grad_norm": 0.15634858656085387,
      "learning_rate": 0.00016112856517802183,
      "loss": 0.7012,
      "step": 3489
    },
    {
      "epoch": 0.31205293276108725,
      "grad_norm": 0.1295235631208032,
      "learning_rate": 0.0001611056432937077,
      "loss": 0.6969,
      "step": 3490
    },
    {
      "epoch": 0.3121423462088698,
      "grad_norm": 0.1595277719153615,
      "learning_rate": 0.00016108271628453703,
      "loss": 0.6939,
      "step": 3491
    },
    {
      "epoch": 0.31223175965665234,
      "grad_norm": 0.13867798006919244,
      "learning_rate": 0.00016105978415243276,
      "loss": 0.6684,
      "step": 3492
    },
    {
      "epoch": 0.3123211731044349,
      "grad_norm": 0.13454430245128707,
      "learning_rate": 0.00016103684689931807,
      "loss": 0.6534,
      "step": 3493
    },
    {
      "epoch": 0.31241058655221743,
      "grad_norm": 0.1893217897373247,
      "learning_rate": 0.0001610139045271168,
      "loss": 0.3714,
      "step": 3494
    },
    {
      "epoch": 0.3125,
      "grad_norm": 0.13862216803272176,
      "learning_rate": 0.00016099095703775302,
      "loss": 0.6753,
      "step": 3495
    },
    {
      "epoch": 0.31258941344778257,
      "grad_norm": 0.13369555690519078,
      "learning_rate": 0.00016096800443315132,
      "loss": 0.656,
      "step": 3496
    },
    {
      "epoch": 0.3126788268955651,
      "grad_norm": 0.15336682360088175,
      "learning_rate": 0.00016094504671523673,
      "loss": 0.6736,
      "step": 3497
    },
    {
      "epoch": 0.31276824034334766,
      "grad_norm": 0.13498815600866446,
      "learning_rate": 0.00016092208388593469,
      "loss": 0.6961,
      "step": 3498
    },
    {
      "epoch": 0.3128576537911302,
      "grad_norm": 0.15486575528362723,
      "learning_rate": 0.00016089911594717102,
      "loss": 0.6849,
      "step": 3499
    },
    {
      "epoch": 0.31294706723891275,
      "grad_norm": 0.14150365567768816,
      "learning_rate": 0.00016087614290087208,
      "loss": 0.7103,
      "step": 3500
    },
    {
      "epoch": 0.31303648068669526,
      "grad_norm": 0.1449461407675692,
      "learning_rate": 0.00016085316474896452,
      "loss": 0.6887,
      "step": 3501
    },
    {
      "epoch": 0.31312589413447783,
      "grad_norm": 0.14822434663981465,
      "learning_rate": 0.00016083018149337558,
      "loss": 0.6882,
      "step": 3502
    },
    {
      "epoch": 0.31321530758226035,
      "grad_norm": 0.1435320037270502,
      "learning_rate": 0.0001608071931360327,
      "loss": 0.6728,
      "step": 3503
    },
    {
      "epoch": 0.3133047210300429,
      "grad_norm": 0.14322858384556572,
      "learning_rate": 0.00016078419967886402,
      "loss": 0.6604,
      "step": 3504
    },
    {
      "epoch": 0.3133941344778255,
      "grad_norm": 0.15036996670520356,
      "learning_rate": 0.00016076120112379792,
      "loss": 0.6927,
      "step": 3505
    },
    {
      "epoch": 0.313483547925608,
      "grad_norm": 0.13403687674153872,
      "learning_rate": 0.00016073819747276327,
      "loss": 0.7064,
      "step": 3506
    },
    {
      "epoch": 0.3135729613733906,
      "grad_norm": 0.13755088039709634,
      "learning_rate": 0.0001607151887276893,
      "loss": 0.6646,
      "step": 3507
    },
    {
      "epoch": 0.3136623748211731,
      "grad_norm": 0.13155808993754597,
      "learning_rate": 0.00016069217489050584,
      "loss": 0.6538,
      "step": 3508
    },
    {
      "epoch": 0.31375178826895567,
      "grad_norm": 0.15389266798728318,
      "learning_rate": 0.00016066915596314293,
      "loss": 0.6883,
      "step": 3509
    },
    {
      "epoch": 0.3138412017167382,
      "grad_norm": 0.14345045238052564,
      "learning_rate": 0.00016064613194753118,
      "loss": 0.6777,
      "step": 3510
    },
    {
      "epoch": 0.31393061516452075,
      "grad_norm": 0.1630826881288888,
      "learning_rate": 0.0001606231028456016,
      "loss": 0.7077,
      "step": 3511
    },
    {
      "epoch": 0.31402002861230327,
      "grad_norm": 0.13819336971701576,
      "learning_rate": 0.0001606000686592856,
      "loss": 0.6451,
      "step": 3512
    },
    {
      "epoch": 0.31410944206008584,
      "grad_norm": 0.1565888502800002,
      "learning_rate": 0.00016057702939051502,
      "loss": 0.7363,
      "step": 3513
    },
    {
      "epoch": 0.31419885550786836,
      "grad_norm": 0.18462000747438276,
      "learning_rate": 0.00016055398504122214,
      "loss": 0.3813,
      "step": 3514
    },
    {
      "epoch": 0.31428826895565093,
      "grad_norm": 0.15160023350001545,
      "learning_rate": 0.00016053093561333966,
      "loss": 0.7195,
      "step": 3515
    },
    {
      "epoch": 0.3143776824034335,
      "grad_norm": 0.15890518251424715,
      "learning_rate": 0.00016050788110880072,
      "loss": 0.6682,
      "step": 3516
    },
    {
      "epoch": 0.314467095851216,
      "grad_norm": 0.1717367724359548,
      "learning_rate": 0.00016048482152953889,
      "loss": 0.7003,
      "step": 3517
    },
    {
      "epoch": 0.3145565092989986,
      "grad_norm": 0.1453116625954848,
      "learning_rate": 0.0001604617568774881,
      "loss": 0.6494,
      "step": 3518
    },
    {
      "epoch": 0.3146459227467811,
      "grad_norm": 0.1420608698441871,
      "learning_rate": 0.0001604386871545828,
      "loss": 0.6857,
      "step": 3519
    },
    {
      "epoch": 0.3147353361945637,
      "grad_norm": 0.1307115779818909,
      "learning_rate": 0.00016041561236275777,
      "loss": 0.6742,
      "step": 3520
    },
    {
      "epoch": 0.3148247496423462,
      "grad_norm": 0.15192496114498663,
      "learning_rate": 0.00016039253250394833,
      "loss": 0.6876,
      "step": 3521
    },
    {
      "epoch": 0.31491416309012876,
      "grad_norm": 0.15652105011223347,
      "learning_rate": 0.0001603694475800901,
      "loss": 0.664,
      "step": 3522
    },
    {
      "epoch": 0.3150035765379113,
      "grad_norm": 0.1526560475535705,
      "learning_rate": 0.00016034635759311922,
      "loss": 0.6758,
      "step": 3523
    },
    {
      "epoch": 0.31509298998569385,
      "grad_norm": 0.1512496954669721,
      "learning_rate": 0.00016032326254497218,
      "loss": 0.6364,
      "step": 3524
    },
    {
      "epoch": 0.3151824034334764,
      "grad_norm": 0.15927883262020284,
      "learning_rate": 0.000160300162437586,
      "loss": 0.6877,
      "step": 3525
    },
    {
      "epoch": 0.31527181688125894,
      "grad_norm": 0.14975464682989334,
      "learning_rate": 0.00016027705727289802,
      "loss": 0.7101,
      "step": 3526
    },
    {
      "epoch": 0.3153612303290415,
      "grad_norm": 0.1914941457332599,
      "learning_rate": 0.00016025394705284602,
      "loss": 0.4091,
      "step": 3527
    },
    {
      "epoch": 0.315450643776824,
      "grad_norm": 0.1632523603745659,
      "learning_rate": 0.00016023083177936823,
      "loss": 0.6677,
      "step": 3528
    },
    {
      "epoch": 0.3155400572246066,
      "grad_norm": 0.14857571908485992,
      "learning_rate": 0.00016020771145440336,
      "loss": 0.692,
      "step": 3529
    },
    {
      "epoch": 0.3156294706723891,
      "grad_norm": 0.12243043328543643,
      "learning_rate": 0.00016018458607989044,
      "loss": 0.6853,
      "step": 3530
    },
    {
      "epoch": 0.3157188841201717,
      "grad_norm": 0.12286296734161459,
      "learning_rate": 0.00016016145565776895,
      "loss": 0.6666,
      "step": 3531
    },
    {
      "epoch": 0.3158082975679542,
      "grad_norm": 0.14431162809153233,
      "learning_rate": 0.00016013832018997882,
      "loss": 0.6775,
      "step": 3532
    },
    {
      "epoch": 0.31589771101573677,
      "grad_norm": 0.14892723193761556,
      "learning_rate": 0.00016011517967846043,
      "loss": 0.6836,
      "step": 3533
    },
    {
      "epoch": 0.3159871244635193,
      "grad_norm": 0.16253325349060585,
      "learning_rate": 0.00016009203412515455,
      "loss": 0.6751,
      "step": 3534
    },
    {
      "epoch": 0.31607653791130186,
      "grad_norm": 0.14087935903127005,
      "learning_rate": 0.00016006888353200228,
      "loss": 0.7016,
      "step": 3535
    },
    {
      "epoch": 0.31616595135908443,
      "grad_norm": 0.15537447170103913,
      "learning_rate": 0.00016004572790094535,
      "loss": 0.674,
      "step": 3536
    },
    {
      "epoch": 0.31625536480686695,
      "grad_norm": 0.1487266911680862,
      "learning_rate": 0.0001600225672339257,
      "loss": 0.681,
      "step": 3537
    },
    {
      "epoch": 0.3163447782546495,
      "grad_norm": 0.1637060252679842,
      "learning_rate": 0.00015999940153288582,
      "loss": 0.7391,
      "step": 3538
    },
    {
      "epoch": 0.31643419170243203,
      "grad_norm": 0.15814952467889068,
      "learning_rate": 0.00015997623079976863,
      "loss": 0.6448,
      "step": 3539
    },
    {
      "epoch": 0.3165236051502146,
      "grad_norm": 0.14843592222887111,
      "learning_rate": 0.00015995305503651737,
      "loss": 0.6846,
      "step": 3540
    },
    {
      "epoch": 0.3166130185979971,
      "grad_norm": 0.1512070752153583,
      "learning_rate": 0.00015992987424507578,
      "loss": 0.657,
      "step": 3541
    },
    {
      "epoch": 0.3167024320457797,
      "grad_norm": 0.13306001822707533,
      "learning_rate": 0.000159906688427388,
      "loss": 0.6664,
      "step": 3542
    },
    {
      "epoch": 0.3167918454935622,
      "grad_norm": 0.1529472560944139,
      "learning_rate": 0.00015988349758539868,
      "loss": 0.6723,
      "step": 3543
    },
    {
      "epoch": 0.3168812589413448,
      "grad_norm": 0.14071354994733645,
      "learning_rate": 0.00015986030172105266,
      "loss": 0.6872,
      "step": 3544
    },
    {
      "epoch": 0.31697067238912735,
      "grad_norm": 0.15097698951996513,
      "learning_rate": 0.00015983710083629547,
      "loss": 0.6834,
      "step": 3545
    },
    {
      "epoch": 0.31706008583690987,
      "grad_norm": 0.1493071600817909,
      "learning_rate": 0.00015981389493307288,
      "loss": 0.6671,
      "step": 3546
    },
    {
      "epoch": 0.31714949928469244,
      "grad_norm": 0.1481631962984222,
      "learning_rate": 0.0001597906840133312,
      "loss": 0.7244,
      "step": 3547
    },
    {
      "epoch": 0.31723891273247495,
      "grad_norm": 0.16002096908446595,
      "learning_rate": 0.000159767468079017,
      "loss": 0.6931,
      "step": 3548
    },
    {
      "epoch": 0.3173283261802575,
      "grad_norm": 0.16018246203195752,
      "learning_rate": 0.00015974424713207746,
      "loss": 0.7033,
      "step": 3549
    },
    {
      "epoch": 0.31741773962804004,
      "grad_norm": 0.14521124697823018,
      "learning_rate": 0.0001597210211744601,
      "loss": 0.6656,
      "step": 3550
    },
    {
      "epoch": 0.3175071530758226,
      "grad_norm": 0.15466931738271253,
      "learning_rate": 0.0001596977902081128,
      "loss": 0.7093,
      "step": 3551
    },
    {
      "epoch": 0.31759656652360513,
      "grad_norm": 0.12646745711595392,
      "learning_rate": 0.00015967455423498387,
      "loss": 0.6642,
      "step": 3552
    },
    {
      "epoch": 0.3176859799713877,
      "grad_norm": 0.13776339163419904,
      "learning_rate": 0.00015965131325702223,
      "loss": 0.667,
      "step": 3553
    },
    {
      "epoch": 0.3177753934191702,
      "grad_norm": 0.15462152324488732,
      "learning_rate": 0.00015962806727617694,
      "loss": 0.6807,
      "step": 3554
    },
    {
      "epoch": 0.3178648068669528,
      "grad_norm": 0.14803060784945535,
      "learning_rate": 0.00015960481629439768,
      "loss": 0.7083,
      "step": 3555
    },
    {
      "epoch": 0.31795422031473536,
      "grad_norm": 0.1634642260714093,
      "learning_rate": 0.00015958156031363444,
      "loss": 0.6972,
      "step": 3556
    },
    {
      "epoch": 0.3180436337625179,
      "grad_norm": 0.14663220237966812,
      "learning_rate": 0.0001595582993358377,
      "loss": 0.6719,
      "step": 3557
    },
    {
      "epoch": 0.31813304721030045,
      "grad_norm": 0.1282510290972909,
      "learning_rate": 0.00015953503336295835,
      "loss": 0.6938,
      "step": 3558
    },
    {
      "epoch": 0.31822246065808296,
      "grad_norm": 0.14765336832885353,
      "learning_rate": 0.00015951176239694764,
      "loss": 0.6516,
      "step": 3559
    },
    {
      "epoch": 0.31831187410586553,
      "grad_norm": 0.13792430611165152,
      "learning_rate": 0.00015948848643975726,
      "loss": 0.6858,
      "step": 3560
    },
    {
      "epoch": 0.31840128755364805,
      "grad_norm": 0.13959271594273137,
      "learning_rate": 0.00015946520549333938,
      "loss": 0.6082,
      "step": 3561
    },
    {
      "epoch": 0.3184907010014306,
      "grad_norm": 0.16884321920631928,
      "learning_rate": 0.00015944191955964655,
      "loss": 0.6803,
      "step": 3562
    },
    {
      "epoch": 0.31858011444921314,
      "grad_norm": 0.15128351845962515,
      "learning_rate": 0.0001594186286406317,
      "loss": 0.7024,
      "step": 3563
    },
    {
      "epoch": 0.3186695278969957,
      "grad_norm": 0.13436416335882712,
      "learning_rate": 0.00015939533273824822,
      "loss": 0.6629,
      "step": 3564
    },
    {
      "epoch": 0.3187589413447783,
      "grad_norm": 0.15453530542004132,
      "learning_rate": 0.00015937203185444992,
      "loss": 0.7168,
      "step": 3565
    },
    {
      "epoch": 0.3188483547925608,
      "grad_norm": 0.19396280899669163,
      "learning_rate": 0.000159348725991191,
      "loss": 0.3981,
      "step": 3566
    },
    {
      "epoch": 0.31893776824034337,
      "grad_norm": 0.14021694648687694,
      "learning_rate": 0.00015932541515042615,
      "loss": 0.694,
      "step": 3567
    },
    {
      "epoch": 0.3190271816881259,
      "grad_norm": 0.14338293572215458,
      "learning_rate": 0.00015930209933411036,
      "loss": 0.7038,
      "step": 3568
    },
    {
      "epoch": 0.31911659513590845,
      "grad_norm": 0.146149378541778,
      "learning_rate": 0.00015927877854419908,
      "loss": 0.6643,
      "step": 3569
    },
    {
      "epoch": 0.31920600858369097,
      "grad_norm": 0.15796038654444192,
      "learning_rate": 0.00015925545278264828,
      "loss": 0.6979,
      "step": 3570
    },
    {
      "epoch": 0.31929542203147354,
      "grad_norm": 0.16610627534571903,
      "learning_rate": 0.00015923212205141418,
      "loss": 0.704,
      "step": 3571
    },
    {
      "epoch": 0.31938483547925606,
      "grad_norm": 0.14965255680834694,
      "learning_rate": 0.00015920878635245357,
      "loss": 0.6916,
      "step": 3572
    },
    {
      "epoch": 0.31947424892703863,
      "grad_norm": 0.1473432160224945,
      "learning_rate": 0.00015918544568772354,
      "loss": 0.6896,
      "step": 3573
    },
    {
      "epoch": 0.3195636623748212,
      "grad_norm": 0.14236032427587322,
      "learning_rate": 0.00015916210005918164,
      "loss": 0.6622,
      "step": 3574
    },
    {
      "epoch": 0.3196530758226037,
      "grad_norm": 0.14757600701723506,
      "learning_rate": 0.00015913874946878588,
      "loss": 0.6445,
      "step": 3575
    },
    {
      "epoch": 0.3197424892703863,
      "grad_norm": 0.14637586574443978,
      "learning_rate": 0.00015911539391849462,
      "loss": 0.7015,
      "step": 3576
    },
    {
      "epoch": 0.3198319027181688,
      "grad_norm": 0.16005570512565784,
      "learning_rate": 0.00015909203341026666,
      "loss": 0.7003,
      "step": 3577
    },
    {
      "epoch": 0.3199213161659514,
      "grad_norm": 0.1486062407544593,
      "learning_rate": 0.00015906866794606126,
      "loss": 0.7167,
      "step": 3578
    },
    {
      "epoch": 0.3200107296137339,
      "grad_norm": 0.1369674441725178,
      "learning_rate": 0.00015904529752783794,
      "loss": 0.6726,
      "step": 3579
    },
    {
      "epoch": 0.32010014306151646,
      "grad_norm": 0.13733853592583561,
      "learning_rate": 0.00015902192215755688,
      "loss": 0.6779,
      "step": 3580
    },
    {
      "epoch": 0.320189556509299,
      "grad_norm": 0.1444762813276095,
      "learning_rate": 0.00015899854183717852,
      "loss": 0.6338,
      "step": 3581
    },
    {
      "epoch": 0.32027896995708155,
      "grad_norm": 0.14751924028903002,
      "learning_rate": 0.00015897515656866363,
      "loss": 0.6596,
      "step": 3582
    },
    {
      "epoch": 0.32036838340486407,
      "grad_norm": 0.17724071063541255,
      "learning_rate": 0.00015895176635397364,
      "loss": 0.7361,
      "step": 3583
    },
    {
      "epoch": 0.32045779685264664,
      "grad_norm": 0.1406408682581235,
      "learning_rate": 0.00015892837119507014,
      "loss": 0.6595,
      "step": 3584
    },
    {
      "epoch": 0.3205472103004292,
      "grad_norm": 0.16302122581846362,
      "learning_rate": 0.0001589049710939154,
      "loss": 0.6927,
      "step": 3585
    },
    {
      "epoch": 0.3206366237482117,
      "grad_norm": 0.14626856781810385,
      "learning_rate": 0.0001588815660524718,
      "loss": 0.6956,
      "step": 3586
    },
    {
      "epoch": 0.3207260371959943,
      "grad_norm": 0.15848550627303226,
      "learning_rate": 0.0001588581560727024,
      "loss": 0.7317,
      "step": 3587
    },
    {
      "epoch": 0.3208154506437768,
      "grad_norm": 0.22100381879167005,
      "learning_rate": 0.00015883474115657056,
      "loss": 0.3752,
      "step": 3588
    },
    {
      "epoch": 0.3209048640915594,
      "grad_norm": 0.14350525552690369,
      "learning_rate": 0.00015881132130603998,
      "loss": 0.7012,
      "step": 3589
    },
    {
      "epoch": 0.3209942775393419,
      "grad_norm": 0.1652253382869622,
      "learning_rate": 0.00015878789652307496,
      "loss": 0.71,
      "step": 3590
    },
    {
      "epoch": 0.32108369098712447,
      "grad_norm": 0.1385911238700958,
      "learning_rate": 0.00015876446680964,
      "loss": 0.6665,
      "step": 3591
    },
    {
      "epoch": 0.321173104434907,
      "grad_norm": 0.1401506848211704,
      "learning_rate": 0.00015874103216770023,
      "loss": 0.6739,
      "step": 3592
    },
    {
      "epoch": 0.32126251788268956,
      "grad_norm": 0.14717624471363408,
      "learning_rate": 0.00015871759259922097,
      "loss": 0.7022,
      "step": 3593
    },
    {
      "epoch": 0.32135193133047213,
      "grad_norm": 0.14715806820657962,
      "learning_rate": 0.0001586941481061682,
      "loss": 0.6954,
      "step": 3594
    },
    {
      "epoch": 0.32144134477825465,
      "grad_norm": 0.15352835566954315,
      "learning_rate": 0.0001586706986905081,
      "loss": 0.6735,
      "step": 3595
    },
    {
      "epoch": 0.3215307582260372,
      "grad_norm": 0.13923281398853837,
      "learning_rate": 0.00015864724435420732,
      "loss": 0.7246,
      "step": 3596
    },
    {
      "epoch": 0.32162017167381973,
      "grad_norm": 0.16151130072275358,
      "learning_rate": 0.000158623785099233,
      "loss": 0.6771,
      "step": 3597
    },
    {
      "epoch": 0.3217095851216023,
      "grad_norm": 0.15236365902915402,
      "learning_rate": 0.0001586003209275526,
      "loss": 0.6886,
      "step": 3598
    },
    {
      "epoch": 0.3217989985693848,
      "grad_norm": 0.16268838987191778,
      "learning_rate": 0.00015857685184113412,
      "loss": 0.7065,
      "step": 3599
    },
    {
      "epoch": 0.3218884120171674,
      "grad_norm": 0.1481258361584457,
      "learning_rate": 0.00015855337784194577,
      "loss": 0.7045,
      "step": 3600
    },
    {
      "epoch": 0.3219778254649499,
      "grad_norm": 0.1519618016581749,
      "learning_rate": 0.00015852989893195635,
      "loss": 0.7268,
      "step": 3601
    },
    {
      "epoch": 0.3220672389127325,
      "grad_norm": 0.16277120648727664,
      "learning_rate": 0.00015850641511313496,
      "loss": 0.7096,
      "step": 3602
    },
    {
      "epoch": 0.322156652360515,
      "grad_norm": 0.16110954762011845,
      "learning_rate": 0.00015848292638745125,
      "loss": 0.7093,
      "step": 3603
    },
    {
      "epoch": 0.32224606580829757,
      "grad_norm": 0.14254461167158966,
      "learning_rate": 0.0001584594327568751,
      "loss": 0.678,
      "step": 3604
    },
    {
      "epoch": 0.32233547925608014,
      "grad_norm": 0.1851571384658795,
      "learning_rate": 0.00015843593422337695,
      "loss": 0.7426,
      "step": 3605
    },
    {
      "epoch": 0.32242489270386265,
      "grad_norm": 0.13707360754022618,
      "learning_rate": 0.00015841243078892756,
      "loss": 0.6651,
      "step": 3606
    },
    {
      "epoch": 0.3225143061516452,
      "grad_norm": 0.157239624260074,
      "learning_rate": 0.0001583889224554981,
      "loss": 0.6894,
      "step": 3607
    },
    {
      "epoch": 0.32260371959942774,
      "grad_norm": 0.15800228433609675,
      "learning_rate": 0.0001583654092250603,
      "loss": 0.7013,
      "step": 3608
    },
    {
      "epoch": 0.3226931330472103,
      "grad_norm": 0.14883342055072354,
      "learning_rate": 0.00015834189109958607,
      "loss": 0.6819,
      "step": 3609
    },
    {
      "epoch": 0.32278254649499283,
      "grad_norm": 0.1405960077015638,
      "learning_rate": 0.00015831836808104788,
      "loss": 0.7024,
      "step": 3610
    },
    {
      "epoch": 0.3228719599427754,
      "grad_norm": 0.1323027516972473,
      "learning_rate": 0.0001582948401714186,
      "loss": 0.6883,
      "step": 3611
    },
    {
      "epoch": 0.3229613733905579,
      "grad_norm": 0.14177444276066797,
      "learning_rate": 0.0001582713073726715,
      "loss": 0.6842,
      "step": 3612
    },
    {
      "epoch": 0.3230507868383405,
      "grad_norm": 0.14949674416506248,
      "learning_rate": 0.00015824776968678024,
      "loss": 0.6592,
      "step": 3613
    },
    {
      "epoch": 0.32314020028612306,
      "grad_norm": 0.15494351487860886,
      "learning_rate": 0.00015822422711571883,
      "loss": 0.6997,
      "step": 3614
    },
    {
      "epoch": 0.3232296137339056,
      "grad_norm": 0.13516141995829978,
      "learning_rate": 0.00015820067966146185,
      "loss": 0.6624,
      "step": 3615
    },
    {
      "epoch": 0.32331902718168815,
      "grad_norm": 0.1305960769262559,
      "learning_rate": 0.00015817712732598413,
      "loss": 0.6609,
      "step": 3616
    },
    {
      "epoch": 0.32340844062947066,
      "grad_norm": 0.14220132795173857,
      "learning_rate": 0.00015815357011126103,
      "loss": 0.6962,
      "step": 3617
    },
    {
      "epoch": 0.32349785407725323,
      "grad_norm": 0.1521051082649202,
      "learning_rate": 0.0001581300080192682,
      "loss": 0.6908,
      "step": 3618
    },
    {
      "epoch": 0.32358726752503575,
      "grad_norm": 0.13585601238744346,
      "learning_rate": 0.00015810644105198184,
      "loss": 0.6797,
      "step": 3619
    },
    {
      "epoch": 0.3236766809728183,
      "grad_norm": 0.14873086481349213,
      "learning_rate": 0.0001580828692113784,
      "loss": 0.6792,
      "step": 3620
    },
    {
      "epoch": 0.32376609442060084,
      "grad_norm": 0.15585750420089828,
      "learning_rate": 0.0001580592924994349,
      "loss": 0.684,
      "step": 3621
    },
    {
      "epoch": 0.3238555078683834,
      "grad_norm": 0.14931488249102726,
      "learning_rate": 0.00015803571091812865,
      "loss": 0.6868,
      "step": 3622
    },
    {
      "epoch": 0.323944921316166,
      "grad_norm": 0.1486849951083825,
      "learning_rate": 0.0001580121244694374,
      "loss": 0.7161,
      "step": 3623
    },
    {
      "epoch": 0.3240343347639485,
      "grad_norm": 0.15854351440319525,
      "learning_rate": 0.00015798853315533931,
      "loss": 0.7025,
      "step": 3624
    },
    {
      "epoch": 0.32412374821173107,
      "grad_norm": 0.145222889481304,
      "learning_rate": 0.00015796493697781304,
      "loss": 0.6374,
      "step": 3625
    },
    {
      "epoch": 0.3242131616595136,
      "grad_norm": 0.1463427377462643,
      "learning_rate": 0.0001579413359388375,
      "loss": 0.6576,
      "step": 3626
    },
    {
      "epoch": 0.32430257510729615,
      "grad_norm": 0.1624903304268763,
      "learning_rate": 0.00015791773004039206,
      "loss": 0.6791,
      "step": 3627
    },
    {
      "epoch": 0.32439198855507867,
      "grad_norm": 0.16265341560333638,
      "learning_rate": 0.00015789411928445653,
      "loss": 0.7065,
      "step": 3628
    },
    {
      "epoch": 0.32448140200286124,
      "grad_norm": 0.15820559496485007,
      "learning_rate": 0.00015787050367301118,
      "loss": 0.6693,
      "step": 3629
    },
    {
      "epoch": 0.32457081545064376,
      "grad_norm": 0.15038765692330422,
      "learning_rate": 0.00015784688320803655,
      "loss": 0.6975,
      "step": 3630
    },
    {
      "epoch": 0.32466022889842633,
      "grad_norm": 0.14607957745256567,
      "learning_rate": 0.00015782325789151367,
      "loss": 0.7213,
      "step": 3631
    },
    {
      "epoch": 0.32474964234620884,
      "grad_norm": 0.1382596662106252,
      "learning_rate": 0.00015779962772542402,
      "loss": 0.6712,
      "step": 3632
    },
    {
      "epoch": 0.3248390557939914,
      "grad_norm": 0.15209678265700857,
      "learning_rate": 0.0001577759927117494,
      "loss": 0.6878,
      "step": 3633
    },
    {
      "epoch": 0.324928469241774,
      "grad_norm": 0.1370163500320507,
      "learning_rate": 0.00015775235285247203,
      "loss": 0.6638,
      "step": 3634
    },
    {
      "epoch": 0.3250178826895565,
      "grad_norm": 0.144641336235448,
      "learning_rate": 0.00015772870814957453,
      "loss": 0.6626,
      "step": 3635
    },
    {
      "epoch": 0.3251072961373391,
      "grad_norm": 0.14961796783822479,
      "learning_rate": 0.00015770505860504005,
      "loss": 0.6879,
      "step": 3636
    },
    {
      "epoch": 0.3251967095851216,
      "grad_norm": 0.14465844874985248,
      "learning_rate": 0.000157681404220852,
      "loss": 0.6582,
      "step": 3637
    },
    {
      "epoch": 0.32528612303290416,
      "grad_norm": 0.1375850842478536,
      "learning_rate": 0.00015765774499899423,
      "loss": 0.6668,
      "step": 3638
    },
    {
      "epoch": 0.3253755364806867,
      "grad_norm": 0.1487291874595583,
      "learning_rate": 0.00015763408094145103,
      "loss": 0.7406,
      "step": 3639
    },
    {
      "epoch": 0.32546494992846925,
      "grad_norm": 0.16016277264652123,
      "learning_rate": 0.00015761041205020703,
      "loss": 0.7261,
      "step": 3640
    },
    {
      "epoch": 0.32555436337625177,
      "grad_norm": 0.13659939873273497,
      "learning_rate": 0.00015758673832724738,
      "loss": 0.6633,
      "step": 3641
    },
    {
      "epoch": 0.32564377682403434,
      "grad_norm": 0.1377668672151151,
      "learning_rate": 0.00015756305977455753,
      "loss": 0.6927,
      "step": 3642
    },
    {
      "epoch": 0.3257331902718169,
      "grad_norm": 0.14304382078524377,
      "learning_rate": 0.00015753937639412336,
      "loss": 0.6661,
      "step": 3643
    },
    {
      "epoch": 0.3258226037195994,
      "grad_norm": 0.16875525471592076,
      "learning_rate": 0.00015751568818793117,
      "loss": 0.7189,
      "step": 3644
    },
    {
      "epoch": 0.325912017167382,
      "grad_norm": 0.1494759618389669,
      "learning_rate": 0.0001574919951579677,
      "loss": 0.7231,
      "step": 3645
    },
    {
      "epoch": 0.3260014306151645,
      "grad_norm": 0.1373791999013063,
      "learning_rate": 0.00015746829730622,
      "loss": 0.7119,
      "step": 3646
    },
    {
      "epoch": 0.3260908440629471,
      "grad_norm": 0.14921325008938488,
      "learning_rate": 0.00015744459463467564,
      "loss": 0.7218,
      "step": 3647
    },
    {
      "epoch": 0.3261802575107296,
      "grad_norm": 0.14690495335124112,
      "learning_rate": 0.00015742088714532247,
      "loss": 0.7026,
      "step": 3648
    },
    {
      "epoch": 0.32626967095851217,
      "grad_norm": 0.14221486023032603,
      "learning_rate": 0.00015739717484014888,
      "loss": 0.7018,
      "step": 3649
    },
    {
      "epoch": 0.3263590844062947,
      "grad_norm": 0.14860839182492927,
      "learning_rate": 0.00015737345772114355,
      "loss": 0.6972,
      "step": 3650
    },
    {
      "epoch": 0.32644849785407726,
      "grad_norm": 0.13523936898074598,
      "learning_rate": 0.0001573497357902956,
      "loss": 0.6834,
      "step": 3651
    },
    {
      "epoch": 0.3265379113018598,
      "grad_norm": 0.16100063864192246,
      "learning_rate": 0.00015732600904959455,
      "loss": 0.6566,
      "step": 3652
    },
    {
      "epoch": 0.32662732474964234,
      "grad_norm": 0.13997698320882523,
      "learning_rate": 0.00015730227750103038,
      "loss": 0.686,
      "step": 3653
    },
    {
      "epoch": 0.3267167381974249,
      "grad_norm": 0.16633656031747027,
      "learning_rate": 0.0001572785411465934,
      "loss": 0.7593,
      "step": 3654
    },
    {
      "epoch": 0.32680615164520743,
      "grad_norm": 0.15364738555892163,
      "learning_rate": 0.00015725479998827434,
      "loss": 0.6865,
      "step": 3655
    },
    {
      "epoch": 0.32689556509299,
      "grad_norm": 0.13959943593285448,
      "learning_rate": 0.00015723105402806436,
      "loss": 0.7168,
      "step": 3656
    },
    {
      "epoch": 0.3269849785407725,
      "grad_norm": 0.15268732677558883,
      "learning_rate": 0.000157207303267955,
      "loss": 0.6795,
      "step": 3657
    },
    {
      "epoch": 0.3270743919885551,
      "grad_norm": 0.15736735890468087,
      "learning_rate": 0.00015718354770993817,
      "loss": 0.7593,
      "step": 3658
    },
    {
      "epoch": 0.3271638054363376,
      "grad_norm": 0.15788039855048985,
      "learning_rate": 0.00015715978735600627,
      "loss": 0.7163,
      "step": 3659
    },
    {
      "epoch": 0.3272532188841202,
      "grad_norm": 0.138066107158712,
      "learning_rate": 0.00015713602220815203,
      "loss": 0.6695,
      "step": 3660
    },
    {
      "epoch": 0.3273426323319027,
      "grad_norm": 0.15806137186441538,
      "learning_rate": 0.00015711225226836865,
      "loss": 0.6661,
      "step": 3661
    },
    {
      "epoch": 0.32743204577968527,
      "grad_norm": 0.16085222719907624,
      "learning_rate": 0.00015708847753864963,
      "loss": 0.6773,
      "step": 3662
    },
    {
      "epoch": 0.32752145922746784,
      "grad_norm": 0.13931486129097548,
      "learning_rate": 0.0001570646980209889,
      "loss": 0.6735,
      "step": 3663
    },
    {
      "epoch": 0.32761087267525035,
      "grad_norm": 0.15379338522143415,
      "learning_rate": 0.0001570409137173809,
      "loss": 0.7046,
      "step": 3664
    },
    {
      "epoch": 0.3277002861230329,
      "grad_norm": 0.134099549183172,
      "learning_rate": 0.00015701712462982037,
      "loss": 0.6506,
      "step": 3665
    },
    {
      "epoch": 0.32778969957081544,
      "grad_norm": 0.14972606936791483,
      "learning_rate": 0.0001569933307603024,
      "loss": 0.6847,
      "step": 3666
    },
    {
      "epoch": 0.327879113018598,
      "grad_norm": 0.15496827619849668,
      "learning_rate": 0.00015696953211082268,
      "loss": 0.6875,
      "step": 3667
    },
    {
      "epoch": 0.3279685264663805,
      "grad_norm": 0.14990768569548213,
      "learning_rate": 0.00015694572868337706,
      "loss": 0.6724,
      "step": 3668
    },
    {
      "epoch": 0.3280579399141631,
      "grad_norm": 0.16744883636900568,
      "learning_rate": 0.00015692192047996194,
      "loss": 0.6652,
      "step": 3669
    },
    {
      "epoch": 0.3281473533619456,
      "grad_norm": 0.14442070186705525,
      "learning_rate": 0.00015689810750257413,
      "loss": 0.6794,
      "step": 3670
    },
    {
      "epoch": 0.3282367668097282,
      "grad_norm": 0.16362138200849877,
      "learning_rate": 0.00015687428975321078,
      "loss": 0.7075,
      "step": 3671
    },
    {
      "epoch": 0.3283261802575107,
      "grad_norm": 0.13781215809597988,
      "learning_rate": 0.00015685046723386937,
      "loss": 0.6941,
      "step": 3672
    },
    {
      "epoch": 0.3284155937052933,
      "grad_norm": 0.16018783904568726,
      "learning_rate": 0.00015682663994654795,
      "loss": 0.6971,
      "step": 3673
    },
    {
      "epoch": 0.32850500715307585,
      "grad_norm": 0.12426237271474216,
      "learning_rate": 0.0001568028078932449,
      "loss": 0.6315,
      "step": 3674
    },
    {
      "epoch": 0.32859442060085836,
      "grad_norm": 0.17220567349552535,
      "learning_rate": 0.00015677897107595892,
      "loss": 0.6884,
      "step": 3675
    },
    {
      "epoch": 0.32868383404864093,
      "grad_norm": 0.14547888876592793,
      "learning_rate": 0.0001567551294966892,
      "loss": 0.667,
      "step": 3676
    },
    {
      "epoch": 0.32877324749642345,
      "grad_norm": 0.1685512919649906,
      "learning_rate": 0.00015673128315743534,
      "loss": 0.7071,
      "step": 3677
    },
    {
      "epoch": 0.328862660944206,
      "grad_norm": 0.14337404247346927,
      "learning_rate": 0.00015670743206019723,
      "loss": 0.6694,
      "step": 3678
    },
    {
      "epoch": 0.32895207439198854,
      "grad_norm": 0.15556705150632588,
      "learning_rate": 0.00015668357620697533,
      "loss": 0.6823,
      "step": 3679
    },
    {
      "epoch": 0.3290414878397711,
      "grad_norm": 0.14564942033284684,
      "learning_rate": 0.00015665971559977035,
      "loss": 0.6785,
      "step": 3680
    },
    {
      "epoch": 0.3291309012875536,
      "grad_norm": 0.15541114529754152,
      "learning_rate": 0.00015663585024058342,
      "loss": 0.6758,
      "step": 3681
    },
    {
      "epoch": 0.3292203147353362,
      "grad_norm": 0.1623027439469434,
      "learning_rate": 0.00015661198013141613,
      "loss": 0.7269,
      "step": 3682
    },
    {
      "epoch": 0.32930972818311877,
      "grad_norm": 0.14757895744624558,
      "learning_rate": 0.00015658810527427046,
      "loss": 0.7224,
      "step": 3683
    },
    {
      "epoch": 0.3293991416309013,
      "grad_norm": 0.15416609623735206,
      "learning_rate": 0.00015656422567114872,
      "loss": 0.712,
      "step": 3684
    },
    {
      "epoch": 0.32948855507868385,
      "grad_norm": 0.140119302863469,
      "learning_rate": 0.0001565403413240537,
      "loss": 0.6768,
      "step": 3685
    },
    {
      "epoch": 0.32957796852646637,
      "grad_norm": 0.19372217104296544,
      "learning_rate": 0.00015651645223498854,
      "loss": 0.4078,
      "step": 3686
    },
    {
      "epoch": 0.32966738197424894,
      "grad_norm": 0.16707775366348526,
      "learning_rate": 0.00015649255840595675,
      "loss": 0.7098,
      "step": 3687
    },
    {
      "epoch": 0.32975679542203146,
      "grad_norm": 0.17435054865307445,
      "learning_rate": 0.00015646865983896238,
      "loss": 0.3857,
      "step": 3688
    },
    {
      "epoch": 0.32984620886981403,
      "grad_norm": 0.17518412565091881,
      "learning_rate": 0.00015644475653600964,
      "loss": 0.716,
      "step": 3689
    },
    {
      "epoch": 0.32993562231759654,
      "grad_norm": 0.15747673648553745,
      "learning_rate": 0.00015642084849910336,
      "loss": 0.692,
      "step": 3690
    },
    {
      "epoch": 0.3300250357653791,
      "grad_norm": 0.14640261626891257,
      "learning_rate": 0.00015639693573024865,
      "loss": 0.6674,
      "step": 3691
    },
    {
      "epoch": 0.3301144492131617,
      "grad_norm": 0.16677352495409795,
      "learning_rate": 0.0001563730182314511,
      "loss": 0.7103,
      "step": 3692
    },
    {
      "epoch": 0.3302038626609442,
      "grad_norm": 0.14170137726945578,
      "learning_rate": 0.0001563490960047165,
      "loss": 0.6789,
      "step": 3693
    },
    {
      "epoch": 0.3302932761087268,
      "grad_norm": 0.15255916725658517,
      "learning_rate": 0.00015632516905205135,
      "loss": 0.6842,
      "step": 3694
    },
    {
      "epoch": 0.3303826895565093,
      "grad_norm": 0.15154987513730317,
      "learning_rate": 0.00015630123737546224,
      "loss": 0.688,
      "step": 3695
    },
    {
      "epoch": 0.33047210300429186,
      "grad_norm": 0.15724467155529598,
      "learning_rate": 0.00015627730097695638,
      "loss": 0.691,
      "step": 3696
    },
    {
      "epoch": 0.3305615164520744,
      "grad_norm": 0.14298689651846117,
      "learning_rate": 0.00015625335985854126,
      "loss": 0.6604,
      "step": 3697
    },
    {
      "epoch": 0.33065092989985695,
      "grad_norm": 0.143916463165982,
      "learning_rate": 0.00015622941402222479,
      "loss": 0.6618,
      "step": 3698
    },
    {
      "epoch": 0.33074034334763946,
      "grad_norm": 0.15508104701058092,
      "learning_rate": 0.00015620546347001524,
      "loss": 0.6853,
      "step": 3699
    },
    {
      "epoch": 0.33082975679542204,
      "grad_norm": 0.1652999790281651,
      "learning_rate": 0.00015618150820392136,
      "loss": 0.6639,
      "step": 3700
    },
    {
      "epoch": 0.33091917024320455,
      "grad_norm": 0.1284468257409543,
      "learning_rate": 0.00015615754822595224,
      "loss": 0.6148,
      "step": 3701
    },
    {
      "epoch": 0.3310085836909871,
      "grad_norm": 0.14655780401307889,
      "learning_rate": 0.00015613358353811738,
      "loss": 0.7119,
      "step": 3702
    },
    {
      "epoch": 0.3310979971387697,
      "grad_norm": 0.15136354037617036,
      "learning_rate": 0.00015610961414242664,
      "loss": 0.6875,
      "step": 3703
    },
    {
      "epoch": 0.3311874105865522,
      "grad_norm": 0.1794151545192315,
      "learning_rate": 0.00015608564004089033,
      "loss": 0.6774,
      "step": 3704
    },
    {
      "epoch": 0.3312768240343348,
      "grad_norm": 0.16277808364840993,
      "learning_rate": 0.00015606166123551912,
      "loss": 0.6966,
      "step": 3705
    },
    {
      "epoch": 0.3313662374821173,
      "grad_norm": 0.1270010354930184,
      "learning_rate": 0.00015603767772832413,
      "loss": 0.6525,
      "step": 3706
    },
    {
      "epoch": 0.33145565092989987,
      "grad_norm": 0.14129552615060484,
      "learning_rate": 0.0001560136895213167,
      "loss": 0.6558,
      "step": 3707
    },
    {
      "epoch": 0.3315450643776824,
      "grad_norm": 0.16782057532215752,
      "learning_rate": 0.00015598969661650888,
      "loss": 0.736,
      "step": 3708
    },
    {
      "epoch": 0.33163447782546496,
      "grad_norm": 0.1379278742389458,
      "learning_rate": 0.00015596569901591277,
      "loss": 0.6503,
      "step": 3709
    },
    {
      "epoch": 0.3317238912732475,
      "grad_norm": 0.13617093985620926,
      "learning_rate": 0.00015594169672154107,
      "loss": 0.6142,
      "step": 3710
    },
    {
      "epoch": 0.33181330472103004,
      "grad_norm": 0.13585098636904297,
      "learning_rate": 0.00015591768973540683,
      "loss": 0.6542,
      "step": 3711
    },
    {
      "epoch": 0.3319027181688126,
      "grad_norm": 0.1310975651858622,
      "learning_rate": 0.00015589367805952348,
      "loss": 0.6393,
      "step": 3712
    },
    {
      "epoch": 0.33199213161659513,
      "grad_norm": 0.15346793771787756,
      "learning_rate": 0.00015586966169590488,
      "loss": 0.6872,
      "step": 3713
    },
    {
      "epoch": 0.3320815450643777,
      "grad_norm": 0.14770876074094239,
      "learning_rate": 0.0001558456406465652,
      "loss": 0.6792,
      "step": 3714
    },
    {
      "epoch": 0.3321709585121602,
      "grad_norm": 0.3196152036435191,
      "learning_rate": 0.00015582161491351908,
      "loss": 0.4016,
      "step": 3715
    },
    {
      "epoch": 0.3322603719599428,
      "grad_norm": 0.14941346428401525,
      "learning_rate": 0.00015579758449878157,
      "loss": 0.6892,
      "step": 3716
    },
    {
      "epoch": 0.3323497854077253,
      "grad_norm": 0.14664818246567018,
      "learning_rate": 0.000155773549404368,
      "loss": 0.7029,
      "step": 3717
    },
    {
      "epoch": 0.3324391988555079,
      "grad_norm": 0.13376235108331083,
      "learning_rate": 0.00015574950963229419,
      "loss": 0.6803,
      "step": 3718
    },
    {
      "epoch": 0.3325286123032904,
      "grad_norm": 0.15837307888038615,
      "learning_rate": 0.00015572546518457636,
      "loss": 0.7302,
      "step": 3719
    },
    {
      "epoch": 0.33261802575107297,
      "grad_norm": 0.15814466972498448,
      "learning_rate": 0.00015570141606323105,
      "loss": 0.6872,
      "step": 3720
    },
    {
      "epoch": 0.3327074391988555,
      "grad_norm": 0.1580066731544759,
      "learning_rate": 0.00015567736227027525,
      "loss": 0.7292,
      "step": 3721
    },
    {
      "epoch": 0.33279685264663805,
      "grad_norm": 0.13067623487983915,
      "learning_rate": 0.00015565330380772633,
      "loss": 0.6468,
      "step": 3722
    },
    {
      "epoch": 0.3328862660944206,
      "grad_norm": 0.16417394966907198,
      "learning_rate": 0.00015562924067760202,
      "loss": 0.7147,
      "step": 3723
    },
    {
      "epoch": 0.33297567954220314,
      "grad_norm": 0.16428922061298554,
      "learning_rate": 0.00015560517288192046,
      "loss": 0.674,
      "step": 3724
    },
    {
      "epoch": 0.3330650929899857,
      "grad_norm": 0.13838293464812867,
      "learning_rate": 0.00015558110042270023,
      "loss": 0.638,
      "step": 3725
    },
    {
      "epoch": 0.3331545064377682,
      "grad_norm": 0.13753879190296325,
      "learning_rate": 0.00015555702330196023,
      "loss": 0.6607,
      "step": 3726
    },
    {
      "epoch": 0.3332439198855508,
      "grad_norm": 0.1335620728511058,
      "learning_rate": 0.00015553294152171977,
      "loss": 0.6748,
      "step": 3727
    },
    {
      "epoch": 0.3333333333333333,
      "grad_norm": 0.1414089516378772,
      "learning_rate": 0.00015550885508399856,
      "loss": 0.7006,
      "step": 3728
    },
    {
      "epoch": 0.3334227467811159,
      "grad_norm": 0.14082451982923933,
      "learning_rate": 0.00015548476399081674,
      "loss": 0.6581,
      "step": 3729
    },
    {
      "epoch": 0.3335121602288984,
      "grad_norm": 0.15921344860748063,
      "learning_rate": 0.0001554606682441948,
      "loss": 0.6856,
      "step": 3730
    },
    {
      "epoch": 0.333601573676681,
      "grad_norm": 0.15247439964944143,
      "learning_rate": 0.00015543656784615354,
      "loss": 0.6988,
      "step": 3731
    },
    {
      "epoch": 0.33369098712446355,
      "grad_norm": 0.36975978865393083,
      "learning_rate": 0.00015541246279871432,
      "loss": 0.4358,
      "step": 3732
    },
    {
      "epoch": 0.33378040057224606,
      "grad_norm": 0.17473741725420022,
      "learning_rate": 0.00015538835310389875,
      "loss": 0.7299,
      "step": 3733
    },
    {
      "epoch": 0.33386981402002863,
      "grad_norm": 0.23413931428726925,
      "learning_rate": 0.00015536423876372888,
      "loss": 0.4062,
      "step": 3734
    },
    {
      "epoch": 0.33395922746781115,
      "grad_norm": 0.1608683641018746,
      "learning_rate": 0.00015534011978022717,
      "loss": 0.6879,
      "step": 3735
    },
    {
      "epoch": 0.3340486409155937,
      "grad_norm": 0.12464872417757437,
      "learning_rate": 0.00015531599615541648,
      "loss": 0.6226,
      "step": 3736
    },
    {
      "epoch": 0.33413805436337624,
      "grad_norm": 0.1586555740936065,
      "learning_rate": 0.00015529186789131996,
      "loss": 0.6549,
      "step": 3737
    },
    {
      "epoch": 0.3342274678111588,
      "grad_norm": 0.1726513712234149,
      "learning_rate": 0.0001552677349899613,
      "loss": 0.7054,
      "step": 3738
    },
    {
      "epoch": 0.3343168812589413,
      "grad_norm": 0.17445603757993708,
      "learning_rate": 0.0001552435974533644,
      "loss": 0.7308,
      "step": 3739
    },
    {
      "epoch": 0.3344062947067239,
      "grad_norm": 0.1545512666749799,
      "learning_rate": 0.00015521945528355376,
      "loss": 0.662,
      "step": 3740
    },
    {
      "epoch": 0.3344957081545064,
      "grad_norm": 0.3026289243716081,
      "learning_rate": 0.00015519530848255407,
      "loss": 0.422,
      "step": 3741
    },
    {
      "epoch": 0.334585121602289,
      "grad_norm": 0.14422405704260058,
      "learning_rate": 0.00015517115705239047,
      "loss": 0.6501,
      "step": 3742
    },
    {
      "epoch": 0.33467453505007155,
      "grad_norm": 0.1400025277296751,
      "learning_rate": 0.0001551470009950886,
      "loss": 0.6348,
      "step": 3743
    },
    {
      "epoch": 0.33476394849785407,
      "grad_norm": 0.14771457164896432,
      "learning_rate": 0.00015512284031267437,
      "loss": 0.7165,
      "step": 3744
    },
    {
      "epoch": 0.33485336194563664,
      "grad_norm": 0.13731305344592118,
      "learning_rate": 0.00015509867500717407,
      "loss": 0.6749,
      "step": 3745
    },
    {
      "epoch": 0.33494277539341916,
      "grad_norm": 0.12560580356514955,
      "learning_rate": 0.00015507450508061443,
      "loss": 0.6759,
      "step": 3746
    },
    {
      "epoch": 0.33503218884120173,
      "grad_norm": 0.13541833289621144,
      "learning_rate": 0.0001550503305350226,
      "loss": 0.7291,
      "step": 3747
    },
    {
      "epoch": 0.33512160228898424,
      "grad_norm": 0.15251305379326746,
      "learning_rate": 0.000155026151372426,
      "loss": 0.6929,
      "step": 3748
    },
    {
      "epoch": 0.3352110157367668,
      "grad_norm": 0.17129071366752624,
      "learning_rate": 0.00015500196759485254,
      "loss": 0.7058,
      "step": 3749
    },
    {
      "epoch": 0.33530042918454933,
      "grad_norm": 0.14214329741215803,
      "learning_rate": 0.0001549777792043305,
      "loss": 0.7112,
      "step": 3750
    },
    {
      "epoch": 0.3353898426323319,
      "grad_norm": 0.1550886187386279,
      "learning_rate": 0.0001549535862028885,
      "loss": 0.6937,
      "step": 3751
    },
    {
      "epoch": 0.3354792560801145,
      "grad_norm": 0.16167727561719944,
      "learning_rate": 0.0001549293885925556,
      "loss": 0.733,
      "step": 3752
    },
    {
      "epoch": 0.335568669527897,
      "grad_norm": 0.14047909523604973,
      "learning_rate": 0.0001549051863753612,
      "loss": 0.6761,
      "step": 3753
    },
    {
      "epoch": 0.33565808297567956,
      "grad_norm": 0.11510835805451607,
      "learning_rate": 0.00015488097955333515,
      "loss": 0.6287,
      "step": 3754
    },
    {
      "epoch": 0.3357474964234621,
      "grad_norm": 0.14253608088842182,
      "learning_rate": 0.00015485676812850761,
      "loss": 0.6571,
      "step": 3755
    },
    {
      "epoch": 0.33583690987124465,
      "grad_norm": 0.14879030624839665,
      "learning_rate": 0.0001548325521029092,
      "loss": 0.6796,
      "step": 3756
    },
    {
      "epoch": 0.33592632331902716,
      "grad_norm": 0.1696288625779196,
      "learning_rate": 0.00015480833147857087,
      "loss": 0.677,
      "step": 3757
    },
    {
      "epoch": 0.33601573676680974,
      "grad_norm": 0.16552240870343932,
      "learning_rate": 0.00015478410625752393,
      "loss": 0.6834,
      "step": 3758
    },
    {
      "epoch": 0.33610515021459225,
      "grad_norm": 0.14360438158274486,
      "learning_rate": 0.00015475987644180016,
      "loss": 0.6611,
      "step": 3759
    },
    {
      "epoch": 0.3361945636623748,
      "grad_norm": 0.15780771002805696,
      "learning_rate": 0.00015473564203343174,
      "loss": 0.7048,
      "step": 3760
    },
    {
      "epoch": 0.3362839771101574,
      "grad_norm": 0.15658035663110792,
      "learning_rate": 0.00015471140303445109,
      "loss": 0.718,
      "step": 3761
    },
    {
      "epoch": 0.3363733905579399,
      "grad_norm": 0.16024244103197657,
      "learning_rate": 0.00015468715944689113,
      "loss": 0.7125,
      "step": 3762
    },
    {
      "epoch": 0.3364628040057225,
      "grad_norm": 0.12240540607821307,
      "learning_rate": 0.0001546629112727852,
      "loss": 0.6356,
      "step": 3763
    },
    {
      "epoch": 0.336552217453505,
      "grad_norm": 0.13584408889184937,
      "learning_rate": 0.00015463865851416685,
      "loss": 0.6846,
      "step": 3764
    },
    {
      "epoch": 0.33664163090128757,
      "grad_norm": 0.14776347904426942,
      "learning_rate": 0.00015461440117307026,
      "loss": 0.6961,
      "step": 3765
    },
    {
      "epoch": 0.3367310443490701,
      "grad_norm": 0.1503213040975581,
      "learning_rate": 0.00015459013925152976,
      "loss": 0.7246,
      "step": 3766
    },
    {
      "epoch": 0.33682045779685266,
      "grad_norm": 0.14226971753612858,
      "learning_rate": 0.00015456587275158024,
      "loss": 0.6648,
      "step": 3767
    },
    {
      "epoch": 0.3369098712446352,
      "grad_norm": 0.15338329134564196,
      "learning_rate": 0.00015454160167525685,
      "loss": 0.7044,
      "step": 3768
    },
    {
      "epoch": 0.33699928469241774,
      "grad_norm": 0.15946592101307047,
      "learning_rate": 0.00015451732602459522,
      "loss": 0.7254,
      "step": 3769
    },
    {
      "epoch": 0.33708869814020026,
      "grad_norm": 0.15171957425799282,
      "learning_rate": 0.00015449304580163125,
      "loss": 0.6904,
      "step": 3770
    },
    {
      "epoch": 0.33717811158798283,
      "grad_norm": 0.14687038335631555,
      "learning_rate": 0.00015446876100840137,
      "loss": 0.6873,
      "step": 3771
    },
    {
      "epoch": 0.3372675250357654,
      "grad_norm": 0.15218514368719221,
      "learning_rate": 0.0001544444716469423,
      "loss": 0.6697,
      "step": 3772
    },
    {
      "epoch": 0.3373569384835479,
      "grad_norm": 0.14796652875489352,
      "learning_rate": 0.0001544201777192911,
      "loss": 0.6795,
      "step": 3773
    },
    {
      "epoch": 0.3374463519313305,
      "grad_norm": 0.12600206908178982,
      "learning_rate": 0.00015439587922748537,
      "loss": 0.645,
      "step": 3774
    },
    {
      "epoch": 0.337535765379113,
      "grad_norm": 0.13457409877685877,
      "learning_rate": 0.00015437157617356292,
      "loss": 0.6749,
      "step": 3775
    },
    {
      "epoch": 0.3376251788268956,
      "grad_norm": 0.13230976543744527,
      "learning_rate": 0.00015434726855956206,
      "loss": 0.6526,
      "step": 3776
    },
    {
      "epoch": 0.3377145922746781,
      "grad_norm": 0.13559917135301128,
      "learning_rate": 0.0001543229563875214,
      "loss": 0.6732,
      "step": 3777
    },
    {
      "epoch": 0.33780400572246067,
      "grad_norm": 0.1438928094019633,
      "learning_rate": 0.00015429863965947996,
      "loss": 0.6525,
      "step": 3778
    },
    {
      "epoch": 0.3378934191702432,
      "grad_norm": 0.1432818633980158,
      "learning_rate": 0.00015427431837747725,
      "loss": 0.6917,
      "step": 3779
    },
    {
      "epoch": 0.33798283261802575,
      "grad_norm": 0.19760026685947202,
      "learning_rate": 0.00015424999254355296,
      "loss": 0.3415,
      "step": 3780
    },
    {
      "epoch": 0.3380722460658083,
      "grad_norm": 0.16025905814955224,
      "learning_rate": 0.00015422566215974733,
      "loss": 0.6833,
      "step": 3781
    },
    {
      "epoch": 0.33816165951359084,
      "grad_norm": 0.1606837083117054,
      "learning_rate": 0.00015420132722810092,
      "loss": 0.7245,
      "step": 3782
    },
    {
      "epoch": 0.3382510729613734,
      "grad_norm": 0.17267905908113876,
      "learning_rate": 0.00015417698775065466,
      "loss": 0.7336,
      "step": 3783
    },
    {
      "epoch": 0.3383404864091559,
      "grad_norm": 0.17879073315975752,
      "learning_rate": 0.00015415264372944983,
      "loss": 0.6895,
      "step": 3784
    },
    {
      "epoch": 0.3384298998569385,
      "grad_norm": 0.14144489370624397,
      "learning_rate": 0.00015412829516652817,
      "loss": 0.6844,
      "step": 3785
    },
    {
      "epoch": 0.338519313304721,
      "grad_norm": 0.14862942156497966,
      "learning_rate": 0.00015410394206393177,
      "loss": 0.709,
      "step": 3786
    },
    {
      "epoch": 0.3386087267525036,
      "grad_norm": 0.1425722113503091,
      "learning_rate": 0.00015407958442370312,
      "loss": 0.6644,
      "step": 3787
    },
    {
      "epoch": 0.3386981402002861,
      "grad_norm": 0.13792901076255074,
      "learning_rate": 0.000154055222247885,
      "loss": 0.6722,
      "step": 3788
    },
    {
      "epoch": 0.3387875536480687,
      "grad_norm": 0.17978672002806959,
      "learning_rate": 0.00015403085553852068,
      "loss": 0.6587,
      "step": 3789
    },
    {
      "epoch": 0.3388769670958512,
      "grad_norm": 0.13595672079618806,
      "learning_rate": 0.00015400648429765375,
      "loss": 0.6409,
      "step": 3790
    },
    {
      "epoch": 0.33896638054363376,
      "grad_norm": 0.16551512118095876,
      "learning_rate": 0.00015398210852732825,
      "loss": 0.682,
      "step": 3791
    },
    {
      "epoch": 0.33905579399141633,
      "grad_norm": 0.1506486592479118,
      "learning_rate": 0.00015395772822958845,
      "loss": 0.6939,
      "step": 3792
    },
    {
      "epoch": 0.33914520743919885,
      "grad_norm": 0.20253901436816873,
      "learning_rate": 0.00015393334340647917,
      "loss": 0.3681,
      "step": 3793
    },
    {
      "epoch": 0.3392346208869814,
      "grad_norm": 0.15330766601938786,
      "learning_rate": 0.00015390895406004553,
      "loss": 0.6973,
      "step": 3794
    },
    {
      "epoch": 0.33932403433476394,
      "grad_norm": 0.14321622880592846,
      "learning_rate": 0.00015388456019233302,
      "loss": 0.658,
      "step": 3795
    },
    {
      "epoch": 0.3394134477825465,
      "grad_norm": 0.14177631150182232,
      "learning_rate": 0.0001538601618053875,
      "loss": 0.6847,
      "step": 3796
    },
    {
      "epoch": 0.339502861230329,
      "grad_norm": 0.14770409959030426,
      "learning_rate": 0.00015383575890125527,
      "loss": 0.6935,
      "step": 3797
    },
    {
      "epoch": 0.3395922746781116,
      "grad_norm": 0.13527212694877272,
      "learning_rate": 0.00015381135148198293,
      "loss": 0.7029,
      "step": 3798
    },
    {
      "epoch": 0.3396816881258941,
      "grad_norm": 0.1434242577750698,
      "learning_rate": 0.00015378693954961754,
      "loss": 0.6645,
      "step": 3799
    },
    {
      "epoch": 0.3397711015736767,
      "grad_norm": 0.15437662198986932,
      "learning_rate": 0.0001537625231062065,
      "loss": 0.6682,
      "step": 3800
    },
    {
      "epoch": 0.33986051502145925,
      "grad_norm": 0.15058253760498072,
      "learning_rate": 0.00015373810215379757,
      "loss": 0.7086,
      "step": 3801
    },
    {
      "epoch": 0.33994992846924177,
      "grad_norm": 0.13208856901177013,
      "learning_rate": 0.0001537136766944389,
      "loss": 0.6636,
      "step": 3802
    },
    {
      "epoch": 0.34003934191702434,
      "grad_norm": 0.14759547487048366,
      "learning_rate": 0.00015368924673017905,
      "loss": 0.6478,
      "step": 3803
    },
    {
      "epoch": 0.34012875536480686,
      "grad_norm": 0.14879768332486162,
      "learning_rate": 0.00015366481226306692,
      "loss": 0.6719,
      "step": 3804
    },
    {
      "epoch": 0.3402181688125894,
      "grad_norm": 0.15091205011067838,
      "learning_rate": 0.00015364037329515182,
      "loss": 0.7016,
      "step": 3805
    },
    {
      "epoch": 0.34030758226037194,
      "grad_norm": 0.15523776381663046,
      "learning_rate": 0.00015361592982848335,
      "loss": 0.7044,
      "step": 3806
    },
    {
      "epoch": 0.3403969957081545,
      "grad_norm": 0.15211823779352773,
      "learning_rate": 0.00015359148186511163,
      "loss": 0.7204,
      "step": 3807
    },
    {
      "epoch": 0.34048640915593703,
      "grad_norm": 0.1540437010059594,
      "learning_rate": 0.000153567029407087,
      "loss": 0.7123,
      "step": 3808
    },
    {
      "epoch": 0.3405758226037196,
      "grad_norm": 0.17796131268892912,
      "learning_rate": 0.00015354257245646036,
      "loss": 0.3794,
      "step": 3809
    },
    {
      "epoch": 0.3406652360515021,
      "grad_norm": 0.14964033590357323,
      "learning_rate": 0.0001535181110152828,
      "loss": 0.687,
      "step": 3810
    },
    {
      "epoch": 0.3407546494992847,
      "grad_norm": 0.14069266412089987,
      "learning_rate": 0.00015349364508560588,
      "loss": 0.6897,
      "step": 3811
    },
    {
      "epoch": 0.34084406294706726,
      "grad_norm": 0.14243653808053497,
      "learning_rate": 0.00015346917466948161,
      "loss": 0.6724,
      "step": 3812
    },
    {
      "epoch": 0.3409334763948498,
      "grad_norm": 0.15369399657991148,
      "learning_rate": 0.0001534446997689622,
      "loss": 0.6963,
      "step": 3813
    },
    {
      "epoch": 0.34102288984263235,
      "grad_norm": 0.14116328953230972,
      "learning_rate": 0.00015342022038610038,
      "loss": 0.6919,
      "step": 3814
    },
    {
      "epoch": 0.34111230329041486,
      "grad_norm": 0.16778800223820883,
      "learning_rate": 0.00015339573652294917,
      "loss": 0.7296,
      "step": 3815
    },
    {
      "epoch": 0.34120171673819744,
      "grad_norm": 0.1328059257569814,
      "learning_rate": 0.00015337124818156205,
      "loss": 0.6591,
      "step": 3816
    },
    {
      "epoch": 0.34129113018597995,
      "grad_norm": 0.1429636071760728,
      "learning_rate": 0.00015334675536399277,
      "loss": 0.6947,
      "step": 3817
    },
    {
      "epoch": 0.3413805436337625,
      "grad_norm": 0.15205035505105266,
      "learning_rate": 0.00015332225807229556,
      "loss": 0.7542,
      "step": 3818
    },
    {
      "epoch": 0.34146995708154504,
      "grad_norm": 0.14915042749422372,
      "learning_rate": 0.00015329775630852497,
      "loss": 0.7056,
      "step": 3819
    },
    {
      "epoch": 0.3415593705293276,
      "grad_norm": 0.15188693794894165,
      "learning_rate": 0.00015327325007473592,
      "loss": 0.7031,
      "step": 3820
    },
    {
      "epoch": 0.3416487839771102,
      "grad_norm": 0.1401509518021406,
      "learning_rate": 0.00015324873937298374,
      "loss": 0.6565,
      "step": 3821
    },
    {
      "epoch": 0.3417381974248927,
      "grad_norm": 0.14965132156317146,
      "learning_rate": 0.00015322422420532407,
      "loss": 0.7051,
      "step": 3822
    },
    {
      "epoch": 0.34182761087267527,
      "grad_norm": 0.15046759658839426,
      "learning_rate": 0.000153199704573813,
      "loss": 0.6895,
      "step": 3823
    },
    {
      "epoch": 0.3419170243204578,
      "grad_norm": 0.13975261817104073,
      "learning_rate": 0.00015317518048050697,
      "loss": 0.6926,
      "step": 3824
    },
    {
      "epoch": 0.34200643776824036,
      "grad_norm": 0.14533120130738897,
      "learning_rate": 0.00015315065192746276,
      "loss": 0.6675,
      "step": 3825
    },
    {
      "epoch": 0.3420958512160229,
      "grad_norm": 0.1339981454312732,
      "learning_rate": 0.00015312611891673752,
      "loss": 0.6424,
      "step": 3826
    },
    {
      "epoch": 0.34218526466380544,
      "grad_norm": 0.14803741209092658,
      "learning_rate": 0.00015310158145038892,
      "loss": 0.659,
      "step": 3827
    },
    {
      "epoch": 0.34227467811158796,
      "grad_norm": 0.13488190578858505,
      "learning_rate": 0.0001530770395304748,
      "loss": 0.6627,
      "step": 3828
    },
    {
      "epoch": 0.34236409155937053,
      "grad_norm": 0.16890016462636193,
      "learning_rate": 0.00015305249315905348,
      "loss": 0.7028,
      "step": 3829
    },
    {
      "epoch": 0.3424535050071531,
      "grad_norm": 0.18474034618101248,
      "learning_rate": 0.0001530279423381836,
      "loss": 0.3947,
      "step": 3830
    },
    {
      "epoch": 0.3425429184549356,
      "grad_norm": 0.1445705635784896,
      "learning_rate": 0.00015300338706992426,
      "loss": 0.6969,
      "step": 3831
    },
    {
      "epoch": 0.3426323319027182,
      "grad_norm": 0.15742563132954912,
      "learning_rate": 0.00015297882735633485,
      "loss": 0.6915,
      "step": 3832
    },
    {
      "epoch": 0.3427217453505007,
      "grad_norm": 0.15724102780271226,
      "learning_rate": 0.00015295426319947514,
      "loss": 0.6785,
      "step": 3833
    },
    {
      "epoch": 0.3428111587982833,
      "grad_norm": 0.1285121032457185,
      "learning_rate": 0.0001529296946014054,
      "loss": 0.647,
      "step": 3834
    },
    {
      "epoch": 0.3429005722460658,
      "grad_norm": 0.13679755965832963,
      "learning_rate": 0.00015290512156418602,
      "loss": 0.6529,
      "step": 3835
    },
    {
      "epoch": 0.34298998569384836,
      "grad_norm": 0.14504768155633155,
      "learning_rate": 0.000152880544089878,
      "loss": 0.6805,
      "step": 3836
    },
    {
      "epoch": 0.3430793991416309,
      "grad_norm": 0.15689099430909662,
      "learning_rate": 0.00015285596218054265,
      "loss": 0.6988,
      "step": 3837
    },
    {
      "epoch": 0.34316881258941345,
      "grad_norm": 0.14924780317520642,
      "learning_rate": 0.00015283137583824158,
      "loss": 0.6945,
      "step": 3838
    },
    {
      "epoch": 0.34325822603719597,
      "grad_norm": 0.1377401076928638,
      "learning_rate": 0.0001528067850650368,
      "loss": 0.6655,
      "step": 3839
    },
    {
      "epoch": 0.34334763948497854,
      "grad_norm": 0.1364220811386974,
      "learning_rate": 0.00015278218986299074,
      "loss": 0.648,
      "step": 3840
    },
    {
      "epoch": 0.3434370529327611,
      "grad_norm": 0.14647075710860824,
      "learning_rate": 0.00015275759023416618,
      "loss": 0.697,
      "step": 3841
    },
    {
      "epoch": 0.3435264663805436,
      "grad_norm": 0.14699861168327,
      "learning_rate": 0.00015273298618062624,
      "loss": 0.6998,
      "step": 3842
    },
    {
      "epoch": 0.3436158798283262,
      "grad_norm": 0.14679548592955627,
      "learning_rate": 0.00015270837770443437,
      "loss": 0.6706,
      "step": 3843
    },
    {
      "epoch": 0.3437052932761087,
      "grad_norm": 0.1425751866966338,
      "learning_rate": 0.0001526837648076546,
      "loss": 0.7039,
      "step": 3844
    },
    {
      "epoch": 0.3437947067238913,
      "grad_norm": 0.14856827978415654,
      "learning_rate": 0.00015265914749235107,
      "loss": 0.678,
      "step": 3845
    },
    {
      "epoch": 0.3438841201716738,
      "grad_norm": 0.15516233615290792,
      "learning_rate": 0.00015263452576058843,
      "loss": 0.667,
      "step": 3846
    },
    {
      "epoch": 0.3439735336194564,
      "grad_norm": 0.1327871513026604,
      "learning_rate": 0.0001526098996144317,
      "loss": 0.6342,
      "step": 3847
    },
    {
      "epoch": 0.3440629470672389,
      "grad_norm": 0.1962097691418371,
      "learning_rate": 0.0001525852690559462,
      "loss": 0.4063,
      "step": 3848
    },
    {
      "epoch": 0.34415236051502146,
      "grad_norm": 0.153732719449623,
      "learning_rate": 0.00015256063408719772,
      "loss": 0.6795,
      "step": 3849
    },
    {
      "epoch": 0.34424177396280403,
      "grad_norm": 0.1743395690587232,
      "learning_rate": 0.0001525359947102523,
      "loss": 0.6801,
      "step": 3850
    },
    {
      "epoch": 0.34433118741058655,
      "grad_norm": 0.15267515223354497,
      "learning_rate": 0.00015251135092717648,
      "loss": 0.667,
      "step": 3851
    },
    {
      "epoch": 0.3444206008583691,
      "grad_norm": 0.15256677003951147,
      "learning_rate": 0.00015248670274003708,
      "loss": 0.7158,
      "step": 3852
    },
    {
      "epoch": 0.34451001430615164,
      "grad_norm": 0.14341501540387258,
      "learning_rate": 0.00015246205015090127,
      "loss": 0.6812,
      "step": 3853
    },
    {
      "epoch": 0.3445994277539342,
      "grad_norm": 0.14703913793008946,
      "learning_rate": 0.0001524373931618367,
      "loss": 0.6866,
      "step": 3854
    },
    {
      "epoch": 0.3446888412017167,
      "grad_norm": 0.15037196681242243,
      "learning_rate": 0.0001524127317749113,
      "loss": 0.6907,
      "step": 3855
    },
    {
      "epoch": 0.3447782546494993,
      "grad_norm": 0.14504338103553488,
      "learning_rate": 0.00015238806599219336,
      "loss": 0.6833,
      "step": 3856
    },
    {
      "epoch": 0.3448676680972818,
      "grad_norm": 0.1612892394499813,
      "learning_rate": 0.0001523633958157516,
      "loss": 0.7344,
      "step": 3857
    },
    {
      "epoch": 0.3449570815450644,
      "grad_norm": 0.1375005764431461,
      "learning_rate": 0.00015233872124765512,
      "loss": 0.6906,
      "step": 3858
    },
    {
      "epoch": 0.3450464949928469,
      "grad_norm": 0.14386900638344802,
      "learning_rate": 0.00015231404228997325,
      "loss": 0.7334,
      "step": 3859
    },
    {
      "epoch": 0.34513590844062947,
      "grad_norm": 0.1431299231691696,
      "learning_rate": 0.00015228935894477582,
      "loss": 0.6529,
      "step": 3860
    },
    {
      "epoch": 0.34522532188841204,
      "grad_norm": 0.15122782551551614,
      "learning_rate": 0.00015226467121413304,
      "loss": 0.6818,
      "step": 3861
    },
    {
      "epoch": 0.34531473533619456,
      "grad_norm": 0.14533386755626407,
      "learning_rate": 0.0001522399791001154,
      "loss": 0.705,
      "step": 3862
    },
    {
      "epoch": 0.3454041487839771,
      "grad_norm": 0.1312208620388203,
      "learning_rate": 0.00015221528260479377,
      "loss": 0.6067,
      "step": 3863
    },
    {
      "epoch": 0.34549356223175964,
      "grad_norm": 0.14295949209094155,
      "learning_rate": 0.0001521905817302395,
      "loss": 0.6881,
      "step": 3864
    },
    {
      "epoch": 0.3455829756795422,
      "grad_norm": 0.14847872320924824,
      "learning_rate": 0.00015216587647852415,
      "loss": 0.672,
      "step": 3865
    },
    {
      "epoch": 0.34567238912732473,
      "grad_norm": 0.15370232282208451,
      "learning_rate": 0.0001521411668517197,
      "loss": 0.7142,
      "step": 3866
    },
    {
      "epoch": 0.3457618025751073,
      "grad_norm": 0.12487669447327862,
      "learning_rate": 0.00015211645285189858,
      "loss": 0.6972,
      "step": 3867
    },
    {
      "epoch": 0.3458512160228898,
      "grad_norm": 0.16524157777140402,
      "learning_rate": 0.00015209173448113355,
      "loss": 0.7284,
      "step": 3868
    },
    {
      "epoch": 0.3459406294706724,
      "grad_norm": 0.16808462920508732,
      "learning_rate": 0.0001520670117414976,
      "loss": 0.6767,
      "step": 3869
    },
    {
      "epoch": 0.34603004291845496,
      "grad_norm": 0.14545925750785266,
      "learning_rate": 0.00015204228463506424,
      "loss": 0.6861,
      "step": 3870
    },
    {
      "epoch": 0.3461194563662375,
      "grad_norm": 0.15470372682183267,
      "learning_rate": 0.00015201755316390737,
      "loss": 0.7348,
      "step": 3871
    },
    {
      "epoch": 0.34620886981402005,
      "grad_norm": 0.16400238765930375,
      "learning_rate": 0.00015199281733010116,
      "loss": 0.7164,
      "step": 3872
    },
    {
      "epoch": 0.34629828326180256,
      "grad_norm": 0.2025562145999025,
      "learning_rate": 0.0001519680771357201,
      "loss": 0.3944,
      "step": 3873
    },
    {
      "epoch": 0.34638769670958514,
      "grad_norm": 0.14616711695035048,
      "learning_rate": 0.00015194333258283918,
      "loss": 0.7319,
      "step": 3874
    },
    {
      "epoch": 0.34647711015736765,
      "grad_norm": 0.15357456387711382,
      "learning_rate": 0.00015191858367353368,
      "loss": 0.6714,
      "step": 3875
    },
    {
      "epoch": 0.3465665236051502,
      "grad_norm": 0.13593512031359642,
      "learning_rate": 0.0001518938304098793,
      "loss": 0.6545,
      "step": 3876
    },
    {
      "epoch": 0.34665593705293274,
      "grad_norm": 0.1523517685339463,
      "learning_rate": 0.00015186907279395202,
      "loss": 0.6916,
      "step": 3877
    },
    {
      "epoch": 0.3467453505007153,
      "grad_norm": 0.14890443601577022,
      "learning_rate": 0.00015184431082782823,
      "loss": 0.702,
      "step": 3878
    },
    {
      "epoch": 0.3468347639484979,
      "grad_norm": 0.1396240595797069,
      "learning_rate": 0.00015181954451358473,
      "loss": 0.7114,
      "step": 3879
    },
    {
      "epoch": 0.3469241773962804,
      "grad_norm": 0.13521390667472666,
      "learning_rate": 0.0001517947738532986,
      "loss": 0.6536,
      "step": 3880
    },
    {
      "epoch": 0.34701359084406297,
      "grad_norm": 0.16025673913131394,
      "learning_rate": 0.00015176999884904734,
      "loss": 0.6869,
      "step": 3881
    },
    {
      "epoch": 0.3471030042918455,
      "grad_norm": 0.13821343392390004,
      "learning_rate": 0.0001517452195029088,
      "loss": 0.6865,
      "step": 3882
    },
    {
      "epoch": 0.34719241773962806,
      "grad_norm": 0.15920113728546706,
      "learning_rate": 0.00015172043581696118,
      "loss": 0.6824,
      "step": 3883
    },
    {
      "epoch": 0.3472818311874106,
      "grad_norm": 0.1406704945338132,
      "learning_rate": 0.0001516956477932831,
      "loss": 0.6575,
      "step": 3884
    },
    {
      "epoch": 0.34737124463519314,
      "grad_norm": 0.1436215701432842,
      "learning_rate": 0.00015167085543395348,
      "loss": 0.6582,
      "step": 3885
    },
    {
      "epoch": 0.34746065808297566,
      "grad_norm": 0.13354732159024596,
      "learning_rate": 0.00015164605874105156,
      "loss": 0.6536,
      "step": 3886
    },
    {
      "epoch": 0.34755007153075823,
      "grad_norm": 0.13652874296552905,
      "learning_rate": 0.0001516212577166571,
      "loss": 0.6784,
      "step": 3887
    },
    {
      "epoch": 0.34763948497854075,
      "grad_norm": 0.1329684045330412,
      "learning_rate": 0.0001515964523628501,
      "loss": 0.6431,
      "step": 3888
    },
    {
      "epoch": 0.3477288984263233,
      "grad_norm": 0.14507431033507592,
      "learning_rate": 0.00015157164268171097,
      "loss": 0.6749,
      "step": 3889
    },
    {
      "epoch": 0.3478183118741059,
      "grad_norm": 0.1626978607259272,
      "learning_rate": 0.0001515468286753204,
      "loss": 0.7503,
      "step": 3890
    },
    {
      "epoch": 0.3479077253218884,
      "grad_norm": 0.13554460886121486,
      "learning_rate": 0.0001515220103457596,
      "loss": 0.6759,
      "step": 3891
    },
    {
      "epoch": 0.347997138769671,
      "grad_norm": 0.13050561555182924,
      "learning_rate": 0.00015149718769511003,
      "loss": 0.6723,
      "step": 3892
    },
    {
      "epoch": 0.3480865522174535,
      "grad_norm": 0.13903687949643573,
      "learning_rate": 0.00015147236072545348,
      "loss": 0.6634,
      "step": 3893
    },
    {
      "epoch": 0.34817596566523606,
      "grad_norm": 0.14528896796413454,
      "learning_rate": 0.00015144752943887222,
      "loss": 0.667,
      "step": 3894
    },
    {
      "epoch": 0.3482653791130186,
      "grad_norm": 0.13767865135664292,
      "learning_rate": 0.0001514226938374488,
      "loss": 0.6617,
      "step": 3895
    },
    {
      "epoch": 0.34835479256080115,
      "grad_norm": 0.1348509993859346,
      "learning_rate": 0.00015139785392326616,
      "loss": 0.6953,
      "step": 3896
    },
    {
      "epoch": 0.34844420600858367,
      "grad_norm": 0.15976106579205704,
      "learning_rate": 0.00015137300969840758,
      "loss": 0.6978,
      "step": 3897
    },
    {
      "epoch": 0.34853361945636624,
      "grad_norm": 0.15289884676839094,
      "learning_rate": 0.0001513481611649567,
      "loss": 0.6829,
      "step": 3898
    },
    {
      "epoch": 0.3486230329041488,
      "grad_norm": 0.16242968916796555,
      "learning_rate": 0.00015132330832499756,
      "loss": 0.7041,
      "step": 3899
    },
    {
      "epoch": 0.3487124463519313,
      "grad_norm": 0.15001989192593404,
      "learning_rate": 0.00015129845118061453,
      "loss": 0.7042,
      "step": 3900
    },
    {
      "epoch": 0.3488018597997139,
      "grad_norm": 0.1537543960377121,
      "learning_rate": 0.00015127358973389236,
      "loss": 0.7016,
      "step": 3901
    },
    {
      "epoch": 0.3488912732474964,
      "grad_norm": 0.15074529797799546,
      "learning_rate": 0.00015124872398691617,
      "loss": 0.6882,
      "step": 3902
    },
    {
      "epoch": 0.348980686695279,
      "grad_norm": 0.16728485946463265,
      "learning_rate": 0.00015122385394177135,
      "loss": 0.7625,
      "step": 3903
    },
    {
      "epoch": 0.3490701001430615,
      "grad_norm": 0.1249570206320876,
      "learning_rate": 0.0001511989796005438,
      "loss": 0.6798,
      "step": 3904
    },
    {
      "epoch": 0.3491595135908441,
      "grad_norm": 0.14627306744948085,
      "learning_rate": 0.00015117410096531964,
      "loss": 0.6769,
      "step": 3905
    },
    {
      "epoch": 0.3492489270386266,
      "grad_norm": 0.13162982457095226,
      "learning_rate": 0.00015114921803818546,
      "loss": 0.6172,
      "step": 3906
    },
    {
      "epoch": 0.34933834048640916,
      "grad_norm": 0.17362032674786654,
      "learning_rate": 0.0001511243308212281,
      "loss": 0.685,
      "step": 3907
    },
    {
      "epoch": 0.3494277539341917,
      "grad_norm": 0.13692345910225928,
      "learning_rate": 0.00015109943931653486,
      "loss": 0.6415,
      "step": 3908
    },
    {
      "epoch": 0.34951716738197425,
      "grad_norm": 0.15847947354875408,
      "learning_rate": 0.00015107454352619336,
      "loss": 0.6987,
      "step": 3909
    },
    {
      "epoch": 0.3496065808297568,
      "grad_norm": 0.1608126903598285,
      "learning_rate": 0.00015104964345229158,
      "loss": 0.6757,
      "step": 3910
    },
    {
      "epoch": 0.34969599427753933,
      "grad_norm": 0.1524263421586583,
      "learning_rate": 0.00015102473909691785,
      "loss": 0.6649,
      "step": 3911
    },
    {
      "epoch": 0.3497854077253219,
      "grad_norm": 0.14850151802171396,
      "learning_rate": 0.0001509998304621609,
      "loss": 0.6675,
      "step": 3912
    },
    {
      "epoch": 0.3498748211731044,
      "grad_norm": 0.14849582385734314,
      "learning_rate": 0.00015097491755010974,
      "loss": 0.6586,
      "step": 3913
    },
    {
      "epoch": 0.349964234620887,
      "grad_norm": 0.16687293026002123,
      "learning_rate": 0.0001509500003628538,
      "loss": 0.6899,
      "step": 3914
    },
    {
      "epoch": 0.3500536480686695,
      "grad_norm": 0.16319122732003682,
      "learning_rate": 0.00015092507890248288,
      "loss": 0.6551,
      "step": 3915
    },
    {
      "epoch": 0.3501430615164521,
      "grad_norm": 0.15523283204476382,
      "learning_rate": 0.0001509001531710871,
      "loss": 0.6961,
      "step": 3916
    },
    {
      "epoch": 0.3502324749642346,
      "grad_norm": 0.14610727907985835,
      "learning_rate": 0.00015087522317075693,
      "loss": 0.6793,
      "step": 3917
    },
    {
      "epoch": 0.35032188841201717,
      "grad_norm": 0.14527123877765905,
      "learning_rate": 0.00015085028890358325,
      "loss": 0.66,
      "step": 3918
    },
    {
      "epoch": 0.35041130185979974,
      "grad_norm": 0.1411872905394371,
      "learning_rate": 0.00015082535037165724,
      "loss": 0.6296,
      "step": 3919
    },
    {
      "epoch": 0.35050071530758226,
      "grad_norm": 0.15718630224609362,
      "learning_rate": 0.00015080040757707046,
      "loss": 0.6922,
      "step": 3920
    },
    {
      "epoch": 0.3505901287553648,
      "grad_norm": 0.13086948390009165,
      "learning_rate": 0.0001507754605219149,
      "loss": 0.672,
      "step": 3921
    },
    {
      "epoch": 0.35067954220314734,
      "grad_norm": 0.12924416685331563,
      "learning_rate": 0.00015075050920828272,
      "loss": 0.6606,
      "step": 3922
    },
    {
      "epoch": 0.3507689556509299,
      "grad_norm": 0.16982494461214578,
      "learning_rate": 0.00015072555363826665,
      "loss": 0.7343,
      "step": 3923
    },
    {
      "epoch": 0.35085836909871243,
      "grad_norm": 0.14587431895065192,
      "learning_rate": 0.00015070059381395968,
      "loss": 0.6716,
      "step": 3924
    },
    {
      "epoch": 0.350947782546495,
      "grad_norm": 0.13861139911983125,
      "learning_rate": 0.0001506756297374551,
      "loss": 0.6765,
      "step": 3925
    },
    {
      "epoch": 0.3510371959942775,
      "grad_norm": 0.14966397371447343,
      "learning_rate": 0.00015065066141084667,
      "loss": 0.6718,
      "step": 3926
    },
    {
      "epoch": 0.3511266094420601,
      "grad_norm": 0.14486481157292083,
      "learning_rate": 0.00015062568883622844,
      "loss": 0.6887,
      "step": 3927
    },
    {
      "epoch": 0.3512160228898426,
      "grad_norm": 0.16838835016898435,
      "learning_rate": 0.00015060071201569486,
      "loss": 0.7081,
      "step": 3928
    },
    {
      "epoch": 0.3513054363376252,
      "grad_norm": 0.16662558139958533,
      "learning_rate": 0.00015057573095134062,
      "loss": 0.7441,
      "step": 3929
    },
    {
      "epoch": 0.35139484978540775,
      "grad_norm": 0.15755741563492748,
      "learning_rate": 0.00015055074564526095,
      "loss": 0.7089,
      "step": 3930
    },
    {
      "epoch": 0.35148426323319026,
      "grad_norm": 0.1449454152917181,
      "learning_rate": 0.00015052575609955125,
      "loss": 0.7175,
      "step": 3931
    },
    {
      "epoch": 0.35157367668097284,
      "grad_norm": 0.18284265807907485,
      "learning_rate": 0.00015050076231630744,
      "loss": 0.7505,
      "step": 3932
    },
    {
      "epoch": 0.35166309012875535,
      "grad_norm": 0.1744956899457033,
      "learning_rate": 0.00015047576429762566,
      "loss": 0.4397,
      "step": 3933
    },
    {
      "epoch": 0.3517525035765379,
      "grad_norm": 0.13593369252745968,
      "learning_rate": 0.0001504507620456025,
      "loss": 0.6562,
      "step": 3934
    },
    {
      "epoch": 0.35184191702432044,
      "grad_norm": 0.14265155883935582,
      "learning_rate": 0.00015042575556233488,
      "loss": 0.6667,
      "step": 3935
    },
    {
      "epoch": 0.351931330472103,
      "grad_norm": 0.15191207738228138,
      "learning_rate": 0.00015040074484992,
      "loss": 0.709,
      "step": 3936
    },
    {
      "epoch": 0.3520207439198855,
      "grad_norm": 0.14531524949569533,
      "learning_rate": 0.00015037572991045552,
      "loss": 0.706,
      "step": 3937
    },
    {
      "epoch": 0.3521101573676681,
      "grad_norm": 0.15536525712603363,
      "learning_rate": 0.00015035071074603944,
      "loss": 0.6785,
      "step": 3938
    },
    {
      "epoch": 0.35219957081545067,
      "grad_norm": 0.16716466475586175,
      "learning_rate": 0.00015032568735877003,
      "loss": 0.7474,
      "step": 3939
    },
    {
      "epoch": 0.3522889842632332,
      "grad_norm": 0.13274038590493611,
      "learning_rate": 0.000150300659750746,
      "loss": 0.6752,
      "step": 3940
    },
    {
      "epoch": 0.35237839771101576,
      "grad_norm": 0.15075069468402186,
      "learning_rate": 0.00015027562792406643,
      "loss": 0.6493,
      "step": 3941
    },
    {
      "epoch": 0.35246781115879827,
      "grad_norm": 0.14319975149687741,
      "learning_rate": 0.0001502505918808306,
      "loss": 0.6425,
      "step": 3942
    },
    {
      "epoch": 0.35255722460658084,
      "grad_norm": 0.14746445819977086,
      "learning_rate": 0.00015022555162313834,
      "loss": 0.6908,
      "step": 3943
    },
    {
      "epoch": 0.35264663805436336,
      "grad_norm": 0.14847739630245552,
      "learning_rate": 0.00015020050715308972,
      "loss": 0.6651,
      "step": 3944
    },
    {
      "epoch": 0.35273605150214593,
      "grad_norm": 0.1427810212193974,
      "learning_rate": 0.0001501754584727852,
      "loss": 0.6598,
      "step": 3945
    },
    {
      "epoch": 0.35282546494992845,
      "grad_norm": 0.16100738010155047,
      "learning_rate": 0.0001501504055843256,
      "loss": 0.735,
      "step": 3946
    },
    {
      "epoch": 0.352914878397711,
      "grad_norm": 0.1538210289308057,
      "learning_rate": 0.00015012534848981202,
      "loss": 0.7209,
      "step": 3947
    },
    {
      "epoch": 0.3530042918454936,
      "grad_norm": 0.16530809340343838,
      "learning_rate": 0.000150100287191346,
      "loss": 0.7493,
      "step": 3948
    },
    {
      "epoch": 0.3530937052932761,
      "grad_norm": 0.1815935465478366,
      "learning_rate": 0.00015007522169102941,
      "loss": 0.4043,
      "step": 3949
    },
    {
      "epoch": 0.3531831187410587,
      "grad_norm": 0.15942621363010565,
      "learning_rate": 0.00015005015199096443,
      "loss": 0.6919,
      "step": 3950
    },
    {
      "epoch": 0.3532725321888412,
      "grad_norm": 0.14737214120106837,
      "learning_rate": 0.00015002507809325365,
      "loss": 0.659,
      "step": 3951
    },
    {
      "epoch": 0.35336194563662376,
      "grad_norm": 0.13852306146021284,
      "learning_rate": 0.00015000000000000001,
      "loss": 0.659,
      "step": 3952
    },
    {
      "epoch": 0.3534513590844063,
      "grad_norm": 0.16945059357722672,
      "learning_rate": 0.00014997491771330675,
      "loss": 0.7174,
      "step": 3953
    },
    {
      "epoch": 0.35354077253218885,
      "grad_norm": 0.171916793259839,
      "learning_rate": 0.0001499498312352775,
      "loss": 0.7266,
      "step": 3954
    },
    {
      "epoch": 0.35363018597997137,
      "grad_norm": 0.15818541934178512,
      "learning_rate": 0.0001499247405680162,
      "loss": 0.6949,
      "step": 3955
    },
    {
      "epoch": 0.35371959942775394,
      "grad_norm": 0.16367670990098496,
      "learning_rate": 0.00014989964571362723,
      "loss": 0.7355,
      "step": 3956
    },
    {
      "epoch": 0.35380901287553645,
      "grad_norm": 0.1364214247985001,
      "learning_rate": 0.0001498745466742152,
      "loss": 0.6477,
      "step": 3957
    },
    {
      "epoch": 0.353898426323319,
      "grad_norm": 0.14565742261211664,
      "learning_rate": 0.0001498494434518852,
      "loss": 0.6676,
      "step": 3958
    },
    {
      "epoch": 0.3539878397711016,
      "grad_norm": 0.14806586689862156,
      "learning_rate": 0.0001498243360487426,
      "loss": 0.69,
      "step": 3959
    },
    {
      "epoch": 0.3540772532188841,
      "grad_norm": 0.15039999571673537,
      "learning_rate": 0.00014979922446689306,
      "loss": 0.7082,
      "step": 3960
    },
    {
      "epoch": 0.3541666666666667,
      "grad_norm": 0.14887316859172361,
      "learning_rate": 0.00014977410870844273,
      "loss": 0.6825,
      "step": 3961
    },
    {
      "epoch": 0.3542560801144492,
      "grad_norm": 0.14132103295013346,
      "learning_rate": 0.00014974898877549806,
      "loss": 0.6075,
      "step": 3962
    },
    {
      "epoch": 0.3543454935622318,
      "grad_norm": 0.18640564977102891,
      "learning_rate": 0.0001497238646701657,
      "loss": 0.4086,
      "step": 3963
    },
    {
      "epoch": 0.3544349070100143,
      "grad_norm": 0.15241523835269202,
      "learning_rate": 0.00014969873639455292,
      "loss": 0.6944,
      "step": 3964
    },
    {
      "epoch": 0.35452432045779686,
      "grad_norm": 0.15584115129280465,
      "learning_rate": 0.00014967360395076713,
      "loss": 0.7171,
      "step": 3965
    },
    {
      "epoch": 0.3546137339055794,
      "grad_norm": 0.14236630117707536,
      "learning_rate": 0.00014964846734091616,
      "loss": 0.6679,
      "step": 3966
    },
    {
      "epoch": 0.35470314735336195,
      "grad_norm": 0.1456833442688178,
      "learning_rate": 0.00014962332656710817,
      "loss": 0.6527,
      "step": 3967
    },
    {
      "epoch": 0.3547925608011445,
      "grad_norm": 0.1339008939647742,
      "learning_rate": 0.00014959818163145174,
      "loss": 0.636,
      "step": 3968
    },
    {
      "epoch": 0.35488197424892703,
      "grad_norm": 0.13386926881847086,
      "learning_rate": 0.00014957303253605573,
      "loss": 0.6488,
      "step": 3969
    },
    {
      "epoch": 0.3549713876967096,
      "grad_norm": 0.14171557259809134,
      "learning_rate": 0.00014954787928302935,
      "loss": 0.6814,
      "step": 3970
    },
    {
      "epoch": 0.3550608011444921,
      "grad_norm": 0.14572798200605527,
      "learning_rate": 0.00014952272187448214,
      "loss": 0.6991,
      "step": 3971
    },
    {
      "epoch": 0.3551502145922747,
      "grad_norm": 0.152599095291391,
      "learning_rate": 0.00014949756031252406,
      "loss": 0.7231,
      "step": 3972
    },
    {
      "epoch": 0.3552396280400572,
      "grad_norm": 0.15181119317225458,
      "learning_rate": 0.0001494723945992654,
      "loss": 0.7144,
      "step": 3973
    },
    {
      "epoch": 0.3553290414878398,
      "grad_norm": 0.16242080040968143,
      "learning_rate": 0.00014944722473681673,
      "loss": 0.6883,
      "step": 3974
    },
    {
      "epoch": 0.3554184549356223,
      "grad_norm": 0.14542653782459725,
      "learning_rate": 0.00014942205072728903,
      "loss": 0.7029,
      "step": 3975
    },
    {
      "epoch": 0.35550786838340487,
      "grad_norm": 0.16178619805410108,
      "learning_rate": 0.00014939687257279363,
      "loss": 0.7384,
      "step": 3976
    },
    {
      "epoch": 0.3555972818311874,
      "grad_norm": 0.14430071961695334,
      "learning_rate": 0.0001493716902754422,
      "loss": 0.6697,
      "step": 3977
    },
    {
      "epoch": 0.35568669527896996,
      "grad_norm": 0.1374899517891578,
      "learning_rate": 0.00014934650383734672,
      "loss": 0.6437,
      "step": 3978
    },
    {
      "epoch": 0.3557761087267525,
      "grad_norm": 0.14631086605679086,
      "learning_rate": 0.00014932131326061957,
      "loss": 0.6742,
      "step": 3979
    },
    {
      "epoch": 0.35586552217453504,
      "grad_norm": 0.14481008343097476,
      "learning_rate": 0.00014929611854737343,
      "loss": 0.6856,
      "step": 3980
    },
    {
      "epoch": 0.3559549356223176,
      "grad_norm": 0.14025510390234516,
      "learning_rate": 0.00014927091969972134,
      "loss": 0.6581,
      "step": 3981
    },
    {
      "epoch": 0.35604434907010013,
      "grad_norm": 0.13290844103544244,
      "learning_rate": 0.00014924571671977676,
      "loss": 0.6733,
      "step": 3982
    },
    {
      "epoch": 0.3561337625178827,
      "grad_norm": 0.1301810892951476,
      "learning_rate": 0.0001492205096096534,
      "loss": 0.6613,
      "step": 3983
    },
    {
      "epoch": 0.3562231759656652,
      "grad_norm": 0.13503221505140234,
      "learning_rate": 0.00014919529837146528,
      "loss": 0.6975,
      "step": 3984
    },
    {
      "epoch": 0.3563125894134478,
      "grad_norm": 0.16061725816180783,
      "learning_rate": 0.00014917008300732696,
      "loss": 0.6949,
      "step": 3985
    },
    {
      "epoch": 0.3564020028612303,
      "grad_norm": 0.18291598831224778,
      "learning_rate": 0.00014914486351935312,
      "loss": 0.4356,
      "step": 3986
    },
    {
      "epoch": 0.3564914163090129,
      "grad_norm": 0.12377143505071751,
      "learning_rate": 0.00014911963990965897,
      "loss": 0.6416,
      "step": 3987
    },
    {
      "epoch": 0.35658082975679545,
      "grad_norm": 0.15780487338626012,
      "learning_rate": 0.00014909441218035992,
      "loss": 0.6878,
      "step": 3988
    },
    {
      "epoch": 0.35667024320457796,
      "grad_norm": 0.1682160025364165,
      "learning_rate": 0.0001490691803335718,
      "loss": 0.6816,
      "step": 3989
    },
    {
      "epoch": 0.35675965665236054,
      "grad_norm": 0.14152343911205026,
      "learning_rate": 0.0001490439443714108,
      "loss": 0.6855,
      "step": 3990
    },
    {
      "epoch": 0.35684907010014305,
      "grad_norm": 0.12709793317277654,
      "learning_rate": 0.0001490187042959934,
      "loss": 0.6425,
      "step": 3991
    },
    {
      "epoch": 0.3569384835479256,
      "grad_norm": 0.17281605291564933,
      "learning_rate": 0.0001489934601094365,
      "loss": 0.7608,
      "step": 3992
    },
    {
      "epoch": 0.35702789699570814,
      "grad_norm": 0.14376407054396437,
      "learning_rate": 0.00014896821181385725,
      "loss": 0.6622,
      "step": 3993
    },
    {
      "epoch": 0.3571173104434907,
      "grad_norm": 0.1707311424609591,
      "learning_rate": 0.0001489429594113732,
      "loss": 0.7192,
      "step": 3994
    },
    {
      "epoch": 0.3572067238912732,
      "grad_norm": 0.15497159959048848,
      "learning_rate": 0.00014891770290410228,
      "loss": 0.6482,
      "step": 3995
    },
    {
      "epoch": 0.3572961373390558,
      "grad_norm": 0.16115297873352852,
      "learning_rate": 0.0001488924422941627,
      "loss": 0.7048,
      "step": 3996
    },
    {
      "epoch": 0.3573855507868383,
      "grad_norm": 0.1619239936194811,
      "learning_rate": 0.000148867177583673,
      "loss": 0.6849,
      "step": 3997
    },
    {
      "epoch": 0.3574749642346209,
      "grad_norm": 0.15375237512077386,
      "learning_rate": 0.00014884190877475213,
      "loss": 0.7164,
      "step": 3998
    },
    {
      "epoch": 0.35756437768240346,
      "grad_norm": 0.15574188016762627,
      "learning_rate": 0.00014881663586951938,
      "loss": 0.7188,
      "step": 3999
    },
    {
      "epoch": 0.35765379113018597,
      "grad_norm": 0.14965691037869375,
      "learning_rate": 0.00014879135887009435,
      "loss": 0.6647,
      "step": 4000
    },
    {
      "epoch": 0.35774320457796854,
      "grad_norm": 0.1531033106131088,
      "learning_rate": 0.00014876607777859695,
      "loss": 0.6844,
      "step": 4001
    },
    {
      "epoch": 0.35783261802575106,
      "grad_norm": 0.14767666822002856,
      "learning_rate": 0.0001487407925971475,
      "loss": 0.7111,
      "step": 4002
    },
    {
      "epoch": 0.35792203147353363,
      "grad_norm": 0.1306829720281249,
      "learning_rate": 0.00014871550332786666,
      "loss": 0.631,
      "step": 4003
    },
    {
      "epoch": 0.35801144492131615,
      "grad_norm": 0.14800898013142605,
      "learning_rate": 0.00014869020997287536,
      "loss": 0.6723,
      "step": 4004
    },
    {
      "epoch": 0.3581008583690987,
      "grad_norm": 0.13502130424441974,
      "learning_rate": 0.00014866491253429497,
      "loss": 0.6704,
      "step": 4005
    },
    {
      "epoch": 0.35819027181688123,
      "grad_norm": 0.1401319173793906,
      "learning_rate": 0.00014863961101424712,
      "loss": 0.6735,
      "step": 4006
    },
    {
      "epoch": 0.3582796852646638,
      "grad_norm": 0.1615230805697385,
      "learning_rate": 0.00014861430541485387,
      "loss": 0.6986,
      "step": 4007
    },
    {
      "epoch": 0.3583690987124464,
      "grad_norm": 0.14848890660241088,
      "learning_rate": 0.00014858899573823753,
      "loss": 0.6855,
      "step": 4008
    },
    {
      "epoch": 0.3584585121602289,
      "grad_norm": 0.14213823322315972,
      "learning_rate": 0.00014856368198652077,
      "loss": 0.6904,
      "step": 4009
    },
    {
      "epoch": 0.35854792560801146,
      "grad_norm": 0.1466096925054767,
      "learning_rate": 0.00014853836416182668,
      "loss": 0.6489,
      "step": 4010
    },
    {
      "epoch": 0.358637339055794,
      "grad_norm": 0.15474178436960842,
      "learning_rate": 0.0001485130422662786,
      "loss": 0.6652,
      "step": 4011
    },
    {
      "epoch": 0.35872675250357655,
      "grad_norm": 0.14941391431810702,
      "learning_rate": 0.00014848771630200023,
      "loss": 0.6593,
      "step": 4012
    },
    {
      "epoch": 0.35881616595135907,
      "grad_norm": 0.1711278058468329,
      "learning_rate": 0.00014846238627111568,
      "loss": 0.6838,
      "step": 4013
    },
    {
      "epoch": 0.35890557939914164,
      "grad_norm": 0.1620010606932157,
      "learning_rate": 0.00014843705217574933,
      "loss": 0.7171,
      "step": 4014
    },
    {
      "epoch": 0.35899499284692415,
      "grad_norm": 0.13101880912162195,
      "learning_rate": 0.00014841171401802587,
      "loss": 0.6585,
      "step": 4015
    },
    {
      "epoch": 0.3590844062947067,
      "grad_norm": 0.14999213733892733,
      "learning_rate": 0.00014838637180007047,
      "loss": 0.6774,
      "step": 4016
    },
    {
      "epoch": 0.3591738197424893,
      "grad_norm": 0.1642911797969986,
      "learning_rate": 0.00014836102552400848,
      "loss": 0.7228,
      "step": 4017
    },
    {
      "epoch": 0.3592632331902718,
      "grad_norm": 0.13782543899102143,
      "learning_rate": 0.00014833567519196566,
      "loss": 0.7008,
      "step": 4018
    },
    {
      "epoch": 0.3593526466380544,
      "grad_norm": 0.143063381801345,
      "learning_rate": 0.00014831032080606817,
      "loss": 0.6609,
      "step": 4019
    },
    {
      "epoch": 0.3594420600858369,
      "grad_norm": 0.14403040805363138,
      "learning_rate": 0.00014828496236844242,
      "loss": 0.6785,
      "step": 4020
    },
    {
      "epoch": 0.3595314735336195,
      "grad_norm": 0.15620535700091104,
      "learning_rate": 0.00014825959988121515,
      "loss": 0.7004,
      "step": 4021
    },
    {
      "epoch": 0.359620886981402,
      "grad_norm": 0.14805263436681138,
      "learning_rate": 0.00014823423334651357,
      "loss": 0.6843,
      "step": 4022
    },
    {
      "epoch": 0.35971030042918456,
      "grad_norm": 0.13834433122972703,
      "learning_rate": 0.00014820886276646506,
      "loss": 0.6832,
      "step": 4023
    },
    {
      "epoch": 0.3597997138769671,
      "grad_norm": 0.15616675484614823,
      "learning_rate": 0.00014818348814319747,
      "loss": 0.6773,
      "step": 4024
    },
    {
      "epoch": 0.35988912732474965,
      "grad_norm": 0.1691628434891448,
      "learning_rate": 0.0001481581094788389,
      "loss": 0.7273,
      "step": 4025
    },
    {
      "epoch": 0.35997854077253216,
      "grad_norm": 0.1583353609685778,
      "learning_rate": 0.00014813272677551787,
      "loss": 0.6665,
      "step": 4026
    },
    {
      "epoch": 0.36006795422031473,
      "grad_norm": 0.17019613045479332,
      "learning_rate": 0.00014810734003536317,
      "loss": 0.7136,
      "step": 4027
    },
    {
      "epoch": 0.3601573676680973,
      "grad_norm": 0.17000865499346765,
      "learning_rate": 0.00014808194926050394,
      "loss": 0.6806,
      "step": 4028
    },
    {
      "epoch": 0.3602467811158798,
      "grad_norm": 0.1295040374558923,
      "learning_rate": 0.0001480565544530697,
      "loss": 0.6311,
      "step": 4029
    },
    {
      "epoch": 0.3603361945636624,
      "grad_norm": 0.1823473850070775,
      "learning_rate": 0.0001480311556151903,
      "loss": 0.3835,
      "step": 4030
    },
    {
      "epoch": 0.3604256080114449,
      "grad_norm": 0.15871615357159752,
      "learning_rate": 0.00014800575274899588,
      "loss": 0.6786,
      "step": 4031
    },
    {
      "epoch": 0.3605150214592275,
      "grad_norm": 0.1403123282327246,
      "learning_rate": 0.00014798034585661695,
      "loss": 0.6527,
      "step": 4032
    },
    {
      "epoch": 0.36060443490701,
      "grad_norm": 0.1584503347334472,
      "learning_rate": 0.00014795493494018433,
      "loss": 0.705,
      "step": 4033
    },
    {
      "epoch": 0.36069384835479257,
      "grad_norm": 0.16488472000296706,
      "learning_rate": 0.00014792952000182926,
      "loss": 0.7079,
      "step": 4034
    },
    {
      "epoch": 0.3607832618025751,
      "grad_norm": 0.12974693491032172,
      "learning_rate": 0.00014790410104368324,
      "loss": 0.6948,
      "step": 4035
    },
    {
      "epoch": 0.36087267525035766,
      "grad_norm": 0.1648769463167282,
      "learning_rate": 0.00014787867806787807,
      "loss": 0.7324,
      "step": 4036
    },
    {
      "epoch": 0.3609620886981402,
      "grad_norm": 0.13917704518969642,
      "learning_rate": 0.00014785325107654606,
      "loss": 0.6905,
      "step": 4037
    },
    {
      "epoch": 0.36105150214592274,
      "grad_norm": 0.14510147562251877,
      "learning_rate": 0.00014782782007181962,
      "loss": 0.6736,
      "step": 4038
    },
    {
      "epoch": 0.3611409155937053,
      "grad_norm": 0.15146792023743907,
      "learning_rate": 0.0001478023850558317,
      "loss": 0.703,
      "step": 4039
    },
    {
      "epoch": 0.36123032904148783,
      "grad_norm": 0.14178140866139274,
      "learning_rate": 0.00014777694603071548,
      "loss": 0.7159,
      "step": 4040
    },
    {
      "epoch": 0.3613197424892704,
      "grad_norm": 0.14467826857325597,
      "learning_rate": 0.0001477515029986045,
      "loss": 0.6973,
      "step": 4041
    },
    {
      "epoch": 0.3614091559370529,
      "grad_norm": 0.13140305529250537,
      "learning_rate": 0.00014772605596163261,
      "loss": 0.6564,
      "step": 4042
    },
    {
      "epoch": 0.3614985693848355,
      "grad_norm": 0.14181315340412312,
      "learning_rate": 0.00014770060492193406,
      "loss": 0.6508,
      "step": 4043
    },
    {
      "epoch": 0.361587982832618,
      "grad_norm": 0.1429812112150909,
      "learning_rate": 0.00014767514988164336,
      "loss": 0.6837,
      "step": 4044
    },
    {
      "epoch": 0.3616773962804006,
      "grad_norm": 0.13691243418809512,
      "learning_rate": 0.00014764969084289544,
      "loss": 0.6734,
      "step": 4045
    },
    {
      "epoch": 0.3617668097281831,
      "grad_norm": 0.14604424133574634,
      "learning_rate": 0.00014762422780782548,
      "loss": 0.6716,
      "step": 4046
    },
    {
      "epoch": 0.36185622317596566,
      "grad_norm": 0.14734431566247677,
      "learning_rate": 0.00014759876077856905,
      "loss": 0.6559,
      "step": 4047
    },
    {
      "epoch": 0.36194563662374823,
      "grad_norm": 0.15732932596463112,
      "learning_rate": 0.00014757328975726207,
      "loss": 0.6941,
      "step": 4048
    },
    {
      "epoch": 0.36203505007153075,
      "grad_norm": 0.17308147555037282,
      "learning_rate": 0.0001475478147460407,
      "loss": 0.6939,
      "step": 4049
    },
    {
      "epoch": 0.3621244635193133,
      "grad_norm": 0.15824570127199988,
      "learning_rate": 0.00014752233574704153,
      "loss": 0.6994,
      "step": 4050
    },
    {
      "epoch": 0.36221387696709584,
      "grad_norm": 0.15476349804225661,
      "learning_rate": 0.0001474968527624015,
      "loss": 0.6921,
      "step": 4051
    },
    {
      "epoch": 0.3623032904148784,
      "grad_norm": 0.17801511235807657,
      "learning_rate": 0.00014747136579425772,
      "loss": 0.7313,
      "step": 4052
    },
    {
      "epoch": 0.3623927038626609,
      "grad_norm": 0.13343535047989066,
      "learning_rate": 0.00014744587484474784,
      "loss": 0.6766,
      "step": 4053
    },
    {
      "epoch": 0.3624821173104435,
      "grad_norm": 0.14927900970210825,
      "learning_rate": 0.00014742037991600975,
      "loss": 0.6751,
      "step": 4054
    },
    {
      "epoch": 0.362571530758226,
      "grad_norm": 0.137085793294876,
      "learning_rate": 0.00014739488101018168,
      "loss": 0.6612,
      "step": 4055
    },
    {
      "epoch": 0.3626609442060086,
      "grad_norm": 0.16955315467132187,
      "learning_rate": 0.00014736937812940217,
      "loss": 0.7216,
      "step": 4056
    },
    {
      "epoch": 0.36275035765379116,
      "grad_norm": 0.16351229178174195,
      "learning_rate": 0.0001473438712758101,
      "loss": 0.7028,
      "step": 4057
    },
    {
      "epoch": 0.36283977110157367,
      "grad_norm": 0.13451255739814047,
      "learning_rate": 0.00014731836045154477,
      "loss": 0.6983,
      "step": 4058
    },
    {
      "epoch": 0.36292918454935624,
      "grad_norm": 0.14096977698421784,
      "learning_rate": 0.00014729284565874562,
      "loss": 0.6845,
      "step": 4059
    },
    {
      "epoch": 0.36301859799713876,
      "grad_norm": 0.16398312086036834,
      "learning_rate": 0.0001472673268995527,
      "loss": 0.7145,
      "step": 4060
    },
    {
      "epoch": 0.36310801144492133,
      "grad_norm": 0.1283558547743054,
      "learning_rate": 0.0001472418041761061,
      "loss": 0.6387,
      "step": 4061
    },
    {
      "epoch": 0.36319742489270385,
      "grad_norm": 0.1528826411555671,
      "learning_rate": 0.00014721627749054647,
      "loss": 0.6703,
      "step": 4062
    },
    {
      "epoch": 0.3632868383404864,
      "grad_norm": 0.14474543701651277,
      "learning_rate": 0.00014719074684501468,
      "loss": 0.7257,
      "step": 4063
    },
    {
      "epoch": 0.36337625178826893,
      "grad_norm": 0.148478326448288,
      "learning_rate": 0.00014716521224165192,
      "loss": 0.7118,
      "step": 4064
    },
    {
      "epoch": 0.3634656652360515,
      "grad_norm": 0.15389748051461513,
      "learning_rate": 0.0001471396736825998,
      "loss": 0.7085,
      "step": 4065
    },
    {
      "epoch": 0.363555078683834,
      "grad_norm": 0.14745869745498455,
      "learning_rate": 0.00014711413117000013,
      "loss": 0.6798,
      "step": 4066
    },
    {
      "epoch": 0.3636444921316166,
      "grad_norm": 0.15798714892973265,
      "learning_rate": 0.0001470885847059952,
      "loss": 0.6857,
      "step": 4067
    },
    {
      "epoch": 0.36373390557939916,
      "grad_norm": 0.14923246356801895,
      "learning_rate": 0.00014706303429272755,
      "loss": 0.6911,
      "step": 4068
    },
    {
      "epoch": 0.3638233190271817,
      "grad_norm": 0.14982103423685225,
      "learning_rate": 0.00014703747993234003,
      "loss": 0.6752,
      "step": 4069
    },
    {
      "epoch": 0.36391273247496425,
      "grad_norm": 0.127912539713144,
      "learning_rate": 0.00014701192162697591,
      "loss": 0.6297,
      "step": 4070
    },
    {
      "epoch": 0.36400214592274677,
      "grad_norm": 0.1464110982687707,
      "learning_rate": 0.00014698635937877868,
      "loss": 0.7036,
      "step": 4071
    },
    {
      "epoch": 0.36409155937052934,
      "grad_norm": 0.1538040109374768,
      "learning_rate": 0.0001469607931898922,
      "loss": 0.6616,
      "step": 4072
    },
    {
      "epoch": 0.36418097281831185,
      "grad_norm": 0.2097682001852825,
      "learning_rate": 0.00014693522306246076,
      "loss": 0.7004,
      "step": 4073
    },
    {
      "epoch": 0.3642703862660944,
      "grad_norm": 0.1609473146149468,
      "learning_rate": 0.00014690964899862882,
      "loss": 0.7128,
      "step": 4074
    },
    {
      "epoch": 0.36435979971387694,
      "grad_norm": 0.15318376576691817,
      "learning_rate": 0.0001468840710005413,
      "loss": 0.6842,
      "step": 4075
    },
    {
      "epoch": 0.3644492131616595,
      "grad_norm": 0.15063246001694802,
      "learning_rate": 0.00014685848907034331,
      "loss": 0.6808,
      "step": 4076
    },
    {
      "epoch": 0.3645386266094421,
      "grad_norm": 0.13703314698017732,
      "learning_rate": 0.00014683290321018048,
      "loss": 0.6896,
      "step": 4077
    },
    {
      "epoch": 0.3646280400572246,
      "grad_norm": 0.12892798468221725,
      "learning_rate": 0.0001468073134221986,
      "loss": 0.6759,
      "step": 4078
    },
    {
      "epoch": 0.36471745350500717,
      "grad_norm": 0.1570931233840327,
      "learning_rate": 0.0001467817197085439,
      "loss": 0.7069,
      "step": 4079
    },
    {
      "epoch": 0.3648068669527897,
      "grad_norm": 0.14660589852794448,
      "learning_rate": 0.0001467561220713628,
      "loss": 0.6928,
      "step": 4080
    },
    {
      "epoch": 0.36489628040057226,
      "grad_norm": 0.13367519569496683,
      "learning_rate": 0.00014673052051280227,
      "loss": 0.6605,
      "step": 4081
    },
    {
      "epoch": 0.3649856938483548,
      "grad_norm": 0.13253927694637174,
      "learning_rate": 0.0001467049150350094,
      "loss": 0.6855,
      "step": 4082
    },
    {
      "epoch": 0.36507510729613735,
      "grad_norm": 0.1441703839804374,
      "learning_rate": 0.00014667930564013173,
      "loss": 0.6566,
      "step": 4083
    },
    {
      "epoch": 0.36516452074391986,
      "grad_norm": 0.14084214658247357,
      "learning_rate": 0.00014665369233031705,
      "loss": 0.6597,
      "step": 4084
    },
    {
      "epoch": 0.36525393419170243,
      "grad_norm": 0.15611312799924526,
      "learning_rate": 0.00014662807510771355,
      "loss": 0.6819,
      "step": 4085
    },
    {
      "epoch": 0.365343347639485,
      "grad_norm": 0.15832346583077958,
      "learning_rate": 0.0001466024539744697,
      "loss": 0.708,
      "step": 4086
    },
    {
      "epoch": 0.3654327610872675,
      "grad_norm": 0.14306700019901608,
      "learning_rate": 0.0001465768289327343,
      "loss": 0.6773,
      "step": 4087
    },
    {
      "epoch": 0.3655221745350501,
      "grad_norm": 0.14785405847952954,
      "learning_rate": 0.00014655119998465652,
      "loss": 0.6595,
      "step": 4088
    },
    {
      "epoch": 0.3656115879828326,
      "grad_norm": 0.15445636047919903,
      "learning_rate": 0.00014652556713238578,
      "loss": 0.6987,
      "step": 4089
    },
    {
      "epoch": 0.3657010014306152,
      "grad_norm": 0.1689315163712691,
      "learning_rate": 0.000146499930378072,
      "loss": 0.7043,
      "step": 4090
    },
    {
      "epoch": 0.3657904148783977,
      "grad_norm": 0.15489845657558282,
      "learning_rate": 0.00014647428972386513,
      "loss": 0.6548,
      "step": 4091
    },
    {
      "epoch": 0.36587982832618027,
      "grad_norm": 0.15809961271425696,
      "learning_rate": 0.00014644864517191576,
      "loss": 0.7244,
      "step": 4092
    },
    {
      "epoch": 0.3659692417739628,
      "grad_norm": 0.1684531463942279,
      "learning_rate": 0.00014642299672437461,
      "loss": 0.7093,
      "step": 4093
    },
    {
      "epoch": 0.36605865522174535,
      "grad_norm": 0.19102241648193433,
      "learning_rate": 0.00014639734438339278,
      "loss": 0.3749,
      "step": 4094
    },
    {
      "epoch": 0.36614806866952787,
      "grad_norm": 0.16601896122891957,
      "learning_rate": 0.0001463716881511217,
      "loss": 0.7246,
      "step": 4095
    },
    {
      "epoch": 0.36623748211731044,
      "grad_norm": 0.1635109939896879,
      "learning_rate": 0.00014634602802971312,
      "loss": 0.6976,
      "step": 4096
    },
    {
      "epoch": 0.366326895565093,
      "grad_norm": 0.1410882376073432,
      "learning_rate": 0.0001463203640213192,
      "loss": 0.6578,
      "step": 4097
    },
    {
      "epoch": 0.36641630901287553,
      "grad_norm": 0.14239670453907832,
      "learning_rate": 0.00014629469612809223,
      "loss": 0.6907,
      "step": 4098
    },
    {
      "epoch": 0.3665057224606581,
      "grad_norm": 0.13317823989350475,
      "learning_rate": 0.00014626902435218504,
      "loss": 0.6838,
      "step": 4099
    },
    {
      "epoch": 0.3665951359084406,
      "grad_norm": 0.15912855034470683,
      "learning_rate": 0.00014624334869575066,
      "loss": 0.7255,
      "step": 4100
    },
    {
      "epoch": 0.3666845493562232,
      "grad_norm": 0.16489185246274055,
      "learning_rate": 0.00014621766916094248,
      "loss": 0.6777,
      "step": 4101
    },
    {
      "epoch": 0.3667739628040057,
      "grad_norm": 0.15287131549749783,
      "learning_rate": 0.00014619198574991417,
      "loss": 0.6703,
      "step": 4102
    },
    {
      "epoch": 0.3668633762517883,
      "grad_norm": 0.13523034664500774,
      "learning_rate": 0.00014616629846481982,
      "loss": 0.6327,
      "step": 4103
    },
    {
      "epoch": 0.3669527896995708,
      "grad_norm": 0.15849422095746316,
      "learning_rate": 0.00014614060730781377,
      "loss": 0.6698,
      "step": 4104
    },
    {
      "epoch": 0.36704220314735336,
      "grad_norm": 0.14171436020765807,
      "learning_rate": 0.0001461149122810507,
      "loss": 0.6274,
      "step": 4105
    },
    {
      "epoch": 0.36713161659513593,
      "grad_norm": 0.14663065300893524,
      "learning_rate": 0.00014608921338668562,
      "loss": 0.6956,
      "step": 4106
    },
    {
      "epoch": 0.36722103004291845,
      "grad_norm": 0.15915647741662883,
      "learning_rate": 0.00014606351062687391,
      "loss": 0.6879,
      "step": 4107
    },
    {
      "epoch": 0.367310443490701,
      "grad_norm": 0.14567913597733737,
      "learning_rate": 0.00014603780400377118,
      "loss": 0.6777,
      "step": 4108
    },
    {
      "epoch": 0.36739985693848354,
      "grad_norm": 0.15430825895044858,
      "learning_rate": 0.00014601209351953345,
      "loss": 0.7039,
      "step": 4109
    },
    {
      "epoch": 0.3674892703862661,
      "grad_norm": 0.12631040484161826,
      "learning_rate": 0.00014598637917631697,
      "loss": 0.6435,
      "step": 4110
    },
    {
      "epoch": 0.3675786838340486,
      "grad_norm": 0.15263872708396925,
      "learning_rate": 0.00014596066097627842,
      "loss": 0.6923,
      "step": 4111
    },
    {
      "epoch": 0.3676680972818312,
      "grad_norm": 0.13652118188308113,
      "learning_rate": 0.00014593493892157473,
      "loss": 0.6669,
      "step": 4112
    },
    {
      "epoch": 0.3677575107296137,
      "grad_norm": 0.16094700658370062,
      "learning_rate": 0.00014590921301436318,
      "loss": 0.7416,
      "step": 4113
    },
    {
      "epoch": 0.3678469241773963,
      "grad_norm": 0.1446436641539222,
      "learning_rate": 0.0001458834832568014,
      "loss": 0.6889,
      "step": 4114
    },
    {
      "epoch": 0.3679363376251788,
      "grad_norm": 0.1356042720264935,
      "learning_rate": 0.00014585774965104732,
      "loss": 0.6511,
      "step": 4115
    },
    {
      "epoch": 0.36802575107296137,
      "grad_norm": 0.14247077661269464,
      "learning_rate": 0.00014583201219925908,
      "loss": 0.7021,
      "step": 4116
    },
    {
      "epoch": 0.36811516452074394,
      "grad_norm": 0.15884718017928537,
      "learning_rate": 0.0001458062709035954,
      "loss": 0.7135,
      "step": 4117
    },
    {
      "epoch": 0.36820457796852646,
      "grad_norm": 0.15457041609028072,
      "learning_rate": 0.00014578052576621507,
      "loss": 0.726,
      "step": 4118
    },
    {
      "epoch": 0.36829399141630903,
      "grad_norm": 0.1380198131169513,
      "learning_rate": 0.00014575477678927732,
      "loss": 0.7006,
      "step": 4119
    },
    {
      "epoch": 0.36838340486409155,
      "grad_norm": 0.15219325396004613,
      "learning_rate": 0.00014572902397494173,
      "loss": 0.6834,
      "step": 4120
    },
    {
      "epoch": 0.3684728183118741,
      "grad_norm": 0.14476643187762703,
      "learning_rate": 0.0001457032673253681,
      "loss": 0.7115,
      "step": 4121
    },
    {
      "epoch": 0.36856223175965663,
      "grad_norm": 0.15606937081484767,
      "learning_rate": 0.00014567750684271665,
      "loss": 0.6631,
      "step": 4122
    },
    {
      "epoch": 0.3686516452074392,
      "grad_norm": 0.13009469677091745,
      "learning_rate": 0.00014565174252914785,
      "loss": 0.6564,
      "step": 4123
    },
    {
      "epoch": 0.3687410586552217,
      "grad_norm": 0.14790246264377863,
      "learning_rate": 0.00014562597438682256,
      "loss": 0.6506,
      "step": 4124
    },
    {
      "epoch": 0.3688304721030043,
      "grad_norm": 0.13724520574989868,
      "learning_rate": 0.0001456002024179019,
      "loss": 0.6962,
      "step": 4125
    },
    {
      "epoch": 0.36891988555078686,
      "grad_norm": 0.1614632662402212,
      "learning_rate": 0.0001455744266245473,
      "loss": 0.6674,
      "step": 4126
    },
    {
      "epoch": 0.3690092989985694,
      "grad_norm": 0.15966935014888578,
      "learning_rate": 0.0001455486470089206,
      "loss": 0.6883,
      "step": 4127
    },
    {
      "epoch": 0.36909871244635195,
      "grad_norm": 0.14651088138332596,
      "learning_rate": 0.0001455228635731839,
      "loss": 0.6491,
      "step": 4128
    },
    {
      "epoch": 0.36918812589413447,
      "grad_norm": 0.1441941625467563,
      "learning_rate": 0.00014549707631949957,
      "loss": 0.6628,
      "step": 4129
    },
    {
      "epoch": 0.36927753934191704,
      "grad_norm": 0.14768992133566877,
      "learning_rate": 0.00014547128525003045,
      "loss": 0.6704,
      "step": 4130
    },
    {
      "epoch": 0.36936695278969955,
      "grad_norm": 0.17031433604515248,
      "learning_rate": 0.0001454454903669395,
      "loss": 0.7238,
      "step": 4131
    },
    {
      "epoch": 0.3694563662374821,
      "grad_norm": 0.177798090313897,
      "learning_rate": 0.0001454196916723902,
      "loss": 0.68,
      "step": 4132
    },
    {
      "epoch": 0.36954577968526464,
      "grad_norm": 0.14960129248516757,
      "learning_rate": 0.00014539388916854617,
      "loss": 0.6329,
      "step": 4133
    },
    {
      "epoch": 0.3696351931330472,
      "grad_norm": 0.151312036199025,
      "learning_rate": 0.00014536808285757152,
      "loss": 0.6753,
      "step": 4134
    },
    {
      "epoch": 0.3697246065808298,
      "grad_norm": 0.1504776753760919,
      "learning_rate": 0.00014534227274163051,
      "loss": 0.6574,
      "step": 4135
    },
    {
      "epoch": 0.3698140200286123,
      "grad_norm": 0.13652331549513613,
      "learning_rate": 0.00014531645882288788,
      "loss": 0.7127,
      "step": 4136
    },
    {
      "epoch": 0.36990343347639487,
      "grad_norm": 0.1362332167889646,
      "learning_rate": 0.00014529064110350856,
      "loss": 0.6727,
      "step": 4137
    },
    {
      "epoch": 0.3699928469241774,
      "grad_norm": 0.1471425808730563,
      "learning_rate": 0.00014526481958565787,
      "loss": 0.7127,
      "step": 4138
    },
    {
      "epoch": 0.37008226037195996,
      "grad_norm": 0.1350609837413488,
      "learning_rate": 0.00014523899427150143,
      "loss": 0.6713,
      "step": 4139
    },
    {
      "epoch": 0.3701716738197425,
      "grad_norm": 0.15316892160883508,
      "learning_rate": 0.00014521316516320515,
      "loss": 0.6944,
      "step": 4140
    },
    {
      "epoch": 0.37026108726752505,
      "grad_norm": 0.15251463285302555,
      "learning_rate": 0.00014518733226293534,
      "loss": 0.6935,
      "step": 4141
    },
    {
      "epoch": 0.37035050071530756,
      "grad_norm": 0.14358383776794817,
      "learning_rate": 0.00014516149557285856,
      "loss": 0.6734,
      "step": 4142
    },
    {
      "epoch": 0.37043991416309013,
      "grad_norm": 0.13593789997298525,
      "learning_rate": 0.00014513565509514167,
      "loss": 0.6626,
      "step": 4143
    },
    {
      "epoch": 0.37052932761087265,
      "grad_norm": 0.13830816186509667,
      "learning_rate": 0.00014510981083195188,
      "loss": 0.6613,
      "step": 4144
    },
    {
      "epoch": 0.3706187410586552,
      "grad_norm": 0.24420277129874954,
      "learning_rate": 0.00014508396278545678,
      "loss": 0.3956,
      "step": 4145
    },
    {
      "epoch": 0.3707081545064378,
      "grad_norm": 0.16685164667223937,
      "learning_rate": 0.0001450581109578241,
      "loss": 0.7234,
      "step": 4146
    },
    {
      "epoch": 0.3707975679542203,
      "grad_norm": 0.15085515300212363,
      "learning_rate": 0.00014503225535122212,
      "loss": 0.7011,
      "step": 4147
    },
    {
      "epoch": 0.3708869814020029,
      "grad_norm": 0.13208582067068178,
      "learning_rate": 0.00014500639596781926,
      "loss": 0.6439,
      "step": 4148
    },
    {
      "epoch": 0.3709763948497854,
      "grad_norm": 0.17089487626496438,
      "learning_rate": 0.00014498053280978434,
      "loss": 0.6895,
      "step": 4149
    },
    {
      "epoch": 0.37106580829756797,
      "grad_norm": 0.15006540293669737,
      "learning_rate": 0.00014495466587928642,
      "loss": 0.6966,
      "step": 4150
    },
    {
      "epoch": 0.3711552217453505,
      "grad_norm": 0.13554269509468,
      "learning_rate": 0.00014492879517849497,
      "loss": 0.6653,
      "step": 4151
    },
    {
      "epoch": 0.37124463519313305,
      "grad_norm": 0.16429409887754112,
      "learning_rate": 0.0001449029207095798,
      "loss": 0.6977,
      "step": 4152
    },
    {
      "epoch": 0.37133404864091557,
      "grad_norm": 0.1402793317400411,
      "learning_rate": 0.00014487704247471078,
      "loss": 0.6985,
      "step": 4153
    },
    {
      "epoch": 0.37142346208869814,
      "grad_norm": 0.15118119275890457,
      "learning_rate": 0.00014485116047605848,
      "loss": 0.6893,
      "step": 4154
    },
    {
      "epoch": 0.3715128755364807,
      "grad_norm": 0.15325916631082237,
      "learning_rate": 0.00014482527471579353,
      "loss": 0.6202,
      "step": 4155
    },
    {
      "epoch": 0.37160228898426323,
      "grad_norm": 0.15471962018852192,
      "learning_rate": 0.00014479938519608687,
      "loss": 0.6855,
      "step": 4156
    },
    {
      "epoch": 0.3716917024320458,
      "grad_norm": 0.1429999188795476,
      "learning_rate": 0.0001447734919191099,
      "loss": 0.678,
      "step": 4157
    },
    {
      "epoch": 0.3717811158798283,
      "grad_norm": 0.1496172155746428,
      "learning_rate": 0.00014474759488703425,
      "loss": 0.6882,
      "step": 4158
    },
    {
      "epoch": 0.3718705293276109,
      "grad_norm": 0.13555071482784453,
      "learning_rate": 0.00014472169410203187,
      "loss": 0.6979,
      "step": 4159
    },
    {
      "epoch": 0.3719599427753934,
      "grad_norm": 0.14045549537675184,
      "learning_rate": 0.00014469578956627496,
      "loss": 0.661,
      "step": 4160
    },
    {
      "epoch": 0.372049356223176,
      "grad_norm": 0.12934765119006877,
      "learning_rate": 0.0001446698812819362,
      "loss": 0.6425,
      "step": 4161
    },
    {
      "epoch": 0.3721387696709585,
      "grad_norm": 0.1378363050929252,
      "learning_rate": 0.00014464396925118847,
      "loss": 0.6701,
      "step": 4162
    },
    {
      "epoch": 0.37222818311874106,
      "grad_norm": 0.13374987505459762,
      "learning_rate": 0.00014461805347620489,
      "loss": 0.6372,
      "step": 4163
    },
    {
      "epoch": 0.3723175965665236,
      "grad_norm": 0.1474210606057262,
      "learning_rate": 0.00014459213395915906,
      "loss": 0.6932,
      "step": 4164
    },
    {
      "epoch": 0.37240701001430615,
      "grad_norm": 0.14781463762508476,
      "learning_rate": 0.00014456621070222484,
      "loss": 0.6885,
      "step": 4165
    },
    {
      "epoch": 0.3724964234620887,
      "grad_norm": 0.16208657439850663,
      "learning_rate": 0.00014454028370757636,
      "loss": 0.6832,
      "step": 4166
    },
    {
      "epoch": 0.37258583690987124,
      "grad_norm": 0.1379055308217268,
      "learning_rate": 0.00014451435297738806,
      "loss": 0.6778,
      "step": 4167
    },
    {
      "epoch": 0.3726752503576538,
      "grad_norm": 0.14187716038588324,
      "learning_rate": 0.00014448841851383472,
      "loss": 0.6972,
      "step": 4168
    },
    {
      "epoch": 0.3727646638054363,
      "grad_norm": 0.13280134532132862,
      "learning_rate": 0.00014446248031909148,
      "loss": 0.6714,
      "step": 4169
    },
    {
      "epoch": 0.3728540772532189,
      "grad_norm": 0.15055797293151132,
      "learning_rate": 0.0001444365383953337,
      "loss": 0.6803,
      "step": 4170
    },
    {
      "epoch": 0.3729434907010014,
      "grad_norm": 0.17275634335644796,
      "learning_rate": 0.00014441059274473706,
      "loss": 0.7056,
      "step": 4171
    },
    {
      "epoch": 0.373032904148784,
      "grad_norm": 0.16168670351410364,
      "learning_rate": 0.00014438464336947773,
      "loss": 0.7441,
      "step": 4172
    },
    {
      "epoch": 0.3731223175965665,
      "grad_norm": 0.18143894862527654,
      "learning_rate": 0.0001443586902717319,
      "loss": 0.6998,
      "step": 4173
    },
    {
      "epoch": 0.37321173104434907,
      "grad_norm": 0.15448627233505494,
      "learning_rate": 0.0001443327334536763,
      "loss": 0.7,
      "step": 4174
    },
    {
      "epoch": 0.37330114449213164,
      "grad_norm": 0.14850379862345084,
      "learning_rate": 0.00014430677291748788,
      "loss": 0.6772,
      "step": 4175
    },
    {
      "epoch": 0.37339055793991416,
      "grad_norm": 0.12362324019246163,
      "learning_rate": 0.00014428080866534396,
      "loss": 0.6484,
      "step": 4176
    },
    {
      "epoch": 0.37347997138769673,
      "grad_norm": 0.13383973466773322,
      "learning_rate": 0.00014425484069942207,
      "loss": 0.6147,
      "step": 4177
    },
    {
      "epoch": 0.37356938483547925,
      "grad_norm": 0.1411934239646667,
      "learning_rate": 0.00014422886902190014,
      "loss": 0.6656,
      "step": 4178
    },
    {
      "epoch": 0.3736587982832618,
      "grad_norm": 0.14638339116747603,
      "learning_rate": 0.00014420289363495638,
      "loss": 0.684,
      "step": 4179
    },
    {
      "epoch": 0.37374821173104433,
      "grad_norm": 0.15979502160397022,
      "learning_rate": 0.00014417691454076932,
      "loss": 0.6718,
      "step": 4180
    },
    {
      "epoch": 0.3738376251788269,
      "grad_norm": 0.13069828609324616,
      "learning_rate": 0.00014415093174151777,
      "loss": 0.6512,
      "step": 4181
    },
    {
      "epoch": 0.3739270386266094,
      "grad_norm": 0.17509346625806202,
      "learning_rate": 0.0001441249452393809,
      "loss": 0.6823,
      "step": 4182
    },
    {
      "epoch": 0.374016452074392,
      "grad_norm": 0.15674614975663537,
      "learning_rate": 0.0001440989550365382,
      "loss": 0.6012,
      "step": 4183
    },
    {
      "epoch": 0.3741058655221745,
      "grad_norm": 0.1345426964880119,
      "learning_rate": 0.00014407296113516934,
      "loss": 0.6565,
      "step": 4184
    },
    {
      "epoch": 0.3741952789699571,
      "grad_norm": 0.19444571836544702,
      "learning_rate": 0.00014404696353745452,
      "loss": 0.4007,
      "step": 4185
    },
    {
      "epoch": 0.37428469241773965,
      "grad_norm": 0.17525287782297722,
      "learning_rate": 0.000144020962245574,
      "loss": 0.6674,
      "step": 4186
    },
    {
      "epoch": 0.37437410586552217,
      "grad_norm": 0.13877681609008552,
      "learning_rate": 0.00014399495726170858,
      "loss": 0.66,
      "step": 4187
    },
    {
      "epoch": 0.37446351931330474,
      "grad_norm": 0.13559087426424746,
      "learning_rate": 0.0001439689485880392,
      "loss": 0.6327,
      "step": 4188
    },
    {
      "epoch": 0.37455293276108725,
      "grad_norm": 0.14200510932516133,
      "learning_rate": 0.00014394293622674724,
      "loss": 0.6696,
      "step": 4189
    },
    {
      "epoch": 0.3746423462088698,
      "grad_norm": 0.14702555215774138,
      "learning_rate": 0.00014391692018001425,
      "loss": 0.6926,
      "step": 4190
    },
    {
      "epoch": 0.37473175965665234,
      "grad_norm": 0.181601019425506,
      "learning_rate": 0.00014389090045002225,
      "loss": 0.6597,
      "step": 4191
    },
    {
      "epoch": 0.3748211731044349,
      "grad_norm": 0.15297122952304992,
      "learning_rate": 0.0001438648770389534,
      "loss": 0.6547,
      "step": 4192
    },
    {
      "epoch": 0.37491058655221743,
      "grad_norm": 0.15761559962616198,
      "learning_rate": 0.0001438388499489903,
      "loss": 0.6941,
      "step": 4193
    },
    {
      "epoch": 0.375,
      "grad_norm": 0.14428916462329153,
      "learning_rate": 0.00014381281918231578,
      "loss": 0.6872,
      "step": 4194
    },
    {
      "epoch": 0.37508941344778257,
      "grad_norm": 0.15224754366873944,
      "learning_rate": 0.00014378678474111304,
      "loss": 0.6614,
      "step": 4195
    },
    {
      "epoch": 0.3751788268955651,
      "grad_norm": 0.1457769849043496,
      "learning_rate": 0.00014376074662756557,
      "loss": 0.6449,
      "step": 4196
    },
    {
      "epoch": 0.37526824034334766,
      "grad_norm": 0.14632494157096604,
      "learning_rate": 0.0001437347048438571,
      "loss": 0.6942,
      "step": 4197
    },
    {
      "epoch": 0.3753576537911302,
      "grad_norm": 0.14318987331585786,
      "learning_rate": 0.00014370865939217176,
      "loss": 0.7166,
      "step": 4198
    },
    {
      "epoch": 0.37544706723891275,
      "grad_norm": 0.1542712052970897,
      "learning_rate": 0.00014368261027469394,
      "loss": 0.6949,
      "step": 4199
    },
    {
      "epoch": 0.37553648068669526,
      "grad_norm": 0.14117120559991947,
      "learning_rate": 0.00014365655749360833,
      "loss": 0.7052,
      "step": 4200
    },
    {
      "epoch": 0.37562589413447783,
      "grad_norm": 0.12476973911810627,
      "learning_rate": 0.0001436305010511,
      "loss": 0.6675,
      "step": 4201
    },
    {
      "epoch": 0.37571530758226035,
      "grad_norm": 0.1475901022906067,
      "learning_rate": 0.00014360444094935424,
      "loss": 0.6959,
      "step": 4202
    },
    {
      "epoch": 0.3758047210300429,
      "grad_norm": 0.1425078129555123,
      "learning_rate": 0.00014357837719055667,
      "loss": 0.6901,
      "step": 4203
    },
    {
      "epoch": 0.3758941344778255,
      "grad_norm": 0.136089751083534,
      "learning_rate": 0.00014355230977689323,
      "loss": 0.6762,
      "step": 4204
    },
    {
      "epoch": 0.375983547925608,
      "grad_norm": 0.1574507799777863,
      "learning_rate": 0.00014352623871055018,
      "loss": 0.6697,
      "step": 4205
    },
    {
      "epoch": 0.3760729613733906,
      "grad_norm": 0.17482180144728104,
      "learning_rate": 0.00014350016399371405,
      "loss": 0.7067,
      "step": 4206
    },
    {
      "epoch": 0.3761623748211731,
      "grad_norm": 0.13799470586667678,
      "learning_rate": 0.00014347408562857169,
      "loss": 0.6512,
      "step": 4207
    },
    {
      "epoch": 0.37625178826895567,
      "grad_norm": 0.15650033614110123,
      "learning_rate": 0.00014344800361731027,
      "loss": 0.7197,
      "step": 4208
    },
    {
      "epoch": 0.3763412017167382,
      "grad_norm": 0.1509595585950969,
      "learning_rate": 0.00014342191796211726,
      "loss": 0.6955,
      "step": 4209
    },
    {
      "epoch": 0.37643061516452075,
      "grad_norm": 0.16376010650169678,
      "learning_rate": 0.00014339582866518044,
      "loss": 0.717,
      "step": 4210
    },
    {
      "epoch": 0.37652002861230327,
      "grad_norm": 0.18131972143789507,
      "learning_rate": 0.00014336973572868787,
      "loss": 0.707,
      "step": 4211
    },
    {
      "epoch": 0.37660944206008584,
      "grad_norm": 0.16723536048224763,
      "learning_rate": 0.00014334363915482795,
      "loss": 0.7394,
      "step": 4212
    },
    {
      "epoch": 0.37669885550786836,
      "grad_norm": 0.15113682294225397,
      "learning_rate": 0.00014331753894578937,
      "loss": 0.6714,
      "step": 4213
    },
    {
      "epoch": 0.37678826895565093,
      "grad_norm": 0.14487571413978384,
      "learning_rate": 0.00014329143510376108,
      "loss": 0.6814,
      "step": 4214
    },
    {
      "epoch": 0.3768776824034335,
      "grad_norm": 0.13887811035971404,
      "learning_rate": 0.00014326532763093245,
      "loss": 0.6727,
      "step": 4215
    },
    {
      "epoch": 0.376967095851216,
      "grad_norm": 0.15368989600371333,
      "learning_rate": 0.00014323921652949301,
      "loss": 0.6779,
      "step": 4216
    },
    {
      "epoch": 0.3770565092989986,
      "grad_norm": 0.12517501171404685,
      "learning_rate": 0.00014321310180163272,
      "loss": 0.6595,
      "step": 4217
    },
    {
      "epoch": 0.3771459227467811,
      "grad_norm": 0.14394548111338334,
      "learning_rate": 0.00014318698344954175,
      "loss": 0.6639,
      "step": 4218
    },
    {
      "epoch": 0.3772353361945637,
      "grad_norm": 0.14196922918720228,
      "learning_rate": 0.00014316086147541065,
      "loss": 0.682,
      "step": 4219
    },
    {
      "epoch": 0.3773247496423462,
      "grad_norm": 0.14246275181413132,
      "learning_rate": 0.00014313473588143026,
      "loss": 0.6951,
      "step": 4220
    },
    {
      "epoch": 0.37741416309012876,
      "grad_norm": 0.14855169281844618,
      "learning_rate": 0.0001431086066697916,
      "loss": 0.6607,
      "step": 4221
    },
    {
      "epoch": 0.3775035765379113,
      "grad_norm": 0.1469168823311638,
      "learning_rate": 0.0001430824738426862,
      "loss": 0.6446,
      "step": 4222
    },
    {
      "epoch": 0.37759298998569385,
      "grad_norm": 0.14927373093325239,
      "learning_rate": 0.00014305633740230574,
      "loss": 0.6406,
      "step": 4223
    },
    {
      "epoch": 0.3776824034334764,
      "grad_norm": 0.1597512616910592,
      "learning_rate": 0.00014303019735084226,
      "loss": 0.663,
      "step": 4224
    },
    {
      "epoch": 0.37777181688125894,
      "grad_norm": 0.14588928738011583,
      "learning_rate": 0.00014300405369048808,
      "loss": 0.6829,
      "step": 4225
    },
    {
      "epoch": 0.3778612303290415,
      "grad_norm": 0.1329570811356824,
      "learning_rate": 0.00014297790642343587,
      "loss": 0.6509,
      "step": 4226
    },
    {
      "epoch": 0.377950643776824,
      "grad_norm": 0.16414726502681443,
      "learning_rate": 0.00014295175555187854,
      "loss": 0.6758,
      "step": 4227
    },
    {
      "epoch": 0.3780400572246066,
      "grad_norm": 0.165351331074881,
      "learning_rate": 0.00014292560107800935,
      "loss": 0.6949,
      "step": 4228
    },
    {
      "epoch": 0.3781294706723891,
      "grad_norm": 0.1417736646813673,
      "learning_rate": 0.00014289944300402186,
      "loss": 0.6588,
      "step": 4229
    },
    {
      "epoch": 0.3782188841201717,
      "grad_norm": 0.14723304091938885,
      "learning_rate": 0.00014287328133210986,
      "loss": 0.6827,
      "step": 4230
    },
    {
      "epoch": 0.3783082975679542,
      "grad_norm": 0.16754339952200967,
      "learning_rate": 0.00014284711606446754,
      "loss": 0.6943,
      "step": 4231
    },
    {
      "epoch": 0.37839771101573677,
      "grad_norm": 0.1524615996632398,
      "learning_rate": 0.00014282094720328937,
      "loss": 0.675,
      "step": 4232
    },
    {
      "epoch": 0.3784871244635193,
      "grad_norm": 0.15202249555897349,
      "learning_rate": 0.00014279477475077006,
      "loss": 0.7044,
      "step": 4233
    },
    {
      "epoch": 0.37857653791130186,
      "grad_norm": 0.13088231908948286,
      "learning_rate": 0.00014276859870910463,
      "loss": 0.6481,
      "step": 4234
    },
    {
      "epoch": 0.37866595135908443,
      "grad_norm": 0.11859924157897271,
      "learning_rate": 0.00014274241908048856,
      "loss": 0.659,
      "step": 4235
    },
    {
      "epoch": 0.37875536480686695,
      "grad_norm": 0.14919759701753155,
      "learning_rate": 0.00014271623586711738,
      "loss": 0.6471,
      "step": 4236
    },
    {
      "epoch": 0.3788447782546495,
      "grad_norm": 0.1617228464005841,
      "learning_rate": 0.00014269004907118706,
      "loss": 0.6821,
      "step": 4237
    },
    {
      "epoch": 0.37893419170243203,
      "grad_norm": 0.14594099316861567,
      "learning_rate": 0.0001426638586948939,
      "loss": 0.6885,
      "step": 4238
    },
    {
      "epoch": 0.3790236051502146,
      "grad_norm": 0.1595890120592308,
      "learning_rate": 0.00014263766474043445,
      "loss": 0.6374,
      "step": 4239
    },
    {
      "epoch": 0.3791130185979971,
      "grad_norm": 0.180817190130555,
      "learning_rate": 0.00014261146721000553,
      "loss": 0.7095,
      "step": 4240
    },
    {
      "epoch": 0.3792024320457797,
      "grad_norm": 0.12193644925987389,
      "learning_rate": 0.00014258526610580433,
      "loss": 0.6385,
      "step": 4241
    },
    {
      "epoch": 0.3792918454935622,
      "grad_norm": 0.16472882453876203,
      "learning_rate": 0.0001425590614300283,
      "loss": 0.7252,
      "step": 4242
    },
    {
      "epoch": 0.3793812589413448,
      "grad_norm": 0.1572665407924379,
      "learning_rate": 0.0001425328531848752,
      "loss": 0.6562,
      "step": 4243
    },
    {
      "epoch": 0.37947067238912735,
      "grad_norm": 0.13492550518430224,
      "learning_rate": 0.00014250664137254303,
      "loss": 0.6651,
      "step": 4244
    },
    {
      "epoch": 0.37956008583690987,
      "grad_norm": 0.160379369061855,
      "learning_rate": 0.0001424804259952302,
      "loss": 0.6814,
      "step": 4245
    },
    {
      "epoch": 0.37964949928469244,
      "grad_norm": 0.1706162750397615,
      "learning_rate": 0.00014245420705513535,
      "loss": 0.7444,
      "step": 4246
    },
    {
      "epoch": 0.37973891273247495,
      "grad_norm": 0.185892405762252,
      "learning_rate": 0.0001424279845544574,
      "loss": 0.7348,
      "step": 4247
    },
    {
      "epoch": 0.3798283261802575,
      "grad_norm": 0.19084202640265333,
      "learning_rate": 0.00014240175849539565,
      "loss": 0.4038,
      "step": 4248
    },
    {
      "epoch": 0.37991773962804004,
      "grad_norm": 0.1506009506455417,
      "learning_rate": 0.00014237552888014961,
      "loss": 0.6839,
      "step": 4249
    },
    {
      "epoch": 0.3800071530758226,
      "grad_norm": 0.15060573483373582,
      "learning_rate": 0.00014234929571091916,
      "loss": 0.6706,
      "step": 4250
    },
    {
      "epoch": 0.38009656652360513,
      "grad_norm": 0.14517968799682593,
      "learning_rate": 0.0001423230589899044,
      "loss": 0.7134,
      "step": 4251
    },
    {
      "epoch": 0.3801859799713877,
      "grad_norm": 0.14994347755187856,
      "learning_rate": 0.00014229681871930582,
      "loss": 0.6822,
      "step": 4252
    },
    {
      "epoch": 0.3802753934191702,
      "grad_norm": 0.13999621228550674,
      "learning_rate": 0.00014227057490132414,
      "loss": 0.6248,
      "step": 4253
    },
    {
      "epoch": 0.3803648068669528,
      "grad_norm": 0.14659841020059855,
      "learning_rate": 0.00014224432753816036,
      "loss": 0.6878,
      "step": 4254
    },
    {
      "epoch": 0.38045422031473536,
      "grad_norm": 0.15274872769345577,
      "learning_rate": 0.00014221807663201586,
      "loss": 0.6807,
      "step": 4255
    },
    {
      "epoch": 0.3805436337625179,
      "grad_norm": 0.16603593563938565,
      "learning_rate": 0.0001421918221850923,
      "loss": 0.7385,
      "step": 4256
    },
    {
      "epoch": 0.38063304721030045,
      "grad_norm": 0.15182938379131142,
      "learning_rate": 0.0001421655641995915,
      "loss": 0.6784,
      "step": 4257
    },
    {
      "epoch": 0.38072246065808296,
      "grad_norm": 0.14976832835542275,
      "learning_rate": 0.0001421393026777158,
      "loss": 0.7044,
      "step": 4258
    },
    {
      "epoch": 0.38081187410586553,
      "grad_norm": 0.15309725805427027,
      "learning_rate": 0.00014211303762166766,
      "loss": 0.6608,
      "step": 4259
    },
    {
      "epoch": 0.38090128755364805,
      "grad_norm": 0.2013928739365977,
      "learning_rate": 0.00014208676903364992,
      "loss": 0.3841,
      "step": 4260
    },
    {
      "epoch": 0.3809907010014306,
      "grad_norm": 0.14084808938221507,
      "learning_rate": 0.00014206049691586564,
      "loss": 0.6581,
      "step": 4261
    },
    {
      "epoch": 0.38108011444921314,
      "grad_norm": 0.16350436355696443,
      "learning_rate": 0.00014203422127051835,
      "loss": 0.6999,
      "step": 4262
    },
    {
      "epoch": 0.3811695278969957,
      "grad_norm": 0.15021603232827319,
      "learning_rate": 0.00014200794209981167,
      "loss": 0.6698,
      "step": 4263
    },
    {
      "epoch": 0.3812589413447783,
      "grad_norm": 0.14501732046878568,
      "learning_rate": 0.0001419816594059496,
      "loss": 0.6626,
      "step": 4264
    },
    {
      "epoch": 0.3813483547925608,
      "grad_norm": 0.14812391363306784,
      "learning_rate": 0.00014195537319113647,
      "loss": 0.6528,
      "step": 4265
    },
    {
      "epoch": 0.38143776824034337,
      "grad_norm": 0.15364792814810185,
      "learning_rate": 0.00014192908345757687,
      "loss": 0.6606,
      "step": 4266
    },
    {
      "epoch": 0.3815271816881259,
      "grad_norm": 0.15604638635588752,
      "learning_rate": 0.0001419027902074757,
      "loss": 0.6849,
      "step": 4267
    },
    {
      "epoch": 0.38161659513590845,
      "grad_norm": 0.13551208729648792,
      "learning_rate": 0.000141876493443038,
      "loss": 0.6627,
      "step": 4268
    },
    {
      "epoch": 0.38170600858369097,
      "grad_norm": 0.14966335137731365,
      "learning_rate": 0.0001418501931664695,
      "loss": 0.6745,
      "step": 4269
    },
    {
      "epoch": 0.38179542203147354,
      "grad_norm": 0.15439501906207254,
      "learning_rate": 0.0001418238893799758,
      "loss": 0.6609,
      "step": 4270
    },
    {
      "epoch": 0.38188483547925606,
      "grad_norm": 0.15374803158907369,
      "learning_rate": 0.00014179758208576298,
      "loss": 0.7072,
      "step": 4271
    },
    {
      "epoch": 0.38197424892703863,
      "grad_norm": 0.16176197782608884,
      "learning_rate": 0.00014177127128603745,
      "loss": 0.72,
      "step": 4272
    },
    {
      "epoch": 0.3820636623748212,
      "grad_norm": 0.13890139355339878,
      "learning_rate": 0.00014174495698300588,
      "loss": 0.6534,
      "step": 4273
    },
    {
      "epoch": 0.3821530758226037,
      "grad_norm": 0.16431087418342236,
      "learning_rate": 0.00014171863917887513,
      "loss": 0.6914,
      "step": 4274
    },
    {
      "epoch": 0.3822424892703863,
      "grad_norm": 0.21248112219233886,
      "learning_rate": 0.0001416923178758525,
      "loss": 0.3908,
      "step": 4275
    },
    {
      "epoch": 0.3823319027181688,
      "grad_norm": 0.16361915896099843,
      "learning_rate": 0.00014166599307614556,
      "loss": 0.7421,
      "step": 4276
    },
    {
      "epoch": 0.3824213161659514,
      "grad_norm": 0.15654582576406603,
      "learning_rate": 0.00014163966478196208,
      "loss": 0.7032,
      "step": 4277
    },
    {
      "epoch": 0.3825107296137339,
      "grad_norm": 0.1260500138362603,
      "learning_rate": 0.0001416133329955102,
      "loss": 0.6483,
      "step": 4278
    },
    {
      "epoch": 0.38260014306151646,
      "grad_norm": 0.14101892638827196,
      "learning_rate": 0.00014158699771899832,
      "loss": 0.6619,
      "step": 4279
    },
    {
      "epoch": 0.382689556509299,
      "grad_norm": 0.16963838221582647,
      "learning_rate": 0.0001415606589546352,
      "loss": 0.4172,
      "step": 4280
    },
    {
      "epoch": 0.38277896995708155,
      "grad_norm": 0.15469477582990568,
      "learning_rate": 0.0001415343167046298,
      "loss": 0.6902,
      "step": 4281
    },
    {
      "epoch": 0.38286838340486407,
      "grad_norm": 0.1424944267937974,
      "learning_rate": 0.0001415079709711914,
      "loss": 0.6788,
      "step": 4282
    },
    {
      "epoch": 0.38295779685264664,
      "grad_norm": 0.13750476884975937,
      "learning_rate": 0.0001414816217565296,
      "loss": 0.6578,
      "step": 4283
    },
    {
      "epoch": 0.3830472103004292,
      "grad_norm": 0.16529175707879545,
      "learning_rate": 0.00014145526906285432,
      "loss": 0.6754,
      "step": 4284
    },
    {
      "epoch": 0.3831366237482117,
      "grad_norm": 0.1545007200468146,
      "learning_rate": 0.00014142891289237563,
      "loss": 0.6756,
      "step": 4285
    },
    {
      "epoch": 0.3832260371959943,
      "grad_norm": 0.13912563023383787,
      "learning_rate": 0.0001414025532473041,
      "loss": 0.6727,
      "step": 4286
    },
    {
      "epoch": 0.3833154506437768,
      "grad_norm": 0.12794320254174155,
      "learning_rate": 0.00014137619012985042,
      "loss": 0.6464,
      "step": 4287
    },
    {
      "epoch": 0.3834048640915594,
      "grad_norm": 0.14779238173649678,
      "learning_rate": 0.00014134982354222563,
      "loss": 0.628,
      "step": 4288
    },
    {
      "epoch": 0.3834942775393419,
      "grad_norm": 0.13953133019050165,
      "learning_rate": 0.00014132345348664106,
      "loss": 0.6667,
      "step": 4289
    },
    {
      "epoch": 0.38358369098712447,
      "grad_norm": 0.15465728240932158,
      "learning_rate": 0.00014129707996530838,
      "loss": 0.694,
      "step": 4290
    },
    {
      "epoch": 0.383673104434907,
      "grad_norm": 0.1602653783276704,
      "learning_rate": 0.00014127070298043947,
      "loss": 0.7001,
      "step": 4291
    },
    {
      "epoch": 0.38376251788268956,
      "grad_norm": 0.15550918932552452,
      "learning_rate": 0.00014124432253424655,
      "loss": 0.699,
      "step": 4292
    },
    {
      "epoch": 0.38385193133047213,
      "grad_norm": 0.12862820349603665,
      "learning_rate": 0.0001412179386289421,
      "loss": 0.6527,
      "step": 4293
    },
    {
      "epoch": 0.38394134477825465,
      "grad_norm": 0.15211043923385234,
      "learning_rate": 0.00014119155126673895,
      "loss": 0.6536,
      "step": 4294
    },
    {
      "epoch": 0.3840307582260372,
      "grad_norm": 0.15133581469250498,
      "learning_rate": 0.0001411651604498501,
      "loss": 0.6671,
      "step": 4295
    },
    {
      "epoch": 0.38412017167381973,
      "grad_norm": 0.16766552360645523,
      "learning_rate": 0.00014113876618048897,
      "loss": 0.662,
      "step": 4296
    },
    {
      "epoch": 0.3842095851216023,
      "grad_norm": 0.1428186134158983,
      "learning_rate": 0.00014111236846086922,
      "loss": 0.6801,
      "step": 4297
    },
    {
      "epoch": 0.3842989985693848,
      "grad_norm": 0.16073014937103125,
      "learning_rate": 0.00014108596729320473,
      "loss": 0.7085,
      "step": 4298
    },
    {
      "epoch": 0.3843884120171674,
      "grad_norm": 0.14395342886846588,
      "learning_rate": 0.0001410595626797098,
      "loss": 0.6663,
      "step": 4299
    },
    {
      "epoch": 0.3844778254649499,
      "grad_norm": 0.16323819088521907,
      "learning_rate": 0.00014103315462259898,
      "loss": 0.7064,
      "step": 4300
    },
    {
      "epoch": 0.3845672389127325,
      "grad_norm": 0.15859493115145465,
      "learning_rate": 0.000141006743124087,
      "loss": 0.6999,
      "step": 4301
    },
    {
      "epoch": 0.384656652360515,
      "grad_norm": 0.14444285713212887,
      "learning_rate": 0.000140980328186389,
      "loss": 0.6425,
      "step": 4302
    },
    {
      "epoch": 0.38474606580829757,
      "grad_norm": 0.1498986449469241,
      "learning_rate": 0.00014095390981172038,
      "loss": 0.7026,
      "step": 4303
    },
    {
      "epoch": 0.38483547925608014,
      "grad_norm": 0.15195093137406254,
      "learning_rate": 0.00014092748800229683,
      "loss": 0.6756,
      "step": 4304
    },
    {
      "epoch": 0.38492489270386265,
      "grad_norm": 0.17381686213778655,
      "learning_rate": 0.00014090106276033423,
      "loss": 0.6818,
      "step": 4305
    },
    {
      "epoch": 0.3850143061516452,
      "grad_norm": 0.15850376668251062,
      "learning_rate": 0.00014087463408804892,
      "loss": 0.6987,
      "step": 4306
    },
    {
      "epoch": 0.38510371959942774,
      "grad_norm": 0.14159898459931303,
      "learning_rate": 0.00014084820198765743,
      "loss": 0.6685,
      "step": 4307
    },
    {
      "epoch": 0.3851931330472103,
      "grad_norm": 0.19818718138548438,
      "learning_rate": 0.00014082176646137653,
      "loss": 0.3501,
      "step": 4308
    },
    {
      "epoch": 0.38528254649499283,
      "grad_norm": 0.19925682961894814,
      "learning_rate": 0.0001407953275114234,
      "loss": 0.3993,
      "step": 4309
    },
    {
      "epoch": 0.3853719599427754,
      "grad_norm": 0.12986912007230111,
      "learning_rate": 0.00014076888514001542,
      "loss": 0.6485,
      "step": 4310
    },
    {
      "epoch": 0.3854613733905579,
      "grad_norm": 0.1455250402392873,
      "learning_rate": 0.0001407424393493703,
      "loss": 0.6816,
      "step": 4311
    },
    {
      "epoch": 0.3855507868383405,
      "grad_norm": 0.16001975235991292,
      "learning_rate": 0.00014071599014170598,
      "loss": 0.6452,
      "step": 4312
    },
    {
      "epoch": 0.38564020028612306,
      "grad_norm": 0.1514066002689504,
      "learning_rate": 0.0001406895375192407,
      "loss": 0.6897,
      "step": 4313
    },
    {
      "epoch": 0.3857296137339056,
      "grad_norm": 0.1235707762078476,
      "learning_rate": 0.0001406630814841931,
      "loss": 0.6264,
      "step": 4314
    },
    {
      "epoch": 0.38581902718168815,
      "grad_norm": 0.1419927909385811,
      "learning_rate": 0.00014063662203878195,
      "loss": 0.6789,
      "step": 4315
    },
    {
      "epoch": 0.38590844062947066,
      "grad_norm": 0.14155479151664993,
      "learning_rate": 0.00014061015918522639,
      "loss": 0.6582,
      "step": 4316
    },
    {
      "epoch": 0.38599785407725323,
      "grad_norm": 0.14933418833001763,
      "learning_rate": 0.0001405836929257458,
      "loss": 0.6547,
      "step": 4317
    },
    {
      "epoch": 0.38608726752503575,
      "grad_norm": 0.12903366938968525,
      "learning_rate": 0.00014055722326255992,
      "loss": 0.6867,
      "step": 4318
    },
    {
      "epoch": 0.3861766809728183,
      "grad_norm": 0.13903955336920246,
      "learning_rate": 0.0001405307501978887,
      "loss": 0.6629,
      "step": 4319
    },
    {
      "epoch": 0.38626609442060084,
      "grad_norm": 0.1615556430156133,
      "learning_rate": 0.0001405042737339524,
      "loss": 0.7094,
      "step": 4320
    },
    {
      "epoch": 0.3863555078683834,
      "grad_norm": 0.14884769064105569,
      "learning_rate": 0.0001404777938729716,
      "loss": 0.6499,
      "step": 4321
    },
    {
      "epoch": 0.386444921316166,
      "grad_norm": 0.13651571550967076,
      "learning_rate": 0.00014045131061716712,
      "loss": 0.6345,
      "step": 4322
    },
    {
      "epoch": 0.3865343347639485,
      "grad_norm": 0.15874565099496946,
      "learning_rate": 0.00014042482396876005,
      "loss": 0.6648,
      "step": 4323
    },
    {
      "epoch": 0.38662374821173107,
      "grad_norm": 0.17747017944330956,
      "learning_rate": 0.0001403983339299718,
      "loss": 0.6908,
      "step": 4324
    },
    {
      "epoch": 0.3867131616595136,
      "grad_norm": 0.15548448860661335,
      "learning_rate": 0.0001403718405030241,
      "loss": 0.6608,
      "step": 4325
    },
    {
      "epoch": 0.38680257510729615,
      "grad_norm": 0.14256295049788353,
      "learning_rate": 0.00014034534369013887,
      "loss": 0.6776,
      "step": 4326
    },
    {
      "epoch": 0.38689198855507867,
      "grad_norm": 0.1476950176321808,
      "learning_rate": 0.0001403188434935384,
      "loss": 0.6607,
      "step": 4327
    },
    {
      "epoch": 0.38698140200286124,
      "grad_norm": 0.1537334242856437,
      "learning_rate": 0.00014029233991544527,
      "loss": 0.6686,
      "step": 4328
    },
    {
      "epoch": 0.38707081545064376,
      "grad_norm": 0.15685956716747954,
      "learning_rate": 0.0001402658329580822,
      "loss": 0.701,
      "step": 4329
    },
    {
      "epoch": 0.38716022889842633,
      "grad_norm": 0.15018220232300172,
      "learning_rate": 0.0001402393226236723,
      "loss": 0.6908,
      "step": 4330
    },
    {
      "epoch": 0.38724964234620884,
      "grad_norm": 0.16479940929541967,
      "learning_rate": 0.00014021280891443909,
      "loss": 0.722,
      "step": 4331
    },
    {
      "epoch": 0.3873390557939914,
      "grad_norm": 0.1298915036364152,
      "learning_rate": 0.0001401862918326061,
      "loss": 0.6281,
      "step": 4332
    },
    {
      "epoch": 0.387428469241774,
      "grad_norm": 0.150919911672271,
      "learning_rate": 0.0001401597713803974,
      "loss": 0.646,
      "step": 4333
    },
    {
      "epoch": 0.3875178826895565,
      "grad_norm": 0.14756846650452413,
      "learning_rate": 0.00014013324756003716,
      "loss": 0.6809,
      "step": 4334
    },
    {
      "epoch": 0.3876072961373391,
      "grad_norm": 0.17067997304686758,
      "learning_rate": 0.0001401067203737499,
      "loss": 0.7411,
      "step": 4335
    },
    {
      "epoch": 0.3876967095851216,
      "grad_norm": 0.13796068232747705,
      "learning_rate": 0.00014008018982376044,
      "loss": 0.7045,
      "step": 4336
    },
    {
      "epoch": 0.38778612303290416,
      "grad_norm": 0.14201460568289792,
      "learning_rate": 0.0001400536559122939,
      "loss": 0.636,
      "step": 4337
    },
    {
      "epoch": 0.3878755364806867,
      "grad_norm": 0.1284624545812376,
      "learning_rate": 0.00014002711864157557,
      "loss": 0.6601,
      "step": 4338
    },
    {
      "epoch": 0.38796494992846925,
      "grad_norm": 0.14111945648343316,
      "learning_rate": 0.00014000057801383115,
      "loss": 0.6996,
      "step": 4339
    },
    {
      "epoch": 0.38805436337625177,
      "grad_norm": 0.1536813964896739,
      "learning_rate": 0.0001399740340312866,
      "loss": 0.7291,
      "step": 4340
    },
    {
      "epoch": 0.38814377682403434,
      "grad_norm": 0.1566568930267915,
      "learning_rate": 0.00013994748669616803,
      "loss": 0.675,
      "step": 4341
    },
    {
      "epoch": 0.3882331902718169,
      "grad_norm": 0.15166748904958116,
      "learning_rate": 0.00013992093601070203,
      "loss": 0.6915,
      "step": 4342
    },
    {
      "epoch": 0.3883226037195994,
      "grad_norm": 0.13819825182577658,
      "learning_rate": 0.00013989438197711533,
      "loss": 0.6189,
      "step": 4343
    },
    {
      "epoch": 0.388412017167382,
      "grad_norm": 0.128090091328655,
      "learning_rate": 0.000139867824597635,
      "loss": 0.6812,
      "step": 4344
    },
    {
      "epoch": 0.3885014306151645,
      "grad_norm": 0.1404920079851099,
      "learning_rate": 0.00013984126387448837,
      "loss": 0.6438,
      "step": 4345
    },
    {
      "epoch": 0.3885908440629471,
      "grad_norm": 0.15722263927893051,
      "learning_rate": 0.00013981469980990302,
      "loss": 0.6786,
      "step": 4346
    },
    {
      "epoch": 0.3886802575107296,
      "grad_norm": 0.13901929179733913,
      "learning_rate": 0.0001397881324061069,
      "loss": 0.6726,
      "step": 4347
    },
    {
      "epoch": 0.38876967095851217,
      "grad_norm": 0.15313393477291176,
      "learning_rate": 0.0001397615616653282,
      "loss": 0.6503,
      "step": 4348
    },
    {
      "epoch": 0.3888590844062947,
      "grad_norm": 0.16130527269147477,
      "learning_rate": 0.00013973498758979532,
      "loss": 0.7351,
      "step": 4349
    },
    {
      "epoch": 0.38894849785407726,
      "grad_norm": 0.16979933957443577,
      "learning_rate": 0.00013970841018173702,
      "loss": 0.6844,
      "step": 4350
    },
    {
      "epoch": 0.3890379113018598,
      "grad_norm": 0.16494570977006234,
      "learning_rate": 0.0001396818294433823,
      "loss": 0.7046,
      "step": 4351
    },
    {
      "epoch": 0.38912732474964234,
      "grad_norm": 0.14770113831229975,
      "learning_rate": 0.00013965524537696048,
      "loss": 0.6849,
      "step": 4352
    },
    {
      "epoch": 0.3892167381974249,
      "grad_norm": 0.16369566812984077,
      "learning_rate": 0.00013962865798470113,
      "loss": 0.7051,
      "step": 4353
    },
    {
      "epoch": 0.38930615164520743,
      "grad_norm": 0.15078205234491068,
      "learning_rate": 0.00013960206726883407,
      "loss": 0.6574,
      "step": 4354
    },
    {
      "epoch": 0.38939556509299,
      "grad_norm": 0.17027859133883702,
      "learning_rate": 0.00013957547323158949,
      "loss": 0.6963,
      "step": 4355
    },
    {
      "epoch": 0.3894849785407725,
      "grad_norm": 0.14784343914151699,
      "learning_rate": 0.00013954887587519773,
      "loss": 0.652,
      "step": 4356
    },
    {
      "epoch": 0.3895743919885551,
      "grad_norm": 0.1511542149025232,
      "learning_rate": 0.00013952227520188957,
      "loss": 0.6737,
      "step": 4357
    },
    {
      "epoch": 0.3896638054363376,
      "grad_norm": 0.1456573943580501,
      "learning_rate": 0.00013949567121389586,
      "loss": 0.6839,
      "step": 4358
    },
    {
      "epoch": 0.3897532188841202,
      "grad_norm": 0.16947776394967554,
      "learning_rate": 0.00013946906391344791,
      "loss": 0.6725,
      "step": 4359
    },
    {
      "epoch": 0.3898426323319027,
      "grad_norm": 0.13284617787059375,
      "learning_rate": 0.00013944245330277724,
      "loss": 0.6373,
      "step": 4360
    },
    {
      "epoch": 0.38993204577968527,
      "grad_norm": 0.16023077487382986,
      "learning_rate": 0.00013941583938411567,
      "loss": 0.6999,
      "step": 4361
    },
    {
      "epoch": 0.39002145922746784,
      "grad_norm": 0.15492834893392732,
      "learning_rate": 0.00013938922215969523,
      "loss": 0.6849,
      "step": 4362
    },
    {
      "epoch": 0.39011087267525035,
      "grad_norm": 0.14381710673117504,
      "learning_rate": 0.00013936260163174832,
      "loss": 0.6614,
      "step": 4363
    },
    {
      "epoch": 0.3902002861230329,
      "grad_norm": 0.3406148356064853,
      "learning_rate": 0.00013933597780250753,
      "loss": 0.6657,
      "step": 4364
    },
    {
      "epoch": 0.39028969957081544,
      "grad_norm": 0.13854328129241797,
      "learning_rate": 0.0001393093506742058,
      "loss": 0.6692,
      "step": 4365
    },
    {
      "epoch": 0.390379113018598,
      "grad_norm": 0.14455474161133625,
      "learning_rate": 0.0001392827202490763,
      "loss": 0.6892,
      "step": 4366
    },
    {
      "epoch": 0.3904685264663805,
      "grad_norm": 0.1590365398541699,
      "learning_rate": 0.00013925608652935249,
      "loss": 0.6605,
      "step": 4367
    },
    {
      "epoch": 0.3905579399141631,
      "grad_norm": 0.17003321480004344,
      "learning_rate": 0.0001392294495172681,
      "loss": 0.7263,
      "step": 4368
    },
    {
      "epoch": 0.3906473533619456,
      "grad_norm": 0.16045334626305113,
      "learning_rate": 0.00013920280921505716,
      "loss": 0.7074,
      "step": 4369
    },
    {
      "epoch": 0.3907367668097282,
      "grad_norm": 0.1766305239758068,
      "learning_rate": 0.00013917616562495396,
      "loss": 0.6718,
      "step": 4370
    },
    {
      "epoch": 0.3908261802575107,
      "grad_norm": 0.14954727901701234,
      "learning_rate": 0.00013914951874919308,
      "loss": 0.6774,
      "step": 4371
    },
    {
      "epoch": 0.3909155937052933,
      "grad_norm": 0.13186573346694286,
      "learning_rate": 0.00013912286859000934,
      "loss": 0.6377,
      "step": 4372
    },
    {
      "epoch": 0.39100500715307585,
      "grad_norm": 0.1615057831486864,
      "learning_rate": 0.00013909621514963784,
      "loss": 0.6866,
      "step": 4373
    },
    {
      "epoch": 0.39109442060085836,
      "grad_norm": 0.20404244503212862,
      "learning_rate": 0.00013906955843031403,
      "loss": 0.3985,
      "step": 4374
    },
    {
      "epoch": 0.39118383404864093,
      "grad_norm": 0.14367336526827296,
      "learning_rate": 0.00013904289843427348,
      "loss": 0.7049,
      "step": 4375
    },
    {
      "epoch": 0.39127324749642345,
      "grad_norm": 0.13141923191550492,
      "learning_rate": 0.00013901623516375219,
      "loss": 0.6811,
      "step": 4376
    },
    {
      "epoch": 0.391362660944206,
      "grad_norm": 0.1658479685263351,
      "learning_rate": 0.00013898956862098643,
      "loss": 0.6553,
      "step": 4377
    },
    {
      "epoch": 0.39145207439198854,
      "grad_norm": 0.14911757360565833,
      "learning_rate": 0.00013896289880821263,
      "loss": 0.6551,
      "step": 4378
    },
    {
      "epoch": 0.3915414878397711,
      "grad_norm": 0.14186756255703123,
      "learning_rate": 0.0001389362257276675,
      "loss": 0.667,
      "step": 4379
    },
    {
      "epoch": 0.3916309012875536,
      "grad_norm": 0.16452599800005796,
      "learning_rate": 0.00013890954938158823,
      "loss": 0.3768,
      "step": 4380
    },
    {
      "epoch": 0.3917203147353362,
      "grad_norm": 0.15433039746894936,
      "learning_rate": 0.000138882869772212,
      "loss": 0.6842,
      "step": 4381
    },
    {
      "epoch": 0.39180972818311877,
      "grad_norm": 0.20553699670675826,
      "learning_rate": 0.00013885618690177642,
      "loss": 0.3845,
      "step": 4382
    },
    {
      "epoch": 0.3918991416309013,
      "grad_norm": 0.14660395003130872,
      "learning_rate": 0.0001388295007725194,
      "loss": 0.6519,
      "step": 4383
    },
    {
      "epoch": 0.39198855507868385,
      "grad_norm": 0.16300543640951293,
      "learning_rate": 0.00013880281138667905,
      "loss": 0.707,
      "step": 4384
    },
    {
      "epoch": 0.39207796852646637,
      "grad_norm": 0.17668481186326432,
      "learning_rate": 0.00013877611874649375,
      "loss": 0.6966,
      "step": 4385
    },
    {
      "epoch": 0.39216738197424894,
      "grad_norm": 0.1496875239455015,
      "learning_rate": 0.0001387494228542022,
      "loss": 0.683,
      "step": 4386
    },
    {
      "epoch": 0.39225679542203146,
      "grad_norm": 0.14329232658552526,
      "learning_rate": 0.00013872272371204337,
      "loss": 0.6721,
      "step": 4387
    },
    {
      "epoch": 0.39234620886981403,
      "grad_norm": 0.1504369376802061,
      "learning_rate": 0.00013869602132225646,
      "loss": 0.6866,
      "step": 4388
    },
    {
      "epoch": 0.39243562231759654,
      "grad_norm": 0.14794969725310503,
      "learning_rate": 0.00013866931568708098,
      "loss": 0.6562,
      "step": 4389
    },
    {
      "epoch": 0.3925250357653791,
      "grad_norm": 0.14288538563886322,
      "learning_rate": 0.00013864260680875666,
      "loss": 0.6587,
      "step": 4390
    },
    {
      "epoch": 0.3926144492131617,
      "grad_norm": 0.15644917842087072,
      "learning_rate": 0.00013861589468952364,
      "loss": 0.6671,
      "step": 4391
    },
    {
      "epoch": 0.3927038626609442,
      "grad_norm": 0.14201804868781337,
      "learning_rate": 0.0001385891793316221,
      "loss": 0.6867,
      "step": 4392
    },
    {
      "epoch": 0.3927932761087268,
      "grad_norm": 0.15455107407353544,
      "learning_rate": 0.0001385624607372927,
      "loss": 0.7007,
      "step": 4393
    },
    {
      "epoch": 0.3928826895565093,
      "grad_norm": 0.1600817852017047,
      "learning_rate": 0.00013853573890877633,
      "loss": 0.6506,
      "step": 4394
    },
    {
      "epoch": 0.39297210300429186,
      "grad_norm": 0.13239809817520326,
      "learning_rate": 0.0001385090138483141,
      "loss": 0.7076,
      "step": 4395
    },
    {
      "epoch": 0.3930615164520744,
      "grad_norm": 0.16021416650117887,
      "learning_rate": 0.0001384822855581473,
      "loss": 0.6732,
      "step": 4396
    },
    {
      "epoch": 0.39315092989985695,
      "grad_norm": 0.15330450951038901,
      "learning_rate": 0.00013845555404051776,
      "loss": 0.6443,
      "step": 4397
    },
    {
      "epoch": 0.39324034334763946,
      "grad_norm": 0.14119609827780694,
      "learning_rate": 0.00013842881929766732,
      "loss": 0.6725,
      "step": 4398
    },
    {
      "epoch": 0.39332975679542204,
      "grad_norm": 0.1408781519083915,
      "learning_rate": 0.00013840208133183822,
      "loss": 0.6521,
      "step": 4399
    },
    {
      "epoch": 0.39341917024320455,
      "grad_norm": 0.1375348769736457,
      "learning_rate": 0.0001383753401452729,
      "loss": 0.6622,
      "step": 4400
    },
    {
      "epoch": 0.3935085836909871,
      "grad_norm": 0.14559101583304168,
      "learning_rate": 0.00013834859574021418,
      "loss": 0.6869,
      "step": 4401
    },
    {
      "epoch": 0.3935979971387697,
      "grad_norm": 0.1670034442090514,
      "learning_rate": 0.00013832184811890508,
      "loss": 0.7119,
      "step": 4402
    },
    {
      "epoch": 0.3936874105865522,
      "grad_norm": 0.1793857558518939,
      "learning_rate": 0.0001382950972835888,
      "loss": 0.6982,
      "step": 4403
    },
    {
      "epoch": 0.3937768240343348,
      "grad_norm": 0.15454404156294882,
      "learning_rate": 0.000138268343236509,
      "loss": 0.6475,
      "step": 4404
    },
    {
      "epoch": 0.3938662374821173,
      "grad_norm": 0.16350615917684205,
      "learning_rate": 0.00013824158597990947,
      "loss": 0.6907,
      "step": 4405
    },
    {
      "epoch": 0.39395565092989987,
      "grad_norm": 0.13032956879911003,
      "learning_rate": 0.00013821482551603425,
      "loss": 0.5977,
      "step": 4406
    },
    {
      "epoch": 0.3940450643776824,
      "grad_norm": 0.16549975418569515,
      "learning_rate": 0.00013818806184712781,
      "loss": 0.7446,
      "step": 4407
    },
    {
      "epoch": 0.39413447782546496,
      "grad_norm": 0.14082906406689003,
      "learning_rate": 0.00013816129497543476,
      "loss": 0.6485,
      "step": 4408
    },
    {
      "epoch": 0.3942238912732475,
      "grad_norm": 0.15068119154136428,
      "learning_rate": 0.00013813452490319997,
      "loss": 0.7218,
      "step": 4409
    },
    {
      "epoch": 0.39431330472103004,
      "grad_norm": 0.15257716044172132,
      "learning_rate": 0.0001381077516326686,
      "loss": 0.6682,
      "step": 4410
    },
    {
      "epoch": 0.3944027181688126,
      "grad_norm": 0.14175569766895618,
      "learning_rate": 0.00013808097516608618,
      "loss": 0.6439,
      "step": 4411
    },
    {
      "epoch": 0.39449213161659513,
      "grad_norm": 0.15549496946157818,
      "learning_rate": 0.00013805419550569833,
      "loss": 0.7029,
      "step": 4412
    },
    {
      "epoch": 0.3945815450643777,
      "grad_norm": 0.13353569106499344,
      "learning_rate": 0.00013802741265375105,
      "loss": 0.6576,
      "step": 4413
    },
    {
      "epoch": 0.3946709585121602,
      "grad_norm": 0.1626112088574666,
      "learning_rate": 0.00013800062661249062,
      "loss": 0.7444,
      "step": 4414
    },
    {
      "epoch": 0.3947603719599428,
      "grad_norm": 0.15851936021935803,
      "learning_rate": 0.00013797383738416353,
      "loss": 0.6964,
      "step": 4415
    },
    {
      "epoch": 0.3948497854077253,
      "grad_norm": 0.134270632476707,
      "learning_rate": 0.00013794704497101655,
      "loss": 0.6658,
      "step": 4416
    },
    {
      "epoch": 0.3949391988555079,
      "grad_norm": 0.1368858173668906,
      "learning_rate": 0.00013792024937529673,
      "loss": 0.6201,
      "step": 4417
    },
    {
      "epoch": 0.3950286123032904,
      "grad_norm": 0.16541859306121984,
      "learning_rate": 0.0001378934505992514,
      "loss": 0.7155,
      "step": 4418
    },
    {
      "epoch": 0.39511802575107297,
      "grad_norm": 0.14902919865411585,
      "learning_rate": 0.00013786664864512814,
      "loss": 0.7087,
      "step": 4419
    },
    {
      "epoch": 0.3952074391988555,
      "grad_norm": 0.16250504894598397,
      "learning_rate": 0.00013783984351517476,
      "loss": 0.7375,
      "step": 4420
    },
    {
      "epoch": 0.39529685264663805,
      "grad_norm": 0.15674057562021854,
      "learning_rate": 0.00013781303521163943,
      "loss": 0.6991,
      "step": 4421
    },
    {
      "epoch": 0.3953862660944206,
      "grad_norm": 0.14355747736797628,
      "learning_rate": 0.0001377862237367705,
      "loss": 0.6924,
      "step": 4422
    },
    {
      "epoch": 0.39547567954220314,
      "grad_norm": 0.15209850316703444,
      "learning_rate": 0.0001377594090928166,
      "loss": 0.7012,
      "step": 4423
    },
    {
      "epoch": 0.3955650929899857,
      "grad_norm": 0.14151016948882722,
      "learning_rate": 0.00013773259128202668,
      "loss": 0.6988,
      "step": 4424
    },
    {
      "epoch": 0.3956545064377682,
      "grad_norm": 0.14481728334687458,
      "learning_rate": 0.0001377057703066499,
      "loss": 0.6745,
      "step": 4425
    },
    {
      "epoch": 0.3957439198855508,
      "grad_norm": 0.14911183061739622,
      "learning_rate": 0.0001376789461689357,
      "loss": 0.67,
      "step": 4426
    },
    {
      "epoch": 0.3958333333333333,
      "grad_norm": 0.16663707466109212,
      "learning_rate": 0.0001376521188711338,
      "loss": 0.6977,
      "step": 4427
    },
    {
      "epoch": 0.3959227467811159,
      "grad_norm": 0.1326708719582936,
      "learning_rate": 0.00013762528841549415,
      "loss": 0.6626,
      "step": 4428
    },
    {
      "epoch": 0.3960121602288984,
      "grad_norm": 0.11566828691596007,
      "learning_rate": 0.000137598454804267,
      "loss": 0.5997,
      "step": 4429
    },
    {
      "epoch": 0.396101573676681,
      "grad_norm": 0.15779237926104128,
      "learning_rate": 0.00013757161803970282,
      "loss": 0.6702,
      "step": 4430
    },
    {
      "epoch": 0.39619098712446355,
      "grad_norm": 0.16610771304423705,
      "learning_rate": 0.00013754477812405247,
      "loss": 0.6725,
      "step": 4431
    },
    {
      "epoch": 0.39628040057224606,
      "grad_norm": 0.15872888079023964,
      "learning_rate": 0.0001375179350595669,
      "loss": 0.6696,
      "step": 4432
    },
    {
      "epoch": 0.39636981402002863,
      "grad_norm": 0.15872174385855464,
      "learning_rate": 0.0001374910888484974,
      "loss": 0.687,
      "step": 4433
    },
    {
      "epoch": 0.39645922746781115,
      "grad_norm": 0.12808156236746107,
      "learning_rate": 0.00013746423949309556,
      "loss": 0.6612,
      "step": 4434
    },
    {
      "epoch": 0.3965486409155937,
      "grad_norm": 0.17227043128394362,
      "learning_rate": 0.00013743738699561323,
      "loss": 0.7346,
      "step": 4435
    },
    {
      "epoch": 0.39663805436337624,
      "grad_norm": 0.1592201600042711,
      "learning_rate": 0.00013741053135830246,
      "loss": 0.7003,
      "step": 4436
    },
    {
      "epoch": 0.3967274678111588,
      "grad_norm": 0.1535964053494208,
      "learning_rate": 0.00013738367258341557,
      "loss": 0.6686,
      "step": 4437
    },
    {
      "epoch": 0.3968168812589413,
      "grad_norm": 0.15173707159702554,
      "learning_rate": 0.00013735681067320526,
      "loss": 0.6513,
      "step": 4438
    },
    {
      "epoch": 0.3969062947067239,
      "grad_norm": 0.16452110009649132,
      "learning_rate": 0.00013732994562992433,
      "loss": 0.675,
      "step": 4439
    },
    {
      "epoch": 0.3969957081545064,
      "grad_norm": 0.12464181858859169,
      "learning_rate": 0.00013730307745582593,
      "loss": 0.6157,
      "step": 4440
    },
    {
      "epoch": 0.397085121602289,
      "grad_norm": 0.14452453803856152,
      "learning_rate": 0.0001372762061531635,
      "loss": 0.6685,
      "step": 4441
    },
    {
      "epoch": 0.39717453505007155,
      "grad_norm": 0.1685362231810944,
      "learning_rate": 0.00013724933172419066,
      "loss": 0.7059,
      "step": 4442
    },
    {
      "epoch": 0.39726394849785407,
      "grad_norm": 0.1527667887758125,
      "learning_rate": 0.00013722245417116134,
      "loss": 0.6706,
      "step": 4443
    },
    {
      "epoch": 0.39735336194563664,
      "grad_norm": 0.15311466356177322,
      "learning_rate": 0.00013719557349632977,
      "loss": 0.6946,
      "step": 4444
    },
    {
      "epoch": 0.39744277539341916,
      "grad_norm": 0.15650359265414301,
      "learning_rate": 0.00013716868970195038,
      "loss": 0.6866,
      "step": 4445
    },
    {
      "epoch": 0.39753218884120173,
      "grad_norm": 0.14952879203015076,
      "learning_rate": 0.00013714180279027785,
      "loss": 0.6637,
      "step": 4446
    },
    {
      "epoch": 0.39762160228898424,
      "grad_norm": 0.1651457416963843,
      "learning_rate": 0.00013711491276356718,
      "loss": 0.7371,
      "step": 4447
    },
    {
      "epoch": 0.3977110157367668,
      "grad_norm": 0.1441594335240259,
      "learning_rate": 0.0001370880196240736,
      "loss": 0.6536,
      "step": 4448
    },
    {
      "epoch": 0.39780042918454933,
      "grad_norm": 0.145431663290835,
      "learning_rate": 0.00013706112337405263,
      "loss": 0.6665,
      "step": 4449
    },
    {
      "epoch": 0.3978898426323319,
      "grad_norm": 0.16692460932416253,
      "learning_rate": 0.00013703422401575995,
      "loss": 0.677,
      "step": 4450
    },
    {
      "epoch": 0.3979792560801145,
      "grad_norm": 0.15582908604147913,
      "learning_rate": 0.00013700732155145167,
      "loss": 0.7008,
      "step": 4451
    },
    {
      "epoch": 0.398068669527897,
      "grad_norm": 0.1586213633059991,
      "learning_rate": 0.00013698041598338403,
      "loss": 0.7218,
      "step": 4452
    },
    {
      "epoch": 0.39815808297567956,
      "grad_norm": 0.14143342648575594,
      "learning_rate": 0.0001369535073138135,
      "loss": 0.6699,
      "step": 4453
    },
    {
      "epoch": 0.3982474964234621,
      "grad_norm": 0.15126677017085102,
      "learning_rate": 0.00013692659554499702,
      "loss": 0.6814,
      "step": 4454
    },
    {
      "epoch": 0.39833690987124465,
      "grad_norm": 0.14939442865381486,
      "learning_rate": 0.0001368996806791915,
      "loss": 0.6648,
      "step": 4455
    },
    {
      "epoch": 0.39842632331902716,
      "grad_norm": 0.2221465072616742,
      "learning_rate": 0.0001368727627186544,
      "loss": 0.3935,
      "step": 4456
    },
    {
      "epoch": 0.39851573676680974,
      "grad_norm": 0.1469091379886844,
      "learning_rate": 0.00013684584166564315,
      "loss": 0.6819,
      "step": 4457
    },
    {
      "epoch": 0.39860515021459225,
      "grad_norm": 0.14813653740414873,
      "learning_rate": 0.0001368189175224157,
      "loss": 0.6907,
      "step": 4458
    },
    {
      "epoch": 0.3986945636623748,
      "grad_norm": 0.14986208104262688,
      "learning_rate": 0.00013679199029123008,
      "loss": 0.7037,
      "step": 4459
    },
    {
      "epoch": 0.3987839771101574,
      "grad_norm": 0.15970466469689265,
      "learning_rate": 0.00013676505997434467,
      "loss": 0.7065,
      "step": 4460
    },
    {
      "epoch": 0.3988733905579399,
      "grad_norm": 0.15384997360688704,
      "learning_rate": 0.0001367381265740181,
      "loss": 0.6683,
      "step": 4461
    },
    {
      "epoch": 0.3989628040057225,
      "grad_norm": 0.1479053029689128,
      "learning_rate": 0.00013671119009250922,
      "loss": 0.6554,
      "step": 4462
    },
    {
      "epoch": 0.399052217453505,
      "grad_norm": 0.1519417100177547,
      "learning_rate": 0.00013668425053207713,
      "loss": 0.6794,
      "step": 4463
    },
    {
      "epoch": 0.39914163090128757,
      "grad_norm": 0.16664682821128693,
      "learning_rate": 0.0001366573078949813,
      "loss": 0.701,
      "step": 4464
    },
    {
      "epoch": 0.3992310443490701,
      "grad_norm": 0.15371338904493823,
      "learning_rate": 0.00013663036218348128,
      "loss": 0.6557,
      "step": 4465
    },
    {
      "epoch": 0.39932045779685266,
      "grad_norm": 0.14212663496050099,
      "learning_rate": 0.00013660341339983707,
      "loss": 0.6643,
      "step": 4466
    },
    {
      "epoch": 0.3994098712446352,
      "grad_norm": 0.14087320785594762,
      "learning_rate": 0.00013657646154630876,
      "loss": 0.6632,
      "step": 4467
    },
    {
      "epoch": 0.39949928469241774,
      "grad_norm": 0.15374143937621118,
      "learning_rate": 0.00013654950662515678,
      "loss": 0.6359,
      "step": 4468
    },
    {
      "epoch": 0.39958869814020026,
      "grad_norm": 0.1514048201607894,
      "learning_rate": 0.00013652254863864185,
      "loss": 0.6608,
      "step": 4469
    },
    {
      "epoch": 0.39967811158798283,
      "grad_norm": 0.16082551850264284,
      "learning_rate": 0.00013649558758902484,
      "loss": 0.6814,
      "step": 4470
    },
    {
      "epoch": 0.3997675250357654,
      "grad_norm": 0.13874235795618944,
      "learning_rate": 0.000136468623478567,
      "loss": 0.6914,
      "step": 4471
    },
    {
      "epoch": 0.3998569384835479,
      "grad_norm": 0.14001005357974872,
      "learning_rate": 0.00013644165630952973,
      "loss": 0.637,
      "step": 4472
    },
    {
      "epoch": 0.3999463519313305,
      "grad_norm": 0.1634483349059244,
      "learning_rate": 0.00013641468608417478,
      "loss": 0.6996,
      "step": 4473
    },
    {
      "epoch": 0.400035765379113,
      "grad_norm": 0.1885973130027765,
      "learning_rate": 0.00013638771280476405,
      "loss": 0.4072,
      "step": 4474
    },
    {
      "epoch": 0.4001251788268956,
      "grad_norm": 0.1496881158050757,
      "learning_rate": 0.00013636073647355982,
      "loss": 0.6405,
      "step": 4475
    },
    {
      "epoch": 0.4002145922746781,
      "grad_norm": 0.14932053690546163,
      "learning_rate": 0.00013633375709282453,
      "loss": 0.6872,
      "step": 4476
    },
    {
      "epoch": 0.40030400572246067,
      "grad_norm": 0.13922555645905269,
      "learning_rate": 0.00013630677466482092,
      "loss": 0.6882,
      "step": 4477
    },
    {
      "epoch": 0.4003934191702432,
      "grad_norm": 0.1739636128893044,
      "learning_rate": 0.00013627978919181197,
      "loss": 0.7094,
      "step": 4478
    },
    {
      "epoch": 0.40048283261802575,
      "grad_norm": 0.13222889232358004,
      "learning_rate": 0.0001362528006760609,
      "loss": 0.666,
      "step": 4479
    },
    {
      "epoch": 0.4005722460658083,
      "grad_norm": 0.1647955970544327,
      "learning_rate": 0.0001362258091198312,
      "loss": 0.697,
      "step": 4480
    },
    {
      "epoch": 0.40066165951359084,
      "grad_norm": 0.157541769489766,
      "learning_rate": 0.0001361988145253867,
      "loss": 0.7015,
      "step": 4481
    },
    {
      "epoch": 0.4007510729613734,
      "grad_norm": 0.1457402464190857,
      "learning_rate": 0.00013617181689499128,
      "loss": 0.6594,
      "step": 4482
    },
    {
      "epoch": 0.4008404864091559,
      "grad_norm": 0.16952533506636308,
      "learning_rate": 0.00013614481623090932,
      "loss": 0.7066,
      "step": 4483
    },
    {
      "epoch": 0.4009298998569385,
      "grad_norm": 0.12443795478317113,
      "learning_rate": 0.00013611781253540522,
      "loss": 0.6217,
      "step": 4484
    },
    {
      "epoch": 0.401019313304721,
      "grad_norm": 0.15470660363322977,
      "learning_rate": 0.00013609080581074382,
      "loss": 0.6828,
      "step": 4485
    },
    {
      "epoch": 0.4011087267525036,
      "grad_norm": 0.14950407449360995,
      "learning_rate": 0.00013606379605919013,
      "loss": 0.663,
      "step": 4486
    },
    {
      "epoch": 0.4011981402002861,
      "grad_norm": 0.15090675722045652,
      "learning_rate": 0.00013603678328300939,
      "loss": 0.674,
      "step": 4487
    },
    {
      "epoch": 0.4012875536480687,
      "grad_norm": 0.14798746189005696,
      "learning_rate": 0.0001360097674844672,
      "loss": 0.6472,
      "step": 4488
    },
    {
      "epoch": 0.4013769670958512,
      "grad_norm": 0.1567887214450416,
      "learning_rate": 0.0001359827486658293,
      "loss": 0.6754,
      "step": 4489
    },
    {
      "epoch": 0.40146638054363376,
      "grad_norm": 0.15827566554333905,
      "learning_rate": 0.00013595572682936172,
      "loss": 0.7097,
      "step": 4490
    },
    {
      "epoch": 0.40155579399141633,
      "grad_norm": 0.1428728822994251,
      "learning_rate": 0.00013592870197733073,
      "loss": 0.6793,
      "step": 4491
    },
    {
      "epoch": 0.40164520743919885,
      "grad_norm": 0.1868978535871941,
      "learning_rate": 0.0001359016741120029,
      "loss": 0.3654,
      "step": 4492
    },
    {
      "epoch": 0.4017346208869814,
      "grad_norm": 0.1440407053705785,
      "learning_rate": 0.00013587464323564503,
      "loss": 0.6568,
      "step": 4493
    },
    {
      "epoch": 0.40182403433476394,
      "grad_norm": 0.14945205262015962,
      "learning_rate": 0.00013584760935052417,
      "loss": 0.6796,
      "step": 4494
    },
    {
      "epoch": 0.4019134477825465,
      "grad_norm": 0.13827981998912683,
      "learning_rate": 0.00013582057245890757,
      "loss": 0.6646,
      "step": 4495
    },
    {
      "epoch": 0.402002861230329,
      "grad_norm": 0.15928319039300196,
      "learning_rate": 0.00013579353256306287,
      "loss": 0.6799,
      "step": 4496
    },
    {
      "epoch": 0.4020922746781116,
      "grad_norm": 0.1384514411970101,
      "learning_rate": 0.00013576648966525778,
      "loss": 0.6705,
      "step": 4497
    },
    {
      "epoch": 0.4021816881258941,
      "grad_norm": 0.1432484821395315,
      "learning_rate": 0.00013573944376776042,
      "loss": 0.6573,
      "step": 4498
    },
    {
      "epoch": 0.4022711015736767,
      "grad_norm": 0.14086082435020944,
      "learning_rate": 0.00013571239487283906,
      "loss": 0.6669,
      "step": 4499
    },
    {
      "epoch": 0.40236051502145925,
      "grad_norm": 0.14866364707826782,
      "learning_rate": 0.00013568534298276228,
      "loss": 0.6609,
      "step": 4500
    },
    {
      "epoch": 0.40244992846924177,
      "grad_norm": 0.14758744988406547,
      "learning_rate": 0.00013565828809979885,
      "loss": 0.6569,
      "step": 4501
    },
    {
      "epoch": 0.40253934191702434,
      "grad_norm": 0.16390052579656797,
      "learning_rate": 0.0001356312302262179,
      "loss": 0.6845,
      "step": 4502
    },
    {
      "epoch": 0.40262875536480686,
      "grad_norm": 0.14122536954224618,
      "learning_rate": 0.0001356041693642887,
      "loss": 0.682,
      "step": 4503
    },
    {
      "epoch": 0.4027181688125894,
      "grad_norm": 0.1449195760961454,
      "learning_rate": 0.0001355771055162808,
      "loss": 0.6867,
      "step": 4504
    },
    {
      "epoch": 0.40280758226037194,
      "grad_norm": 0.15088408105641993,
      "learning_rate": 0.00013555003868446404,
      "loss": 0.6417,
      "step": 4505
    },
    {
      "epoch": 0.4028969957081545,
      "grad_norm": 0.13787461677363247,
      "learning_rate": 0.00013552296887110846,
      "loss": 0.6381,
      "step": 4506
    },
    {
      "epoch": 0.40298640915593703,
      "grad_norm": 0.13932441163339324,
      "learning_rate": 0.00013549589607848438,
      "loss": 0.6986,
      "step": 4507
    },
    {
      "epoch": 0.4030758226037196,
      "grad_norm": 0.15692110784496108,
      "learning_rate": 0.00013546882030886237,
      "loss": 0.6592,
      "step": 4508
    },
    {
      "epoch": 0.4031652360515021,
      "grad_norm": 0.1769103743763648,
      "learning_rate": 0.00013544174156451323,
      "loss": 0.7264,
      "step": 4509
    },
    {
      "epoch": 0.4032546494992847,
      "grad_norm": 0.15750089503523004,
      "learning_rate": 0.00013541465984770804,
      "loss": 0.7311,
      "step": 4510
    },
    {
      "epoch": 0.40334406294706726,
      "grad_norm": 0.1456599108704451,
      "learning_rate": 0.00013538757516071807,
      "loss": 0.6776,
      "step": 4511
    },
    {
      "epoch": 0.4034334763948498,
      "grad_norm": 0.15665023905521017,
      "learning_rate": 0.00013536048750581494,
      "loss": 0.6899,
      "step": 4512
    },
    {
      "epoch": 0.40352288984263235,
      "grad_norm": 0.1731799317819575,
      "learning_rate": 0.0001353333968852704,
      "loss": 0.6901,
      "step": 4513
    },
    {
      "epoch": 0.40361230329041486,
      "grad_norm": 0.15428213567211482,
      "learning_rate": 0.00013530630330135655,
      "loss": 0.6386,
      "step": 4514
    },
    {
      "epoch": 0.40370171673819744,
      "grad_norm": 0.14622646534286374,
      "learning_rate": 0.0001352792067563457,
      "loss": 0.6676,
      "step": 4515
    },
    {
      "epoch": 0.40379113018597995,
      "grad_norm": 0.15616391511844832,
      "learning_rate": 0.00013525210725251035,
      "loss": 0.7035,
      "step": 4516
    },
    {
      "epoch": 0.4038805436337625,
      "grad_norm": 0.15355669411930173,
      "learning_rate": 0.00013522500479212337,
      "loss": 0.6585,
      "step": 4517
    },
    {
      "epoch": 0.40396995708154504,
      "grad_norm": 0.13931648501521557,
      "learning_rate": 0.00013519789937745775,
      "loss": 0.618,
      "step": 4518
    },
    {
      "epoch": 0.4040593705293276,
      "grad_norm": 0.16873693150710878,
      "learning_rate": 0.00013517079101078684,
      "loss": 0.7028,
      "step": 4519
    },
    {
      "epoch": 0.4041487839771102,
      "grad_norm": 0.14619952314901616,
      "learning_rate": 0.00013514367969438414,
      "loss": 0.6564,
      "step": 4520
    },
    {
      "epoch": 0.4042381974248927,
      "grad_norm": 0.16055761567003946,
      "learning_rate": 0.0001351165654305235,
      "loss": 0.7089,
      "step": 4521
    },
    {
      "epoch": 0.40432761087267527,
      "grad_norm": 0.1554600793720178,
      "learning_rate": 0.00013508944822147891,
      "loss": 0.6905,
      "step": 4522
    },
    {
      "epoch": 0.4044170243204578,
      "grad_norm": 0.13962504724545405,
      "learning_rate": 0.00013506232806952467,
      "loss": 0.6673,
      "step": 4523
    },
    {
      "epoch": 0.40450643776824036,
      "grad_norm": 0.1520119084519682,
      "learning_rate": 0.0001350352049769353,
      "loss": 0.7061,
      "step": 4524
    },
    {
      "epoch": 0.4045958512160229,
      "grad_norm": 0.1602393165418052,
      "learning_rate": 0.00013500807894598565,
      "loss": 0.6935,
      "step": 4525
    },
    {
      "epoch": 0.40468526466380544,
      "grad_norm": 0.13050313819691062,
      "learning_rate": 0.00013498094997895069,
      "loss": 0.6455,
      "step": 4526
    },
    {
      "epoch": 0.40477467811158796,
      "grad_norm": 0.14281456020161024,
      "learning_rate": 0.00013495381807810569,
      "loss": 0.7053,
      "step": 4527
    },
    {
      "epoch": 0.40486409155937053,
      "grad_norm": 0.17846252088796938,
      "learning_rate": 0.00013492668324572614,
      "loss": 0.7162,
      "step": 4528
    },
    {
      "epoch": 0.4049535050071531,
      "grad_norm": 0.13640524928568445,
      "learning_rate": 0.0001348995454840879,
      "loss": 0.6561,
      "step": 4529
    },
    {
      "epoch": 0.4050429184549356,
      "grad_norm": 0.16813513478808972,
      "learning_rate": 0.00013487240479546691,
      "loss": 0.7205,
      "step": 4530
    },
    {
      "epoch": 0.4051323319027182,
      "grad_norm": 0.14606121280882428,
      "learning_rate": 0.00013484526118213942,
      "loss": 0.6645,
      "step": 4531
    },
    {
      "epoch": 0.4052217453505007,
      "grad_norm": 0.16332708629499795,
      "learning_rate": 0.000134818114646382,
      "loss": 0.6836,
      "step": 4532
    },
    {
      "epoch": 0.4053111587982833,
      "grad_norm": 0.14907349797030361,
      "learning_rate": 0.00013479096519047136,
      "loss": 0.6773,
      "step": 4533
    },
    {
      "epoch": 0.4054005722460658,
      "grad_norm": 0.15660686588986142,
      "learning_rate": 0.00013476381281668447,
      "loss": 0.7067,
      "step": 4534
    },
    {
      "epoch": 0.40548998569384836,
      "grad_norm": 0.15261811197553884,
      "learning_rate": 0.00013473665752729855,
      "loss": 0.6758,
      "step": 4535
    },
    {
      "epoch": 0.4055793991416309,
      "grad_norm": 0.17338258852221214,
      "learning_rate": 0.00013470949932459117,
      "loss": 0.6897,
      "step": 4536
    },
    {
      "epoch": 0.40566881258941345,
      "grad_norm": 0.15095515528404244,
      "learning_rate": 0.00013468233821083996,
      "loss": 0.7076,
      "step": 4537
    },
    {
      "epoch": 0.40575822603719597,
      "grad_norm": 0.150571416716395,
      "learning_rate": 0.0001346551741883229,
      "loss": 0.6972,
      "step": 4538
    },
    {
      "epoch": 0.40584763948497854,
      "grad_norm": 0.14528653770972644,
      "learning_rate": 0.0001346280072593183,
      "loss": 0.7074,
      "step": 4539
    },
    {
      "epoch": 0.4059370529327611,
      "grad_norm": 0.14648033240579966,
      "learning_rate": 0.00013460083742610455,
      "loss": 0.6398,
      "step": 4540
    },
    {
      "epoch": 0.4060264663805436,
      "grad_norm": 0.1740623040290435,
      "learning_rate": 0.00013457366469096029,
      "loss": 0.7131,
      "step": 4541
    },
    {
      "epoch": 0.4061158798283262,
      "grad_norm": 0.1715336737448949,
      "learning_rate": 0.00013454648905616458,
      "loss": 0.7217,
      "step": 4542
    },
    {
      "epoch": 0.4062052932761087,
      "grad_norm": 0.14108091940515227,
      "learning_rate": 0.00013451931052399656,
      "loss": 0.6754,
      "step": 4543
    },
    {
      "epoch": 0.4062947067238913,
      "grad_norm": 0.13088577175093466,
      "learning_rate": 0.00013449212909673563,
      "loss": 0.6442,
      "step": 4544
    },
    {
      "epoch": 0.4063841201716738,
      "grad_norm": 0.13745464640391866,
      "learning_rate": 0.00013446494477666146,
      "loss": 0.6645,
      "step": 4545
    },
    {
      "epoch": 0.4064735336194564,
      "grad_norm": 0.1430187104529894,
      "learning_rate": 0.00013443775756605405,
      "loss": 0.6484,
      "step": 4546
    },
    {
      "epoch": 0.4065629470672389,
      "grad_norm": 0.1358759578255798,
      "learning_rate": 0.0001344105674671935,
      "loss": 0.6367,
      "step": 4547
    },
    {
      "epoch": 0.40665236051502146,
      "grad_norm": 0.1470580782398473,
      "learning_rate": 0.00013438337448236015,
      "loss": 0.6924,
      "step": 4548
    },
    {
      "epoch": 0.40674177396280403,
      "grad_norm": 0.14579013859797058,
      "learning_rate": 0.0001343561786138348,
      "loss": 0.6472,
      "step": 4549
    },
    {
      "epoch": 0.40683118741058655,
      "grad_norm": 0.13681618006649818,
      "learning_rate": 0.00013432897986389818,
      "loss": 0.683,
      "step": 4550
    },
    {
      "epoch": 0.4069206008583691,
      "grad_norm": 0.17299066184074402,
      "learning_rate": 0.00013430177823483148,
      "loss": 0.6909,
      "step": 4551
    },
    {
      "epoch": 0.40701001430615164,
      "grad_norm": 0.15986114084617722,
      "learning_rate": 0.00013427457372891608,
      "loss": 0.6619,
      "step": 4552
    },
    {
      "epoch": 0.4070994277539342,
      "grad_norm": 0.15543971076725194,
      "learning_rate": 0.00013424736634843357,
      "loss": 0.6916,
      "step": 4553
    },
    {
      "epoch": 0.4071888412017167,
      "grad_norm": 0.13815517093331528,
      "learning_rate": 0.0001342201560956658,
      "loss": 0.673,
      "step": 4554
    },
    {
      "epoch": 0.4072782546494993,
      "grad_norm": 0.1430320334158288,
      "learning_rate": 0.00013419294297289486,
      "loss": 0.6906,
      "step": 4555
    },
    {
      "epoch": 0.4073676680972818,
      "grad_norm": 0.14894638522748166,
      "learning_rate": 0.00013416572698240312,
      "loss": 0.6984,
      "step": 4556
    },
    {
      "epoch": 0.4074570815450644,
      "grad_norm": 0.15936546965106038,
      "learning_rate": 0.00013413850812647312,
      "loss": 0.651,
      "step": 4557
    },
    {
      "epoch": 0.4075464949928469,
      "grad_norm": 0.14602141368112884,
      "learning_rate": 0.00013411128640738762,
      "loss": 0.6789,
      "step": 4558
    },
    {
      "epoch": 0.40763590844062947,
      "grad_norm": 0.1642849488983738,
      "learning_rate": 0.00013408406182742976,
      "loss": 0.698,
      "step": 4559
    },
    {
      "epoch": 0.40772532188841204,
      "grad_norm": 0.13731516229428753,
      "learning_rate": 0.00013405683438888282,
      "loss": 0.6976,
      "step": 4560
    },
    {
      "epoch": 0.40781473533619456,
      "grad_norm": 0.14441295782474656,
      "learning_rate": 0.00013402960409403028,
      "loss": 0.6739,
      "step": 4561
    },
    {
      "epoch": 0.4079041487839771,
      "grad_norm": 0.13329717115314882,
      "learning_rate": 0.00013400237094515592,
      "loss": 0.629,
      "step": 4562
    },
    {
      "epoch": 0.40799356223175964,
      "grad_norm": 0.15533992278062503,
      "learning_rate": 0.0001339751349445438,
      "loss": 0.6899,
      "step": 4563
    },
    {
      "epoch": 0.4080829756795422,
      "grad_norm": 0.1669365190381189,
      "learning_rate": 0.00013394789609447817,
      "loss": 0.7044,
      "step": 4564
    },
    {
      "epoch": 0.40817238912732473,
      "grad_norm": 0.16832287574870453,
      "learning_rate": 0.00013392065439724344,
      "loss": 0.6952,
      "step": 4565
    },
    {
      "epoch": 0.4082618025751073,
      "grad_norm": 0.14751127836239342,
      "learning_rate": 0.00013389340985512442,
      "loss": 0.7295,
      "step": 4566
    },
    {
      "epoch": 0.4083512160228898,
      "grad_norm": 0.14449975620156325,
      "learning_rate": 0.00013386616247040606,
      "loss": 0.6636,
      "step": 4567
    },
    {
      "epoch": 0.4084406294706724,
      "grad_norm": 0.15786414896915243,
      "learning_rate": 0.00013383891224537354,
      "loss": 0.6418,
      "step": 4568
    },
    {
      "epoch": 0.40853004291845496,
      "grad_norm": 0.17987341345121574,
      "learning_rate": 0.0001338116591823123,
      "loss": 0.7205,
      "step": 4569
    },
    {
      "epoch": 0.4086194563662375,
      "grad_norm": 0.15326878722903137,
      "learning_rate": 0.0001337844032835081,
      "loss": 0.6745,
      "step": 4570
    },
    {
      "epoch": 0.40870886981402005,
      "grad_norm": 0.15493725626727883,
      "learning_rate": 0.0001337571445512467,
      "loss": 0.6398,
      "step": 4571
    },
    {
      "epoch": 0.40879828326180256,
      "grad_norm": 0.14809513994428694,
      "learning_rate": 0.00013372988298781442,
      "loss": 0.6588,
      "step": 4572
    },
    {
      "epoch": 0.40888769670958514,
      "grad_norm": 0.1554039126983817,
      "learning_rate": 0.00013370261859549758,
      "loss": 0.7217,
      "step": 4573
    },
    {
      "epoch": 0.40897711015736765,
      "grad_norm": 0.17313277888218834,
      "learning_rate": 0.00013367535137658282,
      "loss": 0.6577,
      "step": 4574
    },
    {
      "epoch": 0.4090665236051502,
      "grad_norm": 0.15551898255940827,
      "learning_rate": 0.00013364808133335703,
      "loss": 0.6924,
      "step": 4575
    },
    {
      "epoch": 0.40915593705293274,
      "grad_norm": 0.1558953225531468,
      "learning_rate": 0.00013362080846810725,
      "loss": 0.6682,
      "step": 4576
    },
    {
      "epoch": 0.4092453505007153,
      "grad_norm": 0.15369112751532454,
      "learning_rate": 0.0001335935327831209,
      "loss": 0.7019,
      "step": 4577
    },
    {
      "epoch": 0.4093347639484979,
      "grad_norm": 0.1647637245135871,
      "learning_rate": 0.0001335662542806855,
      "loss": 0.6965,
      "step": 4578
    },
    {
      "epoch": 0.4094241773962804,
      "grad_norm": 0.13293892968044832,
      "learning_rate": 0.00013353897296308892,
      "loss": 0.6889,
      "step": 4579
    },
    {
      "epoch": 0.40951359084406297,
      "grad_norm": 0.15406554659367633,
      "learning_rate": 0.00013351168883261915,
      "loss": 0.6889,
      "step": 4580
    },
    {
      "epoch": 0.4096030042918455,
      "grad_norm": 0.13624555347394324,
      "learning_rate": 0.00013348440189156455,
      "loss": 0.6323,
      "step": 4581
    },
    {
      "epoch": 0.40969241773962806,
      "grad_norm": 0.1467334937646596,
      "learning_rate": 0.00013345711214221359,
      "loss": 0.704,
      "step": 4582
    },
    {
      "epoch": 0.4097818311874106,
      "grad_norm": 0.143807728546793,
      "learning_rate": 0.00013342981958685502,
      "loss": 0.6354,
      "step": 4583
    },
    {
      "epoch": 0.40987124463519314,
      "grad_norm": 0.15433631165603806,
      "learning_rate": 0.00013340252422777788,
      "loss": 0.7014,
      "step": 4584
    },
    {
      "epoch": 0.40996065808297566,
      "grad_norm": 0.15883390470967695,
      "learning_rate": 0.00013337522606727132,
      "loss": 0.7138,
      "step": 4585
    },
    {
      "epoch": 0.41005007153075823,
      "grad_norm": 0.1582877606619693,
      "learning_rate": 0.00013334792510762491,
      "loss": 0.6905,
      "step": 4586
    },
    {
      "epoch": 0.41013948497854075,
      "grad_norm": 0.14870955779993028,
      "learning_rate": 0.0001333206213511283,
      "loss": 0.6781,
      "step": 4587
    },
    {
      "epoch": 0.4102288984263233,
      "grad_norm": 0.14417589402391373,
      "learning_rate": 0.00013329331480007139,
      "loss": 0.6431,
      "step": 4588
    },
    {
      "epoch": 0.4103183118741059,
      "grad_norm": 0.15235905280077428,
      "learning_rate": 0.0001332660054567444,
      "loss": 0.6821,
      "step": 4589
    },
    {
      "epoch": 0.4104077253218884,
      "grad_norm": 0.13316512527144642,
      "learning_rate": 0.00013323869332343768,
      "loss": 0.6514,
      "step": 4590
    },
    {
      "epoch": 0.410497138769671,
      "grad_norm": 0.1467998971824828,
      "learning_rate": 0.00013321137840244192,
      "loss": 0.6499,
      "step": 4591
    },
    {
      "epoch": 0.4105865522174535,
      "grad_norm": 0.127740048917835,
      "learning_rate": 0.00013318406069604794,
      "loss": 0.6382,
      "step": 4592
    },
    {
      "epoch": 0.41067596566523606,
      "grad_norm": 0.1355200069996211,
      "learning_rate": 0.00013315674020654688,
      "loss": 0.6576,
      "step": 4593
    },
    {
      "epoch": 0.4107653791130186,
      "grad_norm": 0.14434204081929103,
      "learning_rate": 0.00013312941693623004,
      "loss": 0.7011,
      "step": 4594
    },
    {
      "epoch": 0.41085479256080115,
      "grad_norm": 0.1775375682916789,
      "learning_rate": 0.00013310209088738902,
      "loss": 0.7012,
      "step": 4595
    },
    {
      "epoch": 0.41094420600858367,
      "grad_norm": 0.16684076390243519,
      "learning_rate": 0.00013307476206231563,
      "loss": 0.69,
      "step": 4596
    },
    {
      "epoch": 0.41103361945636624,
      "grad_norm": 0.17289558329336935,
      "learning_rate": 0.0001330474304633019,
      "loss": 0.6643,
      "step": 4597
    },
    {
      "epoch": 0.4111230329041488,
      "grad_norm": 0.16075068676531387,
      "learning_rate": 0.00013302009609264005,
      "loss": 0.7079,
      "step": 4598
    },
    {
      "epoch": 0.4112124463519313,
      "grad_norm": 0.1578199673710377,
      "learning_rate": 0.00013299275895262266,
      "loss": 0.7198,
      "step": 4599
    },
    {
      "epoch": 0.4113018597997139,
      "grad_norm": 0.15352889533990566,
      "learning_rate": 0.00013296541904554238,
      "loss": 0.6963,
      "step": 4600
    },
    {
      "epoch": 0.4113912732474964,
      "grad_norm": 0.1519204933908073,
      "learning_rate": 0.00013293807637369226,
      "loss": 0.7032,
      "step": 4601
    },
    {
      "epoch": 0.411480686695279,
      "grad_norm": 0.17875041783408538,
      "learning_rate": 0.00013291073093936543,
      "loss": 0.735,
      "step": 4602
    },
    {
      "epoch": 0.4115701001430615,
      "grad_norm": 0.15946639997132428,
      "learning_rate": 0.00013288338274485532,
      "loss": 0.7197,
      "step": 4603
    },
    {
      "epoch": 0.4116595135908441,
      "grad_norm": 0.14994794066737488,
      "learning_rate": 0.00013285603179245565,
      "loss": 0.6522,
      "step": 4604
    },
    {
      "epoch": 0.4117489270386266,
      "grad_norm": 0.1539409829257156,
      "learning_rate": 0.00013282867808446025,
      "loss": 0.6853,
      "step": 4605
    },
    {
      "epoch": 0.41183834048640916,
      "grad_norm": 0.16072349862019233,
      "learning_rate": 0.0001328013216231633,
      "loss": 0.6891,
      "step": 4606
    },
    {
      "epoch": 0.4119277539341917,
      "grad_norm": 0.15150624235192062,
      "learning_rate": 0.00013277396241085908,
      "loss": 0.6549,
      "step": 4607
    },
    {
      "epoch": 0.41201716738197425,
      "grad_norm": 0.15285655286172592,
      "learning_rate": 0.00013274660044984224,
      "loss": 0.6587,
      "step": 4608
    },
    {
      "epoch": 0.4121065808297568,
      "grad_norm": 0.14624321431883666,
      "learning_rate": 0.00013271923574240756,
      "loss": 0.634,
      "step": 4609
    },
    {
      "epoch": 0.41219599427753933,
      "grad_norm": 0.1440297170097666,
      "learning_rate": 0.00013269186829085008,
      "loss": 0.6746,
      "step": 4610
    },
    {
      "epoch": 0.4122854077253219,
      "grad_norm": 0.16027481842924785,
      "learning_rate": 0.0001326644980974651,
      "loss": 0.6969,
      "step": 4611
    },
    {
      "epoch": 0.4123748211731044,
      "grad_norm": 0.18014795625395935,
      "learning_rate": 0.00013263712516454812,
      "loss": 0.6824,
      "step": 4612
    },
    {
      "epoch": 0.412464234620887,
      "grad_norm": 0.13366820369828647,
      "learning_rate": 0.00013260974949439485,
      "loss": 0.6688,
      "step": 4613
    },
    {
      "epoch": 0.4125536480686695,
      "grad_norm": 0.14202641391457776,
      "learning_rate": 0.00013258237108930128,
      "loss": 0.6755,
      "step": 4614
    },
    {
      "epoch": 0.4126430615164521,
      "grad_norm": 0.17199870213538854,
      "learning_rate": 0.0001325549899515636,
      "loss": 0.6738,
      "step": 4615
    },
    {
      "epoch": 0.4127324749642346,
      "grad_norm": 0.13036572276251046,
      "learning_rate": 0.00013252760608347826,
      "loss": 0.6574,
      "step": 4616
    },
    {
      "epoch": 0.41282188841201717,
      "grad_norm": 0.14930306512821084,
      "learning_rate": 0.00013250021948734184,
      "loss": 0.684,
      "step": 4617
    },
    {
      "epoch": 0.41291130185979974,
      "grad_norm": 0.16265548429125312,
      "learning_rate": 0.00013247283016545126,
      "loss": 0.6908,
      "step": 4618
    },
    {
      "epoch": 0.41300071530758226,
      "grad_norm": 0.1798806812448399,
      "learning_rate": 0.00013244543812010364,
      "loss": 0.7042,
      "step": 4619
    },
    {
      "epoch": 0.4130901287553648,
      "grad_norm": 0.15042546743177332,
      "learning_rate": 0.00013241804335359633,
      "loss": 0.6635,
      "step": 4620
    },
    {
      "epoch": 0.41317954220314734,
      "grad_norm": 0.2050806034723225,
      "learning_rate": 0.00013239064586822685,
      "loss": 0.4006,
      "step": 4621
    },
    {
      "epoch": 0.4132689556509299,
      "grad_norm": 0.11975775618271857,
      "learning_rate": 0.000132363245666293,
      "loss": 0.6052,
      "step": 4622
    },
    {
      "epoch": 0.41335836909871243,
      "grad_norm": 0.15229084032317855,
      "learning_rate": 0.00013233584275009288,
      "loss": 0.6879,
      "step": 4623
    },
    {
      "epoch": 0.413447782546495,
      "grad_norm": 0.16275852952671693,
      "learning_rate": 0.00013230843712192463,
      "loss": 0.6558,
      "step": 4624
    },
    {
      "epoch": 0.4135371959942775,
      "grad_norm": 0.17767165233743407,
      "learning_rate": 0.0001322810287840868,
      "loss": 0.7429,
      "step": 4625
    },
    {
      "epoch": 0.4136266094420601,
      "grad_norm": 0.139825279367149,
      "learning_rate": 0.00013225361773887804,
      "loss": 0.6435,
      "step": 4626
    },
    {
      "epoch": 0.4137160228898426,
      "grad_norm": 0.14191725309605963,
      "learning_rate": 0.00013222620398859738,
      "loss": 0.6702,
      "step": 4627
    },
    {
      "epoch": 0.4138054363376252,
      "grad_norm": 0.13396911421249352,
      "learning_rate": 0.00013219878753554384,
      "loss": 0.6493,
      "step": 4628
    },
    {
      "epoch": 0.41389484978540775,
      "grad_norm": 0.14585312808543072,
      "learning_rate": 0.0001321713683820169,
      "loss": 0.671,
      "step": 4629
    },
    {
      "epoch": 0.41398426323319026,
      "grad_norm": 0.1487636150423228,
      "learning_rate": 0.00013214394653031616,
      "loss": 0.6542,
      "step": 4630
    },
    {
      "epoch": 0.41407367668097284,
      "grad_norm": 0.1609301588366272,
      "learning_rate": 0.00013211652198274145,
      "loss": 0.6747,
      "step": 4631
    },
    {
      "epoch": 0.41416309012875535,
      "grad_norm": 0.16113255391425071,
      "learning_rate": 0.0001320890947415928,
      "loss": 0.6857,
      "step": 4632
    },
    {
      "epoch": 0.4142525035765379,
      "grad_norm": 0.1490085478176476,
      "learning_rate": 0.00013206166480917055,
      "loss": 0.6883,
      "step": 4633
    },
    {
      "epoch": 0.41434191702432044,
      "grad_norm": 0.148085279066328,
      "learning_rate": 0.0001320342321877752,
      "loss": 0.6693,
      "step": 4634
    },
    {
      "epoch": 0.414431330472103,
      "grad_norm": 0.1497068248844614,
      "learning_rate": 0.00013200679687970748,
      "loss": 0.6825,
      "step": 4635
    },
    {
      "epoch": 0.4145207439198855,
      "grad_norm": 0.17009741507014992,
      "learning_rate": 0.00013197935888726832,
      "loss": 0.7026,
      "step": 4636
    },
    {
      "epoch": 0.4146101573676681,
      "grad_norm": 0.13758515123796367,
      "learning_rate": 0.000131951918212759,
      "loss": 0.6502,
      "step": 4637
    },
    {
      "epoch": 0.41469957081545067,
      "grad_norm": 0.14071003009953179,
      "learning_rate": 0.00013192447485848088,
      "loss": 0.662,
      "step": 4638
    },
    {
      "epoch": 0.4147889842632332,
      "grad_norm": 0.15852113180155797,
      "learning_rate": 0.00013189702882673556,
      "loss": 0.6723,
      "step": 4639
    },
    {
      "epoch": 0.41487839771101576,
      "grad_norm": 0.16910557448643154,
      "learning_rate": 0.00013186958011982502,
      "loss": 0.6889,
      "step": 4640
    },
    {
      "epoch": 0.41496781115879827,
      "grad_norm": 0.22075216042094092,
      "learning_rate": 0.00013184212874005124,
      "loss": 0.3775,
      "step": 4641
    },
    {
      "epoch": 0.41505722460658084,
      "grad_norm": 0.15116436580507092,
      "learning_rate": 0.0001318146746897166,
      "loss": 0.7109,
      "step": 4642
    },
    {
      "epoch": 0.41514663805436336,
      "grad_norm": 0.13711586901525674,
      "learning_rate": 0.00013178721797112362,
      "loss": 0.6899,
      "step": 4643
    },
    {
      "epoch": 0.41523605150214593,
      "grad_norm": 0.11676582762432886,
      "learning_rate": 0.00013175975858657505,
      "loss": 0.6071,
      "step": 4644
    },
    {
      "epoch": 0.41532546494992845,
      "grad_norm": 0.1555779305310931,
      "learning_rate": 0.00013173229653837387,
      "loss": 0.6743,
      "step": 4645
    },
    {
      "epoch": 0.415414878397711,
      "grad_norm": 0.1513197989857233,
      "learning_rate": 0.0001317048318288233,
      "loss": 0.6799,
      "step": 4646
    },
    {
      "epoch": 0.4155042918454936,
      "grad_norm": 0.15727105769979005,
      "learning_rate": 0.0001316773644602268,
      "loss": 0.6898,
      "step": 4647
    },
    {
      "epoch": 0.4155937052932761,
      "grad_norm": 0.1327403478084461,
      "learning_rate": 0.00013164989443488798,
      "loss": 0.631,
      "step": 4648
    },
    {
      "epoch": 0.4156831187410587,
      "grad_norm": 0.12482174241699047,
      "learning_rate": 0.00013162242175511076,
      "loss": 0.6432,
      "step": 4649
    },
    {
      "epoch": 0.4157725321888412,
      "grad_norm": 0.1369324249041584,
      "learning_rate": 0.0001315949464231992,
      "loss": 0.7122,
      "step": 4650
    },
    {
      "epoch": 0.41586194563662376,
      "grad_norm": 0.16398422197707502,
      "learning_rate": 0.00013156746844145766,
      "loss": 0.7206,
      "step": 4651
    },
    {
      "epoch": 0.4159513590844063,
      "grad_norm": 0.13831290915217875,
      "learning_rate": 0.00013153998781219062,
      "loss": 0.6846,
      "step": 4652
    },
    {
      "epoch": 0.41604077253218885,
      "grad_norm": 0.15500058739107087,
      "learning_rate": 0.00013151250453770293,
      "loss": 0.6578,
      "step": 4653
    },
    {
      "epoch": 0.41613018597997137,
      "grad_norm": 0.15156297714422098,
      "learning_rate": 0.00013148501862029954,
      "loss": 0.6568,
      "step": 4654
    },
    {
      "epoch": 0.41621959942775394,
      "grad_norm": 0.1673907535569878,
      "learning_rate": 0.00013145753006228565,
      "loss": 0.7225,
      "step": 4655
    },
    {
      "epoch": 0.41630901287553645,
      "grad_norm": 0.1746620472285733,
      "learning_rate": 0.00013143003886596669,
      "loss": 0.7238,
      "step": 4656
    },
    {
      "epoch": 0.416398426323319,
      "grad_norm": 0.13462370175826094,
      "learning_rate": 0.00013140254503364837,
      "loss": 0.6894,
      "step": 4657
    },
    {
      "epoch": 0.4164878397711016,
      "grad_norm": 0.14266774485068584,
      "learning_rate": 0.00013137504856763652,
      "loss": 0.6866,
      "step": 4658
    },
    {
      "epoch": 0.4165772532188841,
      "grad_norm": 0.14211294413502848,
      "learning_rate": 0.0001313475494702372,
      "loss": 0.6564,
      "step": 4659
    },
    {
      "epoch": 0.4166666666666667,
      "grad_norm": 0.15747007315761297,
      "learning_rate": 0.0001313200477437568,
      "loss": 0.6812,
      "step": 4660
    },
    {
      "epoch": 0.4167560801144492,
      "grad_norm": 0.16135277113178845,
      "learning_rate": 0.00013129254339050181,
      "loss": 0.6697,
      "step": 4661
    },
    {
      "epoch": 0.4168454935622318,
      "grad_norm": 0.15915190091099132,
      "learning_rate": 0.00013126503641277897,
      "loss": 0.6571,
      "step": 4662
    },
    {
      "epoch": 0.4169349070100143,
      "grad_norm": 0.15135847861272198,
      "learning_rate": 0.00013123752681289529,
      "loss": 0.6908,
      "step": 4663
    },
    {
      "epoch": 0.41702432045779686,
      "grad_norm": 0.1497614218205096,
      "learning_rate": 0.000131210014593158,
      "loss": 0.645,
      "step": 4664
    },
    {
      "epoch": 0.4171137339055794,
      "grad_norm": 0.13853888535382497,
      "learning_rate": 0.00013118249975587447,
      "loss": 0.656,
      "step": 4665
    },
    {
      "epoch": 0.41720314735336195,
      "grad_norm": 0.15580843353981352,
      "learning_rate": 0.0001311549823033523,
      "loss": 0.6892,
      "step": 4666
    },
    {
      "epoch": 0.4172925608011445,
      "grad_norm": 0.15551047717227165,
      "learning_rate": 0.0001311274622378994,
      "loss": 0.676,
      "step": 4667
    },
    {
      "epoch": 0.41738197424892703,
      "grad_norm": 0.1533267483624362,
      "learning_rate": 0.0001310999395618239,
      "loss": 0.6853,
      "step": 4668
    },
    {
      "epoch": 0.4174713876967096,
      "grad_norm": 0.15650004303874387,
      "learning_rate": 0.00013107241427743398,
      "loss": 0.6903,
      "step": 4669
    },
    {
      "epoch": 0.4175608011444921,
      "grad_norm": 0.17193838412131612,
      "learning_rate": 0.0001310448863870382,
      "loss": 0.7529,
      "step": 4670
    },
    {
      "epoch": 0.4176502145922747,
      "grad_norm": 0.142393046992722,
      "learning_rate": 0.0001310173558929453,
      "loss": 0.6586,
      "step": 4671
    },
    {
      "epoch": 0.4177396280400572,
      "grad_norm": 0.14310224647137637,
      "learning_rate": 0.00013098982279746422,
      "loss": 0.684,
      "step": 4672
    },
    {
      "epoch": 0.4178290414878398,
      "grad_norm": 0.15164905625444527,
      "learning_rate": 0.0001309622871029041,
      "loss": 0.663,
      "step": 4673
    },
    {
      "epoch": 0.4179184549356223,
      "grad_norm": 0.14803220691989774,
      "learning_rate": 0.00013093474881157438,
      "loss": 0.6666,
      "step": 4674
    },
    {
      "epoch": 0.41800786838340487,
      "grad_norm": 0.14801428077747483,
      "learning_rate": 0.00013090720792578465,
      "loss": 0.6885,
      "step": 4675
    },
    {
      "epoch": 0.4180972818311874,
      "grad_norm": 0.13692844986379146,
      "learning_rate": 0.00013087966444784468,
      "loss": 0.6303,
      "step": 4676
    },
    {
      "epoch": 0.41818669527896996,
      "grad_norm": 0.15811876466995767,
      "learning_rate": 0.00013085211838006458,
      "loss": 0.6451,
      "step": 4677
    },
    {
      "epoch": 0.4182761087267525,
      "grad_norm": 0.14416762096771338,
      "learning_rate": 0.00013082456972475458,
      "loss": 0.6786,
      "step": 4678
    },
    {
      "epoch": 0.41836552217453504,
      "grad_norm": 0.14931530074422517,
      "learning_rate": 0.0001307970184842251,
      "loss": 0.6723,
      "step": 4679
    },
    {
      "epoch": 0.4184549356223176,
      "grad_norm": 0.14155798001724954,
      "learning_rate": 0.0001307694646607869,
      "loss": 0.6892,
      "step": 4680
    },
    {
      "epoch": 0.41854434907010013,
      "grad_norm": 0.15539348711806303,
      "learning_rate": 0.00013074190825675087,
      "loss": 0.6424,
      "step": 4681
    },
    {
      "epoch": 0.4186337625178827,
      "grad_norm": 0.16800662455174958,
      "learning_rate": 0.00013071434927442813,
      "loss": 0.7096,
      "step": 4682
    },
    {
      "epoch": 0.4187231759656652,
      "grad_norm": 0.14901810966562667,
      "learning_rate": 0.00013068678771612996,
      "loss": 0.6759,
      "step": 4683
    },
    {
      "epoch": 0.4188125894134478,
      "grad_norm": 0.15922960007200512,
      "learning_rate": 0.00013065922358416798,
      "loss": 0.737,
      "step": 4684
    },
    {
      "epoch": 0.4189020028612303,
      "grad_norm": 0.1522722601461904,
      "learning_rate": 0.00013063165688085397,
      "loss": 0.684,
      "step": 4685
    },
    {
      "epoch": 0.4189914163090129,
      "grad_norm": 0.14104268468511208,
      "learning_rate": 0.00013060408760849987,
      "loss": 0.6859,
      "step": 4686
    },
    {
      "epoch": 0.41908082975679545,
      "grad_norm": 0.1374969552786062,
      "learning_rate": 0.00013057651576941793,
      "loss": 0.6714,
      "step": 4687
    },
    {
      "epoch": 0.41917024320457796,
      "grad_norm": 0.16021498834368156,
      "learning_rate": 0.00013054894136592052,
      "loss": 0.6923,
      "step": 4688
    },
    {
      "epoch": 0.41925965665236054,
      "grad_norm": 0.14585687084465834,
      "learning_rate": 0.00013052136440032028,
      "loss": 0.6313,
      "step": 4689
    },
    {
      "epoch": 0.41934907010014305,
      "grad_norm": 0.1492533773503135,
      "learning_rate": 0.00013049378487493008,
      "loss": 0.7307,
      "step": 4690
    },
    {
      "epoch": 0.4194384835479256,
      "grad_norm": 0.15878966987388546,
      "learning_rate": 0.00013046620279206296,
      "loss": 0.6812,
      "step": 4691
    },
    {
      "epoch": 0.41952789699570814,
      "grad_norm": 0.15220579839057596,
      "learning_rate": 0.00013043861815403225,
      "loss": 0.663,
      "step": 4692
    },
    {
      "epoch": 0.4196173104434907,
      "grad_norm": 0.16068305853438936,
      "learning_rate": 0.0001304110309631513,
      "loss": 0.6954,
      "step": 4693
    },
    {
      "epoch": 0.4197067238912732,
      "grad_norm": 0.15916611712339188,
      "learning_rate": 0.000130383441221734,
      "loss": 0.7104,
      "step": 4694
    },
    {
      "epoch": 0.4197961373390558,
      "grad_norm": 0.20017705126017002,
      "learning_rate": 0.00013035584893209416,
      "loss": 0.3791,
      "step": 4695
    },
    {
      "epoch": 0.4198855507868383,
      "grad_norm": 0.14352960212134228,
      "learning_rate": 0.00013032825409654592,
      "loss": 0.6556,
      "step": 4696
    },
    {
      "epoch": 0.4199749642346209,
      "grad_norm": 0.15409443530792574,
      "learning_rate": 0.00013030065671740363,
      "loss": 0.684,
      "step": 4697
    },
    {
      "epoch": 0.42006437768240346,
      "grad_norm": 0.132785960199592,
      "learning_rate": 0.00013027305679698186,
      "loss": 0.6761,
      "step": 4698
    },
    {
      "epoch": 0.42015379113018597,
      "grad_norm": 0.16978776384065572,
      "learning_rate": 0.00013024545433759538,
      "loss": 0.7371,
      "step": 4699
    },
    {
      "epoch": 0.42024320457796854,
      "grad_norm": 0.16966080267860922,
      "learning_rate": 0.00013021784934155915,
      "loss": 0.712,
      "step": 4700
    },
    {
      "epoch": 0.42033261802575106,
      "grad_norm": 0.14680579096121732,
      "learning_rate": 0.00013019024181118845,
      "loss": 0.6839,
      "step": 4701
    },
    {
      "epoch": 0.42042203147353363,
      "grad_norm": 0.15483598495651457,
      "learning_rate": 0.00013016263174879858,
      "loss": 0.7039,
      "step": 4702
    },
    {
      "epoch": 0.42051144492131615,
      "grad_norm": 0.1711599080791358,
      "learning_rate": 0.00013013501915670522,
      "loss": 0.6854,
      "step": 4703
    },
    {
      "epoch": 0.4206008583690987,
      "grad_norm": 0.1320518977145096,
      "learning_rate": 0.0001301074040372242,
      "loss": 0.6536,
      "step": 4704
    },
    {
      "epoch": 0.42069027181688123,
      "grad_norm": 0.15709508300629915,
      "learning_rate": 0.0001300797863926716,
      "loss": 0.6731,
      "step": 4705
    },
    {
      "epoch": 0.4207796852646638,
      "grad_norm": 0.12061969448969997,
      "learning_rate": 0.00013005216622536355,
      "loss": 0.6215,
      "step": 4706
    },
    {
      "epoch": 0.4208690987124464,
      "grad_norm": 0.15422724212004593,
      "learning_rate": 0.00013002454353761665,
      "loss": 0.7244,
      "step": 4707
    },
    {
      "epoch": 0.4209585121602289,
      "grad_norm": 0.15512743786525,
      "learning_rate": 0.0001299969183317476,
      "loss": 0.6743,
      "step": 4708
    },
    {
      "epoch": 0.42104792560801146,
      "grad_norm": 0.14087029707063398,
      "learning_rate": 0.0001299692906100732,
      "loss": 0.669,
      "step": 4709
    },
    {
      "epoch": 0.421137339055794,
      "grad_norm": 0.1586330603707174,
      "learning_rate": 0.00012994166037491058,
      "loss": 0.6955,
      "step": 4710
    },
    {
      "epoch": 0.42122675250357655,
      "grad_norm": 0.2042729074628395,
      "learning_rate": 0.00012991402762857707,
      "loss": 0.4187,
      "step": 4711
    },
    {
      "epoch": 0.42131616595135907,
      "grad_norm": 0.18373312909124961,
      "learning_rate": 0.00012988639237339022,
      "loss": 0.684,
      "step": 4712
    },
    {
      "epoch": 0.42140557939914164,
      "grad_norm": 0.14806518545238168,
      "learning_rate": 0.0001298587546116677,
      "loss": 0.6642,
      "step": 4713
    },
    {
      "epoch": 0.42149499284692415,
      "grad_norm": 0.1475896784199986,
      "learning_rate": 0.00012983111434572748,
      "loss": 0.623,
      "step": 4714
    },
    {
      "epoch": 0.4215844062947067,
      "grad_norm": 0.19253752391787873,
      "learning_rate": 0.00012980347157788777,
      "loss": 0.3641,
      "step": 4715
    },
    {
      "epoch": 0.4216738197424893,
      "grad_norm": 0.17805924189717895,
      "learning_rate": 0.00012977582631046685,
      "loss": 0.6779,
      "step": 4716
    },
    {
      "epoch": 0.4217632331902718,
      "grad_norm": 0.1595904339095883,
      "learning_rate": 0.0001297481785457834,
      "loss": 0.6753,
      "step": 4717
    },
    {
      "epoch": 0.4218526466380544,
      "grad_norm": 0.13812418542090868,
      "learning_rate": 0.00012972052828615606,
      "loss": 0.6238,
      "step": 4718
    },
    {
      "epoch": 0.4219420600858369,
      "grad_norm": 0.16175791683405355,
      "learning_rate": 0.00012969287553390397,
      "loss": 0.7028,
      "step": 4719
    },
    {
      "epoch": 0.4220314735336195,
      "grad_norm": 0.15919406027288927,
      "learning_rate": 0.00012966522029134623,
      "loss": 0.674,
      "step": 4720
    },
    {
      "epoch": 0.422120886981402,
      "grad_norm": 0.1571130965295656,
      "learning_rate": 0.0001296375625608023,
      "loss": 0.7133,
      "step": 4721
    },
    {
      "epoch": 0.42221030042918456,
      "grad_norm": 0.13450258843529858,
      "learning_rate": 0.0001296099023445918,
      "loss": 0.6645,
      "step": 4722
    },
    {
      "epoch": 0.4222997138769671,
      "grad_norm": 0.1662938325470628,
      "learning_rate": 0.00012958223964503452,
      "loss": 0.7065,
      "step": 4723
    },
    {
      "epoch": 0.42238912732474965,
      "grad_norm": 0.15549852962761784,
      "learning_rate": 0.00012955457446445055,
      "loss": 0.6433,
      "step": 4724
    },
    {
      "epoch": 0.42247854077253216,
      "grad_norm": 0.13015185820898684,
      "learning_rate": 0.00012952690680516016,
      "loss": 0.6636,
      "step": 4725
    },
    {
      "epoch": 0.42256795422031473,
      "grad_norm": 0.15319495850202616,
      "learning_rate": 0.0001294992366694837,
      "loss": 0.6915,
      "step": 4726
    },
    {
      "epoch": 0.4226573676680973,
      "grad_norm": 0.1528961357792002,
      "learning_rate": 0.00012947156405974187,
      "loss": 0.6632,
      "step": 4727
    },
    {
      "epoch": 0.4227467811158798,
      "grad_norm": 0.17586915658542235,
      "learning_rate": 0.0001294438889782556,
      "loss": 0.679,
      "step": 4728
    },
    {
      "epoch": 0.4228361945636624,
      "grad_norm": 0.13839456278629397,
      "learning_rate": 0.00012941621142734594,
      "loss": 0.6441,
      "step": 4729
    },
    {
      "epoch": 0.4229256080114449,
      "grad_norm": 0.13890814838579407,
      "learning_rate": 0.00012938853140933407,
      "loss": 0.6598,
      "step": 4730
    },
    {
      "epoch": 0.4230150214592275,
      "grad_norm": 0.14278515056224586,
      "learning_rate": 0.0001293608489265416,
      "loss": 0.6698,
      "step": 4731
    },
    {
      "epoch": 0.42310443490701,
      "grad_norm": 0.12402182763101786,
      "learning_rate": 0.00012933316398129022,
      "loss": 0.6262,
      "step": 4732
    },
    {
      "epoch": 0.42319384835479257,
      "grad_norm": 0.17078796615234362,
      "learning_rate": 0.00012930547657590179,
      "loss": 0.7012,
      "step": 4733
    },
    {
      "epoch": 0.4232832618025751,
      "grad_norm": 0.14538212645507842,
      "learning_rate": 0.00012927778671269842,
      "loss": 0.6557,
      "step": 4734
    },
    {
      "epoch": 0.42337267525035766,
      "grad_norm": 0.23299422550851007,
      "learning_rate": 0.00012925009439400243,
      "loss": 0.394,
      "step": 4735
    },
    {
      "epoch": 0.4234620886981402,
      "grad_norm": 0.1672900262285799,
      "learning_rate": 0.00012922239962213637,
      "loss": 0.7103,
      "step": 4736
    },
    {
      "epoch": 0.42355150214592274,
      "grad_norm": 0.14096400360928535,
      "learning_rate": 0.00012919470239942292,
      "loss": 0.6481,
      "step": 4737
    },
    {
      "epoch": 0.4236409155937053,
      "grad_norm": 0.14847430957260102,
      "learning_rate": 0.00012916700272818505,
      "loss": 0.6508,
      "step": 4738
    },
    {
      "epoch": 0.42373032904148783,
      "grad_norm": 0.12587367490187715,
      "learning_rate": 0.00012913930061074592,
      "loss": 0.6396,
      "step": 4739
    },
    {
      "epoch": 0.4238197424892704,
      "grad_norm": 0.14406962030339876,
      "learning_rate": 0.00012911159604942879,
      "loss": 0.6629,
      "step": 4740
    },
    {
      "epoch": 0.4239091559370529,
      "grad_norm": 0.17168025855426103,
      "learning_rate": 0.0001290838890465573,
      "loss": 0.7481,
      "step": 4741
    },
    {
      "epoch": 0.4239985693848355,
      "grad_norm": 0.15282383236558442,
      "learning_rate": 0.00012905617960445512,
      "loss": 0.7048,
      "step": 4742
    },
    {
      "epoch": 0.424087982832618,
      "grad_norm": 0.15593576096897596,
      "learning_rate": 0.00012902846772544624,
      "loss": 0.686,
      "step": 4743
    },
    {
      "epoch": 0.4241773962804006,
      "grad_norm": 0.1325107435252399,
      "learning_rate": 0.00012900075341185487,
      "loss": 0.6692,
      "step": 4744
    },
    {
      "epoch": 0.4242668097281831,
      "grad_norm": 0.15580832401882394,
      "learning_rate": 0.0001289730366660053,
      "loss": 0.6812,
      "step": 4745
    },
    {
      "epoch": 0.42435622317596566,
      "grad_norm": 0.1320918320233276,
      "learning_rate": 0.00012894531749022217,
      "loss": 0.6559,
      "step": 4746
    },
    {
      "epoch": 0.42444563662374823,
      "grad_norm": 0.127003059499043,
      "learning_rate": 0.00012891759588683018,
      "loss": 0.626,
      "step": 4747
    },
    {
      "epoch": 0.42453505007153075,
      "grad_norm": 0.13070929060560973,
      "learning_rate": 0.0001288898718581544,
      "loss": 0.6413,
      "step": 4748
    },
    {
      "epoch": 0.4246244635193133,
      "grad_norm": 0.1435820658681544,
      "learning_rate": 0.0001288621454065199,
      "loss": 0.6594,
      "step": 4749
    },
    {
      "epoch": 0.42471387696709584,
      "grad_norm": 0.1683795170326741,
      "learning_rate": 0.00012883441653425214,
      "loss": 0.6864,
      "step": 4750
    },
    {
      "epoch": 0.4248032904148784,
      "grad_norm": 0.15673683572309122,
      "learning_rate": 0.00012880668524367672,
      "loss": 0.6606,
      "step": 4751
    },
    {
      "epoch": 0.4248927038626609,
      "grad_norm": 0.16914991494990378,
      "learning_rate": 0.00012877895153711935,
      "loss": 0.7199,
      "step": 4752
    },
    {
      "epoch": 0.4249821173104435,
      "grad_norm": 0.147242598821626,
      "learning_rate": 0.0001287512154169061,
      "loss": 0.6835,
      "step": 4753
    },
    {
      "epoch": 0.425071530758226,
      "grad_norm": 0.16760006751573436,
      "learning_rate": 0.00012872347688536312,
      "loss": 0.7129,
      "step": 4754
    },
    {
      "epoch": 0.4251609442060086,
      "grad_norm": 0.1980464003545554,
      "learning_rate": 0.00012869573594481685,
      "loss": 0.4084,
      "step": 4755
    },
    {
      "epoch": 0.42525035765379116,
      "grad_norm": 0.14372228841466989,
      "learning_rate": 0.00012866799259759386,
      "loss": 0.6138,
      "step": 4756
    },
    {
      "epoch": 0.42533977110157367,
      "grad_norm": 0.14273132086052787,
      "learning_rate": 0.0001286402468460209,
      "loss": 0.6521,
      "step": 4757
    },
    {
      "epoch": 0.42542918454935624,
      "grad_norm": 0.148595046522236,
      "learning_rate": 0.0001286124986924251,
      "loss": 0.6587,
      "step": 4758
    },
    {
      "epoch": 0.42551859799713876,
      "grad_norm": 0.15540672180898998,
      "learning_rate": 0.00012858474813913352,
      "loss": 0.7029,
      "step": 4759
    },
    {
      "epoch": 0.42560801144492133,
      "grad_norm": 0.16662059974003543,
      "learning_rate": 0.00012855699518847367,
      "loss": 0.7096,
      "step": 4760
    },
    {
      "epoch": 0.42569742489270385,
      "grad_norm": 0.1572954583086285,
      "learning_rate": 0.00012852923984277314,
      "loss": 0.6482,
      "step": 4761
    },
    {
      "epoch": 0.4257868383404864,
      "grad_norm": 0.15847498533611196,
      "learning_rate": 0.0001285014821043597,
      "loss": 0.7109,
      "step": 4762
    },
    {
      "epoch": 0.42587625178826893,
      "grad_norm": 0.16352886499348757,
      "learning_rate": 0.00012847372197556138,
      "loss": 0.6722,
      "step": 4763
    },
    {
      "epoch": 0.4259656652360515,
      "grad_norm": 0.16056768896660908,
      "learning_rate": 0.00012844595945870637,
      "loss": 0.6205,
      "step": 4764
    },
    {
      "epoch": 0.426055078683834,
      "grad_norm": 0.1626561976966984,
      "learning_rate": 0.00012841819455612313,
      "loss": 0.6824,
      "step": 4765
    },
    {
      "epoch": 0.4261444921316166,
      "grad_norm": 0.13936313161399969,
      "learning_rate": 0.0001283904272701402,
      "loss": 0.6749,
      "step": 4766
    },
    {
      "epoch": 0.42623390557939916,
      "grad_norm": 0.14631058750026385,
      "learning_rate": 0.0001283626576030864,
      "loss": 0.6599,
      "step": 4767
    },
    {
      "epoch": 0.4263233190271817,
      "grad_norm": 0.16106614295244726,
      "learning_rate": 0.0001283348855572908,
      "loss": 0.6748,
      "step": 4768
    },
    {
      "epoch": 0.42641273247496425,
      "grad_norm": 0.1356692166965415,
      "learning_rate": 0.00012830711113508256,
      "loss": 0.6482,
      "step": 4769
    },
    {
      "epoch": 0.42650214592274677,
      "grad_norm": 0.16133907184231067,
      "learning_rate": 0.0001282793343387911,
      "loss": 0.6445,
      "step": 4770
    },
    {
      "epoch": 0.42659155937052934,
      "grad_norm": 0.18316617611152425,
      "learning_rate": 0.000128251555170746,
      "loss": 0.3953,
      "step": 4771
    },
    {
      "epoch": 0.42668097281831185,
      "grad_norm": 0.16603418307230655,
      "learning_rate": 0.00012822377363327713,
      "loss": 0.6997,
      "step": 4772
    },
    {
      "epoch": 0.4267703862660944,
      "grad_norm": 0.14544525256353996,
      "learning_rate": 0.00012819598972871443,
      "loss": 0.6501,
      "step": 4773
    },
    {
      "epoch": 0.42685979971387694,
      "grad_norm": 0.16249896820292056,
      "learning_rate": 0.0001281682034593881,
      "loss": 0.6923,
      "step": 4774
    },
    {
      "epoch": 0.4269492131616595,
      "grad_norm": 0.14294149387537142,
      "learning_rate": 0.0001281404148276286,
      "loss": 0.6611,
      "step": 4775
    },
    {
      "epoch": 0.4270386266094421,
      "grad_norm": 0.14865938748689186,
      "learning_rate": 0.00012811262383576646,
      "loss": 0.6606,
      "step": 4776
    },
    {
      "epoch": 0.4271280400572246,
      "grad_norm": 0.16408887277804868,
      "learning_rate": 0.00012808483048613252,
      "loss": 0.6505,
      "step": 4777
    },
    {
      "epoch": 0.42721745350500717,
      "grad_norm": 0.1807467399963989,
      "learning_rate": 0.00012805703478105778,
      "loss": 0.7284,
      "step": 4778
    },
    {
      "epoch": 0.4273068669527897,
      "grad_norm": 0.14599979389045703,
      "learning_rate": 0.00012802923672287342,
      "loss": 0.6673,
      "step": 4779
    },
    {
      "epoch": 0.42739628040057226,
      "grad_norm": 0.14767805924484897,
      "learning_rate": 0.00012800143631391082,
      "loss": 0.6937,
      "step": 4780
    },
    {
      "epoch": 0.4274856938483548,
      "grad_norm": 0.15151504147995648,
      "learning_rate": 0.00012797363355650154,
      "loss": 0.6322,
      "step": 4781
    },
    {
      "epoch": 0.42757510729613735,
      "grad_norm": 0.15260106231374854,
      "learning_rate": 0.00012794582845297744,
      "loss": 0.6883,
      "step": 4782
    },
    {
      "epoch": 0.42766452074391986,
      "grad_norm": 0.1541064230019208,
      "learning_rate": 0.00012791802100567043,
      "loss": 0.6711,
      "step": 4783
    },
    {
      "epoch": 0.42775393419170243,
      "grad_norm": 0.14997125257594734,
      "learning_rate": 0.00012789021121691274,
      "loss": 0.6794,
      "step": 4784
    },
    {
      "epoch": 0.427843347639485,
      "grad_norm": 0.15025021935923238,
      "learning_rate": 0.0001278623990890367,
      "loss": 0.6894,
      "step": 4785
    },
    {
      "epoch": 0.4279327610872675,
      "grad_norm": 0.14073085980599587,
      "learning_rate": 0.0001278345846243749,
      "loss": 0.6802,
      "step": 4786
    },
    {
      "epoch": 0.4280221745350501,
      "grad_norm": 0.13479134938432424,
      "learning_rate": 0.00012780676782526014,
      "loss": 0.6522,
      "step": 4787
    },
    {
      "epoch": 0.4281115879828326,
      "grad_norm": 0.14005064337377507,
      "learning_rate": 0.0001277789486940253,
      "loss": 0.6466,
      "step": 4788
    },
    {
      "epoch": 0.4282010014306152,
      "grad_norm": 0.13280267705343254,
      "learning_rate": 0.0001277511272330036,
      "loss": 0.6732,
      "step": 4789
    },
    {
      "epoch": 0.4282904148783977,
      "grad_norm": 0.19002063617178677,
      "learning_rate": 0.00012772330344452834,
      "loss": 0.4051,
      "step": 4790
    },
    {
      "epoch": 0.42837982832618027,
      "grad_norm": 0.15625407557828594,
      "learning_rate": 0.00012769547733093312,
      "loss": 0.6806,
      "step": 4791
    },
    {
      "epoch": 0.4284692417739628,
      "grad_norm": 0.14244995657599419,
      "learning_rate": 0.0001276676488945517,
      "loss": 0.6346,
      "step": 4792
    },
    {
      "epoch": 0.42855865522174535,
      "grad_norm": 0.140619290410716,
      "learning_rate": 0.00012763981813771795,
      "loss": 0.6497,
      "step": 4793
    },
    {
      "epoch": 0.42864806866952787,
      "grad_norm": 0.13789275516702365,
      "learning_rate": 0.00012761198506276603,
      "loss": 0.6211,
      "step": 4794
    },
    {
      "epoch": 0.42873748211731044,
      "grad_norm": 0.19056558432196413,
      "learning_rate": 0.00012758414967203028,
      "loss": 0.7239,
      "step": 4795
    },
    {
      "epoch": 0.428826895565093,
      "grad_norm": 0.14743437249856614,
      "learning_rate": 0.00012755631196784522,
      "loss": 0.6301,
      "step": 4796
    },
    {
      "epoch": 0.42891630901287553,
      "grad_norm": 0.1482272478822071,
      "learning_rate": 0.00012752847195254553,
      "loss": 0.6567,
      "step": 4797
    },
    {
      "epoch": 0.4290057224606581,
      "grad_norm": 0.15088190153986597,
      "learning_rate": 0.00012750062962846613,
      "loss": 0.6457,
      "step": 4798
    },
    {
      "epoch": 0.4290951359084406,
      "grad_norm": 0.16001482664707384,
      "learning_rate": 0.0001274727849979422,
      "loss": 0.6917,
      "step": 4799
    },
    {
      "epoch": 0.4291845493562232,
      "grad_norm": 0.13190756298467338,
      "learning_rate": 0.0001274449380633089,
      "loss": 0.6468,
      "step": 4800
    },
    {
      "epoch": 0.4292739628040057,
      "grad_norm": 0.1471831260124128,
      "learning_rate": 0.0001274170888269018,
      "loss": 0.6863,
      "step": 4801
    },
    {
      "epoch": 0.4293633762517883,
      "grad_norm": 0.152958126207275,
      "learning_rate": 0.00012738923729105662,
      "loss": 0.6878,
      "step": 4802
    },
    {
      "epoch": 0.4294527896995708,
      "grad_norm": 0.1451372659855809,
      "learning_rate": 0.00012736138345810917,
      "loss": 0.6957,
      "step": 4803
    },
    {
      "epoch": 0.42954220314735336,
      "grad_norm": 0.13037454815093297,
      "learning_rate": 0.0001273335273303955,
      "loss": 0.6512,
      "step": 4804
    },
    {
      "epoch": 0.42963161659513593,
      "grad_norm": 0.13917647145258658,
      "learning_rate": 0.00012730566891025195,
      "loss": 0.663,
      "step": 4805
    },
    {
      "epoch": 0.42972103004291845,
      "grad_norm": 0.15611741925728978,
      "learning_rate": 0.0001272778082000149,
      "loss": 0.6864,
      "step": 4806
    },
    {
      "epoch": 0.429810443490701,
      "grad_norm": 0.13326302517832442,
      "learning_rate": 0.000127249945202021,
      "loss": 0.6505,
      "step": 4807
    },
    {
      "epoch": 0.42989985693848354,
      "grad_norm": 0.16770047878550182,
      "learning_rate": 0.00012722207991860713,
      "loss": 0.6888,
      "step": 4808
    },
    {
      "epoch": 0.4299892703862661,
      "grad_norm": 0.1438970889652241,
      "learning_rate": 0.0001271942123521103,
      "loss": 0.6821,
      "step": 4809
    },
    {
      "epoch": 0.4300786838340486,
      "grad_norm": 0.14899157550036943,
      "learning_rate": 0.0001271663425048677,
      "loss": 0.6768,
      "step": 4810
    },
    {
      "epoch": 0.4301680972818312,
      "grad_norm": 0.19320319887199172,
      "learning_rate": 0.00012713847037921678,
      "loss": 0.3689,
      "step": 4811
    },
    {
      "epoch": 0.4302575107296137,
      "grad_norm": 0.18485166415606777,
      "learning_rate": 0.00012711059597749513,
      "loss": 0.7286,
      "step": 4812
    },
    {
      "epoch": 0.4303469241773963,
      "grad_norm": 0.16470093499954785,
      "learning_rate": 0.00012708271930204052,
      "loss": 0.6818,
      "step": 4813
    },
    {
      "epoch": 0.4304363376251788,
      "grad_norm": 0.16093385981211258,
      "learning_rate": 0.00012705484035519096,
      "loss": 0.6986,
      "step": 4814
    },
    {
      "epoch": 0.43052575107296137,
      "grad_norm": 0.14535593322232726,
      "learning_rate": 0.0001270269591392846,
      "loss": 0.6793,
      "step": 4815
    },
    {
      "epoch": 0.43061516452074394,
      "grad_norm": 0.15542092590606438,
      "learning_rate": 0.00012699907565665982,
      "loss": 0.6635,
      "step": 4816
    },
    {
      "epoch": 0.43070457796852646,
      "grad_norm": 0.11243167964668253,
      "learning_rate": 0.0001269711899096552,
      "loss": 0.6149,
      "step": 4817
    },
    {
      "epoch": 0.43079399141630903,
      "grad_norm": 0.14176325436550583,
      "learning_rate": 0.0001269433019006094,
      "loss": 0.6794,
      "step": 4818
    },
    {
      "epoch": 0.43088340486409155,
      "grad_norm": 0.12991546211509658,
      "learning_rate": 0.00012691541163186148,
      "loss": 0.6332,
      "step": 4819
    },
    {
      "epoch": 0.4309728183118741,
      "grad_norm": 0.12951532053871548,
      "learning_rate": 0.00012688751910575044,
      "loss": 0.6518,
      "step": 4820
    },
    {
      "epoch": 0.43106223175965663,
      "grad_norm": 0.14089549515200672,
      "learning_rate": 0.00012685962432461563,
      "loss": 0.68,
      "step": 4821
    },
    {
      "epoch": 0.4311516452074392,
      "grad_norm": 0.14910742205431368,
      "learning_rate": 0.00012683172729079662,
      "loss": 0.6541,
      "step": 4822
    },
    {
      "epoch": 0.4312410586552217,
      "grad_norm": 0.16168866502017942,
      "learning_rate": 0.00012680382800663302,
      "loss": 0.7059,
      "step": 4823
    },
    {
      "epoch": 0.4313304721030043,
      "grad_norm": 0.15847319072803476,
      "learning_rate": 0.00012677592647446472,
      "loss": 0.6996,
      "step": 4824
    },
    {
      "epoch": 0.43141988555078686,
      "grad_norm": 0.14728624092671386,
      "learning_rate": 0.0001267480226966318,
      "loss": 0.6377,
      "step": 4825
    },
    {
      "epoch": 0.4315092989985694,
      "grad_norm": 0.17469159210479152,
      "learning_rate": 0.00012672011667547457,
      "loss": 0.683,
      "step": 4826
    },
    {
      "epoch": 0.43159871244635195,
      "grad_norm": 0.1635578713892383,
      "learning_rate": 0.0001266922084133334,
      "loss": 0.687,
      "step": 4827
    },
    {
      "epoch": 0.43168812589413447,
      "grad_norm": 0.17198796356743684,
      "learning_rate": 0.00012666429791254892,
      "loss": 0.6949,
      "step": 4828
    },
    {
      "epoch": 0.43177753934191704,
      "grad_norm": 0.14048171026938222,
      "learning_rate": 0.000126636385175462,
      "loss": 0.6443,
      "step": 4829
    },
    {
      "epoch": 0.43186695278969955,
      "grad_norm": 0.1472660495612772,
      "learning_rate": 0.00012660847020441363,
      "loss": 0.6458,
      "step": 4830
    },
    {
      "epoch": 0.4319563662374821,
      "grad_norm": 0.15424242687461812,
      "learning_rate": 0.00012658055300174498,
      "loss": 0.67,
      "step": 4831
    },
    {
      "epoch": 0.43204577968526464,
      "grad_norm": 0.16300962604511576,
      "learning_rate": 0.00012655263356979747,
      "loss": 0.7028,
      "step": 4832
    },
    {
      "epoch": 0.4321351931330472,
      "grad_norm": 0.16551344136910262,
      "learning_rate": 0.00012652471191091266,
      "loss": 0.7003,
      "step": 4833
    },
    {
      "epoch": 0.4322246065808298,
      "grad_norm": 0.14214652575294173,
      "learning_rate": 0.00012649678802743227,
      "loss": 0.6603,
      "step": 4834
    },
    {
      "epoch": 0.4323140200286123,
      "grad_norm": 0.1362415769500522,
      "learning_rate": 0.00012646886192169826,
      "loss": 0.6813,
      "step": 4835
    },
    {
      "epoch": 0.43240343347639487,
      "grad_norm": 0.15477223989115238,
      "learning_rate": 0.00012644093359605278,
      "loss": 0.6896,
      "step": 4836
    },
    {
      "epoch": 0.4324928469241774,
      "grad_norm": 0.13973884495499447,
      "learning_rate": 0.00012641300305283814,
      "loss": 0.6477,
      "step": 4837
    },
    {
      "epoch": 0.43258226037195996,
      "grad_norm": 0.14945752052750236,
      "learning_rate": 0.00012638507029439684,
      "loss": 0.6828,
      "step": 4838
    },
    {
      "epoch": 0.4326716738197425,
      "grad_norm": 0.14833137163013355,
      "learning_rate": 0.00012635713532307152,
      "loss": 0.6152,
      "step": 4839
    },
    {
      "epoch": 0.43276108726752505,
      "grad_norm": 0.17162623297826948,
      "learning_rate": 0.00012632919814120513,
      "loss": 0.6971,
      "step": 4840
    },
    {
      "epoch": 0.43285050071530756,
      "grad_norm": 0.15228903166952187,
      "learning_rate": 0.00012630125875114068,
      "loss": 0.6849,
      "step": 4841
    },
    {
      "epoch": 0.43293991416309013,
      "grad_norm": 0.16321293434329998,
      "learning_rate": 0.00012627331715522143,
      "loss": 0.6786,
      "step": 4842
    },
    {
      "epoch": 0.43302932761087265,
      "grad_norm": 0.12928974935665738,
      "learning_rate": 0.0001262453733557908,
      "loss": 0.6568,
      "step": 4843
    },
    {
      "epoch": 0.4331187410586552,
      "grad_norm": 0.15422795668043476,
      "learning_rate": 0.00012621742735519239,
      "loss": 0.6757,
      "step": 4844
    },
    {
      "epoch": 0.4332081545064378,
      "grad_norm": 0.1622599075008989,
      "learning_rate": 0.00012618947915577,
      "loss": 0.6864,
      "step": 4845
    },
    {
      "epoch": 0.4332975679542203,
      "grad_norm": 0.16362308836258552,
      "learning_rate": 0.0001261615287598676,
      "loss": 0.6825,
      "step": 4846
    },
    {
      "epoch": 0.4333869814020029,
      "grad_norm": 0.16948890195176788,
      "learning_rate": 0.00012613357616982946,
      "loss": 0.6881,
      "step": 4847
    },
    {
      "epoch": 0.4334763948497854,
      "grad_norm": 0.17452328079989105,
      "learning_rate": 0.00012610562138799978,
      "loss": 0.7125,
      "step": 4848
    },
    {
      "epoch": 0.43356580829756797,
      "grad_norm": 0.1298734394797595,
      "learning_rate": 0.00012607766441672318,
      "loss": 0.6625,
      "step": 4849
    },
    {
      "epoch": 0.4336552217453505,
      "grad_norm": 0.15217782276508102,
      "learning_rate": 0.00012604970525834436,
      "loss": 0.6912,
      "step": 4850
    },
    {
      "epoch": 0.43374463519313305,
      "grad_norm": 0.14840143134922104,
      "learning_rate": 0.00012602174391520822,
      "loss": 0.7029,
      "step": 4851
    },
    {
      "epoch": 0.43383404864091557,
      "grad_norm": 0.13502447637821577,
      "learning_rate": 0.00012599378038965985,
      "loss": 0.6656,
      "step": 4852
    },
    {
      "epoch": 0.43392346208869814,
      "grad_norm": 0.1585331231892872,
      "learning_rate": 0.00012596581468404453,
      "loss": 0.7091,
      "step": 4853
    },
    {
      "epoch": 0.4340128755364807,
      "grad_norm": 0.19288195267997632,
      "learning_rate": 0.00012593784680070766,
      "loss": 0.3745,
      "step": 4854
    },
    {
      "epoch": 0.43410228898426323,
      "grad_norm": 0.16738001922206852,
      "learning_rate": 0.0001259098767419949,
      "loss": 0.6976,
      "step": 4855
    },
    {
      "epoch": 0.4341917024320458,
      "grad_norm": 0.151310812886223,
      "learning_rate": 0.00012588190451025207,
      "loss": 0.689,
      "step": 4856
    },
    {
      "epoch": 0.4342811158798283,
      "grad_norm": 0.14874414402992442,
      "learning_rate": 0.0001258539301078252,
      "loss": 0.6566,
      "step": 4857
    },
    {
      "epoch": 0.4343705293276109,
      "grad_norm": 0.15790813493260808,
      "learning_rate": 0.0001258259535370604,
      "loss": 0.6913,
      "step": 4858
    },
    {
      "epoch": 0.4344599427753934,
      "grad_norm": 0.13365772931929848,
      "learning_rate": 0.00012579797480030406,
      "loss": 0.6356,
      "step": 4859
    },
    {
      "epoch": 0.434549356223176,
      "grad_norm": 0.17589110401953847,
      "learning_rate": 0.00012576999389990278,
      "loss": 0.703,
      "step": 4860
    },
    {
      "epoch": 0.4346387696709585,
      "grad_norm": 0.1520196741480728,
      "learning_rate": 0.0001257420108382032,
      "loss": 0.6976,
      "step": 4861
    },
    {
      "epoch": 0.43472818311874106,
      "grad_norm": 0.13732828255235324,
      "learning_rate": 0.00012571402561755227,
      "loss": 0.6281,
      "step": 4862
    },
    {
      "epoch": 0.4348175965665236,
      "grad_norm": 0.14729493613523462,
      "learning_rate": 0.00012568603824029707,
      "loss": 0.672,
      "step": 4863
    },
    {
      "epoch": 0.43490701001430615,
      "grad_norm": 0.13155964640659853,
      "learning_rate": 0.00012565804870878484,
      "loss": 0.6827,
      "step": 4864
    },
    {
      "epoch": 0.4349964234620887,
      "grad_norm": 0.16699904693134304,
      "learning_rate": 0.00012563005702536306,
      "loss": 0.7127,
      "step": 4865
    },
    {
      "epoch": 0.43508583690987124,
      "grad_norm": 0.14402316835373205,
      "learning_rate": 0.00012560206319237936,
      "loss": 0.6901,
      "step": 4866
    },
    {
      "epoch": 0.4351752503576538,
      "grad_norm": 0.15978585639342413,
      "learning_rate": 0.00012557406721218155,
      "loss": 0.7184,
      "step": 4867
    },
    {
      "epoch": 0.4352646638054363,
      "grad_norm": 0.15138111334735518,
      "learning_rate": 0.00012554606908711757,
      "loss": 0.7344,
      "step": 4868
    },
    {
      "epoch": 0.4353540772532189,
      "grad_norm": 0.14766748689819947,
      "learning_rate": 0.00012551806881953566,
      "loss": 0.6755,
      "step": 4869
    },
    {
      "epoch": 0.4354434907010014,
      "grad_norm": 0.1361457958201,
      "learning_rate": 0.0001254900664117841,
      "loss": 0.673,
      "step": 4870
    },
    {
      "epoch": 0.435532904148784,
      "grad_norm": 0.1636255204727663,
      "learning_rate": 0.0001254620618662115,
      "loss": 0.7303,
      "step": 4871
    },
    {
      "epoch": 0.4356223175965665,
      "grad_norm": 0.13733413423903013,
      "learning_rate": 0.0001254340551851665,
      "loss": 0.6764,
      "step": 4872
    },
    {
      "epoch": 0.43571173104434907,
      "grad_norm": 0.15849913907330426,
      "learning_rate": 0.000125406046370998,
      "loss": 0.6747,
      "step": 4873
    },
    {
      "epoch": 0.43580114449213164,
      "grad_norm": 0.15573608498524055,
      "learning_rate": 0.00012537803542605512,
      "loss": 0.6899,
      "step": 4874
    },
    {
      "epoch": 0.43589055793991416,
      "grad_norm": 0.14657250413922923,
      "learning_rate": 0.00012535002235268701,
      "loss": 0.7087,
      "step": 4875
    },
    {
      "epoch": 0.43597997138769673,
      "grad_norm": 0.16916993597490995,
      "learning_rate": 0.00012532200715324317,
      "loss": 0.6834,
      "step": 4876
    },
    {
      "epoch": 0.43606938483547925,
      "grad_norm": 0.16167795250992822,
      "learning_rate": 0.0001252939898300731,
      "loss": 0.6261,
      "step": 4877
    },
    {
      "epoch": 0.4361587982832618,
      "grad_norm": 0.1521942166333568,
      "learning_rate": 0.0001252659703855267,
      "loss": 0.6801,
      "step": 4878
    },
    {
      "epoch": 0.43624821173104433,
      "grad_norm": 0.16060440190986108,
      "learning_rate": 0.00012523794882195391,
      "loss": 0.7024,
      "step": 4879
    },
    {
      "epoch": 0.4363376251788269,
      "grad_norm": 0.16148810939814273,
      "learning_rate": 0.0001252099251417048,
      "loss": 0.6723,
      "step": 4880
    },
    {
      "epoch": 0.4364270386266094,
      "grad_norm": 0.1374268911110505,
      "learning_rate": 0.00012518189934712973,
      "loss": 0.646,
      "step": 4881
    },
    {
      "epoch": 0.436516452074392,
      "grad_norm": 0.1362388475745117,
      "learning_rate": 0.00012515387144057915,
      "loss": 0.6659,
      "step": 4882
    },
    {
      "epoch": 0.4366058655221745,
      "grad_norm": 0.1541412899237504,
      "learning_rate": 0.00012512584142440378,
      "loss": 0.6622,
      "step": 4883
    },
    {
      "epoch": 0.4366952789699571,
      "grad_norm": 0.1454546605383998,
      "learning_rate": 0.00012509780930095442,
      "loss": 0.6691,
      "step": 4884
    },
    {
      "epoch": 0.43678469241773965,
      "grad_norm": 0.14814979159079605,
      "learning_rate": 0.00012506977507258208,
      "loss": 0.6531,
      "step": 4885
    },
    {
      "epoch": 0.43687410586552217,
      "grad_norm": 0.1408384688982319,
      "learning_rate": 0.000125041738741638,
      "loss": 0.6642,
      "step": 4886
    },
    {
      "epoch": 0.43696351931330474,
      "grad_norm": 0.141282077385524,
      "learning_rate": 0.00012501370031047356,
      "loss": 0.7038,
      "step": 4887
    },
    {
      "epoch": 0.43705293276108725,
      "grad_norm": 0.16043338188726491,
      "learning_rate": 0.00012498565978144027,
      "loss": 0.659,
      "step": 4888
    },
    {
      "epoch": 0.4371423462088698,
      "grad_norm": 0.12962595965602186,
      "learning_rate": 0.00012495761715688983,
      "loss": 0.6347,
      "step": 4889
    },
    {
      "epoch": 0.43723175965665234,
      "grad_norm": 0.160091397008883,
      "learning_rate": 0.00012492957243917424,
      "loss": 0.6658,
      "step": 4890
    },
    {
      "epoch": 0.4373211731044349,
      "grad_norm": 0.1447341287358262,
      "learning_rate": 0.00012490152563064544,
      "loss": 0.6793,
      "step": 4891
    },
    {
      "epoch": 0.43741058655221743,
      "grad_norm": 0.16708261386981066,
      "learning_rate": 0.00012487347673365582,
      "loss": 0.6749,
      "step": 4892
    },
    {
      "epoch": 0.4375,
      "grad_norm": 0.16287476158349662,
      "learning_rate": 0.00012484542575055775,
      "loss": 0.6778,
      "step": 4893
    },
    {
      "epoch": 0.43758941344778257,
      "grad_norm": 0.14959656123356843,
      "learning_rate": 0.0001248173726837038,
      "loss": 0.6574,
      "step": 4894
    },
    {
      "epoch": 0.4376788268955651,
      "grad_norm": 0.15640417604145912,
      "learning_rate": 0.00012478931753544676,
      "loss": 0.6721,
      "step": 4895
    },
    {
      "epoch": 0.43776824034334766,
      "grad_norm": 0.15791533504854732,
      "learning_rate": 0.00012476126030813963,
      "loss": 0.6694,
      "step": 4896
    },
    {
      "epoch": 0.4378576537911302,
      "grad_norm": 0.16598870882203962,
      "learning_rate": 0.0001247332010041355,
      "loss": 0.6776,
      "step": 4897
    },
    {
      "epoch": 0.43794706723891275,
      "grad_norm": 0.1619563187943942,
      "learning_rate": 0.00012470513962578768,
      "loss": 0.6985,
      "step": 4898
    },
    {
      "epoch": 0.43803648068669526,
      "grad_norm": 0.146393116228779,
      "learning_rate": 0.0001246770761754496,
      "loss": 0.6537,
      "step": 4899
    },
    {
      "epoch": 0.43812589413447783,
      "grad_norm": 0.15466362801302613,
      "learning_rate": 0.000124649010655475,
      "loss": 0.6772,
      "step": 4900
    },
    {
      "epoch": 0.43821530758226035,
      "grad_norm": 0.15729884063704783,
      "learning_rate": 0.00012462094306821758,
      "loss": 0.6501,
      "step": 4901
    },
    {
      "epoch": 0.4383047210300429,
      "grad_norm": 0.16372197655382106,
      "learning_rate": 0.0001245928734160314,
      "loss": 0.7324,
      "step": 4902
    },
    {
      "epoch": 0.4383941344778255,
      "grad_norm": 0.14108754555888556,
      "learning_rate": 0.00012456480170127069,
      "loss": 0.6815,
      "step": 4903
    },
    {
      "epoch": 0.438483547925608,
      "grad_norm": 0.16504525382978757,
      "learning_rate": 0.00012453672792628968,
      "loss": 0.6794,
      "step": 4904
    },
    {
      "epoch": 0.4385729613733906,
      "grad_norm": 0.16678414149917806,
      "learning_rate": 0.00012450865209344294,
      "loss": 0.6509,
      "step": 4905
    },
    {
      "epoch": 0.4386623748211731,
      "grad_norm": 0.15881909830318156,
      "learning_rate": 0.00012448057420508517,
      "loss": 0.697,
      "step": 4906
    },
    {
      "epoch": 0.43875178826895567,
      "grad_norm": 0.14201302462090878,
      "learning_rate": 0.0001244524942635712,
      "loss": 0.667,
      "step": 4907
    },
    {
      "epoch": 0.4388412017167382,
      "grad_norm": 0.15461082187471728,
      "learning_rate": 0.00012442441227125602,
      "loss": 0.6575,
      "step": 4908
    },
    {
      "epoch": 0.43893061516452075,
      "grad_norm": 0.171946069103897,
      "learning_rate": 0.00012439632823049493,
      "loss": 0.7096,
      "step": 4909
    },
    {
      "epoch": 0.43902002861230327,
      "grad_norm": 0.1301805342564213,
      "learning_rate": 0.00012436824214364324,
      "loss": 0.6532,
      "step": 4910
    },
    {
      "epoch": 0.43910944206008584,
      "grad_norm": 0.1695309222415832,
      "learning_rate": 0.00012434015401305653,
      "loss": 0.71,
      "step": 4911
    },
    {
      "epoch": 0.43919885550786836,
      "grad_norm": 0.16383943232472264,
      "learning_rate": 0.00012431206384109044,
      "loss": 0.6725,
      "step": 4912
    },
    {
      "epoch": 0.43928826895565093,
      "grad_norm": 0.18907044431609507,
      "learning_rate": 0.00012428397163010096,
      "loss": 0.7189,
      "step": 4913
    },
    {
      "epoch": 0.4393776824034335,
      "grad_norm": 0.1665130321408054,
      "learning_rate": 0.00012425587738244413,
      "loss": 0.674,
      "step": 4914
    },
    {
      "epoch": 0.439467095851216,
      "grad_norm": 0.1562971223285488,
      "learning_rate": 0.00012422778110047613,
      "loss": 0.6795,
      "step": 4915
    },
    {
      "epoch": 0.4395565092989986,
      "grad_norm": 0.16071304926122068,
      "learning_rate": 0.0001241996827865534,
      "loss": 0.6824,
      "step": 4916
    },
    {
      "epoch": 0.4396459227467811,
      "grad_norm": 0.14264701208923367,
      "learning_rate": 0.00012417158244303249,
      "loss": 0.6691,
      "step": 4917
    },
    {
      "epoch": 0.4397353361945637,
      "grad_norm": 0.15232030735269966,
      "learning_rate": 0.00012414348007227014,
      "loss": 0.6617,
      "step": 4918
    },
    {
      "epoch": 0.4398247496423462,
      "grad_norm": 0.17190519628752524,
      "learning_rate": 0.00012411537567662327,
      "loss": 0.6801,
      "step": 4919
    },
    {
      "epoch": 0.43991416309012876,
      "grad_norm": 0.1463881440502048,
      "learning_rate": 0.000124087269258449,
      "loss": 0.6643,
      "step": 4920
    },
    {
      "epoch": 0.4400035765379113,
      "grad_norm": 0.13633643046811306,
      "learning_rate": 0.00012405916082010456,
      "loss": 0.644,
      "step": 4921
    },
    {
      "epoch": 0.44009298998569385,
      "grad_norm": 0.15630966154401008,
      "learning_rate": 0.00012403105036394728,
      "loss": 0.6986,
      "step": 4922
    },
    {
      "epoch": 0.4401824034334764,
      "grad_norm": 0.1849590221066126,
      "learning_rate": 0.0001240029378923349,
      "loss": 0.3916,
      "step": 4923
    },
    {
      "epoch": 0.44027181688125894,
      "grad_norm": 0.15648168321960593,
      "learning_rate": 0.0001239748234076251,
      "loss": 0.7119,
      "step": 4924
    },
    {
      "epoch": 0.4403612303290415,
      "grad_norm": 0.1677709783078593,
      "learning_rate": 0.0001239467069121758,
      "loss": 0.6725,
      "step": 4925
    },
    {
      "epoch": 0.440450643776824,
      "grad_norm": 0.1581861416626547,
      "learning_rate": 0.0001239185884083451,
      "loss": 0.6613,
      "step": 4926
    },
    {
      "epoch": 0.4405400572246066,
      "grad_norm": 0.1594605656259672,
      "learning_rate": 0.00012389046789849128,
      "loss": 0.6895,
      "step": 4927
    },
    {
      "epoch": 0.4406294706723891,
      "grad_norm": 0.1484281359199618,
      "learning_rate": 0.00012386234538497282,
      "loss": 0.6414,
      "step": 4928
    },
    {
      "epoch": 0.4407188841201717,
      "grad_norm": 0.14775171774956963,
      "learning_rate": 0.00012383422087014817,
      "loss": 0.6575,
      "step": 4929
    },
    {
      "epoch": 0.4408082975679542,
      "grad_norm": 0.16024933237535324,
      "learning_rate": 0.00012380609435637627,
      "loss": 0.6283,
      "step": 4930
    },
    {
      "epoch": 0.44089771101573677,
      "grad_norm": 0.14612237835451208,
      "learning_rate": 0.000123777965846016,
      "loss": 0.6892,
      "step": 4931
    },
    {
      "epoch": 0.4409871244635193,
      "grad_norm": 0.16299797055860174,
      "learning_rate": 0.0001237498353414264,
      "loss": 0.7051,
      "step": 4932
    },
    {
      "epoch": 0.44107653791130186,
      "grad_norm": 0.16847737025075138,
      "learning_rate": 0.00012372170284496683,
      "loss": 0.7301,
      "step": 4933
    },
    {
      "epoch": 0.44116595135908443,
      "grad_norm": 0.15448327706395998,
      "learning_rate": 0.0001236935683589967,
      "loss": 0.6806,
      "step": 4934
    },
    {
      "epoch": 0.44125536480686695,
      "grad_norm": 0.16481470576274337,
      "learning_rate": 0.00012366543188587555,
      "loss": 0.6808,
      "step": 4935
    },
    {
      "epoch": 0.4413447782546495,
      "grad_norm": 0.16381694186989615,
      "learning_rate": 0.00012363729342796325,
      "loss": 0.7155,
      "step": 4936
    },
    {
      "epoch": 0.44143419170243203,
      "grad_norm": 0.14754583215539657,
      "learning_rate": 0.0001236091529876197,
      "loss": 0.6561,
      "step": 4937
    },
    {
      "epoch": 0.4415236051502146,
      "grad_norm": 0.16564482050794024,
      "learning_rate": 0.000123581010567205,
      "loss": 0.6989,
      "step": 4938
    },
    {
      "epoch": 0.4416130185979971,
      "grad_norm": 0.1306428128758209,
      "learning_rate": 0.00012355286616907939,
      "loss": 0.656,
      "step": 4939
    },
    {
      "epoch": 0.4417024320457797,
      "grad_norm": 0.14692997248953094,
      "learning_rate": 0.00012352471979560338,
      "loss": 0.6923,
      "step": 4940
    },
    {
      "epoch": 0.4417918454935622,
      "grad_norm": 0.15380274691657342,
      "learning_rate": 0.00012349657144913753,
      "loss": 0.6803,
      "step": 4941
    },
    {
      "epoch": 0.4418812589413448,
      "grad_norm": 0.15607744533427034,
      "learning_rate": 0.00012346842113204257,
      "loss": 0.7103,
      "step": 4942
    },
    {
      "epoch": 0.44197067238912735,
      "grad_norm": 0.14913306490846726,
      "learning_rate": 0.0001234402688466795,
      "loss": 0.6836,
      "step": 4943
    },
    {
      "epoch": 0.44206008583690987,
      "grad_norm": 0.16072213502508248,
      "learning_rate": 0.0001234121145954094,
      "loss": 0.6809,
      "step": 4944
    },
    {
      "epoch": 0.44214949928469244,
      "grad_norm": 0.1425570946130723,
      "learning_rate": 0.00012338395838059352,
      "loss": 0.6566,
      "step": 4945
    },
    {
      "epoch": 0.44223891273247495,
      "grad_norm": 0.13392349070275686,
      "learning_rate": 0.00012335580020459325,
      "loss": 0.6557,
      "step": 4946
    },
    {
      "epoch": 0.4423283261802575,
      "grad_norm": 0.15671558723475013,
      "learning_rate": 0.00012332764006977028,
      "loss": 0.6806,
      "step": 4947
    },
    {
      "epoch": 0.44241773962804004,
      "grad_norm": 0.16971578528203352,
      "learning_rate": 0.0001232994779784863,
      "loss": 0.6803,
      "step": 4948
    },
    {
      "epoch": 0.4425071530758226,
      "grad_norm": 0.1606954868955433,
      "learning_rate": 0.0001232713139331032,
      "loss": 0.6793,
      "step": 4949
    },
    {
      "epoch": 0.44259656652360513,
      "grad_norm": 0.15781368555023498,
      "learning_rate": 0.00012324314793598314,
      "loss": 0.683,
      "step": 4950
    },
    {
      "epoch": 0.4426859799713877,
      "grad_norm": 0.15204380165090903,
      "learning_rate": 0.00012321497998948834,
      "loss": 0.6822,
      "step": 4951
    },
    {
      "epoch": 0.4427753934191702,
      "grad_norm": 0.14132902998818234,
      "learning_rate": 0.00012318681009598116,
      "loss": 0.6548,
      "step": 4952
    },
    {
      "epoch": 0.4428648068669528,
      "grad_norm": 0.12371329466130632,
      "learning_rate": 0.00012315863825782425,
      "loss": 0.6444,
      "step": 4953
    },
    {
      "epoch": 0.44295422031473536,
      "grad_norm": 0.15398553514235438,
      "learning_rate": 0.00012313046447738035,
      "loss": 0.7023,
      "step": 4954
    },
    {
      "epoch": 0.4430436337625179,
      "grad_norm": 0.15742991265773376,
      "learning_rate": 0.0001231022887570123,
      "loss": 0.6772,
      "step": 4955
    },
    {
      "epoch": 0.44313304721030045,
      "grad_norm": 0.14946040525388687,
      "learning_rate": 0.00012307411109908315,
      "loss": 0.6835,
      "step": 4956
    },
    {
      "epoch": 0.44322246065808296,
      "grad_norm": 0.14105696892860445,
      "learning_rate": 0.00012304593150595623,
      "loss": 0.6747,
      "step": 4957
    },
    {
      "epoch": 0.44331187410586553,
      "grad_norm": 0.13894609787227502,
      "learning_rate": 0.00012301774997999483,
      "loss": 0.6676,
      "step": 4958
    },
    {
      "epoch": 0.44340128755364805,
      "grad_norm": 0.15120639915625195,
      "learning_rate": 0.00012298956652356257,
      "loss": 0.6828,
      "step": 4959
    },
    {
      "epoch": 0.4434907010014306,
      "grad_norm": 0.1635098560269106,
      "learning_rate": 0.00012296138113902308,
      "loss": 0.6948,
      "step": 4960
    },
    {
      "epoch": 0.44358011444921314,
      "grad_norm": 0.15757574028806637,
      "learning_rate": 0.00012293319382874037,
      "loss": 0.6764,
      "step": 4961
    },
    {
      "epoch": 0.4436695278969957,
      "grad_norm": 0.16187299496270335,
      "learning_rate": 0.0001229050045950783,
      "loss": 0.6716,
      "step": 4962
    },
    {
      "epoch": 0.4437589413447783,
      "grad_norm": 0.15841785143281806,
      "learning_rate": 0.00012287681344040117,
      "loss": 0.6964,
      "step": 4963
    },
    {
      "epoch": 0.4438483547925608,
      "grad_norm": 0.14803784967740394,
      "learning_rate": 0.00012284862036707339,
      "loss": 0.6678,
      "step": 4964
    },
    {
      "epoch": 0.44393776824034337,
      "grad_norm": 0.14382207246971712,
      "learning_rate": 0.00012282042537745938,
      "loss": 0.6473,
      "step": 4965
    },
    {
      "epoch": 0.4440271816881259,
      "grad_norm": 0.1611034327837252,
      "learning_rate": 0.00012279222847392385,
      "loss": 0.6642,
      "step": 4966
    },
    {
      "epoch": 0.44411659513590845,
      "grad_norm": 0.16391835447175968,
      "learning_rate": 0.0001227640296588316,
      "loss": 0.6632,
      "step": 4967
    },
    {
      "epoch": 0.44420600858369097,
      "grad_norm": 0.16698600227761826,
      "learning_rate": 0.00012273582893454775,
      "loss": 0.6884,
      "step": 4968
    },
    {
      "epoch": 0.44429542203147354,
      "grad_norm": 0.18257445433847988,
      "learning_rate": 0.00012270762630343734,
      "loss": 0.7136,
      "step": 4969
    },
    {
      "epoch": 0.44438483547925606,
      "grad_norm": 0.16506816344722142,
      "learning_rate": 0.00012267942176786575,
      "loss": 0.7109,
      "step": 4970
    },
    {
      "epoch": 0.44447424892703863,
      "grad_norm": 0.13707701158703292,
      "learning_rate": 0.00012265121533019843,
      "loss": 0.6427,
      "step": 4971
    },
    {
      "epoch": 0.4445636623748212,
      "grad_norm": 0.14130123552621252,
      "learning_rate": 0.00012262300699280104,
      "loss": 0.6405,
      "step": 4972
    },
    {
      "epoch": 0.4446530758226037,
      "grad_norm": 0.1659247114889449,
      "learning_rate": 0.00012259479675803939,
      "loss": 0.7017,
      "step": 4973
    },
    {
      "epoch": 0.4447424892703863,
      "grad_norm": 0.14959449522155607,
      "learning_rate": 0.00012256658462827941,
      "loss": 0.6703,
      "step": 4974
    },
    {
      "epoch": 0.4448319027181688,
      "grad_norm": 0.15426838083082542,
      "learning_rate": 0.00012253837060588723,
      "loss": 0.7009,
      "step": 4975
    },
    {
      "epoch": 0.4449213161659514,
      "grad_norm": 0.15338850214994432,
      "learning_rate": 0.00012251015469322916,
      "loss": 0.6857,
      "step": 4976
    },
    {
      "epoch": 0.4450107296137339,
      "grad_norm": 0.16119892495949867,
      "learning_rate": 0.00012248193689267157,
      "loss": 0.6898,
      "step": 4977
    },
    {
      "epoch": 0.44510014306151646,
      "grad_norm": 0.1520321612600389,
      "learning_rate": 0.00012245371720658109,
      "loss": 0.6781,
      "step": 4978
    },
    {
      "epoch": 0.445189556509299,
      "grad_norm": 0.13800231491701148,
      "learning_rate": 0.00012242549563732443,
      "loss": 0.6506,
      "step": 4979
    },
    {
      "epoch": 0.44527896995708155,
      "grad_norm": 0.17521685840383125,
      "learning_rate": 0.00012239727218726857,
      "loss": 0.3841,
      "step": 4980
    },
    {
      "epoch": 0.44536838340486407,
      "grad_norm": 0.16045819483660603,
      "learning_rate": 0.00012236904685878055,
      "loss": 0.6759,
      "step": 4981
    },
    {
      "epoch": 0.44545779685264664,
      "grad_norm": 0.15701495244742505,
      "learning_rate": 0.0001223408196542276,
      "loss": 0.6426,
      "step": 4982
    },
    {
      "epoch": 0.4455472103004292,
      "grad_norm": 0.15418829345162968,
      "learning_rate": 0.00012231259057597703,
      "loss": 0.696,
      "step": 4983
    },
    {
      "epoch": 0.4456366237482117,
      "grad_norm": 0.1585528434466023,
      "learning_rate": 0.00012228435962639646,
      "loss": 0.7114,
      "step": 4984
    },
    {
      "epoch": 0.4457260371959943,
      "grad_norm": 0.15991821162272724,
      "learning_rate": 0.00012225612680785358,
      "loss": 0.6833,
      "step": 4985
    },
    {
      "epoch": 0.4458154506437768,
      "grad_norm": 0.14335428845424106,
      "learning_rate": 0.0001222278921227162,
      "loss": 0.6791,
      "step": 4986
    },
    {
      "epoch": 0.4459048640915594,
      "grad_norm": 0.14587395667439426,
      "learning_rate": 0.00012219965557335236,
      "loss": 0.6732,
      "step": 4987
    },
    {
      "epoch": 0.4459942775393419,
      "grad_norm": 0.16317406709241133,
      "learning_rate": 0.00012217141716213022,
      "loss": 0.6886,
      "step": 4988
    },
    {
      "epoch": 0.44608369098712447,
      "grad_norm": 0.1996987269931736,
      "learning_rate": 0.0001221431768914181,
      "loss": 0.4328,
      "step": 4989
    },
    {
      "epoch": 0.446173104434907,
      "grad_norm": 0.17139324701302755,
      "learning_rate": 0.00012211493476358448,
      "loss": 0.7058,
      "step": 4990
    },
    {
      "epoch": 0.44626251788268956,
      "grad_norm": 0.1553235699924578,
      "learning_rate": 0.00012208669078099798,
      "loss": 0.7059,
      "step": 4991
    },
    {
      "epoch": 0.44635193133047213,
      "grad_norm": 0.14949349561248101,
      "learning_rate": 0.0001220584449460274,
      "loss": 0.6686,
      "step": 4992
    },
    {
      "epoch": 0.44644134477825465,
      "grad_norm": 0.15867400521632602,
      "learning_rate": 0.00012203019726104168,
      "loss": 0.6813,
      "step": 4993
    },
    {
      "epoch": 0.4465307582260372,
      "grad_norm": 0.13062638069550164,
      "learning_rate": 0.00012200194772840991,
      "loss": 0.6441,
      "step": 4994
    },
    {
      "epoch": 0.44662017167381973,
      "grad_norm": 0.17685753099911153,
      "learning_rate": 0.0001219736963505014,
      "loss": 0.6936,
      "step": 4995
    },
    {
      "epoch": 0.4467095851216023,
      "grad_norm": 0.15534346021649761,
      "learning_rate": 0.00012194544312968548,
      "loss": 0.6669,
      "step": 4996
    },
    {
      "epoch": 0.4467989985693848,
      "grad_norm": 0.16302108277755883,
      "learning_rate": 0.00012191718806833177,
      "loss": 0.6612,
      "step": 4997
    },
    {
      "epoch": 0.4468884120171674,
      "grad_norm": 0.156397331846615,
      "learning_rate": 0.00012188893116880993,
      "loss": 0.6335,
      "step": 4998
    },
    {
      "epoch": 0.4469778254649499,
      "grad_norm": 0.1706495508498542,
      "learning_rate": 0.00012186067243348991,
      "loss": 0.7261,
      "step": 4999
    },
    {
      "epoch": 0.4470672389127325,
      "grad_norm": 0.15122070643167515,
      "learning_rate": 0.00012183241186474166,
      "loss": 0.6785,
      "step": 5000
    },
    {
      "epoch": 0.447156652360515,
      "grad_norm": 0.18833783927622327,
      "learning_rate": 0.00012180414946493538,
      "loss": 0.7026,
      "step": 5001
    },
    {
      "epoch": 0.44724606580829757,
      "grad_norm": 0.12738271741229237,
      "learning_rate": 0.00012177588523644146,
      "loss": 0.667,
      "step": 5002
    },
    {
      "epoch": 0.44733547925608014,
      "grad_norm": 0.12954528837311063,
      "learning_rate": 0.00012174761918163034,
      "loss": 0.6499,
      "step": 5003
    },
    {
      "epoch": 0.44742489270386265,
      "grad_norm": 0.15052423845076376,
      "learning_rate": 0.00012171935130287265,
      "loss": 0.6569,
      "step": 5004
    },
    {
      "epoch": 0.4475143061516452,
      "grad_norm": 0.14960530641647984,
      "learning_rate": 0.00012169108160253919,
      "loss": 0.6561,
      "step": 5005
    },
    {
      "epoch": 0.44760371959942774,
      "grad_norm": 0.1577230597701611,
      "learning_rate": 0.00012166281008300094,
      "loss": 0.7079,
      "step": 5006
    },
    {
      "epoch": 0.4476931330472103,
      "grad_norm": 0.13489061872446895,
      "learning_rate": 0.00012163453674662892,
      "loss": 0.6725,
      "step": 5007
    },
    {
      "epoch": 0.44778254649499283,
      "grad_norm": 0.1425060275096881,
      "learning_rate": 0.00012160626159579447,
      "loss": 0.6511,
      "step": 5008
    },
    {
      "epoch": 0.4478719599427754,
      "grad_norm": 0.14504806083915567,
      "learning_rate": 0.00012157798463286894,
      "loss": 0.6678,
      "step": 5009
    },
    {
      "epoch": 0.4479613733905579,
      "grad_norm": 0.1405288083759847,
      "learning_rate": 0.00012154970586022389,
      "loss": 0.6821,
      "step": 5010
    },
    {
      "epoch": 0.4480507868383405,
      "grad_norm": 0.1421523392879838,
      "learning_rate": 0.00012152142528023107,
      "loss": 0.6516,
      "step": 5011
    },
    {
      "epoch": 0.44814020028612306,
      "grad_norm": 0.15112200458080544,
      "learning_rate": 0.00012149314289526228,
      "loss": 0.6608,
      "step": 5012
    },
    {
      "epoch": 0.4482296137339056,
      "grad_norm": 0.15135503973748957,
      "learning_rate": 0.00012146485870768954,
      "loss": 0.6351,
      "step": 5013
    },
    {
      "epoch": 0.44831902718168815,
      "grad_norm": 0.16841461887814482,
      "learning_rate": 0.00012143657271988505,
      "loss": 0.7023,
      "step": 5014
    },
    {
      "epoch": 0.44840844062947066,
      "grad_norm": 0.1531503986483392,
      "learning_rate": 0.00012140828493422107,
      "loss": 0.661,
      "step": 5015
    },
    {
      "epoch": 0.44849785407725323,
      "grad_norm": 0.14568982977119851,
      "learning_rate": 0.0001213799953530701,
      "loss": 0.6571,
      "step": 5016
    },
    {
      "epoch": 0.44858726752503575,
      "grad_norm": 0.15532924949567278,
      "learning_rate": 0.00012135170397880473,
      "loss": 0.6888,
      "step": 5017
    },
    {
      "epoch": 0.4486766809728183,
      "grad_norm": 0.18531149286791823,
      "learning_rate": 0.00012132341081379776,
      "loss": 0.4077,
      "step": 5018
    },
    {
      "epoch": 0.44876609442060084,
      "grad_norm": 0.17339316055179713,
      "learning_rate": 0.00012129511586042206,
      "loss": 0.7011,
      "step": 5019
    },
    {
      "epoch": 0.4488555078683834,
      "grad_norm": 0.17680490616605526,
      "learning_rate": 0.00012126681912105069,
      "loss": 0.3855,
      "step": 5020
    },
    {
      "epoch": 0.448944921316166,
      "grad_norm": 0.17524517301668688,
      "learning_rate": 0.00012123852059805691,
      "loss": 0.6749,
      "step": 5021
    },
    {
      "epoch": 0.4490343347639485,
      "grad_norm": 0.15181093999448725,
      "learning_rate": 0.00012121022029381406,
      "loss": 0.6518,
      "step": 5022
    },
    {
      "epoch": 0.44912374821173107,
      "grad_norm": 0.14932103706208164,
      "learning_rate": 0.00012118191821069565,
      "loss": 0.6433,
      "step": 5023
    },
    {
      "epoch": 0.4492131616595136,
      "grad_norm": 0.1290567123328978,
      "learning_rate": 0.00012115361435107531,
      "loss": 0.6541,
      "step": 5024
    },
    {
      "epoch": 0.44930257510729615,
      "grad_norm": 0.151010246931355,
      "learning_rate": 0.0001211253087173269,
      "loss": 0.6661,
      "step": 5025
    },
    {
      "epoch": 0.44939198855507867,
      "grad_norm": 0.15229032276711368,
      "learning_rate": 0.00012109700131182437,
      "loss": 0.6933,
      "step": 5026
    },
    {
      "epoch": 0.44948140200286124,
      "grad_norm": 0.14741475817271402,
      "learning_rate": 0.00012106869213694181,
      "loss": 0.6724,
      "step": 5027
    },
    {
      "epoch": 0.44957081545064376,
      "grad_norm": 0.1334325277288723,
      "learning_rate": 0.0001210403811950535,
      "loss": 0.6418,
      "step": 5028
    },
    {
      "epoch": 0.44966022889842633,
      "grad_norm": 0.16043411754438708,
      "learning_rate": 0.00012101206848853381,
      "loss": 0.67,
      "step": 5029
    },
    {
      "epoch": 0.44974964234620884,
      "grad_norm": 0.14379307433912322,
      "learning_rate": 0.00012098375401975731,
      "loss": 0.6737,
      "step": 5030
    },
    {
      "epoch": 0.4498390557939914,
      "grad_norm": 0.15522966983114453,
      "learning_rate": 0.00012095543779109873,
      "loss": 0.664,
      "step": 5031
    },
    {
      "epoch": 0.449928469241774,
      "grad_norm": 0.14153084644620723,
      "learning_rate": 0.00012092711980493285,
      "loss": 0.6495,
      "step": 5032
    },
    {
      "epoch": 0.4500178826895565,
      "grad_norm": 0.14815800432435966,
      "learning_rate": 0.00012089880006363475,
      "loss": 0.6365,
      "step": 5033
    },
    {
      "epoch": 0.4501072961373391,
      "grad_norm": 0.169604160375193,
      "learning_rate": 0.00012087047856957949,
      "loss": 0.7094,
      "step": 5034
    },
    {
      "epoch": 0.4501967095851216,
      "grad_norm": 0.19238025419435378,
      "learning_rate": 0.00012084215532514243,
      "loss": 0.7229,
      "step": 5035
    },
    {
      "epoch": 0.45028612303290416,
      "grad_norm": 0.16268316889779555,
      "learning_rate": 0.00012081383033269896,
      "loss": 0.6802,
      "step": 5036
    },
    {
      "epoch": 0.4503755364806867,
      "grad_norm": 0.13763890351994104,
      "learning_rate": 0.00012078550359462464,
      "loss": 0.6365,
      "step": 5037
    },
    {
      "epoch": 0.45046494992846925,
      "grad_norm": 0.1578185896949957,
      "learning_rate": 0.00012075717511329529,
      "loss": 0.6912,
      "step": 5038
    },
    {
      "epoch": 0.45055436337625177,
      "grad_norm": 0.15760983972170642,
      "learning_rate": 0.00012072884489108669,
      "loss": 0.6537,
      "step": 5039
    },
    {
      "epoch": 0.45064377682403434,
      "grad_norm": 0.14111744425924444,
      "learning_rate": 0.00012070051293037492,
      "loss": 0.6478,
      "step": 5040
    },
    {
      "epoch": 0.4507331902718169,
      "grad_norm": 0.15600500196700892,
      "learning_rate": 0.00012067217923353615,
      "loss": 0.6892,
      "step": 5041
    },
    {
      "epoch": 0.4508226037195994,
      "grad_norm": 0.1462934628043686,
      "learning_rate": 0.00012064384380294667,
      "loss": 0.6896,
      "step": 5042
    },
    {
      "epoch": 0.450912017167382,
      "grad_norm": 0.17011282955291734,
      "learning_rate": 0.00012061550664098293,
      "loss": 0.6536,
      "step": 5043
    },
    {
      "epoch": 0.4510014306151645,
      "grad_norm": 0.14467153093114224,
      "learning_rate": 0.00012058716775002152,
      "loss": 0.6521,
      "step": 5044
    },
    {
      "epoch": 0.4510908440629471,
      "grad_norm": 0.15032017432302056,
      "learning_rate": 0.00012055882713243926,
      "loss": 0.6516,
      "step": 5045
    },
    {
      "epoch": 0.4511802575107296,
      "grad_norm": 0.13089309538607602,
      "learning_rate": 0.00012053048479061298,
      "loss": 0.656,
      "step": 5046
    },
    {
      "epoch": 0.45126967095851217,
      "grad_norm": 0.1499793874762614,
      "learning_rate": 0.00012050214072691969,
      "loss": 0.6727,
      "step": 5047
    },
    {
      "epoch": 0.4513590844062947,
      "grad_norm": 0.14372592458221078,
      "learning_rate": 0.0001204737949437367,
      "loss": 0.7267,
      "step": 5048
    },
    {
      "epoch": 0.45144849785407726,
      "grad_norm": 0.13948476272434404,
      "learning_rate": 0.00012044544744344122,
      "loss": 0.6251,
      "step": 5049
    },
    {
      "epoch": 0.4515379113018598,
      "grad_norm": 0.13833877189441332,
      "learning_rate": 0.00012041709822841074,
      "loss": 0.6485,
      "step": 5050
    },
    {
      "epoch": 0.45162732474964234,
      "grad_norm": 0.1507118862317965,
      "learning_rate": 0.00012038874730102288,
      "loss": 0.6479,
      "step": 5051
    },
    {
      "epoch": 0.4517167381974249,
      "grad_norm": 0.16357772431902004,
      "learning_rate": 0.00012036039466365543,
      "loss": 0.7143,
      "step": 5052
    },
    {
      "epoch": 0.45180615164520743,
      "grad_norm": 0.13910547273259133,
      "learning_rate": 0.00012033204031868626,
      "loss": 0.6873,
      "step": 5053
    },
    {
      "epoch": 0.45189556509299,
      "grad_norm": 0.1533450587391136,
      "learning_rate": 0.0001203036842684934,
      "loss": 0.6837,
      "step": 5054
    },
    {
      "epoch": 0.4519849785407725,
      "grad_norm": 0.13078600672250337,
      "learning_rate": 0.00012027532651545512,
      "loss": 0.634,
      "step": 5055
    },
    {
      "epoch": 0.4520743919885551,
      "grad_norm": 0.1494228106930646,
      "learning_rate": 0.00012024696706194967,
      "loss": 0.6643,
      "step": 5056
    },
    {
      "epoch": 0.4521638054363376,
      "grad_norm": 0.1553732857077409,
      "learning_rate": 0.00012021860591035549,
      "loss": 0.6645,
      "step": 5057
    },
    {
      "epoch": 0.4522532188841202,
      "grad_norm": 0.16831323097191145,
      "learning_rate": 0.00012019024306305131,
      "loss": 0.6984,
      "step": 5058
    },
    {
      "epoch": 0.4523426323319027,
      "grad_norm": 0.14233054061617328,
      "learning_rate": 0.00012016187852241583,
      "loss": 0.6257,
      "step": 5059
    },
    {
      "epoch": 0.45243204577968527,
      "grad_norm": 0.1568553494897425,
      "learning_rate": 0.00012013351229082792,
      "loss": 0.7207,
      "step": 5060
    },
    {
      "epoch": 0.45252145922746784,
      "grad_norm": 0.16233807705125497,
      "learning_rate": 0.00012010514437066664,
      "loss": 0.7234,
      "step": 5061
    },
    {
      "epoch": 0.45261087267525035,
      "grad_norm": 0.14592430959525302,
      "learning_rate": 0.00012007677476431123,
      "loss": 0.6716,
      "step": 5062
    },
    {
      "epoch": 0.4527002861230329,
      "grad_norm": 0.1347877699930912,
      "learning_rate": 0.00012004840347414092,
      "loss": 0.6812,
      "step": 5063
    },
    {
      "epoch": 0.45278969957081544,
      "grad_norm": 0.14900890099380185,
      "learning_rate": 0.00012002003050253522,
      "loss": 0.6592,
      "step": 5064
    },
    {
      "epoch": 0.452879113018598,
      "grad_norm": 0.14560379144520139,
      "learning_rate": 0.0001199916558518738,
      "loss": 0.6816,
      "step": 5065
    },
    {
      "epoch": 0.4529685264663805,
      "grad_norm": 0.1534245500958793,
      "learning_rate": 0.00011996327952453629,
      "loss": 0.6632,
      "step": 5066
    },
    {
      "epoch": 0.4530579399141631,
      "grad_norm": 0.13956303781970378,
      "learning_rate": 0.00011993490152290266,
      "loss": 0.6616,
      "step": 5067
    },
    {
      "epoch": 0.4531473533619456,
      "grad_norm": 0.13734457448085363,
      "learning_rate": 0.00011990652184935289,
      "loss": 0.6565,
      "step": 5068
    },
    {
      "epoch": 0.4532367668097282,
      "grad_norm": 0.16960795904861573,
      "learning_rate": 0.00011987814050626722,
      "loss": 0.7174,
      "step": 5069
    },
    {
      "epoch": 0.4533261802575107,
      "grad_norm": 0.1508283891399053,
      "learning_rate": 0.00011984975749602588,
      "loss": 0.6507,
      "step": 5070
    },
    {
      "epoch": 0.4534155937052933,
      "grad_norm": 0.14283441846754039,
      "learning_rate": 0.00011982137282100934,
      "loss": 0.6594,
      "step": 5071
    },
    {
      "epoch": 0.45350500715307585,
      "grad_norm": 0.16063706968666358,
      "learning_rate": 0.00011979298648359823,
      "loss": 0.6806,
      "step": 5072
    },
    {
      "epoch": 0.45359442060085836,
      "grad_norm": 0.1714464633104631,
      "learning_rate": 0.00011976459848617323,
      "loss": 0.7287,
      "step": 5073
    },
    {
      "epoch": 0.45368383404864093,
      "grad_norm": 0.15040136548923583,
      "learning_rate": 0.00011973620883111521,
      "loss": 0.6621,
      "step": 5074
    },
    {
      "epoch": 0.45377324749642345,
      "grad_norm": 0.150323064169993,
      "learning_rate": 0.00011970781752080523,
      "loss": 0.6533,
      "step": 5075
    },
    {
      "epoch": 0.453862660944206,
      "grad_norm": 0.1388649639697715,
      "learning_rate": 0.00011967942455762437,
      "loss": 0.66,
      "step": 5076
    },
    {
      "epoch": 0.45395207439198854,
      "grad_norm": 0.19081818626230818,
      "learning_rate": 0.00011965102994395394,
      "loss": 0.7097,
      "step": 5077
    },
    {
      "epoch": 0.4540414878397711,
      "grad_norm": 0.15051082581841568,
      "learning_rate": 0.00011962263368217535,
      "loss": 0.6509,
      "step": 5078
    },
    {
      "epoch": 0.4541309012875536,
      "grad_norm": 0.15989806576510743,
      "learning_rate": 0.0001195942357746702,
      "loss": 0.6604,
      "step": 5079
    },
    {
      "epoch": 0.4542203147353362,
      "grad_norm": 0.15734500598709786,
      "learning_rate": 0.00011956583622382015,
      "loss": 0.7136,
      "step": 5080
    },
    {
      "epoch": 0.45430972818311877,
      "grad_norm": 0.13419840828620067,
      "learning_rate": 0.000119537435032007,
      "loss": 0.6438,
      "step": 5081
    },
    {
      "epoch": 0.4543991416309013,
      "grad_norm": 0.15098268574812226,
      "learning_rate": 0.00011950903220161285,
      "loss": 0.6748,
      "step": 5082
    },
    {
      "epoch": 0.45448855507868385,
      "grad_norm": 0.15305748444277956,
      "learning_rate": 0.00011948062773501969,
      "loss": 0.6354,
      "step": 5083
    },
    {
      "epoch": 0.45457796852646637,
      "grad_norm": 0.1486681887535822,
      "learning_rate": 0.00011945222163460979,
      "loss": 0.6786,
      "step": 5084
    },
    {
      "epoch": 0.45466738197424894,
      "grad_norm": 0.1630713534779179,
      "learning_rate": 0.00011942381390276556,
      "loss": 0.6688,
      "step": 5085
    },
    {
      "epoch": 0.45475679542203146,
      "grad_norm": 0.1361613089963617,
      "learning_rate": 0.00011939540454186954,
      "loss": 0.6641,
      "step": 5086
    },
    {
      "epoch": 0.45484620886981403,
      "grad_norm": 0.13937418663682818,
      "learning_rate": 0.00011936699355430436,
      "loss": 0.6437,
      "step": 5087
    },
    {
      "epoch": 0.45493562231759654,
      "grad_norm": 0.14545295519734558,
      "learning_rate": 0.00011933858094245281,
      "loss": 0.691,
      "step": 5088
    },
    {
      "epoch": 0.4550250357653791,
      "grad_norm": 0.1751780828143364,
      "learning_rate": 0.00011931016670869784,
      "loss": 0.7035,
      "step": 5089
    },
    {
      "epoch": 0.4551144492131617,
      "grad_norm": 0.19932719407895277,
      "learning_rate": 0.0001192817508554225,
      "loss": 0.6722,
      "step": 5090
    },
    {
      "epoch": 0.4552038626609442,
      "grad_norm": 0.15590214263990976,
      "learning_rate": 0.00011925333338500999,
      "loss": 0.6493,
      "step": 5091
    },
    {
      "epoch": 0.4552932761087268,
      "grad_norm": 0.14267219377497645,
      "learning_rate": 0.00011922491429984368,
      "loss": 0.6394,
      "step": 5092
    },
    {
      "epoch": 0.4553826895565093,
      "grad_norm": 0.13701537538002917,
      "learning_rate": 0.00011919649360230702,
      "loss": 0.6253,
      "step": 5093
    },
    {
      "epoch": 0.45547210300429186,
      "grad_norm": 0.1607751891007317,
      "learning_rate": 0.00011916807129478363,
      "loss": 0.6705,
      "step": 5094
    },
    {
      "epoch": 0.4555615164520744,
      "grad_norm": 0.13348190006361865,
      "learning_rate": 0.00011913964737965723,
      "loss": 0.6418,
      "step": 5095
    },
    {
      "epoch": 0.45565092989985695,
      "grad_norm": 0.16527578553109507,
      "learning_rate": 0.00011911122185931176,
      "loss": 0.6846,
      "step": 5096
    },
    {
      "epoch": 0.45574034334763946,
      "grad_norm": 0.1488777386761082,
      "learning_rate": 0.00011908279473613115,
      "loss": 0.6822,
      "step": 5097
    },
    {
      "epoch": 0.45582975679542204,
      "grad_norm": 0.14253232143216416,
      "learning_rate": 0.0001190543660124996,
      "loss": 0.6558,
      "step": 5098
    },
    {
      "epoch": 0.45591917024320455,
      "grad_norm": 0.1560066123433867,
      "learning_rate": 0.0001190259356908014,
      "loss": 0.6689,
      "step": 5099
    },
    {
      "epoch": 0.4560085836909871,
      "grad_norm": 0.13989833090918724,
      "learning_rate": 0.00011899750377342098,
      "loss": 0.6499,
      "step": 5100
    },
    {
      "epoch": 0.4560979971387697,
      "grad_norm": 0.1607405453269743,
      "learning_rate": 0.0001189690702627428,
      "loss": 0.7131,
      "step": 5101
    },
    {
      "epoch": 0.4561874105865522,
      "grad_norm": 0.1519196971425884,
      "learning_rate": 0.00011894063516115163,
      "loss": 0.7036,
      "step": 5102
    },
    {
      "epoch": 0.4562768240343348,
      "grad_norm": 0.13399028122888076,
      "learning_rate": 0.00011891219847103228,
      "loss": 0.6419,
      "step": 5103
    },
    {
      "epoch": 0.4563662374821173,
      "grad_norm": 0.1337331902418179,
      "learning_rate": 0.00011888376019476966,
      "loss": 0.6396,
      "step": 5104
    },
    {
      "epoch": 0.45645565092989987,
      "grad_norm": 0.18157083868662655,
      "learning_rate": 0.00011885532033474889,
      "loss": 0.7131,
      "step": 5105
    },
    {
      "epoch": 0.4565450643776824,
      "grad_norm": 0.14905382933950012,
      "learning_rate": 0.00011882687889335518,
      "loss": 0.6906,
      "step": 5106
    },
    {
      "epoch": 0.45663447782546496,
      "grad_norm": 0.1485498401573053,
      "learning_rate": 0.00011879843587297387,
      "loss": 0.662,
      "step": 5107
    },
    {
      "epoch": 0.4567238912732475,
      "grad_norm": 0.12741119522888197,
      "learning_rate": 0.00011876999127599042,
      "loss": 0.6388,
      "step": 5108
    },
    {
      "epoch": 0.45681330472103004,
      "grad_norm": 0.14265276262819013,
      "learning_rate": 0.00011874154510479052,
      "loss": 0.6113,
      "step": 5109
    },
    {
      "epoch": 0.4569027181688126,
      "grad_norm": 0.12709687237824133,
      "learning_rate": 0.00011871309736175984,
      "loss": 0.5952,
      "step": 5110
    },
    {
      "epoch": 0.45699213161659513,
      "grad_norm": 0.150221528961044,
      "learning_rate": 0.0001186846480492843,
      "loss": 0.6594,
      "step": 5111
    },
    {
      "epoch": 0.4570815450643777,
      "grad_norm": 0.1340228353773436,
      "learning_rate": 0.00011865619716974984,
      "loss": 0.6865,
      "step": 5112
    },
    {
      "epoch": 0.4571709585121602,
      "grad_norm": 0.14213473902181178,
      "learning_rate": 0.00011862774472554272,
      "loss": 0.6777,
      "step": 5113
    },
    {
      "epoch": 0.4572603719599428,
      "grad_norm": 0.13976190930396054,
      "learning_rate": 0.00011859929071904912,
      "loss": 0.6797,
      "step": 5114
    },
    {
      "epoch": 0.4573497854077253,
      "grad_norm": 0.14995961074972716,
      "learning_rate": 0.00011857083515265546,
      "loss": 0.6898,
      "step": 5115
    },
    {
      "epoch": 0.4574391988555079,
      "grad_norm": 0.1459804157684153,
      "learning_rate": 0.0001185423780287483,
      "loss": 0.6894,
      "step": 5116
    },
    {
      "epoch": 0.4575286123032904,
      "grad_norm": 0.1582447136841337,
      "learning_rate": 0.0001185139193497143,
      "loss": 0.7213,
      "step": 5117
    },
    {
      "epoch": 0.45761802575107297,
      "grad_norm": 0.1456033760096767,
      "learning_rate": 0.0001184854591179402,
      "loss": 0.6628,
      "step": 5118
    },
    {
      "epoch": 0.4577074391988555,
      "grad_norm": 0.13198100218237008,
      "learning_rate": 0.000118456997335813,
      "loss": 0.6524,
      "step": 5119
    },
    {
      "epoch": 0.45779685264663805,
      "grad_norm": 0.1456569079242771,
      "learning_rate": 0.00011842853400571971,
      "loss": 0.6843,
      "step": 5120
    },
    {
      "epoch": 0.4578862660944206,
      "grad_norm": 0.1587715964597877,
      "learning_rate": 0.00011840006913004753,
      "loss": 0.6656,
      "step": 5121
    },
    {
      "epoch": 0.45797567954220314,
      "grad_norm": 0.14756270398112334,
      "learning_rate": 0.00011837160271118377,
      "loss": 0.6681,
      "step": 5122
    },
    {
      "epoch": 0.4580650929899857,
      "grad_norm": 0.16945212248249905,
      "learning_rate": 0.00011834313475151591,
      "loss": 0.6505,
      "step": 5123
    },
    {
      "epoch": 0.4581545064377682,
      "grad_norm": 0.15009820861880682,
      "learning_rate": 0.00011831466525343146,
      "loss": 0.6577,
      "step": 5124
    },
    {
      "epoch": 0.4582439198855508,
      "grad_norm": 0.1342791107003628,
      "learning_rate": 0.00011828619421931817,
      "loss": 0.6792,
      "step": 5125
    },
    {
      "epoch": 0.4583333333333333,
      "grad_norm": 0.14902654822212327,
      "learning_rate": 0.00011825772165156384,
      "loss": 0.6516,
      "step": 5126
    },
    {
      "epoch": 0.4584227467811159,
      "grad_norm": 0.14850161392408637,
      "learning_rate": 0.00011822924755255647,
      "loss": 0.6478,
      "step": 5127
    },
    {
      "epoch": 0.4585121602288984,
      "grad_norm": 0.14701244052709217,
      "learning_rate": 0.00011820077192468409,
      "loss": 0.6719,
      "step": 5128
    },
    {
      "epoch": 0.458601573676681,
      "grad_norm": 0.15519399793808555,
      "learning_rate": 0.00011817229477033495,
      "loss": 0.6334,
      "step": 5129
    },
    {
      "epoch": 0.45869098712446355,
      "grad_norm": 0.15221029395682162,
      "learning_rate": 0.00011814381609189741,
      "loss": 0.6431,
      "step": 5130
    },
    {
      "epoch": 0.45878040057224606,
      "grad_norm": 0.2184089125491799,
      "learning_rate": 0.00011811533589175994,
      "loss": 0.401,
      "step": 5131
    },
    {
      "epoch": 0.45886981402002863,
      "grad_norm": 0.17011148596810033,
      "learning_rate": 0.00011808685417231111,
      "loss": 0.6853,
      "step": 5132
    },
    {
      "epoch": 0.45895922746781115,
      "grad_norm": 0.17310693772803826,
      "learning_rate": 0.00011805837093593965,
      "loss": 0.7064,
      "step": 5133
    },
    {
      "epoch": 0.4590486409155937,
      "grad_norm": 0.1618149599865905,
      "learning_rate": 0.00011802988618503447,
      "loss": 0.6688,
      "step": 5134
    },
    {
      "epoch": 0.45913805436337624,
      "grad_norm": 0.151672015208134,
      "learning_rate": 0.00011800139992198447,
      "loss": 0.6487,
      "step": 5135
    },
    {
      "epoch": 0.4592274678111588,
      "grad_norm": 0.15553932661655387,
      "learning_rate": 0.00011797291214917881,
      "loss": 0.6473,
      "step": 5136
    },
    {
      "epoch": 0.4593168812589413,
      "grad_norm": 0.17242802198848253,
      "learning_rate": 0.00011794442286900673,
      "loss": 0.3928,
      "step": 5137
    },
    {
      "epoch": 0.4594062947067239,
      "grad_norm": 0.16401912737260163,
      "learning_rate": 0.00011791593208385756,
      "loss": 0.647,
      "step": 5138
    },
    {
      "epoch": 0.4594957081545064,
      "grad_norm": 0.149706784585706,
      "learning_rate": 0.0001178874397961208,
      "loss": 0.6891,
      "step": 5139
    },
    {
      "epoch": 0.459585121602289,
      "grad_norm": 0.1462153385843868,
      "learning_rate": 0.00011785894600818608,
      "loss": 0.6624,
      "step": 5140
    },
    {
      "epoch": 0.45967453505007155,
      "grad_norm": 0.1374506964834282,
      "learning_rate": 0.0001178304507224431,
      "loss": 0.6151,
      "step": 5141
    },
    {
      "epoch": 0.45976394849785407,
      "grad_norm": 0.13800460127073358,
      "learning_rate": 0.0001178019539412818,
      "loss": 0.6368,
      "step": 5142
    },
    {
      "epoch": 0.45985336194563664,
      "grad_norm": 0.14040838305975342,
      "learning_rate": 0.00011777345566709206,
      "loss": 0.6443,
      "step": 5143
    },
    {
      "epoch": 0.45994277539341916,
      "grad_norm": 0.13318795490610547,
      "learning_rate": 0.00011774495590226411,
      "loss": 0.6655,
      "step": 5144
    },
    {
      "epoch": 0.46003218884120173,
      "grad_norm": 0.16490211848145997,
      "learning_rate": 0.00011771645464918813,
      "loss": 0.6621,
      "step": 5145
    },
    {
      "epoch": 0.46012160228898424,
      "grad_norm": 0.1425756548483059,
      "learning_rate": 0.00011768795191025445,
      "loss": 0.6738,
      "step": 5146
    },
    {
      "epoch": 0.4602110157367668,
      "grad_norm": 0.1599998909001487,
      "learning_rate": 0.00011765944768785366,
      "loss": 0.672,
      "step": 5147
    },
    {
      "epoch": 0.46030042918454933,
      "grad_norm": 0.14545552511792897,
      "learning_rate": 0.0001176309419843763,
      "loss": 0.6488,
      "step": 5148
    },
    {
      "epoch": 0.4603898426323319,
      "grad_norm": 0.18047251741355488,
      "learning_rate": 0.00011760243480221313,
      "loss": 0.6917,
      "step": 5149
    },
    {
      "epoch": 0.4604792560801145,
      "grad_norm": 0.16042798460131608,
      "learning_rate": 0.000117573926143755,
      "loss": 0.6829,
      "step": 5150
    },
    {
      "epoch": 0.460568669527897,
      "grad_norm": 0.14307171349532832,
      "learning_rate": 0.00011754541601139292,
      "loss": 0.6465,
      "step": 5151
    },
    {
      "epoch": 0.46065808297567956,
      "grad_norm": 0.1710606620165022,
      "learning_rate": 0.000117516904407518,
      "loss": 0.6684,
      "step": 5152
    },
    {
      "epoch": 0.4607474964234621,
      "grad_norm": 0.1479354677004218,
      "learning_rate": 0.00011748839133452143,
      "loss": 0.6673,
      "step": 5153
    },
    {
      "epoch": 0.46083690987124465,
      "grad_norm": 0.14688593806217748,
      "learning_rate": 0.00011745987679479462,
      "loss": 0.6475,
      "step": 5154
    },
    {
      "epoch": 0.46092632331902716,
      "grad_norm": 0.14997065916082802,
      "learning_rate": 0.00011743136079072903,
      "loss": 0.66,
      "step": 5155
    },
    {
      "epoch": 0.46101573676680974,
      "grad_norm": 0.16023453437690505,
      "learning_rate": 0.00011740284332471628,
      "loss": 0.6956,
      "step": 5156
    },
    {
      "epoch": 0.46110515021459225,
      "grad_norm": 0.1541338640814609,
      "learning_rate": 0.00011737432439914804,
      "loss": 0.6983,
      "step": 5157
    },
    {
      "epoch": 0.4611945636623748,
      "grad_norm": 0.15707417637883267,
      "learning_rate": 0.0001173458040164162,
      "loss": 0.6964,
      "step": 5158
    },
    {
      "epoch": 0.4612839771101574,
      "grad_norm": 0.1756210970964504,
      "learning_rate": 0.00011731728217891275,
      "loss": 0.6526,
      "step": 5159
    },
    {
      "epoch": 0.4613733905579399,
      "grad_norm": 0.18238098756952115,
      "learning_rate": 0.00011728875888902975,
      "loss": 0.7116,
      "step": 5160
    },
    {
      "epoch": 0.4614628040057225,
      "grad_norm": 0.16484928711156466,
      "learning_rate": 0.00011726023414915941,
      "loss": 0.6936,
      "step": 5161
    },
    {
      "epoch": 0.461552217453505,
      "grad_norm": 0.16636514101713046,
      "learning_rate": 0.00011723170796169409,
      "loss": 0.6275,
      "step": 5162
    },
    {
      "epoch": 0.46164163090128757,
      "grad_norm": 0.132658228725625,
      "learning_rate": 0.00011720318032902624,
      "loss": 0.674,
      "step": 5163
    },
    {
      "epoch": 0.4617310443490701,
      "grad_norm": 0.21034973708963997,
      "learning_rate": 0.0001171746512535484,
      "loss": 0.4249,
      "step": 5164
    },
    {
      "epoch": 0.46182045779685266,
      "grad_norm": 0.14552781584738675,
      "learning_rate": 0.00011714612073765332,
      "loss": 0.6338,
      "step": 5165
    },
    {
      "epoch": 0.4619098712446352,
      "grad_norm": 0.1435264346964644,
      "learning_rate": 0.00011711758878373383,
      "loss": 0.6865,
      "step": 5166
    },
    {
      "epoch": 0.46199928469241774,
      "grad_norm": 0.1467028677640329,
      "learning_rate": 0.0001170890553941828,
      "loss": 0.6774,
      "step": 5167
    },
    {
      "epoch": 0.46208869814020026,
      "grad_norm": 0.16985197982242817,
      "learning_rate": 0.00011706052057139335,
      "loss": 0.6869,
      "step": 5168
    },
    {
      "epoch": 0.46217811158798283,
      "grad_norm": 0.15601817745246474,
      "learning_rate": 0.00011703198431775863,
      "loss": 0.6689,
      "step": 5169
    },
    {
      "epoch": 0.4622675250357654,
      "grad_norm": 0.14489082327713387,
      "learning_rate": 0.00011700344663567197,
      "loss": 0.6942,
      "step": 5170
    },
    {
      "epoch": 0.4623569384835479,
      "grad_norm": 0.16642826391284193,
      "learning_rate": 0.00011697490752752678,
      "loss": 0.7211,
      "step": 5171
    },
    {
      "epoch": 0.4624463519313305,
      "grad_norm": 0.15457878108482584,
      "learning_rate": 0.00011694636699571657,
      "loss": 0.6867,
      "step": 5172
    },
    {
      "epoch": 0.462535765379113,
      "grad_norm": 0.15610316089263576,
      "learning_rate": 0.00011691782504263505,
      "loss": 0.6945,
      "step": 5173
    },
    {
      "epoch": 0.4626251788268956,
      "grad_norm": 0.12987839510630717,
      "learning_rate": 0.00011688928167067597,
      "loss": 0.6332,
      "step": 5174
    },
    {
      "epoch": 0.4627145922746781,
      "grad_norm": 0.13731160375110146,
      "learning_rate": 0.0001168607368822332,
      "loss": 0.6896,
      "step": 5175
    },
    {
      "epoch": 0.46280400572246067,
      "grad_norm": 0.11825359872656933,
      "learning_rate": 0.00011683219067970084,
      "loss": 0.6658,
      "step": 5176
    },
    {
      "epoch": 0.4628934191702432,
      "grad_norm": 0.161793773989266,
      "learning_rate": 0.00011680364306547298,
      "loss": 0.6573,
      "step": 5177
    },
    {
      "epoch": 0.46298283261802575,
      "grad_norm": 0.1444724609290293,
      "learning_rate": 0.00011677509404194382,
      "loss": 0.6599,
      "step": 5178
    },
    {
      "epoch": 0.4630722460658083,
      "grad_norm": 0.1715080851279958,
      "learning_rate": 0.0001167465436115078,
      "loss": 0.7214,
      "step": 5179
    },
    {
      "epoch": 0.46316165951359084,
      "grad_norm": 0.1490431577274715,
      "learning_rate": 0.00011671799177655942,
      "loss": 0.6824,
      "step": 5180
    },
    {
      "epoch": 0.4632510729613734,
      "grad_norm": 0.13569904784071252,
      "learning_rate": 0.00011668943853949323,
      "loss": 0.6687,
      "step": 5181
    },
    {
      "epoch": 0.4633404864091559,
      "grad_norm": 0.1508210151677476,
      "learning_rate": 0.000116660883902704,
      "loss": 0.6896,
      "step": 5182
    },
    {
      "epoch": 0.4634298998569385,
      "grad_norm": 0.13449404605773835,
      "learning_rate": 0.00011663232786858656,
      "loss": 0.6305,
      "step": 5183
    },
    {
      "epoch": 0.463519313304721,
      "grad_norm": 0.16897068294181317,
      "learning_rate": 0.00011660377043953588,
      "loss": 0.6753,
      "step": 5184
    },
    {
      "epoch": 0.4636087267525036,
      "grad_norm": 0.16975763737462585,
      "learning_rate": 0.000116575211617947,
      "loss": 0.7238,
      "step": 5185
    },
    {
      "epoch": 0.4636981402002861,
      "grad_norm": 0.15354098323627455,
      "learning_rate": 0.00011654665140621515,
      "loss": 0.6617,
      "step": 5186
    },
    {
      "epoch": 0.4637875536480687,
      "grad_norm": 0.14753531446199722,
      "learning_rate": 0.00011651808980673561,
      "loss": 0.6823,
      "step": 5187
    },
    {
      "epoch": 0.4638769670958512,
      "grad_norm": 0.1789138430373574,
      "learning_rate": 0.00011648952682190387,
      "loss": 0.7058,
      "step": 5188
    },
    {
      "epoch": 0.46396638054363376,
      "grad_norm": 0.15044395738741215,
      "learning_rate": 0.00011646096245411538,
      "loss": 0.6708,
      "step": 5189
    },
    {
      "epoch": 0.46405579399141633,
      "grad_norm": 0.1488215116065796,
      "learning_rate": 0.00011643239670576589,
      "loss": 0.648,
      "step": 5190
    },
    {
      "epoch": 0.46414520743919885,
      "grad_norm": 0.1385501624666876,
      "learning_rate": 0.00011640382957925111,
      "loss": 0.677,
      "step": 5191
    },
    {
      "epoch": 0.4642346208869814,
      "grad_norm": 0.15670758439147378,
      "learning_rate": 0.00011637526107696694,
      "loss": 0.6481,
      "step": 5192
    },
    {
      "epoch": 0.46432403433476394,
      "grad_norm": 0.16038594021342262,
      "learning_rate": 0.00011634669120130943,
      "loss": 0.6741,
      "step": 5193
    },
    {
      "epoch": 0.4644134477825465,
      "grad_norm": 0.16149849321934961,
      "learning_rate": 0.00011631811995467467,
      "loss": 0.6731,
      "step": 5194
    },
    {
      "epoch": 0.464502861230329,
      "grad_norm": 0.17096519885467484,
      "learning_rate": 0.0001162895473394589,
      "loss": 0.6963,
      "step": 5195
    },
    {
      "epoch": 0.4645922746781116,
      "grad_norm": 0.13764959971381166,
      "learning_rate": 0.00011626097335805843,
      "loss": 0.6157,
      "step": 5196
    },
    {
      "epoch": 0.4646816881258941,
      "grad_norm": 0.15131720252559477,
      "learning_rate": 0.00011623239801286981,
      "loss": 0.6267,
      "step": 5197
    },
    {
      "epoch": 0.4647711015736767,
      "grad_norm": 0.15431611376151674,
      "learning_rate": 0.00011620382130628952,
      "loss": 0.6882,
      "step": 5198
    },
    {
      "epoch": 0.46486051502145925,
      "grad_norm": 0.1532624221597536,
      "learning_rate": 0.00011617524324071433,
      "loss": 0.6632,
      "step": 5199
    },
    {
      "epoch": 0.46494992846924177,
      "grad_norm": 0.13327492363858415,
      "learning_rate": 0.00011614666381854107,
      "loss": 0.6439,
      "step": 5200
    },
    {
      "epoch": 0.46503934191702434,
      "grad_norm": 0.1218947916831637,
      "learning_rate": 0.00011611808304216658,
      "loss": 0.6497,
      "step": 5201
    },
    {
      "epoch": 0.46512875536480686,
      "grad_norm": 0.12358016918793101,
      "learning_rate": 0.0001160895009139879,
      "loss": 0.6356,
      "step": 5202
    },
    {
      "epoch": 0.4652181688125894,
      "grad_norm": 0.13347900061181292,
      "learning_rate": 0.00011606091743640224,
      "loss": 0.6354,
      "step": 5203
    },
    {
      "epoch": 0.46530758226037194,
      "grad_norm": 0.16870731998620694,
      "learning_rate": 0.00011603233261180683,
      "loss": 0.7076,
      "step": 5204
    },
    {
      "epoch": 0.4653969957081545,
      "grad_norm": 0.14899445129507058,
      "learning_rate": 0.00011600374644259906,
      "loss": 0.6483,
      "step": 5205
    },
    {
      "epoch": 0.46548640915593703,
      "grad_norm": 0.1578336775790434,
      "learning_rate": 0.00011597515893117637,
      "loss": 0.6526,
      "step": 5206
    },
    {
      "epoch": 0.4655758226037196,
      "grad_norm": 0.1626902821667608,
      "learning_rate": 0.00011594657007993644,
      "loss": 0.6472,
      "step": 5207
    },
    {
      "epoch": 0.4656652360515021,
      "grad_norm": 0.16097828176358803,
      "learning_rate": 0.0001159179798912769,
      "loss": 0.6704,
      "step": 5208
    },
    {
      "epoch": 0.4657546494992847,
      "grad_norm": 0.17811441993764798,
      "learning_rate": 0.0001158893883675956,
      "loss": 0.7092,
      "step": 5209
    },
    {
      "epoch": 0.46584406294706726,
      "grad_norm": 0.18276569558033315,
      "learning_rate": 0.00011586079551129053,
      "loss": 0.6831,
      "step": 5210
    },
    {
      "epoch": 0.4659334763948498,
      "grad_norm": 0.15786895236492515,
      "learning_rate": 0.00011583220132475966,
      "loss": 0.663,
      "step": 5211
    },
    {
      "epoch": 0.46602288984263235,
      "grad_norm": 0.1483039435320675,
      "learning_rate": 0.0001158036058104012,
      "loss": 0.6476,
      "step": 5212
    },
    {
      "epoch": 0.46611230329041486,
      "grad_norm": 0.1264613832989185,
      "learning_rate": 0.00011577500897061338,
      "loss": 0.6389,
      "step": 5213
    },
    {
      "epoch": 0.46620171673819744,
      "grad_norm": 0.14901446379764094,
      "learning_rate": 0.00011574641080779464,
      "loss": 0.6268,
      "step": 5214
    },
    {
      "epoch": 0.46629113018597995,
      "grad_norm": 0.1725445247386035,
      "learning_rate": 0.00011571781132434343,
      "loss": 0.6587,
      "step": 5215
    },
    {
      "epoch": 0.4663805436337625,
      "grad_norm": 0.16324946530903128,
      "learning_rate": 0.00011568921052265836,
      "loss": 0.6802,
      "step": 5216
    },
    {
      "epoch": 0.46646995708154504,
      "grad_norm": 0.16309418805065395,
      "learning_rate": 0.00011566060840513817,
      "loss": 0.6661,
      "step": 5217
    },
    {
      "epoch": 0.4665593705293276,
      "grad_norm": 0.14012779033590245,
      "learning_rate": 0.00011563200497418168,
      "loss": 0.6526,
      "step": 5218
    },
    {
      "epoch": 0.4666487839771102,
      "grad_norm": 0.16645968169812853,
      "learning_rate": 0.00011560340023218776,
      "loss": 0.6881,
      "step": 5219
    },
    {
      "epoch": 0.4667381974248927,
      "grad_norm": 0.15410104651524756,
      "learning_rate": 0.00011557479418155555,
      "loss": 0.6855,
      "step": 5220
    },
    {
      "epoch": 0.46682761087267527,
      "grad_norm": 0.17300791924297007,
      "learning_rate": 0.00011554618682468416,
      "loss": 0.6412,
      "step": 5221
    },
    {
      "epoch": 0.4669170243204578,
      "grad_norm": 0.16641691684105664,
      "learning_rate": 0.00011551757816397285,
      "loss": 0.6454,
      "step": 5222
    },
    {
      "epoch": 0.46700643776824036,
      "grad_norm": 0.13444550717864906,
      "learning_rate": 0.00011548896820182095,
      "loss": 0.6339,
      "step": 5223
    },
    {
      "epoch": 0.4670958512160229,
      "grad_norm": 0.15858425738478332,
      "learning_rate": 0.00011546035694062806,
      "loss": 0.6674,
      "step": 5224
    },
    {
      "epoch": 0.46718526466380544,
      "grad_norm": 0.14339343443275981,
      "learning_rate": 0.0001154317443827937,
      "loss": 0.6726,
      "step": 5225
    },
    {
      "epoch": 0.46727467811158796,
      "grad_norm": 0.18717328280125875,
      "learning_rate": 0.00011540313053071752,
      "loss": 0.7112,
      "step": 5226
    },
    {
      "epoch": 0.46736409155937053,
      "grad_norm": 0.154228694157154,
      "learning_rate": 0.00011537451538679944,
      "loss": 0.7027,
      "step": 5227
    },
    {
      "epoch": 0.4674535050071531,
      "grad_norm": 0.13248487850760576,
      "learning_rate": 0.00011534589895343933,
      "loss": 0.6672,
      "step": 5228
    },
    {
      "epoch": 0.4675429184549356,
      "grad_norm": 0.14691055824930196,
      "learning_rate": 0.00011531728123303715,
      "loss": 0.6436,
      "step": 5229
    },
    {
      "epoch": 0.4676323319027182,
      "grad_norm": 0.14553631586535984,
      "learning_rate": 0.00011528866222799313,
      "loss": 0.6753,
      "step": 5230
    },
    {
      "epoch": 0.4677217453505007,
      "grad_norm": 0.1714744444573861,
      "learning_rate": 0.00011526004194070748,
      "loss": 0.7145,
      "step": 5231
    },
    {
      "epoch": 0.4678111587982833,
      "grad_norm": 0.16850497144280102,
      "learning_rate": 0.0001152314203735805,
      "loss": 0.7027,
      "step": 5232
    },
    {
      "epoch": 0.4679005722460658,
      "grad_norm": 0.16076688164804454,
      "learning_rate": 0.00011520279752901273,
      "loss": 0.6991,
      "step": 5233
    },
    {
      "epoch": 0.46798998569384836,
      "grad_norm": 0.16030817408650452,
      "learning_rate": 0.00011517417340940468,
      "loss": 0.6628,
      "step": 5234
    },
    {
      "epoch": 0.4680793991416309,
      "grad_norm": 0.13528345622489837,
      "learning_rate": 0.00011514554801715704,
      "loss": 0.6479,
      "step": 5235
    },
    {
      "epoch": 0.46816881258941345,
      "grad_norm": 0.16041658156607727,
      "learning_rate": 0.00011511692135467054,
      "loss": 0.6438,
      "step": 5236
    },
    {
      "epoch": 0.46825822603719597,
      "grad_norm": 0.15182791064031823,
      "learning_rate": 0.00011508829342434615,
      "loss": 0.6895,
      "step": 5237
    },
    {
      "epoch": 0.46834763948497854,
      "grad_norm": 0.13333351003685182,
      "learning_rate": 0.00011505966422858481,
      "loss": 0.6586,
      "step": 5238
    },
    {
      "epoch": 0.4684370529327611,
      "grad_norm": 0.14889880373523962,
      "learning_rate": 0.00011503103376978759,
      "loss": 0.6523,
      "step": 5239
    },
    {
      "epoch": 0.4685264663805436,
      "grad_norm": 0.15231367378012706,
      "learning_rate": 0.00011500240205035573,
      "loss": 0.6829,
      "step": 5240
    },
    {
      "epoch": 0.4686158798283262,
      "grad_norm": 0.16648339448215474,
      "learning_rate": 0.00011497376907269053,
      "loss": 0.6934,
      "step": 5241
    },
    {
      "epoch": 0.4687052932761087,
      "grad_norm": 0.17697328220461397,
      "learning_rate": 0.00011494513483919342,
      "loss": 0.6668,
      "step": 5242
    },
    {
      "epoch": 0.4687947067238913,
      "grad_norm": 0.13436594839974927,
      "learning_rate": 0.00011491649935226584,
      "loss": 0.6377,
      "step": 5243
    },
    {
      "epoch": 0.4688841201716738,
      "grad_norm": 0.14100794548043113,
      "learning_rate": 0.00011488786261430954,
      "loss": 0.6717,
      "step": 5244
    },
    {
      "epoch": 0.4689735336194564,
      "grad_norm": 0.15549898927884276,
      "learning_rate": 0.00011485922462772616,
      "loss": 0.7055,
      "step": 5245
    },
    {
      "epoch": 0.4690629470672389,
      "grad_norm": 0.15633764032531086,
      "learning_rate": 0.00011483058539491756,
      "loss": 0.7027,
      "step": 5246
    },
    {
      "epoch": 0.46915236051502146,
      "grad_norm": 0.16601655604711463,
      "learning_rate": 0.00011480194491828567,
      "loss": 0.6935,
      "step": 5247
    },
    {
      "epoch": 0.46924177396280403,
      "grad_norm": 0.1402446654740984,
      "learning_rate": 0.00011477330320023255,
      "loss": 0.6613,
      "step": 5248
    },
    {
      "epoch": 0.46933118741058655,
      "grad_norm": 0.1728209523794144,
      "learning_rate": 0.00011474466024316029,
      "loss": 0.705,
      "step": 5249
    },
    {
      "epoch": 0.4694206008583691,
      "grad_norm": 0.15138536528197485,
      "learning_rate": 0.0001147160160494712,
      "loss": 0.701,
      "step": 5250
    },
    {
      "epoch": 0.46951001430615164,
      "grad_norm": 0.15371781550201438,
      "learning_rate": 0.00011468737062156765,
      "loss": 0.6282,
      "step": 5251
    },
    {
      "epoch": 0.4695994277539342,
      "grad_norm": 0.17192234414638188,
      "learning_rate": 0.00011465872396185204,
      "loss": 0.6942,
      "step": 5252
    },
    {
      "epoch": 0.4696888412017167,
      "grad_norm": 0.1722304436974437,
      "learning_rate": 0.00011463007607272695,
      "loss": 0.6856,
      "step": 5253
    },
    {
      "epoch": 0.4697782546494993,
      "grad_norm": 0.1638569770861929,
      "learning_rate": 0.00011460142695659503,
      "loss": 0.6841,
      "step": 5254
    },
    {
      "epoch": 0.4698676680972818,
      "grad_norm": 0.16427063168333947,
      "learning_rate": 0.00011457277661585912,
      "loss": 0.7489,
      "step": 5255
    },
    {
      "epoch": 0.4699570815450644,
      "grad_norm": 0.13089027046144114,
      "learning_rate": 0.000114544125052922,
      "loss": 0.6434,
      "step": 5256
    },
    {
      "epoch": 0.4700464949928469,
      "grad_norm": 0.14066274492675004,
      "learning_rate": 0.00011451547227018666,
      "loss": 0.6548,
      "step": 5257
    },
    {
      "epoch": 0.47013590844062947,
      "grad_norm": 0.16620403036070086,
      "learning_rate": 0.00011448681827005623,
      "loss": 0.6911,
      "step": 5258
    },
    {
      "epoch": 0.47022532188841204,
      "grad_norm": 0.1413918934054488,
      "learning_rate": 0.00011445816305493382,
      "loss": 0.6514,
      "step": 5259
    },
    {
      "epoch": 0.47031473533619456,
      "grad_norm": 0.16601868017146298,
      "learning_rate": 0.00011442950662722274,
      "loss": 0.6986,
      "step": 5260
    },
    {
      "epoch": 0.4704041487839771,
      "grad_norm": 0.19004172957426727,
      "learning_rate": 0.00011440084898932637,
      "loss": 0.3859,
      "step": 5261
    },
    {
      "epoch": 0.47049356223175964,
      "grad_norm": 0.16459289024426102,
      "learning_rate": 0.00011437219014364819,
      "loss": 0.6892,
      "step": 5262
    },
    {
      "epoch": 0.4705829756795422,
      "grad_norm": 0.15169907881668412,
      "learning_rate": 0.00011434353009259178,
      "loss": 0.6429,
      "step": 5263
    },
    {
      "epoch": 0.47067238912732473,
      "grad_norm": 0.17012236956992308,
      "learning_rate": 0.00011431486883856082,
      "loss": 0.6905,
      "step": 5264
    },
    {
      "epoch": 0.4707618025751073,
      "grad_norm": 0.14994836326173522,
      "learning_rate": 0.0001142862063839591,
      "loss": 0.6612,
      "step": 5265
    },
    {
      "epoch": 0.4708512160228898,
      "grad_norm": 0.16133561417407533,
      "learning_rate": 0.00011425754273119049,
      "loss": 0.7046,
      "step": 5266
    },
    {
      "epoch": 0.4709406294706724,
      "grad_norm": 0.14338702542126708,
      "learning_rate": 0.00011422887788265901,
      "loss": 0.6399,
      "step": 5267
    },
    {
      "epoch": 0.47103004291845496,
      "grad_norm": 0.14601839840769715,
      "learning_rate": 0.00011420021184076872,
      "loss": 0.6419,
      "step": 5268
    },
    {
      "epoch": 0.4711194563662375,
      "grad_norm": 0.15524866464260273,
      "learning_rate": 0.00011417154460792381,
      "loss": 0.6696,
      "step": 5269
    },
    {
      "epoch": 0.47120886981402005,
      "grad_norm": 0.15042955879593978,
      "learning_rate": 0.00011414287618652857,
      "loss": 0.6454,
      "step": 5270
    },
    {
      "epoch": 0.47129828326180256,
      "grad_norm": 0.1431206231320293,
      "learning_rate": 0.00011411420657898737,
      "loss": 0.6646,
      "step": 5271
    },
    {
      "epoch": 0.47138769670958514,
      "grad_norm": 0.17054164362310972,
      "learning_rate": 0.00011408553578770473,
      "loss": 0.7129,
      "step": 5272
    },
    {
      "epoch": 0.47147711015736765,
      "grad_norm": 0.16364918629395442,
      "learning_rate": 0.0001140568638150852,
      "loss": 0.6852,
      "step": 5273
    },
    {
      "epoch": 0.4715665236051502,
      "grad_norm": 0.15959009025684523,
      "learning_rate": 0.00011402819066353348,
      "loss": 0.6929,
      "step": 5274
    },
    {
      "epoch": 0.47165593705293274,
      "grad_norm": 0.1527093768519325,
      "learning_rate": 0.00011399951633545438,
      "loss": 0.6476,
      "step": 5275
    },
    {
      "epoch": 0.4717453505007153,
      "grad_norm": 0.15129529318181958,
      "learning_rate": 0.00011397084083325271,
      "loss": 0.6802,
      "step": 5276
    },
    {
      "epoch": 0.4718347639484979,
      "grad_norm": 0.13111491810020928,
      "learning_rate": 0.00011394216415933355,
      "loss": 0.6226,
      "step": 5277
    },
    {
      "epoch": 0.4719241773962804,
      "grad_norm": 0.15244518114190084,
      "learning_rate": 0.00011391348631610186,
      "loss": 0.6498,
      "step": 5278
    },
    {
      "epoch": 0.47201359084406297,
      "grad_norm": 0.16109092268786723,
      "learning_rate": 0.0001138848073059629,
      "loss": 0.6734,
      "step": 5279
    },
    {
      "epoch": 0.4721030042918455,
      "grad_norm": 0.12296098077386297,
      "learning_rate": 0.0001138561271313219,
      "loss": 0.6696,
      "step": 5280
    },
    {
      "epoch": 0.47219241773962806,
      "grad_norm": 0.1521158573856457,
      "learning_rate": 0.00011382744579458426,
      "loss": 0.6556,
      "step": 5281
    },
    {
      "epoch": 0.4722818311874106,
      "grad_norm": 0.1537417812268301,
      "learning_rate": 0.00011379876329815546,
      "loss": 0.6465,
      "step": 5282
    },
    {
      "epoch": 0.47237124463519314,
      "grad_norm": 0.14894406679171876,
      "learning_rate": 0.00011377007964444104,
      "loss": 0.6834,
      "step": 5283
    },
    {
      "epoch": 0.47246065808297566,
      "grad_norm": 0.18093923581766105,
      "learning_rate": 0.00011374139483584667,
      "loss": 0.719,
      "step": 5284
    },
    {
      "epoch": 0.47255007153075823,
      "grad_norm": 0.1460443849507039,
      "learning_rate": 0.00011371270887477809,
      "loss": 0.6621,
      "step": 5285
    },
    {
      "epoch": 0.47263948497854075,
      "grad_norm": 0.1421589729753596,
      "learning_rate": 0.00011368402176364121,
      "loss": 0.6439,
      "step": 5286
    },
    {
      "epoch": 0.4727288984263233,
      "grad_norm": 0.16526380880591884,
      "learning_rate": 0.00011365533350484194,
      "loss": 0.6716,
      "step": 5287
    },
    {
      "epoch": 0.4728183118741059,
      "grad_norm": 0.15100181311824948,
      "learning_rate": 0.00011362664410078632,
      "loss": 0.6582,
      "step": 5288
    },
    {
      "epoch": 0.4729077253218884,
      "grad_norm": 0.14968356915953226,
      "learning_rate": 0.00011359795355388054,
      "loss": 0.6932,
      "step": 5289
    },
    {
      "epoch": 0.472997138769671,
      "grad_norm": 0.14692032723045434,
      "learning_rate": 0.0001135692618665308,
      "loss": 0.6529,
      "step": 5290
    },
    {
      "epoch": 0.4730865522174535,
      "grad_norm": 0.14969790642361366,
      "learning_rate": 0.00011354056904114347,
      "loss": 0.644,
      "step": 5291
    },
    {
      "epoch": 0.47317596566523606,
      "grad_norm": 0.1470521421929107,
      "learning_rate": 0.00011351187508012496,
      "loss": 0.6641,
      "step": 5292
    },
    {
      "epoch": 0.4732653791130186,
      "grad_norm": 0.13543894391166256,
      "learning_rate": 0.0001134831799858818,
      "loss": 0.6594,
      "step": 5293
    },
    {
      "epoch": 0.47335479256080115,
      "grad_norm": 0.15452497646945132,
      "learning_rate": 0.00011345448376082064,
      "loss": 0.6903,
      "step": 5294
    },
    {
      "epoch": 0.47344420600858367,
      "grad_norm": 0.18498980954480254,
      "learning_rate": 0.00011342578640734816,
      "loss": 0.376,
      "step": 5295
    },
    {
      "epoch": 0.47353361945636624,
      "grad_norm": 0.16367689763321733,
      "learning_rate": 0.00011339708792787119,
      "loss": 0.6934,
      "step": 5296
    },
    {
      "epoch": 0.4736230329041488,
      "grad_norm": 0.15716164210211486,
      "learning_rate": 0.00011336838832479661,
      "loss": 0.6976,
      "step": 5297
    },
    {
      "epoch": 0.4737124463519313,
      "grad_norm": 0.1612341091528699,
      "learning_rate": 0.00011333968760053149,
      "loss": 0.6731,
      "step": 5298
    },
    {
      "epoch": 0.4738018597997139,
      "grad_norm": 0.15247739661630796,
      "learning_rate": 0.00011331098575748284,
      "loss": 0.6441,
      "step": 5299
    },
    {
      "epoch": 0.4738912732474964,
      "grad_norm": 0.15752277779510043,
      "learning_rate": 0.00011328228279805792,
      "loss": 0.6844,
      "step": 5300
    },
    {
      "epoch": 0.473980686695279,
      "grad_norm": 0.14698370903982366,
      "learning_rate": 0.00011325357872466398,
      "loss": 0.6848,
      "step": 5301
    },
    {
      "epoch": 0.4740701001430615,
      "grad_norm": 0.13197815427457377,
      "learning_rate": 0.00011322487353970838,
      "loss": 0.6657,
      "step": 5302
    },
    {
      "epoch": 0.4741595135908441,
      "grad_norm": 0.15810728664547083,
      "learning_rate": 0.00011319616724559866,
      "loss": 0.6823,
      "step": 5303
    },
    {
      "epoch": 0.4742489270386266,
      "grad_norm": 0.14659846942840293,
      "learning_rate": 0.00011316745984474226,
      "loss": 0.6655,
      "step": 5304
    },
    {
      "epoch": 0.47433834048640916,
      "grad_norm": 0.14115221881417564,
      "learning_rate": 0.00011313875133954695,
      "loss": 0.6287,
      "step": 5305
    },
    {
      "epoch": 0.4744277539341917,
      "grad_norm": 0.15108783547656163,
      "learning_rate": 0.00011311004173242041,
      "loss": 0.6852,
      "step": 5306
    },
    {
      "epoch": 0.47451716738197425,
      "grad_norm": 0.1197757698587747,
      "learning_rate": 0.0001130813310257705,
      "loss": 0.626,
      "step": 5307
    },
    {
      "epoch": 0.4746065808297568,
      "grad_norm": 0.14106887336572604,
      "learning_rate": 0.00011305261922200519,
      "loss": 0.6771,
      "step": 5308
    },
    {
      "epoch": 0.47469599427753933,
      "grad_norm": 0.18286947590070762,
      "learning_rate": 0.00011302390632353241,
      "loss": 0.3826,
      "step": 5309
    },
    {
      "epoch": 0.4747854077253219,
      "grad_norm": 0.14481078978156606,
      "learning_rate": 0.00011299519233276037,
      "loss": 0.7106,
      "step": 5310
    },
    {
      "epoch": 0.4748748211731044,
      "grad_norm": 0.16781055417846139,
      "learning_rate": 0.00011296647725209726,
      "loss": 0.6724,
      "step": 5311
    },
    {
      "epoch": 0.474964234620887,
      "grad_norm": 0.15450327003801465,
      "learning_rate": 0.00011293776108395135,
      "loss": 0.698,
      "step": 5312
    },
    {
      "epoch": 0.4750536480686695,
      "grad_norm": 0.15858450924499223,
      "learning_rate": 0.00011290904383073104,
      "loss": 0.6513,
      "step": 5313
    },
    {
      "epoch": 0.4751430615164521,
      "grad_norm": 0.15676422755117328,
      "learning_rate": 0.0001128803254948448,
      "loss": 0.6324,
      "step": 5314
    },
    {
      "epoch": 0.4752324749642346,
      "grad_norm": 0.1494580699829127,
      "learning_rate": 0.00011285160607870124,
      "loss": 0.6661,
      "step": 5315
    },
    {
      "epoch": 0.47532188841201717,
      "grad_norm": 0.1498424170941588,
      "learning_rate": 0.000112822885584709,
      "loss": 0.6657,
      "step": 5316
    },
    {
      "epoch": 0.47541130185979974,
      "grad_norm": 0.15277178963823,
      "learning_rate": 0.0001127941640152768,
      "loss": 0.7175,
      "step": 5317
    },
    {
      "epoch": 0.47550071530758226,
      "grad_norm": 0.15742007194904806,
      "learning_rate": 0.00011276544137281355,
      "loss": 0.6819,
      "step": 5318
    },
    {
      "epoch": 0.4755901287553648,
      "grad_norm": 0.1892551115450373,
      "learning_rate": 0.00011273671765972813,
      "loss": 0.7055,
      "step": 5319
    },
    {
      "epoch": 0.47567954220314734,
      "grad_norm": 0.15537213407356507,
      "learning_rate": 0.00011270799287842957,
      "loss": 0.6992,
      "step": 5320
    },
    {
      "epoch": 0.4757689556509299,
      "grad_norm": 0.15084247516161936,
      "learning_rate": 0.00011267926703132703,
      "loss": 0.6715,
      "step": 5321
    },
    {
      "epoch": 0.47585836909871243,
      "grad_norm": 0.1333351169458907,
      "learning_rate": 0.00011265054012082967,
      "loss": 0.6399,
      "step": 5322
    },
    {
      "epoch": 0.475947782546495,
      "grad_norm": 0.16090538675833604,
      "learning_rate": 0.00011262181214934677,
      "loss": 0.6824,
      "step": 5323
    },
    {
      "epoch": 0.4760371959942775,
      "grad_norm": 0.14018437131296274,
      "learning_rate": 0.00011259308311928771,
      "loss": 0.6421,
      "step": 5324
    },
    {
      "epoch": 0.4761266094420601,
      "grad_norm": 0.17645158186493531,
      "learning_rate": 0.00011256435303306203,
      "loss": 0.6463,
      "step": 5325
    },
    {
      "epoch": 0.4762160228898426,
      "grad_norm": 0.1695147959732175,
      "learning_rate": 0.00011253562189307921,
      "loss": 0.6922,
      "step": 5326
    },
    {
      "epoch": 0.4763054363376252,
      "grad_norm": 0.1520365039846981,
      "learning_rate": 0.0001125068897017489,
      "loss": 0.6998,
      "step": 5327
    },
    {
      "epoch": 0.47639484978540775,
      "grad_norm": 0.1635632930126103,
      "learning_rate": 0.00011247815646148087,
      "loss": 0.7049,
      "step": 5328
    },
    {
      "epoch": 0.47648426323319026,
      "grad_norm": 0.17810943737059076,
      "learning_rate": 0.00011244942217468495,
      "loss": 0.7202,
      "step": 5329
    },
    {
      "epoch": 0.47657367668097284,
      "grad_norm": 0.12432961482038735,
      "learning_rate": 0.00011242068684377101,
      "loss": 0.6648,
      "step": 5330
    },
    {
      "epoch": 0.47666309012875535,
      "grad_norm": 0.1771332365724838,
      "learning_rate": 0.00011239195047114903,
      "loss": 0.6742,
      "step": 5331
    },
    {
      "epoch": 0.4767525035765379,
      "grad_norm": 0.13398526793054932,
      "learning_rate": 0.00011236321305922919,
      "loss": 0.6457,
      "step": 5332
    },
    {
      "epoch": 0.47684191702432044,
      "grad_norm": 0.1526467211129559,
      "learning_rate": 0.00011233447461042157,
      "loss": 0.712,
      "step": 5333
    },
    {
      "epoch": 0.476931330472103,
      "grad_norm": 0.15031796827959992,
      "learning_rate": 0.00011230573512713644,
      "loss": 0.6623,
      "step": 5334
    },
    {
      "epoch": 0.4770207439198855,
      "grad_norm": 0.155907686458449,
      "learning_rate": 0.00011227699461178423,
      "loss": 0.701,
      "step": 5335
    },
    {
      "epoch": 0.4771101573676681,
      "grad_norm": 0.1409775519071196,
      "learning_rate": 0.00011224825306677527,
      "loss": 0.6636,
      "step": 5336
    },
    {
      "epoch": 0.47719957081545067,
      "grad_norm": 0.15608088312931143,
      "learning_rate": 0.00011221951049452009,
      "loss": 0.6645,
      "step": 5337
    },
    {
      "epoch": 0.4772889842632332,
      "grad_norm": 0.15194502543043456,
      "learning_rate": 0.00011219076689742936,
      "loss": 0.6642,
      "step": 5338
    },
    {
      "epoch": 0.47737839771101576,
      "grad_norm": 0.15426098692270143,
      "learning_rate": 0.00011216202227791373,
      "loss": 0.6738,
      "step": 5339
    },
    {
      "epoch": 0.47746781115879827,
      "grad_norm": 0.13932702474933978,
      "learning_rate": 0.00011213327663838396,
      "loss": 0.6882,
      "step": 5340
    },
    {
      "epoch": 0.47755722460658084,
      "grad_norm": 0.15316032940534238,
      "learning_rate": 0.00011210452998125094,
      "loss": 0.7094,
      "step": 5341
    },
    {
      "epoch": 0.47764663805436336,
      "grad_norm": 0.132519496870109,
      "learning_rate": 0.00011207578230892562,
      "loss": 0.6506,
      "step": 5342
    },
    {
      "epoch": 0.47773605150214593,
      "grad_norm": 0.20883146531737348,
      "learning_rate": 0.00011204703362381903,
      "loss": 0.7066,
      "step": 5343
    },
    {
      "epoch": 0.47782546494992845,
      "grad_norm": 0.14118349486362994,
      "learning_rate": 0.00011201828392834223,
      "loss": 0.6756,
      "step": 5344
    },
    {
      "epoch": 0.477914878397711,
      "grad_norm": 0.174530427766596,
      "learning_rate": 0.00011198953322490653,
      "loss": 0.6965,
      "step": 5345
    },
    {
      "epoch": 0.4780042918454936,
      "grad_norm": 0.14133736206024575,
      "learning_rate": 0.00011196078151592314,
      "loss": 0.6585,
      "step": 5346
    },
    {
      "epoch": 0.4780937052932761,
      "grad_norm": 0.1251727516774447,
      "learning_rate": 0.00011193202880380343,
      "loss": 0.6668,
      "step": 5347
    },
    {
      "epoch": 0.4781831187410587,
      "grad_norm": 0.1553072552287919,
      "learning_rate": 0.00011190327509095889,
      "loss": 0.6692,
      "step": 5348
    },
    {
      "epoch": 0.4782725321888412,
      "grad_norm": 0.16004797733263929,
      "learning_rate": 0.00011187452037980104,
      "loss": 0.6654,
      "step": 5349
    },
    {
      "epoch": 0.47836194563662376,
      "grad_norm": 0.16070604380856007,
      "learning_rate": 0.0001118457646727415,
      "loss": 0.6833,
      "step": 5350
    },
    {
      "epoch": 0.4784513590844063,
      "grad_norm": 0.15478591016849022,
      "learning_rate": 0.00011181700797219199,
      "loss": 0.6817,
      "step": 5351
    },
    {
      "epoch": 0.47854077253218885,
      "grad_norm": 0.1286921382970697,
      "learning_rate": 0.0001117882502805643,
      "loss": 0.6148,
      "step": 5352
    },
    {
      "epoch": 0.47863018597997137,
      "grad_norm": 0.16259533965014633,
      "learning_rate": 0.00011175949160027031,
      "loss": 0.7022,
      "step": 5353
    },
    {
      "epoch": 0.47871959942775394,
      "grad_norm": 0.13124180111280082,
      "learning_rate": 0.0001117307319337219,
      "loss": 0.6514,
      "step": 5354
    },
    {
      "epoch": 0.47880901287553645,
      "grad_norm": 0.14913739248031152,
      "learning_rate": 0.00011170197128333122,
      "loss": 0.6937,
      "step": 5355
    },
    {
      "epoch": 0.478898426323319,
      "grad_norm": 0.13687426239419867,
      "learning_rate": 0.00011167320965151033,
      "loss": 0.5807,
      "step": 5356
    },
    {
      "epoch": 0.4789878397711016,
      "grad_norm": 0.15934127194978784,
      "learning_rate": 0.00011164444704067145,
      "loss": 0.677,
      "step": 5357
    },
    {
      "epoch": 0.4790772532188841,
      "grad_norm": 0.153238434208566,
      "learning_rate": 0.00011161568345322684,
      "loss": 0.6345,
      "step": 5358
    },
    {
      "epoch": 0.4791666666666667,
      "grad_norm": 0.13195683234764063,
      "learning_rate": 0.00011158691889158892,
      "loss": 0.6554,
      "step": 5359
    },
    {
      "epoch": 0.4792560801144492,
      "grad_norm": 0.14714058284653755,
      "learning_rate": 0.00011155815335817011,
      "loss": 0.662,
      "step": 5360
    },
    {
      "epoch": 0.4793454935622318,
      "grad_norm": 0.14039260052986682,
      "learning_rate": 0.00011152938685538287,
      "loss": 0.6627,
      "step": 5361
    },
    {
      "epoch": 0.4794349070100143,
      "grad_norm": 0.14208732107231278,
      "learning_rate": 0.00011150061938563993,
      "loss": 0.6339,
      "step": 5362
    },
    {
      "epoch": 0.47952432045779686,
      "grad_norm": 0.15175803780701966,
      "learning_rate": 0.00011147185095135395,
      "loss": 0.6854,
      "step": 5363
    },
    {
      "epoch": 0.4796137339055794,
      "grad_norm": 0.15428692153834517,
      "learning_rate": 0.00011144308155493763,
      "loss": 0.6518,
      "step": 5364
    },
    {
      "epoch": 0.47970314735336195,
      "grad_norm": 0.15213318792713942,
      "learning_rate": 0.00011141431119880392,
      "loss": 0.7187,
      "step": 5365
    },
    {
      "epoch": 0.4797925608011445,
      "grad_norm": 0.13635168265353204,
      "learning_rate": 0.00011138553988536571,
      "loss": 0.6417,
      "step": 5366
    },
    {
      "epoch": 0.47988197424892703,
      "grad_norm": 0.15109455723134702,
      "learning_rate": 0.000111356767617036,
      "loss": 0.6753,
      "step": 5367
    },
    {
      "epoch": 0.4799713876967096,
      "grad_norm": 0.1457211139873238,
      "learning_rate": 0.00011132799439622792,
      "loss": 0.6647,
      "step": 5368
    },
    {
      "epoch": 0.4800608011444921,
      "grad_norm": 0.1792376228132603,
      "learning_rate": 0.00011129922022535464,
      "loss": 0.4066,
      "step": 5369
    },
    {
      "epoch": 0.4801502145922747,
      "grad_norm": 0.1441336741837067,
      "learning_rate": 0.0001112704451068294,
      "loss": 0.6673,
      "step": 5370
    },
    {
      "epoch": 0.4802396280400572,
      "grad_norm": 0.16330723645268533,
      "learning_rate": 0.0001112416690430655,
      "loss": 0.6847,
      "step": 5371
    },
    {
      "epoch": 0.4803290414878398,
      "grad_norm": 0.15137368718345223,
      "learning_rate": 0.00011121289203647644,
      "loss": 0.6483,
      "step": 5372
    },
    {
      "epoch": 0.4804184549356223,
      "grad_norm": 0.1595295655151098,
      "learning_rate": 0.00011118411408947567,
      "loss": 0.6801,
      "step": 5373
    },
    {
      "epoch": 0.48050786838340487,
      "grad_norm": 0.15824543142568093,
      "learning_rate": 0.00011115533520447674,
      "loss": 0.6953,
      "step": 5374
    },
    {
      "epoch": 0.4805972818311874,
      "grad_norm": 0.1578428467361544,
      "learning_rate": 0.00011112655538389331,
      "loss": 0.6631,
      "step": 5375
    },
    {
      "epoch": 0.48068669527896996,
      "grad_norm": 0.1318122768893374,
      "learning_rate": 0.00011109777463013915,
      "loss": 0.6036,
      "step": 5376
    },
    {
      "epoch": 0.4807761087267525,
      "grad_norm": 0.1599845902779309,
      "learning_rate": 0.000111068992945628,
      "loss": 0.6714,
      "step": 5377
    },
    {
      "epoch": 0.48086552217453504,
      "grad_norm": 0.1765122678520295,
      "learning_rate": 0.00011104021033277379,
      "loss": 0.7205,
      "step": 5378
    },
    {
      "epoch": 0.4809549356223176,
      "grad_norm": 0.16002641076418295,
      "learning_rate": 0.00011101142679399049,
      "loss": 0.6484,
      "step": 5379
    },
    {
      "epoch": 0.48104434907010013,
      "grad_norm": 0.1504449525945059,
      "learning_rate": 0.00011098264233169211,
      "loss": 0.6689,
      "step": 5380
    },
    {
      "epoch": 0.4811337625178827,
      "grad_norm": 0.17677472719224546,
      "learning_rate": 0.00011095385694829278,
      "loss": 0.7309,
      "step": 5381
    },
    {
      "epoch": 0.4812231759656652,
      "grad_norm": 0.14893162960085407,
      "learning_rate": 0.0001109250706462067,
      "loss": 0.6633,
      "step": 5382
    },
    {
      "epoch": 0.4813125894134478,
      "grad_norm": 0.16392068286223277,
      "learning_rate": 0.00011089628342784814,
      "loss": 0.6742,
      "step": 5383
    },
    {
      "epoch": 0.4814020028612303,
      "grad_norm": 0.18936989942658655,
      "learning_rate": 0.00011086749529563143,
      "loss": 0.382,
      "step": 5384
    },
    {
      "epoch": 0.4814914163090129,
      "grad_norm": 0.15786637429089345,
      "learning_rate": 0.00011083870625197103,
      "loss": 0.6271,
      "step": 5385
    },
    {
      "epoch": 0.48158082975679545,
      "grad_norm": 0.13627259360698257,
      "learning_rate": 0.00011080991629928143,
      "loss": 0.6516,
      "step": 5386
    },
    {
      "epoch": 0.48167024320457796,
      "grad_norm": 0.15304636794799376,
      "learning_rate": 0.00011078112543997723,
      "loss": 0.658,
      "step": 5387
    },
    {
      "epoch": 0.48175965665236054,
      "grad_norm": 0.17372187256404292,
      "learning_rate": 0.00011075233367647302,
      "loss": 0.6972,
      "step": 5388
    },
    {
      "epoch": 0.48184907010014305,
      "grad_norm": 0.1480957667079636,
      "learning_rate": 0.00011072354101118357,
      "loss": 0.6676,
      "step": 5389
    },
    {
      "epoch": 0.4819384835479256,
      "grad_norm": 0.14071174233874545,
      "learning_rate": 0.00011069474744652371,
      "loss": 0.6151,
      "step": 5390
    },
    {
      "epoch": 0.48202789699570814,
      "grad_norm": 0.145489276817444,
      "learning_rate": 0.00011066595298490827,
      "loss": 0.6486,
      "step": 5391
    },
    {
      "epoch": 0.4821173104434907,
      "grad_norm": 0.16067294331256335,
      "learning_rate": 0.00011063715762875225,
      "loss": 0.628,
      "step": 5392
    },
    {
      "epoch": 0.4822067238912732,
      "grad_norm": 0.16986864248071917,
      "learning_rate": 0.00011060836138047066,
      "loss": 0.7202,
      "step": 5393
    },
    {
      "epoch": 0.4822961373390558,
      "grad_norm": 0.151498650637606,
      "learning_rate": 0.00011057956424247861,
      "loss": 0.6263,
      "step": 5394
    },
    {
      "epoch": 0.4823855507868383,
      "grad_norm": 0.16504152711192804,
      "learning_rate": 0.00011055076621719132,
      "loss": 0.6991,
      "step": 5395
    },
    {
      "epoch": 0.4824749642346209,
      "grad_norm": 0.15205485182003303,
      "learning_rate": 0.00011052196730702396,
      "loss": 0.6992,
      "step": 5396
    },
    {
      "epoch": 0.48256437768240346,
      "grad_norm": 0.15466192186899783,
      "learning_rate": 0.00011049316751439194,
      "loss": 0.6279,
      "step": 5397
    },
    {
      "epoch": 0.48265379113018597,
      "grad_norm": 0.16854529908711532,
      "learning_rate": 0.00011046436684171062,
      "loss": 0.7001,
      "step": 5398
    },
    {
      "epoch": 0.48274320457796854,
      "grad_norm": 0.167713637289923,
      "learning_rate": 0.00011043556529139549,
      "loss": 0.6473,
      "step": 5399
    },
    {
      "epoch": 0.48283261802575106,
      "grad_norm": 0.15859204156280093,
      "learning_rate": 0.00011040676286586211,
      "loss": 0.6587,
      "step": 5400
    },
    {
      "epoch": 0.48292203147353363,
      "grad_norm": 0.14299422689682043,
      "learning_rate": 0.00011037795956752608,
      "loss": 0.6746,
      "step": 5401
    },
    {
      "epoch": 0.48301144492131615,
      "grad_norm": 0.17659587450550385,
      "learning_rate": 0.00011034915539880313,
      "loss": 0.7332,
      "step": 5402
    },
    {
      "epoch": 0.4831008583690987,
      "grad_norm": 0.1345903862149813,
      "learning_rate": 0.00011032035036210901,
      "loss": 0.6552,
      "step": 5403
    },
    {
      "epoch": 0.48319027181688123,
      "grad_norm": 0.13419278264198042,
      "learning_rate": 0.00011029154445985961,
      "loss": 0.6749,
      "step": 5404
    },
    {
      "epoch": 0.4832796852646638,
      "grad_norm": 0.1615361399030392,
      "learning_rate": 0.00011026273769447076,
      "loss": 0.6748,
      "step": 5405
    },
    {
      "epoch": 0.4833690987124464,
      "grad_norm": 0.1457826104456252,
      "learning_rate": 0.00011023393006835847,
      "loss": 0.6712,
      "step": 5406
    },
    {
      "epoch": 0.4834585121602289,
      "grad_norm": 0.1539585325128839,
      "learning_rate": 0.00011020512158393887,
      "loss": 0.6215,
      "step": 5407
    },
    {
      "epoch": 0.48354792560801146,
      "grad_norm": 0.15909192778462256,
      "learning_rate": 0.00011017631224362803,
      "loss": 0.6665,
      "step": 5408
    },
    {
      "epoch": 0.483637339055794,
      "grad_norm": 0.14940036560394387,
      "learning_rate": 0.00011014750204984217,
      "loss": 0.6863,
      "step": 5409
    },
    {
      "epoch": 0.48372675250357655,
      "grad_norm": 0.13865716649110307,
      "learning_rate": 0.00011011869100499758,
      "loss": 0.6756,
      "step": 5410
    },
    {
      "epoch": 0.48381616595135907,
      "grad_norm": 0.14608580594848894,
      "learning_rate": 0.00011008987911151058,
      "loss": 0.6375,
      "step": 5411
    },
    {
      "epoch": 0.48390557939914164,
      "grad_norm": 0.17149458068300386,
      "learning_rate": 0.00011006106637179763,
      "loss": 0.6607,
      "step": 5412
    },
    {
      "epoch": 0.48399499284692415,
      "grad_norm": 0.15332831794418234,
      "learning_rate": 0.00011003225278827515,
      "loss": 0.6525,
      "step": 5413
    },
    {
      "epoch": 0.4840844062947067,
      "grad_norm": 0.15396882143536558,
      "learning_rate": 0.0001100034383633598,
      "loss": 0.6763,
      "step": 5414
    },
    {
      "epoch": 0.4841738197424893,
      "grad_norm": 0.1384390797159068,
      "learning_rate": 0.00010997462309946811,
      "loss": 0.638,
      "step": 5415
    },
    {
      "epoch": 0.4842632331902718,
      "grad_norm": 0.14548299295110378,
      "learning_rate": 0.00010994580699901684,
      "loss": 0.6473,
      "step": 5416
    },
    {
      "epoch": 0.4843526466380544,
      "grad_norm": 0.15709917819290242,
      "learning_rate": 0.00010991699006442275,
      "loss": 0.6798,
      "step": 5417
    },
    {
      "epoch": 0.4844420600858369,
      "grad_norm": 0.1428133041147595,
      "learning_rate": 0.00010988817229810268,
      "loss": 0.6572,
      "step": 5418
    },
    {
      "epoch": 0.4845314735336195,
      "grad_norm": 0.14985842250914994,
      "learning_rate": 0.00010985935370247355,
      "loss": 0.6603,
      "step": 5419
    },
    {
      "epoch": 0.484620886981402,
      "grad_norm": 0.1581143126814748,
      "learning_rate": 0.00010983053427995234,
      "loss": 0.6524,
      "step": 5420
    },
    {
      "epoch": 0.48471030042918456,
      "grad_norm": 0.12831456869081498,
      "learning_rate": 0.0001098017140329561,
      "loss": 0.6258,
      "step": 5421
    },
    {
      "epoch": 0.4847997138769671,
      "grad_norm": 0.15000581719765024,
      "learning_rate": 0.0001097728929639019,
      "loss": 0.6783,
      "step": 5422
    },
    {
      "epoch": 0.48488912732474965,
      "grad_norm": 0.12708928989985824,
      "learning_rate": 0.00010974407107520697,
      "loss": 0.6526,
      "step": 5423
    },
    {
      "epoch": 0.48497854077253216,
      "grad_norm": 0.16587131245383052,
      "learning_rate": 0.0001097152483692886,
      "loss": 0.7172,
      "step": 5424
    },
    {
      "epoch": 0.48506795422031473,
      "grad_norm": 0.15140507142186604,
      "learning_rate": 0.00010968642484856406,
      "loss": 0.6922,
      "step": 5425
    },
    {
      "epoch": 0.4851573676680973,
      "grad_norm": 0.15849648697697222,
      "learning_rate": 0.0001096576005154508,
      "loss": 0.6371,
      "step": 5426
    },
    {
      "epoch": 0.4852467811158798,
      "grad_norm": 0.1918548002849722,
      "learning_rate": 0.0001096287753723662,
      "loss": 0.3997,
      "step": 5427
    },
    {
      "epoch": 0.4853361945636624,
      "grad_norm": 0.13784759513106076,
      "learning_rate": 0.00010959994942172786,
      "loss": 0.6633,
      "step": 5428
    },
    {
      "epoch": 0.4854256080114449,
      "grad_norm": 0.15690644470126858,
      "learning_rate": 0.00010957112266595338,
      "loss": 0.6588,
      "step": 5429
    },
    {
      "epoch": 0.4855150214592275,
      "grad_norm": 0.15265301116344435,
      "learning_rate": 0.00010954229510746035,
      "loss": 0.658,
      "step": 5430
    },
    {
      "epoch": 0.48560443490701,
      "grad_norm": 0.1587642296152591,
      "learning_rate": 0.0001095134667486666,
      "loss": 0.6721,
      "step": 5431
    },
    {
      "epoch": 0.48569384835479257,
      "grad_norm": 0.13515183744858264,
      "learning_rate": 0.00010948463759198986,
      "loss": 0.6539,
      "step": 5432
    },
    {
      "epoch": 0.4857832618025751,
      "grad_norm": 0.14574834478733717,
      "learning_rate": 0.00010945580763984801,
      "loss": 0.6677,
      "step": 5433
    },
    {
      "epoch": 0.48587267525035766,
      "grad_norm": 0.16961673158844673,
      "learning_rate": 0.00010942697689465902,
      "loss": 0.7085,
      "step": 5434
    },
    {
      "epoch": 0.4859620886981402,
      "grad_norm": 0.13276276371311238,
      "learning_rate": 0.00010939814535884083,
      "loss": 0.6667,
      "step": 5435
    },
    {
      "epoch": 0.48605150214592274,
      "grad_norm": 0.16100382939200775,
      "learning_rate": 0.00010936931303481158,
      "loss": 0.6815,
      "step": 5436
    },
    {
      "epoch": 0.4861409155937053,
      "grad_norm": 0.16373176880078164,
      "learning_rate": 0.00010934047992498932,
      "loss": 0.6891,
      "step": 5437
    },
    {
      "epoch": 0.48623032904148783,
      "grad_norm": 0.18010494606053074,
      "learning_rate": 0.00010931164603179231,
      "loss": 0.7207,
      "step": 5438
    },
    {
      "epoch": 0.4863197424892704,
      "grad_norm": 0.16114019322544376,
      "learning_rate": 0.0001092828113576388,
      "loss": 0.6789,
      "step": 5439
    },
    {
      "epoch": 0.4864091559370529,
      "grad_norm": 0.14226711636032602,
      "learning_rate": 0.00010925397590494712,
      "loss": 0.6382,
      "step": 5440
    },
    {
      "epoch": 0.4864985693848355,
      "grad_norm": 0.15463002157254954,
      "learning_rate": 0.00010922513967613563,
      "loss": 0.6879,
      "step": 5441
    },
    {
      "epoch": 0.486587982832618,
      "grad_norm": 0.13147474901617898,
      "learning_rate": 0.00010919630267362282,
      "loss": 0.629,
      "step": 5442
    },
    {
      "epoch": 0.4866773962804006,
      "grad_norm": 0.1516609528946599,
      "learning_rate": 0.00010916746489982723,
      "loss": 0.6447,
      "step": 5443
    },
    {
      "epoch": 0.4867668097281831,
      "grad_norm": 0.1599016320277993,
      "learning_rate": 0.00010913862635716741,
      "loss": 0.6722,
      "step": 5444
    },
    {
      "epoch": 0.48685622317596566,
      "grad_norm": 0.14680762234836076,
      "learning_rate": 0.00010910978704806203,
      "loss": 0.6568,
      "step": 5445
    },
    {
      "epoch": 0.48694563662374823,
      "grad_norm": 0.15918168863397625,
      "learning_rate": 0.00010908094697492983,
      "loss": 0.669,
      "step": 5446
    },
    {
      "epoch": 0.48703505007153075,
      "grad_norm": 0.17010190120508245,
      "learning_rate": 0.00010905210614018957,
      "loss": 0.674,
      "step": 5447
    },
    {
      "epoch": 0.4871244635193133,
      "grad_norm": 0.14548697279205078,
      "learning_rate": 0.0001090232645462601,
      "loss": 0.698,
      "step": 5448
    },
    {
      "epoch": 0.48721387696709584,
      "grad_norm": 0.16101247413106504,
      "learning_rate": 0.00010899442219556033,
      "loss": 0.5997,
      "step": 5449
    },
    {
      "epoch": 0.4873032904148784,
      "grad_norm": 0.14098292630699358,
      "learning_rate": 0.00010896557909050927,
      "loss": 0.6536,
      "step": 5450
    },
    {
      "epoch": 0.4873927038626609,
      "grad_norm": 0.14502308696466884,
      "learning_rate": 0.00010893673523352585,
      "loss": 0.663,
      "step": 5451
    },
    {
      "epoch": 0.4874821173104435,
      "grad_norm": 0.1638721283154951,
      "learning_rate": 0.00010890789062702926,
      "loss": 0.7218,
      "step": 5452
    },
    {
      "epoch": 0.487571530758226,
      "grad_norm": 0.15713035178566404,
      "learning_rate": 0.00010887904527343866,
      "loss": 0.6627,
      "step": 5453
    },
    {
      "epoch": 0.4876609442060086,
      "grad_norm": 0.14826791482252402,
      "learning_rate": 0.00010885019917517325,
      "loss": 0.6789,
      "step": 5454
    },
    {
      "epoch": 0.48775035765379116,
      "grad_norm": 0.14691909035718867,
      "learning_rate": 0.00010882135233465232,
      "loss": 0.654,
      "step": 5455
    },
    {
      "epoch": 0.48783977110157367,
      "grad_norm": 0.14848053934893984,
      "learning_rate": 0.00010879250475429523,
      "loss": 0.6301,
      "step": 5456
    },
    {
      "epoch": 0.48792918454935624,
      "grad_norm": 0.14019868928094742,
      "learning_rate": 0.0001087636564365214,
      "loss": 0.693,
      "step": 5457
    },
    {
      "epoch": 0.48801859799713876,
      "grad_norm": 0.16370612541018817,
      "learning_rate": 0.00010873480738375024,
      "loss": 0.7175,
      "step": 5458
    },
    {
      "epoch": 0.48810801144492133,
      "grad_norm": 0.16867865583764305,
      "learning_rate": 0.00010870595759840137,
      "loss": 0.6699,
      "step": 5459
    },
    {
      "epoch": 0.48819742489270385,
      "grad_norm": 0.18911954943544174,
      "learning_rate": 0.00010867710708289434,
      "loss": 0.3878,
      "step": 5460
    },
    {
      "epoch": 0.4882868383404864,
      "grad_norm": 0.14884861283824463,
      "learning_rate": 0.00010864825583964882,
      "loss": 0.6629,
      "step": 5461
    },
    {
      "epoch": 0.48837625178826893,
      "grad_norm": 0.13199841646868626,
      "learning_rate": 0.00010861940387108451,
      "loss": 0.6187,
      "step": 5462
    },
    {
      "epoch": 0.4884656652360515,
      "grad_norm": 0.14237317957838833,
      "learning_rate": 0.00010859055117962125,
      "loss": 0.6568,
      "step": 5463
    },
    {
      "epoch": 0.488555078683834,
      "grad_norm": 0.14410386599097044,
      "learning_rate": 0.00010856169776767882,
      "loss": 0.6273,
      "step": 5464
    },
    {
      "epoch": 0.4886444921316166,
      "grad_norm": 0.14315432657875454,
      "learning_rate": 0.0001085328436376771,
      "loss": 0.6254,
      "step": 5465
    },
    {
      "epoch": 0.48873390557939916,
      "grad_norm": 0.16155882238350183,
      "learning_rate": 0.00010850398879203611,
      "loss": 0.6463,
      "step": 5466
    },
    {
      "epoch": 0.4888233190271817,
      "grad_norm": 0.15208324956490143,
      "learning_rate": 0.00010847513323317588,
      "loss": 0.6675,
      "step": 5467
    },
    {
      "epoch": 0.48891273247496425,
      "grad_norm": 0.15892753331623105,
      "learning_rate": 0.00010844627696351644,
      "loss": 0.6556,
      "step": 5468
    },
    {
      "epoch": 0.48900214592274677,
      "grad_norm": 0.1560424135879764,
      "learning_rate": 0.00010841741998547794,
      "loss": 0.64,
      "step": 5469
    },
    {
      "epoch": 0.48909155937052934,
      "grad_norm": 0.16086566004744565,
      "learning_rate": 0.00010838856230148063,
      "loss": 0.7039,
      "step": 5470
    },
    {
      "epoch": 0.48918097281831185,
      "grad_norm": 0.14201337053229215,
      "learning_rate": 0.0001083597039139447,
      "loss": 0.6835,
      "step": 5471
    },
    {
      "epoch": 0.4892703862660944,
      "grad_norm": 0.15529701857877704,
      "learning_rate": 0.00010833084482529048,
      "loss": 0.6813,
      "step": 5472
    },
    {
      "epoch": 0.48935979971387694,
      "grad_norm": 0.1615214920596154,
      "learning_rate": 0.0001083019850379384,
      "loss": 0.7024,
      "step": 5473
    },
    {
      "epoch": 0.4894492131616595,
      "grad_norm": 0.15513411439412236,
      "learning_rate": 0.00010827312455430884,
      "loss": 0.6952,
      "step": 5474
    },
    {
      "epoch": 0.4895386266094421,
      "grad_norm": 0.14308983618604215,
      "learning_rate": 0.00010824426337682235,
      "loss": 0.6635,
      "step": 5475
    },
    {
      "epoch": 0.4896280400572246,
      "grad_norm": 0.12549198512807894,
      "learning_rate": 0.00010821540150789939,
      "loss": 0.6488,
      "step": 5476
    },
    {
      "epoch": 0.48971745350500717,
      "grad_norm": 0.15695460386239224,
      "learning_rate": 0.00010818653894996067,
      "loss": 0.6735,
      "step": 5477
    },
    {
      "epoch": 0.4898068669527897,
      "grad_norm": 0.14282583977925967,
      "learning_rate": 0.00010815767570542681,
      "loss": 0.6659,
      "step": 5478
    },
    {
      "epoch": 0.48989628040057226,
      "grad_norm": 0.13334443120043157,
      "learning_rate": 0.00010812881177671852,
      "loss": 0.6487,
      "step": 5479
    },
    {
      "epoch": 0.4899856938483548,
      "grad_norm": 0.15421152681675088,
      "learning_rate": 0.00010809994716625662,
      "loss": 0.6592,
      "step": 5480
    },
    {
      "epoch": 0.49007510729613735,
      "grad_norm": 0.1637890186891768,
      "learning_rate": 0.00010807108187646195,
      "loss": 0.7055,
      "step": 5481
    },
    {
      "epoch": 0.49016452074391986,
      "grad_norm": 0.13874884822846964,
      "learning_rate": 0.00010804221590975535,
      "loss": 0.6548,
      "step": 5482
    },
    {
      "epoch": 0.49025393419170243,
      "grad_norm": 0.15024119441989675,
      "learning_rate": 0.00010801334926855784,
      "loss": 0.6876,
      "step": 5483
    },
    {
      "epoch": 0.490343347639485,
      "grad_norm": 0.15680436194286965,
      "learning_rate": 0.0001079844819552904,
      "loss": 0.6815,
      "step": 5484
    },
    {
      "epoch": 0.4904327610872675,
      "grad_norm": 0.1652935018642029,
      "learning_rate": 0.0001079556139723741,
      "loss": 0.6279,
      "step": 5485
    },
    {
      "epoch": 0.4905221745350501,
      "grad_norm": 0.13558485972638148,
      "learning_rate": 0.00010792674532223006,
      "loss": 0.6379,
      "step": 5486
    },
    {
      "epoch": 0.4906115879828326,
      "grad_norm": 0.16179861946985413,
      "learning_rate": 0.00010789787600727948,
      "loss": 0.6984,
      "step": 5487
    },
    {
      "epoch": 0.4907010014306152,
      "grad_norm": 0.1564344493659494,
      "learning_rate": 0.00010786900602994359,
      "loss": 0.6627,
      "step": 5488
    },
    {
      "epoch": 0.4907904148783977,
      "grad_norm": 0.15480416888820894,
      "learning_rate": 0.00010784013539264359,
      "loss": 0.699,
      "step": 5489
    },
    {
      "epoch": 0.49087982832618027,
      "grad_norm": 0.23792333223125153,
      "learning_rate": 0.00010781126409780098,
      "loss": 0.4064,
      "step": 5490
    },
    {
      "epoch": 0.4909692417739628,
      "grad_norm": 0.16224693662956438,
      "learning_rate": 0.00010778239214783708,
      "loss": 0.6771,
      "step": 5491
    },
    {
      "epoch": 0.49105865522174535,
      "grad_norm": 0.18389830724139009,
      "learning_rate": 0.00010775351954517332,
      "loss": 0.6508,
      "step": 5492
    },
    {
      "epoch": 0.49114806866952787,
      "grad_norm": 0.14726330361344092,
      "learning_rate": 0.00010772464629223124,
      "loss": 0.6688,
      "step": 5493
    },
    {
      "epoch": 0.49123748211731044,
      "grad_norm": 0.1503676186352315,
      "learning_rate": 0.00010769577239143242,
      "loss": 0.6621,
      "step": 5494
    },
    {
      "epoch": 0.491326895565093,
      "grad_norm": 0.16842582762230965,
      "learning_rate": 0.00010766689784519845,
      "loss": 0.6614,
      "step": 5495
    },
    {
      "epoch": 0.49141630901287553,
      "grad_norm": 0.1472893722134317,
      "learning_rate": 0.00010763802265595102,
      "loss": 0.6534,
      "step": 5496
    },
    {
      "epoch": 0.4915057224606581,
      "grad_norm": 0.14094862535358874,
      "learning_rate": 0.00010760914682611188,
      "loss": 0.6538,
      "step": 5497
    },
    {
      "epoch": 0.4915951359084406,
      "grad_norm": 0.14219243498926673,
      "learning_rate": 0.00010758027035810276,
      "loss": 0.6635,
      "step": 5498
    },
    {
      "epoch": 0.4916845493562232,
      "grad_norm": 0.1405758075728752,
      "learning_rate": 0.00010755139325434548,
      "loss": 0.6689,
      "step": 5499
    },
    {
      "epoch": 0.4917739628040057,
      "grad_norm": 0.15561458218831006,
      "learning_rate": 0.00010752251551726205,
      "loss": 0.6883,
      "step": 5500
    },
    {
      "epoch": 0.4918633762517883,
      "grad_norm": 0.15433979651038615,
      "learning_rate": 0.0001074936371492743,
      "loss": 0.6445,
      "step": 5501
    },
    {
      "epoch": 0.4919527896995708,
      "grad_norm": 0.14803996197877914,
      "learning_rate": 0.00010746475815280424,
      "loss": 0.691,
      "step": 5502
    },
    {
      "epoch": 0.49204220314735336,
      "grad_norm": 0.15719584000451278,
      "learning_rate": 0.00010743587853027391,
      "loss": 0.6932,
      "step": 5503
    },
    {
      "epoch": 0.49213161659513593,
      "grad_norm": 0.15641382240437962,
      "learning_rate": 0.00010740699828410545,
      "loss": 0.6429,
      "step": 5504
    },
    {
      "epoch": 0.49222103004291845,
      "grad_norm": 0.17175032713886523,
      "learning_rate": 0.00010737811741672101,
      "loss": 0.697,
      "step": 5505
    },
    {
      "epoch": 0.492310443490701,
      "grad_norm": 0.15688015976590564,
      "learning_rate": 0.00010734923593054271,
      "loss": 0.6438,
      "step": 5506
    },
    {
      "epoch": 0.49239985693848354,
      "grad_norm": 0.15021981236006265,
      "learning_rate": 0.00010732035382799293,
      "loss": 0.6207,
      "step": 5507
    },
    {
      "epoch": 0.4924892703862661,
      "grad_norm": 0.14973899677675043,
      "learning_rate": 0.00010729147111149392,
      "loss": 0.6433,
      "step": 5508
    },
    {
      "epoch": 0.4925786838340486,
      "grad_norm": 0.16390133569336077,
      "learning_rate": 0.00010726258778346798,
      "loss": 0.6594,
      "step": 5509
    },
    {
      "epoch": 0.4926680972818312,
      "grad_norm": 0.15837978151467982,
      "learning_rate": 0.0001072337038463376,
      "loss": 0.6726,
      "step": 5510
    },
    {
      "epoch": 0.4927575107296137,
      "grad_norm": 0.1530048536717251,
      "learning_rate": 0.00010720481930252524,
      "loss": 0.653,
      "step": 5511
    },
    {
      "epoch": 0.4928469241773963,
      "grad_norm": 0.1471417676103126,
      "learning_rate": 0.00010717593415445335,
      "loss": 0.6583,
      "step": 5512
    },
    {
      "epoch": 0.4929363376251788,
      "grad_norm": 0.15167861072109265,
      "learning_rate": 0.00010714704840454453,
      "loss": 0.6526,
      "step": 5513
    },
    {
      "epoch": 0.49302575107296137,
      "grad_norm": 0.2307711633734435,
      "learning_rate": 0.0001071181620552214,
      "loss": 0.4257,
      "step": 5514
    },
    {
      "epoch": 0.49311516452074394,
      "grad_norm": 0.14160801702642922,
      "learning_rate": 0.00010708927510890665,
      "loss": 0.6224,
      "step": 5515
    },
    {
      "epoch": 0.49320457796852646,
      "grad_norm": 0.17574152823995726,
      "learning_rate": 0.0001070603875680229,
      "loss": 0.3972,
      "step": 5516
    },
    {
      "epoch": 0.49329399141630903,
      "grad_norm": 0.16539398434759764,
      "learning_rate": 0.000107031499434993,
      "loss": 0.6738,
      "step": 5517
    },
    {
      "epoch": 0.49338340486409155,
      "grad_norm": 0.1434294446594344,
      "learning_rate": 0.00010700261071223973,
      "loss": 0.6629,
      "step": 5518
    },
    {
      "epoch": 0.4934728183118741,
      "grad_norm": 0.15437048792353525,
      "learning_rate": 0.00010697372140218596,
      "loss": 0.6637,
      "step": 5519
    },
    {
      "epoch": 0.49356223175965663,
      "grad_norm": 0.16299777041094518,
      "learning_rate": 0.00010694483150725458,
      "loss": 0.6356,
      "step": 5520
    },
    {
      "epoch": 0.4936516452074392,
      "grad_norm": 0.15508928692041984,
      "learning_rate": 0.00010691594102986861,
      "loss": 0.6878,
      "step": 5521
    },
    {
      "epoch": 0.4937410586552217,
      "grad_norm": 0.14445377288709688,
      "learning_rate": 0.000106887049972451,
      "loss": 0.6808,
      "step": 5522
    },
    {
      "epoch": 0.4938304721030043,
      "grad_norm": 0.1647743039071933,
      "learning_rate": 0.00010685815833742481,
      "loss": 0.697,
      "step": 5523
    },
    {
      "epoch": 0.49391988555078686,
      "grad_norm": 0.1670876281813288,
      "learning_rate": 0.00010682926612721315,
      "loss": 0.7097,
      "step": 5524
    },
    {
      "epoch": 0.4940092989985694,
      "grad_norm": 0.1684540136687708,
      "learning_rate": 0.00010680037334423925,
      "loss": 0.6623,
      "step": 5525
    },
    {
      "epoch": 0.49409871244635195,
      "grad_norm": 0.14575978629238018,
      "learning_rate": 0.00010677147999092618,
      "loss": 0.6749,
      "step": 5526
    },
    {
      "epoch": 0.49418812589413447,
      "grad_norm": 0.15128439388484183,
      "learning_rate": 0.00010674258606969729,
      "loss": 0.6657,
      "step": 5527
    },
    {
      "epoch": 0.49427753934191704,
      "grad_norm": 0.14392971813307717,
      "learning_rate": 0.00010671369158297586,
      "loss": 0.6606,
      "step": 5528
    },
    {
      "epoch": 0.49436695278969955,
      "grad_norm": 0.15496167725786494,
      "learning_rate": 0.00010668479653318522,
      "loss": 0.6366,
      "step": 5529
    },
    {
      "epoch": 0.4944563662374821,
      "grad_norm": 0.1403117081579515,
      "learning_rate": 0.00010665590092274876,
      "loss": 0.6504,
      "step": 5530
    },
    {
      "epoch": 0.49454577968526464,
      "grad_norm": 0.16045415527616305,
      "learning_rate": 0.00010662700475408994,
      "loss": 0.6814,
      "step": 5531
    },
    {
      "epoch": 0.4946351931330472,
      "grad_norm": 0.14338915058791354,
      "learning_rate": 0.00010659810802963224,
      "loss": 0.652,
      "step": 5532
    },
    {
      "epoch": 0.4947246065808298,
      "grad_norm": 0.1379778833293296,
      "learning_rate": 0.00010656921075179915,
      "loss": 0.6823,
      "step": 5533
    },
    {
      "epoch": 0.4948140200286123,
      "grad_norm": 0.16433899235131957,
      "learning_rate": 0.00010654031292301432,
      "loss": 0.6825,
      "step": 5534
    },
    {
      "epoch": 0.49490343347639487,
      "grad_norm": 0.15352019396365463,
      "learning_rate": 0.00010651141454570135,
      "loss": 0.6845,
      "step": 5535
    },
    {
      "epoch": 0.4949928469241774,
      "grad_norm": 0.13069594782941554,
      "learning_rate": 0.00010648251562228386,
      "loss": 0.6165,
      "step": 5536
    },
    {
      "epoch": 0.49508226037195996,
      "grad_norm": 0.1578295732178604,
      "learning_rate": 0.00010645361615518565,
      "loss": 0.7163,
      "step": 5537
    },
    {
      "epoch": 0.4951716738197425,
      "grad_norm": 0.13654372128837533,
      "learning_rate": 0.00010642471614683045,
      "loss": 0.6735,
      "step": 5538
    },
    {
      "epoch": 0.49526108726752505,
      "grad_norm": 0.1593958025254594,
      "learning_rate": 0.00010639581559964205,
      "loss": 0.6721,
      "step": 5539
    },
    {
      "epoch": 0.49535050071530756,
      "grad_norm": 0.13786166960213408,
      "learning_rate": 0.00010636691451604434,
      "loss": 0.603,
      "step": 5540
    },
    {
      "epoch": 0.49543991416309013,
      "grad_norm": 0.16182436629408103,
      "learning_rate": 0.00010633801289846119,
      "loss": 0.6664,
      "step": 5541
    },
    {
      "epoch": 0.49552932761087265,
      "grad_norm": 0.14778822629505214,
      "learning_rate": 0.00010630911074931655,
      "loss": 0.6398,
      "step": 5542
    },
    {
      "epoch": 0.4956187410586552,
      "grad_norm": 0.25902151367594756,
      "learning_rate": 0.00010628020807103441,
      "loss": 0.4054,
      "step": 5543
    },
    {
      "epoch": 0.4957081545064378,
      "grad_norm": 0.18021555793373348,
      "learning_rate": 0.00010625130486603878,
      "loss": 0.7089,
      "step": 5544
    },
    {
      "epoch": 0.4957975679542203,
      "grad_norm": 0.1526206126723494,
      "learning_rate": 0.00010622240113675382,
      "loss": 0.6761,
      "step": 5545
    },
    {
      "epoch": 0.4958869814020029,
      "grad_norm": 0.16492274796865838,
      "learning_rate": 0.00010619349688560354,
      "loss": 0.6941,
      "step": 5546
    },
    {
      "epoch": 0.4959763948497854,
      "grad_norm": 0.1594635897031172,
      "learning_rate": 0.00010616459211501217,
      "loss": 0.678,
      "step": 5547
    },
    {
      "epoch": 0.49606580829756797,
      "grad_norm": 0.146310936290596,
      "learning_rate": 0.00010613568682740391,
      "loss": 0.6785,
      "step": 5548
    },
    {
      "epoch": 0.4961552217453505,
      "grad_norm": 0.14428202175838375,
      "learning_rate": 0.00010610678102520301,
      "loss": 0.6456,
      "step": 5549
    },
    {
      "epoch": 0.49624463519313305,
      "grad_norm": 0.14680281059977096,
      "learning_rate": 0.00010607787471083375,
      "loss": 0.6522,
      "step": 5550
    },
    {
      "epoch": 0.49633404864091557,
      "grad_norm": 0.16611773173827127,
      "learning_rate": 0.00010604896788672048,
      "loss": 0.7628,
      "step": 5551
    },
    {
      "epoch": 0.49642346208869814,
      "grad_norm": 0.16794700956630113,
      "learning_rate": 0.0001060200605552876,
      "loss": 0.7049,
      "step": 5552
    },
    {
      "epoch": 0.4965128755364807,
      "grad_norm": 0.14563120389516482,
      "learning_rate": 0.00010599115271895948,
      "loss": 0.6521,
      "step": 5553
    },
    {
      "epoch": 0.49660228898426323,
      "grad_norm": 0.15628118318369766,
      "learning_rate": 0.00010596224438016063,
      "loss": 0.7022,
      "step": 5554
    },
    {
      "epoch": 0.4966917024320458,
      "grad_norm": 0.144339964051701,
      "learning_rate": 0.00010593333554131552,
      "loss": 0.665,
      "step": 5555
    },
    {
      "epoch": 0.4967811158798283,
      "grad_norm": 0.13397910805252627,
      "learning_rate": 0.00010590442620484875,
      "loss": 0.6636,
      "step": 5556
    },
    {
      "epoch": 0.4968705293276109,
      "grad_norm": 0.1671302512550555,
      "learning_rate": 0.00010587551637318489,
      "loss": 0.701,
      "step": 5557
    },
    {
      "epoch": 0.4969599427753934,
      "grad_norm": 0.13516524817821224,
      "learning_rate": 0.00010584660604874857,
      "loss": 0.6514,
      "step": 5558
    },
    {
      "epoch": 0.497049356223176,
      "grad_norm": 0.16496453701988614,
      "learning_rate": 0.00010581769523396445,
      "loss": 0.6878,
      "step": 5559
    },
    {
      "epoch": 0.4971387696709585,
      "grad_norm": 0.16856995633732916,
      "learning_rate": 0.00010578878393125724,
      "loss": 0.6838,
      "step": 5560
    },
    {
      "epoch": 0.49722818311874106,
      "grad_norm": 0.1787477412208883,
      "learning_rate": 0.00010575987214305174,
      "loss": 0.6836,
      "step": 5561
    },
    {
      "epoch": 0.4973175965665236,
      "grad_norm": 0.1626067799294747,
      "learning_rate": 0.0001057309598717727,
      "loss": 0.6356,
      "step": 5562
    },
    {
      "epoch": 0.49740701001430615,
      "grad_norm": 0.14777220125238522,
      "learning_rate": 0.000105702047119845,
      "loss": 0.7061,
      "step": 5563
    },
    {
      "epoch": 0.4974964234620887,
      "grad_norm": 0.15592205891693492,
      "learning_rate": 0.00010567313388969348,
      "loss": 0.67,
      "step": 5564
    },
    {
      "epoch": 0.49758583690987124,
      "grad_norm": 0.1504389068477687,
      "learning_rate": 0.00010564422018374307,
      "loss": 0.6701,
      "step": 5565
    },
    {
      "epoch": 0.4976752503576538,
      "grad_norm": 0.1596248548244069,
      "learning_rate": 0.00010561530600441873,
      "loss": 0.7012,
      "step": 5566
    },
    {
      "epoch": 0.4977646638054363,
      "grad_norm": 0.14769988030169684,
      "learning_rate": 0.00010558639135414545,
      "loss": 0.6379,
      "step": 5567
    },
    {
      "epoch": 0.4978540772532189,
      "grad_norm": 0.14307737549334068,
      "learning_rate": 0.00010555747623534831,
      "loss": 0.6459,
      "step": 5568
    },
    {
      "epoch": 0.4979434907010014,
      "grad_norm": 0.20793711475025708,
      "learning_rate": 0.00010552856065045232,
      "loss": 0.4113,
      "step": 5569
    },
    {
      "epoch": 0.498032904148784,
      "grad_norm": 0.1364256819754779,
      "learning_rate": 0.00010549964460188261,
      "loss": 0.6641,
      "step": 5570
    },
    {
      "epoch": 0.4981223175965665,
      "grad_norm": 0.14350946896332226,
      "learning_rate": 0.00010547072809206437,
      "loss": 0.6422,
      "step": 5571
    },
    {
      "epoch": 0.49821173104434907,
      "grad_norm": 0.15296337878915356,
      "learning_rate": 0.00010544181112342278,
      "loss": 0.6732,
      "step": 5572
    },
    {
      "epoch": 0.49830114449213164,
      "grad_norm": 0.14651591168547945,
      "learning_rate": 0.00010541289369838302,
      "loss": 0.6513,
      "step": 5573
    },
    {
      "epoch": 0.49839055793991416,
      "grad_norm": 0.1388474531314344,
      "learning_rate": 0.00010538397581937048,
      "loss": 0.666,
      "step": 5574
    },
    {
      "epoch": 0.49847997138769673,
      "grad_norm": 0.15312091767063474,
      "learning_rate": 0.00010535505748881031,
      "loss": 0.6816,
      "step": 5575
    },
    {
      "epoch": 0.49856938483547925,
      "grad_norm": 0.16180868752795205,
      "learning_rate": 0.00010532613870912799,
      "loss": 0.7068,
      "step": 5576
    },
    {
      "epoch": 0.4986587982832618,
      "grad_norm": 0.13893551094727938,
      "learning_rate": 0.00010529721948274882,
      "loss": 0.6614,
      "step": 5577
    },
    {
      "epoch": 0.49874821173104433,
      "grad_norm": 0.1539306715754525,
      "learning_rate": 0.00010526829981209827,
      "loss": 0.6548,
      "step": 5578
    },
    {
      "epoch": 0.4988376251788269,
      "grad_norm": 0.14932549022195302,
      "learning_rate": 0.00010523937969960176,
      "loss": 0.6394,
      "step": 5579
    },
    {
      "epoch": 0.4989270386266094,
      "grad_norm": 0.1590828926982602,
      "learning_rate": 0.00010521045914768482,
      "loss": 0.6561,
      "step": 5580
    },
    {
      "epoch": 0.499016452074392,
      "grad_norm": 0.18952669948125517,
      "learning_rate": 0.00010518153815877294,
      "loss": 0.4016,
      "step": 5581
    },
    {
      "epoch": 0.4991058655221745,
      "grad_norm": 0.161106939715482,
      "learning_rate": 0.00010515261673529173,
      "loss": 0.685,
      "step": 5582
    },
    {
      "epoch": 0.4991952789699571,
      "grad_norm": 0.17703629832577733,
      "learning_rate": 0.00010512369487966678,
      "loss": 0.6607,
      "step": 5583
    },
    {
      "epoch": 0.49928469241773965,
      "grad_norm": 0.13116534784493528,
      "learning_rate": 0.00010509477259432372,
      "loss": 0.6282,
      "step": 5584
    },
    {
      "epoch": 0.49937410586552217,
      "grad_norm": 0.14393658563156242,
      "learning_rate": 0.00010506584988168824,
      "loss": 0.6766,
      "step": 5585
    },
    {
      "epoch": 0.49946351931330474,
      "grad_norm": 0.14193439212754105,
      "learning_rate": 0.00010503692674418603,
      "loss": 0.6417,
      "step": 5586
    },
    {
      "epoch": 0.49955293276108725,
      "grad_norm": 0.14431805131125824,
      "learning_rate": 0.00010500800318424286,
      "loss": 0.6805,
      "step": 5587
    },
    {
      "epoch": 0.4996423462088698,
      "grad_norm": 0.13501774528657925,
      "learning_rate": 0.00010497907920428454,
      "loss": 0.6304,
      "step": 5588
    },
    {
      "epoch": 0.49973175965665234,
      "grad_norm": 0.16305208477341135,
      "learning_rate": 0.00010495015480673685,
      "loss": 0.6914,
      "step": 5589
    },
    {
      "epoch": 0.4998211731044349,
      "grad_norm": 0.1452744455618461,
      "learning_rate": 0.00010492122999402562,
      "loss": 0.6207,
      "step": 5590
    },
    {
      "epoch": 0.49991058655221743,
      "grad_norm": 0.1342508092832221,
      "learning_rate": 0.00010489230476857681,
      "loss": 0.6202,
      "step": 5591
    },
    {
      "epoch": 0.5,
      "grad_norm": 0.1659522358270193,
      "learning_rate": 0.00010486337913281632,
      "loss": 0.6793,
      "step": 5592
    },
    {
      "epoch": 0.5000894134477826,
      "grad_norm": 0.15777249036457808,
      "learning_rate": 0.00010483445308917006,
      "loss": 0.6998,
      "step": 5593
    },
    {
      "epoch": 0.5001788268955651,
      "grad_norm": 0.14505076055978938,
      "learning_rate": 0.00010480552664006406,
      "loss": 0.6537,
      "step": 5594
    },
    {
      "epoch": 0.5002682403433476,
      "grad_norm": 0.13044537978796758,
      "learning_rate": 0.00010477659978792438,
      "loss": 0.6414,
      "step": 5595
    },
    {
      "epoch": 0.5003576537911302,
      "grad_norm": 0.13388260730455293,
      "learning_rate": 0.00010474767253517701,
      "loss": 0.6795,
      "step": 5596
    },
    {
      "epoch": 0.5004470672389127,
      "grad_norm": 0.14810012029935085,
      "learning_rate": 0.00010471874488424808,
      "loss": 0.646,
      "step": 5597
    },
    {
      "epoch": 0.5005364806866953,
      "grad_norm": 0.16912375697018325,
      "learning_rate": 0.00010468981683756373,
      "loss": 0.6823,
      "step": 5598
    },
    {
      "epoch": 0.5006258941344778,
      "grad_norm": 0.13056234486935472,
      "learning_rate": 0.00010466088839755012,
      "loss": 0.6464,
      "step": 5599
    },
    {
      "epoch": 0.5007153075822603,
      "grad_norm": 0.14558241360248683,
      "learning_rate": 0.00010463195956663338,
      "loss": 0.6543,
      "step": 5600
    },
    {
      "epoch": 0.5008047210300429,
      "grad_norm": 0.16701900281587442,
      "learning_rate": 0.00010460303034723985,
      "loss": 0.6891,
      "step": 5601
    },
    {
      "epoch": 0.5008941344778255,
      "grad_norm": 0.14109844784518014,
      "learning_rate": 0.00010457410074179568,
      "loss": 0.6375,
      "step": 5602
    },
    {
      "epoch": 0.5009835479256081,
      "grad_norm": 0.1835218302720949,
      "learning_rate": 0.00010454517075272721,
      "loss": 0.39,
      "step": 5603
    },
    {
      "epoch": 0.5010729613733905,
      "grad_norm": 0.1584148091350232,
      "learning_rate": 0.00010451624038246075,
      "loss": 0.6636,
      "step": 5604
    },
    {
      "epoch": 0.5011623748211731,
      "grad_norm": 0.16660604687920064,
      "learning_rate": 0.00010448730963342268,
      "loss": 0.7139,
      "step": 5605
    },
    {
      "epoch": 0.5012517882689557,
      "grad_norm": 0.14808957998452177,
      "learning_rate": 0.00010445837850803939,
      "loss": 0.7014,
      "step": 5606
    },
    {
      "epoch": 0.5013412017167382,
      "grad_norm": 0.16111095979344678,
      "learning_rate": 0.00010442944700873722,
      "loss": 0.7014,
      "step": 5607
    },
    {
      "epoch": 0.5014306151645207,
      "grad_norm": 0.1408104467137551,
      "learning_rate": 0.00010440051513794271,
      "loss": 0.6672,
      "step": 5608
    },
    {
      "epoch": 0.5015200286123033,
      "grad_norm": 0.16718078281889212,
      "learning_rate": 0.00010437158289808233,
      "loss": 0.705,
      "step": 5609
    },
    {
      "epoch": 0.5016094420600858,
      "grad_norm": 0.16121746185965194,
      "learning_rate": 0.00010434265029158254,
      "loss": 0.6736,
      "step": 5610
    },
    {
      "epoch": 0.5016988555078684,
      "grad_norm": 0.15351589639658283,
      "learning_rate": 0.00010431371732086994,
      "loss": 0.6622,
      "step": 5611
    },
    {
      "epoch": 0.501788268955651,
      "grad_norm": 0.14894307908860166,
      "learning_rate": 0.00010428478398837107,
      "loss": 0.6962,
      "step": 5612
    },
    {
      "epoch": 0.5018776824034334,
      "grad_norm": 0.15754546882535636,
      "learning_rate": 0.00010425585029651252,
      "loss": 0.6856,
      "step": 5613
    },
    {
      "epoch": 0.501967095851216,
      "grad_norm": 0.15748320106488484,
      "learning_rate": 0.00010422691624772097,
      "loss": 0.712,
      "step": 5614
    },
    {
      "epoch": 0.5020565092989986,
      "grad_norm": 0.12446536798777962,
      "learning_rate": 0.00010419798184442307,
      "loss": 0.617,
      "step": 5615
    },
    {
      "epoch": 0.5021459227467812,
      "grad_norm": 0.1436822896620335,
      "learning_rate": 0.00010416904708904548,
      "loss": 0.624,
      "step": 5616
    },
    {
      "epoch": 0.5022353361945636,
      "grad_norm": 0.17410143494285701,
      "learning_rate": 0.00010414011198401492,
      "loss": 0.6858,
      "step": 5617
    },
    {
      "epoch": 0.5023247496423462,
      "grad_norm": 0.1292004999898173,
      "learning_rate": 0.00010411117653175821,
      "loss": 0.6537,
      "step": 5618
    },
    {
      "epoch": 0.5024141630901288,
      "grad_norm": 0.1708264653136988,
      "learning_rate": 0.0001040822407347021,
      "loss": 0.7191,
      "step": 5619
    },
    {
      "epoch": 0.5025035765379113,
      "grad_norm": 0.15378598180331135,
      "learning_rate": 0.00010405330459527336,
      "loss": 0.6597,
      "step": 5620
    },
    {
      "epoch": 0.5025929899856938,
      "grad_norm": 0.1746892820766876,
      "learning_rate": 0.00010402436811589887,
      "loss": 0.718,
      "step": 5621
    },
    {
      "epoch": 0.5026824034334764,
      "grad_norm": 0.16849371447260778,
      "learning_rate": 0.00010399543129900549,
      "loss": 0.7062,
      "step": 5622
    },
    {
      "epoch": 0.5027718168812589,
      "grad_norm": 0.15962222670715195,
      "learning_rate": 0.00010396649414702011,
      "loss": 0.6628,
      "step": 5623
    },
    {
      "epoch": 0.5028612303290415,
      "grad_norm": 0.15386315542900197,
      "learning_rate": 0.00010393755666236962,
      "loss": 0.6278,
      "step": 5624
    },
    {
      "epoch": 0.5029506437768241,
      "grad_norm": 0.182925137845079,
      "learning_rate": 0.00010390861884748107,
      "loss": 0.7106,
      "step": 5625
    },
    {
      "epoch": 0.5030400572246065,
      "grad_norm": 0.14197188671776587,
      "learning_rate": 0.00010387968070478136,
      "loss": 0.691,
      "step": 5626
    },
    {
      "epoch": 0.5031294706723891,
      "grad_norm": 0.15081522721840426,
      "learning_rate": 0.00010385074223669748,
      "loss": 0.62,
      "step": 5627
    },
    {
      "epoch": 0.5032188841201717,
      "grad_norm": 0.16529550527966475,
      "learning_rate": 0.0001038218034456565,
      "loss": 0.7041,
      "step": 5628
    },
    {
      "epoch": 0.5033082975679543,
      "grad_norm": 0.16429027610616306,
      "learning_rate": 0.00010379286433408553,
      "loss": 0.6844,
      "step": 5629
    },
    {
      "epoch": 0.5033977110157367,
      "grad_norm": 0.1869166264742404,
      "learning_rate": 0.00010376392490441158,
      "loss": 0.684,
      "step": 5630
    },
    {
      "epoch": 0.5034871244635193,
      "grad_norm": 0.1671520148960538,
      "learning_rate": 0.00010373498515906177,
      "loss": 0.7006,
      "step": 5631
    },
    {
      "epoch": 0.5035765379113019,
      "grad_norm": 0.16341988939128266,
      "learning_rate": 0.00010370604510046331,
      "loss": 0.6566,
      "step": 5632
    },
    {
      "epoch": 0.5036659513590844,
      "grad_norm": 0.15654403123396488,
      "learning_rate": 0.00010367710473104331,
      "loss": 0.6954,
      "step": 5633
    },
    {
      "epoch": 0.503755364806867,
      "grad_norm": 0.16927099060278272,
      "learning_rate": 0.00010364816405322895,
      "loss": 0.673,
      "step": 5634
    },
    {
      "epoch": 0.5038447782546495,
      "grad_norm": 0.13967573931310037,
      "learning_rate": 0.00010361922306944751,
      "loss": 0.6333,
      "step": 5635
    },
    {
      "epoch": 0.503934191702432,
      "grad_norm": 0.14529650677407382,
      "learning_rate": 0.0001035902817821262,
      "loss": 0.6548,
      "step": 5636
    },
    {
      "epoch": 0.5040236051502146,
      "grad_norm": 0.15413355249340244,
      "learning_rate": 0.00010356134019369227,
      "loss": 0.6508,
      "step": 5637
    },
    {
      "epoch": 0.5041130185979972,
      "grad_norm": 0.14842292731546736,
      "learning_rate": 0.00010353239830657304,
      "loss": 0.6136,
      "step": 5638
    },
    {
      "epoch": 0.5042024320457796,
      "grad_norm": 0.19547938133290968,
      "learning_rate": 0.00010350345612319586,
      "loss": 0.7091,
      "step": 5639
    },
    {
      "epoch": 0.5042918454935622,
      "grad_norm": 0.17194977645328607,
      "learning_rate": 0.00010347451364598804,
      "loss": 0.7013,
      "step": 5640
    },
    {
      "epoch": 0.5043812589413448,
      "grad_norm": 0.12008461055923177,
      "learning_rate": 0.00010344557087737692,
      "loss": 0.6157,
      "step": 5641
    },
    {
      "epoch": 0.5044706723891274,
      "grad_norm": 0.18302088421573093,
      "learning_rate": 0.00010341662781978996,
      "loss": 0.7208,
      "step": 5642
    },
    {
      "epoch": 0.5045600858369099,
      "grad_norm": 0.1630603093135501,
      "learning_rate": 0.00010338768447565457,
      "loss": 0.6533,
      "step": 5643
    },
    {
      "epoch": 0.5046494992846924,
      "grad_norm": 0.1520382041857524,
      "learning_rate": 0.00010335874084739814,
      "loss": 0.632,
      "step": 5644
    },
    {
      "epoch": 0.504738912732475,
      "grad_norm": 0.19054294154257298,
      "learning_rate": 0.00010332979693744815,
      "loss": 0.4187,
      "step": 5645
    },
    {
      "epoch": 0.5048283261802575,
      "grad_norm": 0.14768551198467533,
      "learning_rate": 0.00010330085274823218,
      "loss": 0.6885,
      "step": 5646
    },
    {
      "epoch": 0.5049177396280401,
      "grad_norm": 0.16076456935143588,
      "learning_rate": 0.00010327190828217763,
      "loss": 0.6846,
      "step": 5647
    },
    {
      "epoch": 0.5050071530758226,
      "grad_norm": 0.1773811677248872,
      "learning_rate": 0.00010324296354171207,
      "loss": 0.6607,
      "step": 5648
    },
    {
      "epoch": 0.5050965665236051,
      "grad_norm": 0.16374528236257796,
      "learning_rate": 0.00010321401852926312,
      "loss": 0.6888,
      "step": 5649
    },
    {
      "epoch": 0.5051859799713877,
      "grad_norm": 0.15523695282313069,
      "learning_rate": 0.0001031850732472583,
      "loss": 0.7038,
      "step": 5650
    },
    {
      "epoch": 0.5052753934191703,
      "grad_norm": 0.14300481448415805,
      "learning_rate": 0.00010315612769812524,
      "loss": 0.6512,
      "step": 5651
    },
    {
      "epoch": 0.5053648068669528,
      "grad_norm": 0.14575298476418738,
      "learning_rate": 0.00010312718188429154,
      "loss": 0.685,
      "step": 5652
    },
    {
      "epoch": 0.5054542203147353,
      "grad_norm": 0.16848897628390008,
      "learning_rate": 0.00010309823580818489,
      "loss": 0.7101,
      "step": 5653
    },
    {
      "epoch": 0.5055436337625179,
      "grad_norm": 0.14113346433178775,
      "learning_rate": 0.00010306928947223294,
      "loss": 0.6508,
      "step": 5654
    },
    {
      "epoch": 0.5056330472103004,
      "grad_norm": 0.1677276584169873,
      "learning_rate": 0.00010304034287886337,
      "loss": 0.7012,
      "step": 5655
    },
    {
      "epoch": 0.505722460658083,
      "grad_norm": 0.16114688120393214,
      "learning_rate": 0.00010301139603050394,
      "loss": 0.6541,
      "step": 5656
    },
    {
      "epoch": 0.5058118741058655,
      "grad_norm": 0.16123517731033213,
      "learning_rate": 0.00010298244892958235,
      "loss": 0.6655,
      "step": 5657
    },
    {
      "epoch": 0.505901287553648,
      "grad_norm": 0.14875867588919495,
      "learning_rate": 0.00010295350157852637,
      "loss": 0.6675,
      "step": 5658
    },
    {
      "epoch": 0.5059907010014306,
      "grad_norm": 0.1492662292310711,
      "learning_rate": 0.00010292455397976379,
      "loss": 0.6399,
      "step": 5659
    },
    {
      "epoch": 0.5060801144492132,
      "grad_norm": 0.15595872871706568,
      "learning_rate": 0.0001028956061357224,
      "loss": 0.6827,
      "step": 5660
    },
    {
      "epoch": 0.5061695278969958,
      "grad_norm": 0.1604055904261131,
      "learning_rate": 0.00010286665804883,
      "loss": 0.6801,
      "step": 5661
    },
    {
      "epoch": 0.5062589413447782,
      "grad_norm": 0.16881375395042222,
      "learning_rate": 0.00010283770972151445,
      "loss": 0.6781,
      "step": 5662
    },
    {
      "epoch": 0.5063483547925608,
      "grad_norm": 0.15922800841570883,
      "learning_rate": 0.00010280876115620365,
      "loss": 0.6848,
      "step": 5663
    },
    {
      "epoch": 0.5064377682403434,
      "grad_norm": 0.1512432127954356,
      "learning_rate": 0.00010277981235532541,
      "loss": 0.6768,
      "step": 5664
    },
    {
      "epoch": 0.5065271816881259,
      "grad_norm": 0.13539876383719282,
      "learning_rate": 0.00010275086332130768,
      "loss": 0.6302,
      "step": 5665
    },
    {
      "epoch": 0.5066165951359084,
      "grad_norm": 0.14330133952973717,
      "learning_rate": 0.00010272191405657836,
      "loss": 0.6737,
      "step": 5666
    },
    {
      "epoch": 0.506706008583691,
      "grad_norm": 0.1822399090540978,
      "learning_rate": 0.00010269296456356541,
      "loss": 0.3985,
      "step": 5667
    },
    {
      "epoch": 0.5067954220314735,
      "grad_norm": 0.1787344327571596,
      "learning_rate": 0.00010266401484469674,
      "loss": 0.4111,
      "step": 5668
    },
    {
      "epoch": 0.5068848354792561,
      "grad_norm": 0.17811331880784004,
      "learning_rate": 0.00010263506490240038,
      "loss": 0.7122,
      "step": 5669
    },
    {
      "epoch": 0.5069742489270386,
      "grad_norm": 0.17829528895923977,
      "learning_rate": 0.00010260611473910433,
      "loss": 0.3973,
      "step": 5670
    },
    {
      "epoch": 0.5070636623748211,
      "grad_norm": 0.1517181897715799,
      "learning_rate": 0.00010257716435723656,
      "loss": 0.6383,
      "step": 5671
    },
    {
      "epoch": 0.5071530758226037,
      "grad_norm": 0.13913463878679783,
      "learning_rate": 0.00010254821375922512,
      "loss": 0.6277,
      "step": 5672
    },
    {
      "epoch": 0.5072424892703863,
      "grad_norm": 0.15047461208945873,
      "learning_rate": 0.0001025192629474981,
      "loss": 0.6598,
      "step": 5673
    },
    {
      "epoch": 0.5073319027181689,
      "grad_norm": 0.17506922276194425,
      "learning_rate": 0.0001024903119244835,
      "loss": 0.6899,
      "step": 5674
    },
    {
      "epoch": 0.5074213161659513,
      "grad_norm": 0.17862252035946835,
      "learning_rate": 0.0001024613606926095,
      "loss": 0.6667,
      "step": 5675
    },
    {
      "epoch": 0.5075107296137339,
      "grad_norm": 0.1653738792054227,
      "learning_rate": 0.00010243240925430411,
      "loss": 0.697,
      "step": 5676
    },
    {
      "epoch": 0.5076001430615165,
      "grad_norm": 0.1425564813392971,
      "learning_rate": 0.00010240345761199553,
      "loss": 0.6463,
      "step": 5677
    },
    {
      "epoch": 0.507689556509299,
      "grad_norm": 0.14406662949449087,
      "learning_rate": 0.0001023745057681118,
      "loss": 0.6435,
      "step": 5678
    },
    {
      "epoch": 0.5077789699570815,
      "grad_norm": 0.13801398091375955,
      "learning_rate": 0.00010234555372508119,
      "loss": 0.6572,
      "step": 5679
    },
    {
      "epoch": 0.5078683834048641,
      "grad_norm": 0.1564889861065534,
      "learning_rate": 0.00010231660148533183,
      "loss": 0.6947,
      "step": 5680
    },
    {
      "epoch": 0.5079577968526466,
      "grad_norm": 0.19753258109372085,
      "learning_rate": 0.00010228764905129184,
      "loss": 0.3826,
      "step": 5681
    },
    {
      "epoch": 0.5080472103004292,
      "grad_norm": 0.16029340440286347,
      "learning_rate": 0.00010225869642538955,
      "loss": 0.6747,
      "step": 5682
    },
    {
      "epoch": 0.5081366237482118,
      "grad_norm": 0.162611949041038,
      "learning_rate": 0.00010222974361005309,
      "loss": 0.6419,
      "step": 5683
    },
    {
      "epoch": 0.5082260371959942,
      "grad_norm": 0.16408617270134837,
      "learning_rate": 0.00010220079060771075,
      "loss": 0.6756,
      "step": 5684
    },
    {
      "epoch": 0.5083154506437768,
      "grad_norm": 0.1561180798324116,
      "learning_rate": 0.00010217183742079073,
      "loss": 0.6763,
      "step": 5685
    },
    {
      "epoch": 0.5084048640915594,
      "grad_norm": 0.15868634119181999,
      "learning_rate": 0.00010214288405172133,
      "loss": 0.6731,
      "step": 5686
    },
    {
      "epoch": 0.508494277539342,
      "grad_norm": 0.13040204704635278,
      "learning_rate": 0.00010211393050293083,
      "loss": 0.6614,
      "step": 5687
    },
    {
      "epoch": 0.5085836909871244,
      "grad_norm": 0.15069978136084136,
      "learning_rate": 0.00010208497677684754,
      "loss": 0.6245,
      "step": 5688
    },
    {
      "epoch": 0.508673104434907,
      "grad_norm": 0.14514885728438093,
      "learning_rate": 0.0001020560228758998,
      "loss": 0.6671,
      "step": 5689
    },
    {
      "epoch": 0.5087625178826896,
      "grad_norm": 0.13743190262753582,
      "learning_rate": 0.00010202706880251584,
      "loss": 0.6556,
      "step": 5690
    },
    {
      "epoch": 0.5088519313304721,
      "grad_norm": 0.152397209833181,
      "learning_rate": 0.00010199811455912412,
      "loss": 0.7144,
      "step": 5691
    },
    {
      "epoch": 0.5089413447782547,
      "grad_norm": 0.13989576674782675,
      "learning_rate": 0.00010196916014815292,
      "loss": 0.5952,
      "step": 5692
    },
    {
      "epoch": 0.5090307582260372,
      "grad_norm": 0.13976769839206818,
      "learning_rate": 0.00010194020557203063,
      "loss": 0.645,
      "step": 5693
    },
    {
      "epoch": 0.5091201716738197,
      "grad_norm": 0.14233930535432646,
      "learning_rate": 0.00010191125083318566,
      "loss": 0.6345,
      "step": 5694
    },
    {
      "epoch": 0.5092095851216023,
      "grad_norm": 0.1326926431301067,
      "learning_rate": 0.00010188229593404639,
      "loss": 0.6629,
      "step": 5695
    },
    {
      "epoch": 0.5092989985693849,
      "grad_norm": 0.1401122284762573,
      "learning_rate": 0.00010185334087704124,
      "loss": 0.6523,
      "step": 5696
    },
    {
      "epoch": 0.5093884120171673,
      "grad_norm": 0.13924010200638767,
      "learning_rate": 0.0001018243856645986,
      "loss": 0.638,
      "step": 5697
    },
    {
      "epoch": 0.5094778254649499,
      "grad_norm": 0.18436688912620716,
      "learning_rate": 0.00010179543029914695,
      "loss": 0.7214,
      "step": 5698
    },
    {
      "epoch": 0.5095672389127325,
      "grad_norm": 0.15961224488872952,
      "learning_rate": 0.00010176647478311473,
      "loss": 0.6644,
      "step": 5699
    },
    {
      "epoch": 0.509656652360515,
      "grad_norm": 0.14180039595513802,
      "learning_rate": 0.00010173751911893041,
      "loss": 0.6434,
      "step": 5700
    },
    {
      "epoch": 0.5097460658082976,
      "grad_norm": 0.15684110499885462,
      "learning_rate": 0.00010170856330902247,
      "loss": 0.6854,
      "step": 5701
    },
    {
      "epoch": 0.5098354792560801,
      "grad_norm": 0.17724545482042683,
      "learning_rate": 0.00010167960735581936,
      "loss": 0.6987,
      "step": 5702
    },
    {
      "epoch": 0.5099248927038627,
      "grad_norm": 0.16689536608008854,
      "learning_rate": 0.00010165065126174962,
      "loss": 0.6359,
      "step": 5703
    },
    {
      "epoch": 0.5100143061516452,
      "grad_norm": 0.16585665974582658,
      "learning_rate": 0.00010162169502924177,
      "loss": 0.6663,
      "step": 5704
    },
    {
      "epoch": 0.5101037195994278,
      "grad_norm": 0.15536156778653,
      "learning_rate": 0.0001015927386607243,
      "loss": 0.6753,
      "step": 5705
    },
    {
      "epoch": 0.5101931330472103,
      "grad_norm": 0.1432502907571349,
      "learning_rate": 0.00010156378215862578,
      "loss": 0.6716,
      "step": 5706
    },
    {
      "epoch": 0.5102825464949928,
      "grad_norm": 0.1656596540089046,
      "learning_rate": 0.00010153482552537472,
      "loss": 0.7218,
      "step": 5707
    },
    {
      "epoch": 0.5103719599427754,
      "grad_norm": 0.15306039354480147,
      "learning_rate": 0.00010150586876339969,
      "loss": 0.6593,
      "step": 5708
    },
    {
      "epoch": 0.510461373390558,
      "grad_norm": 0.16203365779153198,
      "learning_rate": 0.00010147691187512928,
      "loss": 0.6584,
      "step": 5709
    },
    {
      "epoch": 0.5105507868383404,
      "grad_norm": 0.16187237631753876,
      "learning_rate": 0.00010144795486299205,
      "loss": 0.6351,
      "step": 5710
    },
    {
      "epoch": 0.510640200286123,
      "grad_norm": 0.15026643394042608,
      "learning_rate": 0.0001014189977294166,
      "loss": 0.6476,
      "step": 5711
    },
    {
      "epoch": 0.5107296137339056,
      "grad_norm": 0.1496280282416441,
      "learning_rate": 0.00010139004047683151,
      "loss": 0.6636,
      "step": 5712
    },
    {
      "epoch": 0.5108190271816881,
      "grad_norm": 0.13825020971425825,
      "learning_rate": 0.00010136108310766544,
      "loss": 0.6349,
      "step": 5713
    },
    {
      "epoch": 0.5109084406294707,
      "grad_norm": 0.1526295481284795,
      "learning_rate": 0.00010133212562434693,
      "loss": 0.6667,
      "step": 5714
    },
    {
      "epoch": 0.5109978540772532,
      "grad_norm": 0.17034143970786436,
      "learning_rate": 0.00010130316802930467,
      "loss": 0.6143,
      "step": 5715
    },
    {
      "epoch": 0.5110872675250357,
      "grad_norm": 0.16424354316261697,
      "learning_rate": 0.00010127421032496729,
      "loss": 0.6612,
      "step": 5716
    },
    {
      "epoch": 0.5111766809728183,
      "grad_norm": 0.17822569462909146,
      "learning_rate": 0.00010124525251376342,
      "loss": 0.6853,
      "step": 5717
    },
    {
      "epoch": 0.5112660944206009,
      "grad_norm": 0.1615889116389719,
      "learning_rate": 0.00010121629459812172,
      "loss": 0.6763,
      "step": 5718
    },
    {
      "epoch": 0.5113555078683834,
      "grad_norm": 0.16353201476089885,
      "learning_rate": 0.00010118733658047088,
      "loss": 0.6572,
      "step": 5719
    },
    {
      "epoch": 0.5114449213161659,
      "grad_norm": 0.159638109279348,
      "learning_rate": 0.00010115837846323954,
      "loss": 0.6917,
      "step": 5720
    },
    {
      "epoch": 0.5115343347639485,
      "grad_norm": 0.16779173282112456,
      "learning_rate": 0.00010112942024885639,
      "loss": 0.6957,
      "step": 5721
    },
    {
      "epoch": 0.5116237482117311,
      "grad_norm": 0.15925837454149297,
      "learning_rate": 0.00010110046193975014,
      "loss": 0.6751,
      "step": 5722
    },
    {
      "epoch": 0.5117131616595136,
      "grad_norm": 0.13570934263702394,
      "learning_rate": 0.0001010715035383495,
      "loss": 0.6504,
      "step": 5723
    },
    {
      "epoch": 0.5118025751072961,
      "grad_norm": 0.1403556792069874,
      "learning_rate": 0.00010104254504708311,
      "loss": 0.6459,
      "step": 5724
    },
    {
      "epoch": 0.5118919885550787,
      "grad_norm": 0.1654252677989566,
      "learning_rate": 0.00010101358646837971,
      "loss": 0.6749,
      "step": 5725
    },
    {
      "epoch": 0.5119814020028612,
      "grad_norm": 0.15541635203348245,
      "learning_rate": 0.00010098462780466808,
      "loss": 0.624,
      "step": 5726
    },
    {
      "epoch": 0.5120708154506438,
      "grad_norm": 0.18369225393431154,
      "learning_rate": 0.00010095566905837692,
      "loss": 0.3657,
      "step": 5727
    },
    {
      "epoch": 0.5121602288984263,
      "grad_norm": 0.14132204617293853,
      "learning_rate": 0.00010092671023193491,
      "loss": 0.6433,
      "step": 5728
    },
    {
      "epoch": 0.5122496423462088,
      "grad_norm": 0.17343058800992953,
      "learning_rate": 0.00010089775132777084,
      "loss": 0.6938,
      "step": 5729
    },
    {
      "epoch": 0.5123390557939914,
      "grad_norm": 0.13741327973294584,
      "learning_rate": 0.00010086879234831345,
      "loss": 0.6481,
      "step": 5730
    },
    {
      "epoch": 0.512428469241774,
      "grad_norm": 0.1460938706245825,
      "learning_rate": 0.00010083983329599151,
      "loss": 0.6271,
      "step": 5731
    },
    {
      "epoch": 0.5125178826895566,
      "grad_norm": 0.13497086940424308,
      "learning_rate": 0.00010081087417323374,
      "loss": 0.6337,
      "step": 5732
    },
    {
      "epoch": 0.512607296137339,
      "grad_norm": 0.1536931444213161,
      "learning_rate": 0.00010078191498246897,
      "loss": 0.6514,
      "step": 5733
    },
    {
      "epoch": 0.5126967095851216,
      "grad_norm": 0.1480853385427629,
      "learning_rate": 0.00010075295572612593,
      "loss": 0.6609,
      "step": 5734
    },
    {
      "epoch": 0.5127861230329042,
      "grad_norm": 0.1563032463107422,
      "learning_rate": 0.00010072399640663334,
      "loss": 0.6583,
      "step": 5735
    },
    {
      "epoch": 0.5128755364806867,
      "grad_norm": 0.14138759925105313,
      "learning_rate": 0.00010069503702642011,
      "loss": 0.6264,
      "step": 5736
    },
    {
      "epoch": 0.5129649499284692,
      "grad_norm": 0.15196740535998465,
      "learning_rate": 0.00010066607758791495,
      "loss": 0.6453,
      "step": 5737
    },
    {
      "epoch": 0.5130543633762518,
      "grad_norm": 0.18282094011036903,
      "learning_rate": 0.00010063711809354665,
      "loss": 0.6822,
      "step": 5738
    },
    {
      "epoch": 0.5131437768240343,
      "grad_norm": 0.14759594209247456,
      "learning_rate": 0.00010060815854574403,
      "loss": 0.6846,
      "step": 5739
    },
    {
      "epoch": 0.5132331902718169,
      "grad_norm": 0.14966339478215254,
      "learning_rate": 0.00010057919894693593,
      "loss": 0.6553,
      "step": 5740
    },
    {
      "epoch": 0.5133226037195995,
      "grad_norm": 0.16719264883103258,
      "learning_rate": 0.00010055023929955106,
      "loss": 0.7105,
      "step": 5741
    },
    {
      "epoch": 0.5134120171673819,
      "grad_norm": 0.14620704562902462,
      "learning_rate": 0.00010052127960601829,
      "loss": 0.6333,
      "step": 5742
    },
    {
      "epoch": 0.5135014306151645,
      "grad_norm": 0.17774029569416586,
      "learning_rate": 0.00010049231986876646,
      "loss": 0.388,
      "step": 5743
    },
    {
      "epoch": 0.5135908440629471,
      "grad_norm": 0.16597219355649356,
      "learning_rate": 0.00010046336009022435,
      "loss": 0.6568,
      "step": 5744
    },
    {
      "epoch": 0.5136802575107297,
      "grad_norm": 0.15359423533813618,
      "learning_rate": 0.00010043440027282078,
      "loss": 0.6797,
      "step": 5745
    },
    {
      "epoch": 0.5137696709585121,
      "grad_norm": 0.1427151638286077,
      "learning_rate": 0.00010040544041898456,
      "loss": 0.6498,
      "step": 5746
    },
    {
      "epoch": 0.5138590844062947,
      "grad_norm": 0.15195983867828294,
      "learning_rate": 0.0001003764805311446,
      "loss": 0.6892,
      "step": 5747
    },
    {
      "epoch": 0.5139484978540773,
      "grad_norm": 0.1546003107765643,
      "learning_rate": 0.00010034752061172961,
      "loss": 0.6947,
      "step": 5748
    },
    {
      "epoch": 0.5140379113018598,
      "grad_norm": 0.15153174725472587,
      "learning_rate": 0.00010031856066316852,
      "loss": 0.6547,
      "step": 5749
    },
    {
      "epoch": 0.5141273247496424,
      "grad_norm": 0.14397004892131685,
      "learning_rate": 0.00010028960068789012,
      "loss": 0.6166,
      "step": 5750
    },
    {
      "epoch": 0.5142167381974249,
      "grad_norm": 0.16816213412367,
      "learning_rate": 0.00010026064068832328,
      "loss": 0.7209,
      "step": 5751
    },
    {
      "epoch": 0.5143061516452074,
      "grad_norm": 0.16823937525440893,
      "learning_rate": 0.00010023168066689677,
      "loss": 0.6789,
      "step": 5752
    },
    {
      "epoch": 0.51439556509299,
      "grad_norm": 0.14458783238884806,
      "learning_rate": 0.00010020272062603953,
      "loss": 0.6511,
      "step": 5753
    },
    {
      "epoch": 0.5144849785407726,
      "grad_norm": 0.14967187416606292,
      "learning_rate": 0.00010017376056818035,
      "loss": 0.6617,
      "step": 5754
    },
    {
      "epoch": 0.514574391988555,
      "grad_norm": 0.15552975007527545,
      "learning_rate": 0.00010014480049574808,
      "loss": 0.6505,
      "step": 5755
    },
    {
      "epoch": 0.5146638054363376,
      "grad_norm": 0.15322891220358728,
      "learning_rate": 0.00010011584041117155,
      "loss": 0.666,
      "step": 5756
    },
    {
      "epoch": 0.5147532188841202,
      "grad_norm": 0.15183582506057106,
      "learning_rate": 0.00010008688031687964,
      "loss": 0.689,
      "step": 5757
    },
    {
      "epoch": 0.5148426323319027,
      "grad_norm": 0.17702858782279546,
      "learning_rate": 0.00010005792021530121,
      "loss": 0.658,
      "step": 5758
    },
    {
      "epoch": 0.5149320457796852,
      "grad_norm": 0.15469587039331623,
      "learning_rate": 0.000100028960108865,
      "loss": 0.6559,
      "step": 5759
    },
    {
      "epoch": 0.5150214592274678,
      "grad_norm": 0.1360716881400435,
      "learning_rate": 0.0001,
      "loss": 0.6175,
      "step": 5760
    },
    {
      "epoch": 0.5151108726752504,
      "grad_norm": 0.1445465757662741,
      "learning_rate": 9.997103989113501e-05,
      "loss": 0.6609,
      "step": 5761
    },
    {
      "epoch": 0.5152002861230329,
      "grad_norm": 0.14299242430052186,
      "learning_rate": 9.994207978469885e-05,
      "loss": 0.6794,
      "step": 5762
    },
    {
      "epoch": 0.5152896995708155,
      "grad_norm": 0.13558285833199174,
      "learning_rate": 9.991311968312039e-05,
      "loss": 0.6562,
      "step": 5763
    },
    {
      "epoch": 0.515379113018598,
      "grad_norm": 0.1418861828995991,
      "learning_rate": 9.988415958882845e-05,
      "loss": 0.6245,
      "step": 5764
    },
    {
      "epoch": 0.5154685264663805,
      "grad_norm": 0.1423067709514053,
      "learning_rate": 9.985519950425196e-05,
      "loss": 0.6458,
      "step": 5765
    },
    {
      "epoch": 0.5155579399141631,
      "grad_norm": 0.1450305443123626,
      "learning_rate": 9.982623943181966e-05,
      "loss": 0.6726,
      "step": 5766
    },
    {
      "epoch": 0.5156473533619457,
      "grad_norm": 0.1489635948606901,
      "learning_rate": 9.979727937396048e-05,
      "loss": 0.6641,
      "step": 5767
    },
    {
      "epoch": 0.5157367668097281,
      "grad_norm": 0.1879365046814601,
      "learning_rate": 9.976831933310324e-05,
      "loss": 0.3832,
      "step": 5768
    },
    {
      "epoch": 0.5158261802575107,
      "grad_norm": 0.15299226269824684,
      "learning_rate": 9.973935931167677e-05,
      "loss": 0.6739,
      "step": 5769
    },
    {
      "epoch": 0.5159155937052933,
      "grad_norm": 0.1494911846166943,
      "learning_rate": 9.971039931210993e-05,
      "loss": 0.6399,
      "step": 5770
    },
    {
      "epoch": 0.5160050071530758,
      "grad_norm": 0.17030691437211046,
      "learning_rate": 9.968143933683149e-05,
      "loss": 0.6976,
      "step": 5771
    },
    {
      "epoch": 0.5160944206008584,
      "grad_norm": 0.13446392733683865,
      "learning_rate": 9.965247938827041e-05,
      "loss": 0.6189,
      "step": 5772
    },
    {
      "epoch": 0.5161838340486409,
      "grad_norm": 0.16295011516069158,
      "learning_rate": 9.962351946885544e-05,
      "loss": 0.6475,
      "step": 5773
    },
    {
      "epoch": 0.5162732474964234,
      "grad_norm": 0.16858426938733595,
      "learning_rate": 9.959455958101546e-05,
      "loss": 0.719,
      "step": 5774
    },
    {
      "epoch": 0.516362660944206,
      "grad_norm": 0.16716181796023824,
      "learning_rate": 9.956559972717925e-05,
      "loss": 0.7011,
      "step": 5775
    },
    {
      "epoch": 0.5164520743919886,
      "grad_norm": 0.15980695435714723,
      "learning_rate": 9.953663990977568e-05,
      "loss": 0.6749,
      "step": 5776
    },
    {
      "epoch": 0.516541487839771,
      "grad_norm": 0.16295332772432283,
      "learning_rate": 9.950768013123358e-05,
      "loss": 0.6777,
      "step": 5777
    },
    {
      "epoch": 0.5166309012875536,
      "grad_norm": 0.17722231935050645,
      "learning_rate": 9.94787203939817e-05,
      "loss": 0.6837,
      "step": 5778
    },
    {
      "epoch": 0.5167203147353362,
      "grad_norm": 0.1322238776434916,
      "learning_rate": 9.944976070044894e-05,
      "loss": 0.651,
      "step": 5779
    },
    {
      "epoch": 0.5168097281831188,
      "grad_norm": 0.1348596441355082,
      "learning_rate": 9.94208010530641e-05,
      "loss": 0.6723,
      "step": 5780
    },
    {
      "epoch": 0.5168991416309013,
      "grad_norm": 0.15302793887587532,
      "learning_rate": 9.939184145425598e-05,
      "loss": 0.7021,
      "step": 5781
    },
    {
      "epoch": 0.5169885550786838,
      "grad_norm": 0.13826209852302623,
      "learning_rate": 9.936288190645336e-05,
      "loss": 0.6594,
      "step": 5782
    },
    {
      "epoch": 0.5170779685264664,
      "grad_norm": 0.14241796737421147,
      "learning_rate": 9.933392241208507e-05,
      "loss": 0.6507,
      "step": 5783
    },
    {
      "epoch": 0.5171673819742489,
      "grad_norm": 0.16692422332940982,
      "learning_rate": 9.930496297357993e-05,
      "loss": 0.6731,
      "step": 5784
    },
    {
      "epoch": 0.5172567954220315,
      "grad_norm": 0.16026466047515217,
      "learning_rate": 9.927600359336666e-05,
      "loss": 0.6906,
      "step": 5785
    },
    {
      "epoch": 0.517346208869814,
      "grad_norm": 0.1671833377160587,
      "learning_rate": 9.92470442738741e-05,
      "loss": 0.6407,
      "step": 5786
    },
    {
      "epoch": 0.5174356223175965,
      "grad_norm": 0.15053031314269283,
      "learning_rate": 9.921808501753106e-05,
      "loss": 0.6268,
      "step": 5787
    },
    {
      "epoch": 0.5175250357653791,
      "grad_norm": 0.16605984710616212,
      "learning_rate": 9.918912582676629e-05,
      "loss": 0.6643,
      "step": 5788
    },
    {
      "epoch": 0.5176144492131617,
      "grad_norm": 0.14922785909660738,
      "learning_rate": 9.916016670400851e-05,
      "loss": 0.6602,
      "step": 5789
    },
    {
      "epoch": 0.5177038626609443,
      "grad_norm": 0.14958684120322394,
      "learning_rate": 9.913120765168657e-05,
      "loss": 0.6352,
      "step": 5790
    },
    {
      "epoch": 0.5177932761087267,
      "grad_norm": 0.15525626469644113,
      "learning_rate": 9.910224867222921e-05,
      "loss": 0.676,
      "step": 5791
    },
    {
      "epoch": 0.5178826895565093,
      "grad_norm": 0.14993074402995693,
      "learning_rate": 9.907328976806511e-05,
      "loss": 0.663,
      "step": 5792
    },
    {
      "epoch": 0.5179721030042919,
      "grad_norm": 0.1578130362125,
      "learning_rate": 9.904433094162311e-05,
      "loss": 0.6649,
      "step": 5793
    },
    {
      "epoch": 0.5180615164520744,
      "grad_norm": 0.1591028051166114,
      "learning_rate": 9.901537219533194e-05,
      "loss": 0.6428,
      "step": 5794
    },
    {
      "epoch": 0.5181509298998569,
      "grad_norm": 0.14559381007465488,
      "learning_rate": 9.89864135316203e-05,
      "loss": 0.6661,
      "step": 5795
    },
    {
      "epoch": 0.5182403433476395,
      "grad_norm": 0.16513115120736402,
      "learning_rate": 9.895745495291693e-05,
      "loss": 0.6941,
      "step": 5796
    },
    {
      "epoch": 0.518329756795422,
      "grad_norm": 0.16567942031349508,
      "learning_rate": 9.892849646165057e-05,
      "loss": 0.6339,
      "step": 5797
    },
    {
      "epoch": 0.5184191702432046,
      "grad_norm": 0.15924629073182808,
      "learning_rate": 9.889953806024991e-05,
      "loss": 0.6857,
      "step": 5798
    },
    {
      "epoch": 0.5185085836909872,
      "grad_norm": 0.16095890903838395,
      "learning_rate": 9.887057975114362e-05,
      "loss": 0.6328,
      "step": 5799
    },
    {
      "epoch": 0.5185979971387696,
      "grad_norm": 0.20263107391316018,
      "learning_rate": 9.884162153676048e-05,
      "loss": 0.3879,
      "step": 5800
    },
    {
      "epoch": 0.5186874105865522,
      "grad_norm": 0.15691984715811755,
      "learning_rate": 9.881266341952915e-05,
      "loss": 0.6752,
      "step": 5801
    },
    {
      "epoch": 0.5187768240343348,
      "grad_norm": 0.17197994654377605,
      "learning_rate": 9.878370540187831e-05,
      "loss": 0.6448,
      "step": 5802
    },
    {
      "epoch": 0.5188662374821174,
      "grad_norm": 0.1512585667712321,
      "learning_rate": 9.875474748623661e-05,
      "loss": 0.6952,
      "step": 5803
    },
    {
      "epoch": 0.5189556509298998,
      "grad_norm": 0.1547926946555453,
      "learning_rate": 9.872578967503275e-05,
      "loss": 0.7136,
      "step": 5804
    },
    {
      "epoch": 0.5190450643776824,
      "grad_norm": 0.15478854914183068,
      "learning_rate": 9.869683197069533e-05,
      "loss": 0.6552,
      "step": 5805
    },
    {
      "epoch": 0.519134477825465,
      "grad_norm": 0.1647344464552666,
      "learning_rate": 9.866787437565308e-05,
      "loss": 0.6852,
      "step": 5806
    },
    {
      "epoch": 0.5192238912732475,
      "grad_norm": 0.1455100628126039,
      "learning_rate": 9.863891689233459e-05,
      "loss": 0.6386,
      "step": 5807
    },
    {
      "epoch": 0.51931330472103,
      "grad_norm": 0.14563644717931948,
      "learning_rate": 9.860995952316851e-05,
      "loss": 0.6324,
      "step": 5808
    },
    {
      "epoch": 0.5194027181688126,
      "grad_norm": 0.16753488526648375,
      "learning_rate": 9.858100227058342e-05,
      "loss": 0.6932,
      "step": 5809
    },
    {
      "epoch": 0.5194921316165951,
      "grad_norm": 0.15458660477155314,
      "learning_rate": 9.855204513700797e-05,
      "loss": 0.6829,
      "step": 5810
    },
    {
      "epoch": 0.5195815450643777,
      "grad_norm": 0.16285769467527378,
      "learning_rate": 9.852308812487075e-05,
      "loss": 0.6785,
      "step": 5811
    },
    {
      "epoch": 0.5196709585121603,
      "grad_norm": 0.14623142703118178,
      "learning_rate": 9.84941312366003e-05,
      "loss": 0.6575,
      "step": 5812
    },
    {
      "epoch": 0.5197603719599427,
      "grad_norm": 0.16362592848650087,
      "learning_rate": 9.846517447462527e-05,
      "loss": 0.6883,
      "step": 5813
    },
    {
      "epoch": 0.5198497854077253,
      "grad_norm": 0.15142431983713492,
      "learning_rate": 9.843621784137424e-05,
      "loss": 0.6431,
      "step": 5814
    },
    {
      "epoch": 0.5199391988555079,
      "grad_norm": 0.1686794561678524,
      "learning_rate": 9.840726133927571e-05,
      "loss": 0.6752,
      "step": 5815
    },
    {
      "epoch": 0.5200286123032904,
      "grad_norm": 0.1554599227976086,
      "learning_rate": 9.837830497075824e-05,
      "loss": 0.6548,
      "step": 5816
    },
    {
      "epoch": 0.5201180257510729,
      "grad_norm": 0.14769389848830494,
      "learning_rate": 9.834934873825038e-05,
      "loss": 0.6602,
      "step": 5817
    },
    {
      "epoch": 0.5202074391988555,
      "grad_norm": 0.14149454591668043,
      "learning_rate": 9.832039264418067e-05,
      "loss": 0.6665,
      "step": 5818
    },
    {
      "epoch": 0.520296852646638,
      "grad_norm": 0.15864884091597387,
      "learning_rate": 9.829143669097754e-05,
      "loss": 0.6884,
      "step": 5819
    },
    {
      "epoch": 0.5203862660944206,
      "grad_norm": 0.15064952403796925,
      "learning_rate": 9.826248088106959e-05,
      "loss": 0.6721,
      "step": 5820
    },
    {
      "epoch": 0.5204756795422032,
      "grad_norm": 0.1696781473621303,
      "learning_rate": 9.823352521688528e-05,
      "loss": 0.6656,
      "step": 5821
    },
    {
      "epoch": 0.5205650929899857,
      "grad_norm": 0.1351327728995291,
      "learning_rate": 9.820456970085307e-05,
      "loss": 0.6695,
      "step": 5822
    },
    {
      "epoch": 0.5206545064377682,
      "grad_norm": 0.17244376157812386,
      "learning_rate": 9.817561433540141e-05,
      "loss": 0.6844,
      "step": 5823
    },
    {
      "epoch": 0.5207439198855508,
      "grad_norm": 0.16045749635938542,
      "learning_rate": 9.81466591229588e-05,
      "loss": 0.6844,
      "step": 5824
    },
    {
      "epoch": 0.5208333333333334,
      "grad_norm": 0.16477864277972284,
      "learning_rate": 9.811770406595365e-05,
      "loss": 0.6966,
      "step": 5825
    },
    {
      "epoch": 0.5209227467811158,
      "grad_norm": 0.1347066655949994,
      "learning_rate": 9.808874916681436e-05,
      "loss": 0.6557,
      "step": 5826
    },
    {
      "epoch": 0.5210121602288984,
      "grad_norm": 0.1768027856962532,
      "learning_rate": 9.805979442796936e-05,
      "loss": 0.7057,
      "step": 5827
    },
    {
      "epoch": 0.521101573676681,
      "grad_norm": 0.15529696116617378,
      "learning_rate": 9.80308398518471e-05,
      "loss": 0.7029,
      "step": 5828
    },
    {
      "epoch": 0.5211909871244635,
      "grad_norm": 0.1518917537300373,
      "learning_rate": 9.800188544087592e-05,
      "loss": 0.6841,
      "step": 5829
    },
    {
      "epoch": 0.5212804005722461,
      "grad_norm": 0.152886112290314,
      "learning_rate": 9.797293119748417e-05,
      "loss": 0.6515,
      "step": 5830
    },
    {
      "epoch": 0.5213698140200286,
      "grad_norm": 0.13492576196353687,
      "learning_rate": 9.794397712410025e-05,
      "loss": 0.6366,
      "step": 5831
    },
    {
      "epoch": 0.5214592274678111,
      "grad_norm": 0.1401054311182616,
      "learning_rate": 9.791502322315249e-05,
      "loss": 0.6618,
      "step": 5832
    },
    {
      "epoch": 0.5215486409155937,
      "grad_norm": 0.16225349519367885,
      "learning_rate": 9.788606949706918e-05,
      "loss": 0.6933,
      "step": 5833
    },
    {
      "epoch": 0.5216380543633763,
      "grad_norm": 0.15489172865320727,
      "learning_rate": 9.785711594827868e-05,
      "loss": 0.6807,
      "step": 5834
    },
    {
      "epoch": 0.5217274678111588,
      "grad_norm": 0.13462616521792026,
      "learning_rate": 9.78281625792093e-05,
      "loss": 0.6584,
      "step": 5835
    },
    {
      "epoch": 0.5218168812589413,
      "grad_norm": 0.1432295135358371,
      "learning_rate": 9.779920939228928e-05,
      "loss": 0.6389,
      "step": 5836
    },
    {
      "epoch": 0.5219062947067239,
      "grad_norm": 0.13924560382199133,
      "learning_rate": 9.777025638994693e-05,
      "loss": 0.6375,
      "step": 5837
    },
    {
      "epoch": 0.5219957081545065,
      "grad_norm": 0.1535137878027857,
      "learning_rate": 9.774130357461049e-05,
      "loss": 0.6473,
      "step": 5838
    },
    {
      "epoch": 0.522085121602289,
      "grad_norm": 0.17896889414697906,
      "learning_rate": 9.771235094870817e-05,
      "loss": 0.7372,
      "step": 5839
    },
    {
      "epoch": 0.5221745350500715,
      "grad_norm": 0.16071851952899646,
      "learning_rate": 9.768339851466818e-05,
      "loss": 0.6949,
      "step": 5840
    },
    {
      "epoch": 0.5222639484978541,
      "grad_norm": 0.14669509289952812,
      "learning_rate": 9.765444627491882e-05,
      "loss": 0.6565,
      "step": 5841
    },
    {
      "epoch": 0.5223533619456366,
      "grad_norm": 0.15400891055370747,
      "learning_rate": 9.76254942318882e-05,
      "loss": 0.679,
      "step": 5842
    },
    {
      "epoch": 0.5224427753934192,
      "grad_norm": 0.16004797568035697,
      "learning_rate": 9.759654238800451e-05,
      "loss": 0.6983,
      "step": 5843
    },
    {
      "epoch": 0.5225321888412017,
      "grad_norm": 0.13862336152770707,
      "learning_rate": 9.756759074569591e-05,
      "loss": 0.6409,
      "step": 5844
    },
    {
      "epoch": 0.5226216022889842,
      "grad_norm": 0.16483071811128733,
      "learning_rate": 9.753863930739054e-05,
      "loss": 0.7043,
      "step": 5845
    },
    {
      "epoch": 0.5227110157367668,
      "grad_norm": 0.13559276414194354,
      "learning_rate": 9.75096880755165e-05,
      "loss": 0.6523,
      "step": 5846
    },
    {
      "epoch": 0.5228004291845494,
      "grad_norm": 0.15186935309933688,
      "learning_rate": 9.748073705250188e-05,
      "loss": 0.6479,
      "step": 5847
    },
    {
      "epoch": 0.522889842632332,
      "grad_norm": 0.13340284870053182,
      "learning_rate": 9.745178624077488e-05,
      "loss": 0.6654,
      "step": 5848
    },
    {
      "epoch": 0.5229792560801144,
      "grad_norm": 0.14560235920952536,
      "learning_rate": 9.742283564276347e-05,
      "loss": 0.6387,
      "step": 5849
    },
    {
      "epoch": 0.523068669527897,
      "grad_norm": 0.1385676852008384,
      "learning_rate": 9.739388526089568e-05,
      "loss": 0.6343,
      "step": 5850
    },
    {
      "epoch": 0.5231580829756796,
      "grad_norm": 0.13139033179086404,
      "learning_rate": 9.736493509759962e-05,
      "loss": 0.6569,
      "step": 5851
    },
    {
      "epoch": 0.5232474964234621,
      "grad_norm": 0.14332626706538956,
      "learning_rate": 9.733598515530328e-05,
      "loss": 0.6687,
      "step": 5852
    },
    {
      "epoch": 0.5233369098712446,
      "grad_norm": 0.14154017768152383,
      "learning_rate": 9.730703543643464e-05,
      "loss": 0.6762,
      "step": 5853
    },
    {
      "epoch": 0.5234263233190272,
      "grad_norm": 0.1413662166936894,
      "learning_rate": 9.727808594342164e-05,
      "loss": 0.6804,
      "step": 5854
    },
    {
      "epoch": 0.5235157367668097,
      "grad_norm": 0.15831998014508944,
      "learning_rate": 9.724913667869233e-05,
      "loss": 0.6719,
      "step": 5855
    },
    {
      "epoch": 0.5236051502145923,
      "grad_norm": 0.14315795706772183,
      "learning_rate": 9.722018764467461e-05,
      "loss": 0.6683,
      "step": 5856
    },
    {
      "epoch": 0.5236945636623748,
      "grad_norm": 0.16054304549593915,
      "learning_rate": 9.719123884379637e-05,
      "loss": 0.6752,
      "step": 5857
    },
    {
      "epoch": 0.5237839771101573,
      "grad_norm": 0.153638906844142,
      "learning_rate": 9.716229027848556e-05,
      "loss": 0.6597,
      "step": 5858
    },
    {
      "epoch": 0.5238733905579399,
      "grad_norm": 0.1617439380840365,
      "learning_rate": 9.713334195117004e-05,
      "loss": 0.7072,
      "step": 5859
    },
    {
      "epoch": 0.5239628040057225,
      "grad_norm": 0.1425355403945162,
      "learning_rate": 9.710439386427764e-05,
      "loss": 0.6813,
      "step": 5860
    },
    {
      "epoch": 0.524052217453505,
      "grad_norm": 0.15175636154054883,
      "learning_rate": 9.707544602023622e-05,
      "loss": 0.6707,
      "step": 5861
    },
    {
      "epoch": 0.5241416309012875,
      "grad_norm": 0.16819150102306615,
      "learning_rate": 9.704649842147364e-05,
      "loss": 0.6916,
      "step": 5862
    },
    {
      "epoch": 0.5242310443490701,
      "grad_norm": 0.16686336727996692,
      "learning_rate": 9.701755107041767e-05,
      "loss": 0.6398,
      "step": 5863
    },
    {
      "epoch": 0.5243204577968527,
      "grad_norm": 0.16370471303328843,
      "learning_rate": 9.698860396949608e-05,
      "loss": 0.7082,
      "step": 5864
    },
    {
      "epoch": 0.5244098712446352,
      "grad_norm": 0.16073643663553971,
      "learning_rate": 9.695965712113666e-05,
      "loss": 0.6534,
      "step": 5865
    },
    {
      "epoch": 0.5244992846924177,
      "grad_norm": 0.16931393682474052,
      "learning_rate": 9.69307105277671e-05,
      "loss": 0.7004,
      "step": 5866
    },
    {
      "epoch": 0.5245886981402003,
      "grad_norm": 0.15125404754499713,
      "learning_rate": 9.690176419181516e-05,
      "loss": 0.6967,
      "step": 5867
    },
    {
      "epoch": 0.5246781115879828,
      "grad_norm": 0.15732150247313642,
      "learning_rate": 9.687281811570847e-05,
      "loss": 0.7028,
      "step": 5868
    },
    {
      "epoch": 0.5247675250357654,
      "grad_norm": 0.1346476927679208,
      "learning_rate": 9.68438723018748e-05,
      "loss": 0.6595,
      "step": 5869
    },
    {
      "epoch": 0.524856938483548,
      "grad_norm": 0.14594458031777843,
      "learning_rate": 9.681492675274171e-05,
      "loss": 0.6524,
      "step": 5870
    },
    {
      "epoch": 0.5249463519313304,
      "grad_norm": 0.15518799683839662,
      "learning_rate": 9.678598147073689e-05,
      "loss": 0.6942,
      "step": 5871
    },
    {
      "epoch": 0.525035765379113,
      "grad_norm": 0.1497538235221995,
      "learning_rate": 9.675703645828794e-05,
      "loss": 0.6433,
      "step": 5872
    },
    {
      "epoch": 0.5251251788268956,
      "grad_norm": 0.1394943280719447,
      "learning_rate": 9.67280917178224e-05,
      "loss": 0.6159,
      "step": 5873
    },
    {
      "epoch": 0.5252145922746781,
      "grad_norm": 0.15185095299778573,
      "learning_rate": 9.669914725176787e-05,
      "loss": 0.6404,
      "step": 5874
    },
    {
      "epoch": 0.5253040057224606,
      "grad_norm": 0.14992194181346835,
      "learning_rate": 9.667020306255183e-05,
      "loss": 0.6485,
      "step": 5875
    },
    {
      "epoch": 0.5253934191702432,
      "grad_norm": 0.1516684609713369,
      "learning_rate": 9.66412591526019e-05,
      "loss": 0.6615,
      "step": 5876
    },
    {
      "epoch": 0.5254828326180258,
      "grad_norm": 0.128536242806738,
      "learning_rate": 9.661231552434546e-05,
      "loss": 0.6128,
      "step": 5877
    },
    {
      "epoch": 0.5255722460658083,
      "grad_norm": 0.14467138416533554,
      "learning_rate": 9.658337218021007e-05,
      "loss": 0.6879,
      "step": 5878
    },
    {
      "epoch": 0.5256616595135909,
      "grad_norm": 0.15612918674427678,
      "learning_rate": 9.655442912262311e-05,
      "loss": 0.6909,
      "step": 5879
    },
    {
      "epoch": 0.5257510729613734,
      "grad_norm": 0.1533345885612698,
      "learning_rate": 9.652548635401201e-05,
      "loss": 0.654,
      "step": 5880
    },
    {
      "epoch": 0.5258404864091559,
      "grad_norm": 0.16654441886571636,
      "learning_rate": 9.64965438768042e-05,
      "loss": 0.7072,
      "step": 5881
    },
    {
      "epoch": 0.5259298998569385,
      "grad_norm": 0.1455180949679573,
      "learning_rate": 9.646760169342696e-05,
      "loss": 0.6415,
      "step": 5882
    },
    {
      "epoch": 0.5260193133047211,
      "grad_norm": 0.1424149887324472,
      "learning_rate": 9.643865980630775e-05,
      "loss": 0.6833,
      "step": 5883
    },
    {
      "epoch": 0.5261087267525035,
      "grad_norm": 0.13615936690865238,
      "learning_rate": 9.640971821787382e-05,
      "loss": 0.6181,
      "step": 5884
    },
    {
      "epoch": 0.5261981402002861,
      "grad_norm": 0.1569256251438213,
      "learning_rate": 9.638077693055252e-05,
      "loss": 0.6685,
      "step": 5885
    },
    {
      "epoch": 0.5262875536480687,
      "grad_norm": 0.157162842971476,
      "learning_rate": 9.635183594677107e-05,
      "loss": 0.6492,
      "step": 5886
    },
    {
      "epoch": 0.5263769670958512,
      "grad_norm": 0.1502230867253286,
      "learning_rate": 9.632289526895672e-05,
      "loss": 0.6627,
      "step": 5887
    },
    {
      "epoch": 0.5264663805436338,
      "grad_norm": 0.14219143122277528,
      "learning_rate": 9.629395489953669e-05,
      "loss": 0.6693,
      "step": 5888
    },
    {
      "epoch": 0.5265557939914163,
      "grad_norm": 0.1534115204181161,
      "learning_rate": 9.626501484093823e-05,
      "loss": 0.6527,
      "step": 5889
    },
    {
      "epoch": 0.5266452074391988,
      "grad_norm": 0.15247469490919072,
      "learning_rate": 9.623607509558846e-05,
      "loss": 0.6696,
      "step": 5890
    },
    {
      "epoch": 0.5267346208869814,
      "grad_norm": 0.16062482623167648,
      "learning_rate": 9.620713566591449e-05,
      "loss": 0.6849,
      "step": 5891
    },
    {
      "epoch": 0.526824034334764,
      "grad_norm": 0.19428485715925103,
      "learning_rate": 9.61781965543435e-05,
      "loss": 0.7353,
      "step": 5892
    },
    {
      "epoch": 0.5269134477825465,
      "grad_norm": 0.159509239027955,
      "learning_rate": 9.614925776330254e-05,
      "loss": 0.6243,
      "step": 5893
    },
    {
      "epoch": 0.527002861230329,
      "grad_norm": 0.14198637611229584,
      "learning_rate": 9.612031929521869e-05,
      "loss": 0.6707,
      "step": 5894
    },
    {
      "epoch": 0.5270922746781116,
      "grad_norm": 0.154500560342627,
      "learning_rate": 9.609138115251894e-05,
      "loss": 0.6214,
      "step": 5895
    },
    {
      "epoch": 0.5271816881258942,
      "grad_norm": 0.13713583406113783,
      "learning_rate": 9.606244333763038e-05,
      "loss": 0.6605,
      "step": 5896
    },
    {
      "epoch": 0.5272711015736766,
      "grad_norm": 0.16894513832401692,
      "learning_rate": 9.603350585297991e-05,
      "loss": 0.7053,
      "step": 5897
    },
    {
      "epoch": 0.5273605150214592,
      "grad_norm": 0.14741928616522457,
      "learning_rate": 9.600456870099454e-05,
      "loss": 0.6473,
      "step": 5898
    },
    {
      "epoch": 0.5274499284692418,
      "grad_norm": 0.14632065521788862,
      "learning_rate": 9.597563188410116e-05,
      "loss": 0.6646,
      "step": 5899
    },
    {
      "epoch": 0.5275393419170243,
      "grad_norm": 0.14895880240271533,
      "learning_rate": 9.594669540472666e-05,
      "loss": 0.6356,
      "step": 5900
    },
    {
      "epoch": 0.5276287553648069,
      "grad_norm": 0.14705871895633782,
      "learning_rate": 9.591775926529793e-05,
      "loss": 0.6585,
      "step": 5901
    },
    {
      "epoch": 0.5277181688125894,
      "grad_norm": 0.1610948463326484,
      "learning_rate": 9.588882346824177e-05,
      "loss": 0.6192,
      "step": 5902
    },
    {
      "epoch": 0.5278075822603719,
      "grad_norm": 0.15152421983721598,
      "learning_rate": 9.585988801598506e-05,
      "loss": 0.6221,
      "step": 5903
    },
    {
      "epoch": 0.5278969957081545,
      "grad_norm": 0.152026170588076,
      "learning_rate": 9.583095291095453e-05,
      "loss": 0.6582,
      "step": 5904
    },
    {
      "epoch": 0.5279864091559371,
      "grad_norm": 0.16716282465005403,
      "learning_rate": 9.580201815557695e-05,
      "loss": 0.6872,
      "step": 5905
    },
    {
      "epoch": 0.5280758226037195,
      "grad_norm": 0.14391678331806895,
      "learning_rate": 9.577308375227906e-05,
      "loss": 0.683,
      "step": 5906
    },
    {
      "epoch": 0.5281652360515021,
      "grad_norm": 0.1571282515565406,
      "learning_rate": 9.574414970348749e-05,
      "loss": 0.7017,
      "step": 5907
    },
    {
      "epoch": 0.5282546494992847,
      "grad_norm": 0.1805390602693019,
      "learning_rate": 9.571521601162897e-05,
      "loss": 0.7039,
      "step": 5908
    },
    {
      "epoch": 0.5283440629470673,
      "grad_norm": 0.14678777524705805,
      "learning_rate": 9.568628267913007e-05,
      "loss": 0.6115,
      "step": 5909
    },
    {
      "epoch": 0.5284334763948498,
      "grad_norm": 0.14828267209750562,
      "learning_rate": 9.565734970841747e-05,
      "loss": 0.6404,
      "step": 5910
    },
    {
      "epoch": 0.5285228898426323,
      "grad_norm": 0.1293468854323699,
      "learning_rate": 9.562841710191769e-05,
      "loss": 0.6283,
      "step": 5911
    },
    {
      "epoch": 0.5286123032904149,
      "grad_norm": 0.15403251950459654,
      "learning_rate": 9.55994848620573e-05,
      "loss": 0.6753,
      "step": 5912
    },
    {
      "epoch": 0.5287017167381974,
      "grad_norm": 0.16357583660948985,
      "learning_rate": 9.55705529912628e-05,
      "loss": 0.6792,
      "step": 5913
    },
    {
      "epoch": 0.52879113018598,
      "grad_norm": 0.16019885883910734,
      "learning_rate": 9.554162149196066e-05,
      "loss": 0.6659,
      "step": 5914
    },
    {
      "epoch": 0.5288805436337625,
      "grad_norm": 0.13947256228826221,
      "learning_rate": 9.551269036657736e-05,
      "loss": 0.6311,
      "step": 5915
    },
    {
      "epoch": 0.528969957081545,
      "grad_norm": 0.18459381978778505,
      "learning_rate": 9.548375961753926e-05,
      "loss": 0.3726,
      "step": 5916
    },
    {
      "epoch": 0.5290593705293276,
      "grad_norm": 0.16341998136362668,
      "learning_rate": 9.545482924727282e-05,
      "loss": 0.7111,
      "step": 5917
    },
    {
      "epoch": 0.5291487839771102,
      "grad_norm": 0.1753875877249714,
      "learning_rate": 9.542589925820435e-05,
      "loss": 0.6979,
      "step": 5918
    },
    {
      "epoch": 0.5292381974248928,
      "grad_norm": 0.1636426580891589,
      "learning_rate": 9.539696965276019e-05,
      "loss": 0.6878,
      "step": 5919
    },
    {
      "epoch": 0.5293276108726752,
      "grad_norm": 0.15891815011485402,
      "learning_rate": 9.536804043336664e-05,
      "loss": 0.6595,
      "step": 5920
    },
    {
      "epoch": 0.5294170243204578,
      "grad_norm": 0.15910806313306772,
      "learning_rate": 9.533911160244993e-05,
      "loss": 0.6812,
      "step": 5921
    },
    {
      "epoch": 0.5295064377682404,
      "grad_norm": 0.1684824932536405,
      "learning_rate": 9.53101831624363e-05,
      "loss": 0.7126,
      "step": 5922
    },
    {
      "epoch": 0.5295958512160229,
      "grad_norm": 0.14693239102951866,
      "learning_rate": 9.528125511575193e-05,
      "loss": 0.643,
      "step": 5923
    },
    {
      "epoch": 0.5296852646638054,
      "grad_norm": 0.1380157726094972,
      "learning_rate": 9.525232746482301e-05,
      "loss": 0.6456,
      "step": 5924
    },
    {
      "epoch": 0.529774678111588,
      "grad_norm": 0.1412346044715208,
      "learning_rate": 9.522340021207564e-05,
      "loss": 0.6477,
      "step": 5925
    },
    {
      "epoch": 0.5298640915593705,
      "grad_norm": 0.14727664179199515,
      "learning_rate": 9.519447335993595e-05,
      "loss": 0.6625,
      "step": 5926
    },
    {
      "epoch": 0.5299535050071531,
      "grad_norm": 0.12707504069194006,
      "learning_rate": 9.516554691082995e-05,
      "loss": 0.6154,
      "step": 5927
    },
    {
      "epoch": 0.5300429184549357,
      "grad_norm": 0.142776926121997,
      "learning_rate": 9.513662086718372e-05,
      "loss": 0.6505,
      "step": 5928
    },
    {
      "epoch": 0.5301323319027181,
      "grad_norm": 0.15546252110202755,
      "learning_rate": 9.510769523142322e-05,
      "loss": 0.6511,
      "step": 5929
    },
    {
      "epoch": 0.5302217453505007,
      "grad_norm": 0.16186467815892322,
      "learning_rate": 9.507877000597437e-05,
      "loss": 0.6706,
      "step": 5930
    },
    {
      "epoch": 0.5303111587982833,
      "grad_norm": 0.16057387393225694,
      "learning_rate": 9.504984519326316e-05,
      "loss": 0.6848,
      "step": 5931
    },
    {
      "epoch": 0.5304005722460658,
      "grad_norm": 0.16456365626449315,
      "learning_rate": 9.502092079571547e-05,
      "loss": 0.649,
      "step": 5932
    },
    {
      "epoch": 0.5304899856938483,
      "grad_norm": 0.15100639162683488,
      "learning_rate": 9.499199681575716e-05,
      "loss": 0.6697,
      "step": 5933
    },
    {
      "epoch": 0.5305793991416309,
      "grad_norm": 0.144522450625704,
      "learning_rate": 9.496307325581398e-05,
      "loss": 0.6596,
      "step": 5934
    },
    {
      "epoch": 0.5306688125894135,
      "grad_norm": 0.1494470431912151,
      "learning_rate": 9.49341501183118e-05,
      "loss": 0.6476,
      "step": 5935
    },
    {
      "epoch": 0.530758226037196,
      "grad_norm": 0.15184532856377606,
      "learning_rate": 9.490522740567633e-05,
      "loss": 0.6577,
      "step": 5936
    },
    {
      "epoch": 0.5308476394849786,
      "grad_norm": 0.16333885805720727,
      "learning_rate": 9.487630512033325e-05,
      "loss": 0.6736,
      "step": 5937
    },
    {
      "epoch": 0.530937052932761,
      "grad_norm": 0.14932975419958167,
      "learning_rate": 9.484738326470828e-05,
      "loss": 0.6645,
      "step": 5938
    },
    {
      "epoch": 0.5310264663805436,
      "grad_norm": 0.14819592447725355,
      "learning_rate": 9.481846184122707e-05,
      "loss": 0.6525,
      "step": 5939
    },
    {
      "epoch": 0.5311158798283262,
      "grad_norm": 0.14605677736807363,
      "learning_rate": 9.478954085231522e-05,
      "loss": 0.6439,
      "step": 5940
    },
    {
      "epoch": 0.5312052932761088,
      "grad_norm": 0.14046335926678488,
      "learning_rate": 9.476062030039825e-05,
      "loss": 0.6323,
      "step": 5941
    },
    {
      "epoch": 0.5312947067238912,
      "grad_norm": 0.15000198636993384,
      "learning_rate": 9.473170018790176e-05,
      "loss": 0.6613,
      "step": 5942
    },
    {
      "epoch": 0.5313841201716738,
      "grad_norm": 0.1671863644052143,
      "learning_rate": 9.470278051725122e-05,
      "loss": 0.6827,
      "step": 5943
    },
    {
      "epoch": 0.5314735336194564,
      "grad_norm": 0.1774562274511648,
      "learning_rate": 9.467386129087202e-05,
      "loss": 0.6978,
      "step": 5944
    },
    {
      "epoch": 0.531562947067239,
      "grad_norm": 0.13027529042615882,
      "learning_rate": 9.464494251118968e-05,
      "loss": 0.6242,
      "step": 5945
    },
    {
      "epoch": 0.5316523605150214,
      "grad_norm": 0.14717441376280288,
      "learning_rate": 9.461602418062956e-05,
      "loss": 0.6587,
      "step": 5946
    },
    {
      "epoch": 0.531741773962804,
      "grad_norm": 0.16309905882275694,
      "learning_rate": 9.458710630161698e-05,
      "loss": 0.6508,
      "step": 5947
    },
    {
      "epoch": 0.5318311874105865,
      "grad_norm": 0.1483712022137572,
      "learning_rate": 9.455818887657725e-05,
      "loss": 0.6959,
      "step": 5948
    },
    {
      "epoch": 0.5319206008583691,
      "grad_norm": 0.16579275005954036,
      "learning_rate": 9.452927190793566e-05,
      "loss": 0.685,
      "step": 5949
    },
    {
      "epoch": 0.5320100143061517,
      "grad_norm": 0.16228209955493691,
      "learning_rate": 9.450035539811741e-05,
      "loss": 0.6758,
      "step": 5950
    },
    {
      "epoch": 0.5320994277539342,
      "grad_norm": 0.15155341673112818,
      "learning_rate": 9.447143934954771e-05,
      "loss": 0.6329,
      "step": 5951
    },
    {
      "epoch": 0.5321888412017167,
      "grad_norm": 0.13499100167595712,
      "learning_rate": 9.444252376465171e-05,
      "loss": 0.6507,
      "step": 5952
    },
    {
      "epoch": 0.5322782546494993,
      "grad_norm": 0.16537489362396987,
      "learning_rate": 9.441360864585456e-05,
      "loss": 0.6874,
      "step": 5953
    },
    {
      "epoch": 0.5323676680972819,
      "grad_norm": 0.14903920009984092,
      "learning_rate": 9.438469399558128e-05,
      "loss": 0.6479,
      "step": 5954
    },
    {
      "epoch": 0.5324570815450643,
      "grad_norm": 0.16120681697320632,
      "learning_rate": 9.435577981625697e-05,
      "loss": 0.6779,
      "step": 5955
    },
    {
      "epoch": 0.5325464949928469,
      "grad_norm": 0.13737011657927575,
      "learning_rate": 9.432686611030657e-05,
      "loss": 0.6388,
      "step": 5956
    },
    {
      "epoch": 0.5326359084406295,
      "grad_norm": 0.1385066931775262,
      "learning_rate": 9.429795288015504e-05,
      "loss": 0.6515,
      "step": 5957
    },
    {
      "epoch": 0.532725321888412,
      "grad_norm": 0.14770313767232512,
      "learning_rate": 9.42690401282273e-05,
      "loss": 0.6352,
      "step": 5958
    },
    {
      "epoch": 0.5328147353361946,
      "grad_norm": 0.16047107297454433,
      "learning_rate": 9.424012785694827e-05,
      "loss": 0.6969,
      "step": 5959
    },
    {
      "epoch": 0.5329041487839771,
      "grad_norm": 0.142845663892146,
      "learning_rate": 9.421121606874278e-05,
      "loss": 0.6318,
      "step": 5960
    },
    {
      "epoch": 0.5329935622317596,
      "grad_norm": 0.14551220325787745,
      "learning_rate": 9.418230476603558e-05,
      "loss": 0.6423,
      "step": 5961
    },
    {
      "epoch": 0.5330829756795422,
      "grad_norm": 0.15788337658223903,
      "learning_rate": 9.415339395125147e-05,
      "loss": 0.6458,
      "step": 5962
    },
    {
      "epoch": 0.5331723891273248,
      "grad_norm": 0.13941442159936837,
      "learning_rate": 9.412448362681516e-05,
      "loss": 0.6419,
      "step": 5963
    },
    {
      "epoch": 0.5332618025751072,
      "grad_norm": 0.1462775063899294,
      "learning_rate": 9.409557379515127e-05,
      "loss": 0.6529,
      "step": 5964
    },
    {
      "epoch": 0.5333512160228898,
      "grad_norm": 0.16731513798508124,
      "learning_rate": 9.406666445868448e-05,
      "loss": 0.6708,
      "step": 5965
    },
    {
      "epoch": 0.5334406294706724,
      "grad_norm": 0.15583969431147499,
      "learning_rate": 9.40377556198394e-05,
      "loss": 0.6632,
      "step": 5966
    },
    {
      "epoch": 0.533530042918455,
      "grad_norm": 0.15606940616451362,
      "learning_rate": 9.400884728104056e-05,
      "loss": 0.66,
      "step": 5967
    },
    {
      "epoch": 0.5336194563662375,
      "grad_norm": 0.14864006195728993,
      "learning_rate": 9.397993944471244e-05,
      "loss": 0.6579,
      "step": 5968
    },
    {
      "epoch": 0.53370886981402,
      "grad_norm": 0.1379956622866906,
      "learning_rate": 9.395103211327955e-05,
      "loss": 0.6294,
      "step": 5969
    },
    {
      "epoch": 0.5337982832618026,
      "grad_norm": 0.14560313342851822,
      "learning_rate": 9.39221252891663e-05,
      "loss": 0.6534,
      "step": 5970
    },
    {
      "epoch": 0.5338876967095851,
      "grad_norm": 0.15706659840032433,
      "learning_rate": 9.389321897479703e-05,
      "loss": 0.6364,
      "step": 5971
    },
    {
      "epoch": 0.5339771101573677,
      "grad_norm": 0.15821176354840333,
      "learning_rate": 9.386431317259609e-05,
      "loss": 0.6498,
      "step": 5972
    },
    {
      "epoch": 0.5340665236051502,
      "grad_norm": 0.16123347073293204,
      "learning_rate": 9.383540788498784e-05,
      "loss": 0.6957,
      "step": 5973
    },
    {
      "epoch": 0.5341559370529327,
      "grad_norm": 0.14544123126865038,
      "learning_rate": 9.380650311439649e-05,
      "loss": 0.682,
      "step": 5974
    },
    {
      "epoch": 0.5342453505007153,
      "grad_norm": 0.1618187422464365,
      "learning_rate": 9.37775988632462e-05,
      "loss": 0.6833,
      "step": 5975
    },
    {
      "epoch": 0.5343347639484979,
      "grad_norm": 0.1442233568052194,
      "learning_rate": 9.374869513396123e-05,
      "loss": 0.7044,
      "step": 5976
    },
    {
      "epoch": 0.5344241773962805,
      "grad_norm": 0.1414256188194143,
      "learning_rate": 9.371979192896564e-05,
      "loss": 0.6763,
      "step": 5977
    },
    {
      "epoch": 0.5345135908440629,
      "grad_norm": 0.14782292742851444,
      "learning_rate": 9.369088925068347e-05,
      "loss": 0.6802,
      "step": 5978
    },
    {
      "epoch": 0.5346030042918455,
      "grad_norm": 0.13705426129793782,
      "learning_rate": 9.366198710153882e-05,
      "loss": 0.6625,
      "step": 5979
    },
    {
      "epoch": 0.5346924177396281,
      "grad_norm": 0.17672893762394534,
      "learning_rate": 9.363308548395568e-05,
      "loss": 0.6446,
      "step": 5980
    },
    {
      "epoch": 0.5347818311874106,
      "grad_norm": 0.16542371574900128,
      "learning_rate": 9.360418440035796e-05,
      "loss": 0.6458,
      "step": 5981
    },
    {
      "epoch": 0.5348712446351931,
      "grad_norm": 0.14255406332918572,
      "learning_rate": 9.357528385316958e-05,
      "loss": 0.6793,
      "step": 5982
    },
    {
      "epoch": 0.5349606580829757,
      "grad_norm": 0.15319019088102792,
      "learning_rate": 9.354638384481437e-05,
      "loss": 0.6075,
      "step": 5983
    },
    {
      "epoch": 0.5350500715307582,
      "grad_norm": 0.13754263164332614,
      "learning_rate": 9.351748437771615e-05,
      "loss": 0.6583,
      "step": 5984
    },
    {
      "epoch": 0.5351394849785408,
      "grad_norm": 0.15199411524033435,
      "learning_rate": 9.348858545429868e-05,
      "loss": 0.6555,
      "step": 5985
    },
    {
      "epoch": 0.5352288984263234,
      "grad_norm": 0.1743624553522423,
      "learning_rate": 9.345968707698569e-05,
      "loss": 0.6674,
      "step": 5986
    },
    {
      "epoch": 0.5353183118741058,
      "grad_norm": 0.16193926786686413,
      "learning_rate": 9.343078924820087e-05,
      "loss": 0.6762,
      "step": 5987
    },
    {
      "epoch": 0.5354077253218884,
      "grad_norm": 0.15755664198143976,
      "learning_rate": 9.340189197036779e-05,
      "loss": 0.665,
      "step": 5988
    },
    {
      "epoch": 0.535497138769671,
      "grad_norm": 0.13806549414989663,
      "learning_rate": 9.337299524591009e-05,
      "loss": 0.6272,
      "step": 5989
    },
    {
      "epoch": 0.5355865522174535,
      "grad_norm": 0.17153460254713493,
      "learning_rate": 9.334409907725128e-05,
      "loss": 0.6552,
      "step": 5990
    },
    {
      "epoch": 0.535675965665236,
      "grad_norm": 0.14290044007008265,
      "learning_rate": 9.33152034668148e-05,
      "loss": 0.6456,
      "step": 5991
    },
    {
      "epoch": 0.5357653791130186,
      "grad_norm": 0.16436069801627876,
      "learning_rate": 9.328630841702414e-05,
      "loss": 0.6638,
      "step": 5992
    },
    {
      "epoch": 0.5358547925608012,
      "grad_norm": 0.16015248644512892,
      "learning_rate": 9.32574139303027e-05,
      "loss": 0.6731,
      "step": 5993
    },
    {
      "epoch": 0.5359442060085837,
      "grad_norm": 0.16377504948704,
      "learning_rate": 9.322852000907383e-05,
      "loss": 0.6709,
      "step": 5994
    },
    {
      "epoch": 0.5360336194563662,
      "grad_norm": 0.1720834157875958,
      "learning_rate": 9.319962665576078e-05,
      "loss": 0.6729,
      "step": 5995
    },
    {
      "epoch": 0.5361230329041488,
      "grad_norm": 0.17064343347109928,
      "learning_rate": 9.317073387278686e-05,
      "loss": 0.6846,
      "step": 5996
    },
    {
      "epoch": 0.5362124463519313,
      "grad_norm": 0.14331667079837662,
      "learning_rate": 9.314184166257524e-05,
      "loss": 0.6302,
      "step": 5997
    },
    {
      "epoch": 0.5363018597997139,
      "grad_norm": 0.1585828977509953,
      "learning_rate": 9.311295002754905e-05,
      "loss": 0.6903,
      "step": 5998
    },
    {
      "epoch": 0.5363912732474965,
      "grad_norm": 0.14621866048215504,
      "learning_rate": 9.30840589701314e-05,
      "loss": 0.6861,
      "step": 5999
    },
    {
      "epoch": 0.5364806866952789,
      "grad_norm": 0.16052958463676237,
      "learning_rate": 9.305516849274541e-05,
      "loss": 0.6284,
      "step": 6000
    },
    {
      "epoch": 0.5365701001430615,
      "grad_norm": 0.15039389859217409,
      "learning_rate": 9.302627859781406e-05,
      "loss": 0.6472,
      "step": 6001
    },
    {
      "epoch": 0.5366595135908441,
      "grad_norm": 0.15370547791786593,
      "learning_rate": 9.299738928776029e-05,
      "loss": 0.6446,
      "step": 6002
    },
    {
      "epoch": 0.5367489270386266,
      "grad_norm": 0.1576321565952753,
      "learning_rate": 9.296850056500703e-05,
      "loss": 0.6902,
      "step": 6003
    },
    {
      "epoch": 0.5368383404864091,
      "grad_norm": 0.17820842225058808,
      "learning_rate": 9.293961243197715e-05,
      "loss": 0.6974,
      "step": 6004
    },
    {
      "epoch": 0.5369277539341917,
      "grad_norm": 0.1659238556337091,
      "learning_rate": 9.29107248910934e-05,
      "loss": 0.667,
      "step": 6005
    },
    {
      "epoch": 0.5370171673819742,
      "grad_norm": 0.1506721633487485,
      "learning_rate": 9.28818379447786e-05,
      "loss": 0.6442,
      "step": 6006
    },
    {
      "epoch": 0.5371065808297568,
      "grad_norm": 0.1485831984757742,
      "learning_rate": 9.285295159545547e-05,
      "loss": 0.665,
      "step": 6007
    },
    {
      "epoch": 0.5371959942775394,
      "grad_norm": 0.15225349852436598,
      "learning_rate": 9.282406584554668e-05,
      "loss": 0.6767,
      "step": 6008
    },
    {
      "epoch": 0.5372854077253219,
      "grad_norm": 0.14958347633831845,
      "learning_rate": 9.279518069747479e-05,
      "loss": 0.6857,
      "step": 6009
    },
    {
      "epoch": 0.5373748211731044,
      "grad_norm": 0.1316371735982628,
      "learning_rate": 9.276629615366242e-05,
      "loss": 0.6229,
      "step": 6010
    },
    {
      "epoch": 0.537464234620887,
      "grad_norm": 0.16099855210507552,
      "learning_rate": 9.273741221653204e-05,
      "loss": 0.6493,
      "step": 6011
    },
    {
      "epoch": 0.5375536480686696,
      "grad_norm": 0.15351712288246422,
      "learning_rate": 9.270852888850615e-05,
      "loss": 0.672,
      "step": 6012
    },
    {
      "epoch": 0.537643061516452,
      "grad_norm": 0.14481097553320937,
      "learning_rate": 9.267964617200707e-05,
      "loss": 0.6444,
      "step": 6013
    },
    {
      "epoch": 0.5377324749642346,
      "grad_norm": 0.1526877238796846,
      "learning_rate": 9.265076406945727e-05,
      "loss": 0.6559,
      "step": 6014
    },
    {
      "epoch": 0.5378218884120172,
      "grad_norm": 0.1713000145243642,
      "learning_rate": 9.262188258327901e-05,
      "loss": 0.6897,
      "step": 6015
    },
    {
      "epoch": 0.5379113018597997,
      "grad_norm": 0.1569085265413008,
      "learning_rate": 9.259300171589456e-05,
      "loss": 0.6508,
      "step": 6016
    },
    {
      "epoch": 0.5380007153075823,
      "grad_norm": 0.14537664128144862,
      "learning_rate": 9.256412146972611e-05,
      "loss": 0.6297,
      "step": 6017
    },
    {
      "epoch": 0.5380901287553648,
      "grad_norm": 0.14253079186955658,
      "learning_rate": 9.25352418471958e-05,
      "loss": 0.6681,
      "step": 6018
    },
    {
      "epoch": 0.5381795422031473,
      "grad_norm": 0.13687218186586353,
      "learning_rate": 9.250636285072574e-05,
      "loss": 0.6532,
      "step": 6019
    },
    {
      "epoch": 0.5382689556509299,
      "grad_norm": 0.15951013227632924,
      "learning_rate": 9.247748448273796e-05,
      "loss": 0.6644,
      "step": 6020
    },
    {
      "epoch": 0.5383583690987125,
      "grad_norm": 0.15716232087677567,
      "learning_rate": 9.24486067456545e-05,
      "loss": 0.6545,
      "step": 6021
    },
    {
      "epoch": 0.538447782546495,
      "grad_norm": 0.1513279556171732,
      "learning_rate": 9.241972964189726e-05,
      "loss": 0.6251,
      "step": 6022
    },
    {
      "epoch": 0.5385371959942775,
      "grad_norm": 0.15224633578479704,
      "learning_rate": 9.239085317388816e-05,
      "loss": 0.6562,
      "step": 6023
    },
    {
      "epoch": 0.5386266094420601,
      "grad_norm": 0.15173607152854598,
      "learning_rate": 9.236197734404901e-05,
      "loss": 0.6811,
      "step": 6024
    },
    {
      "epoch": 0.5387160228898427,
      "grad_norm": 0.16853620852961995,
      "learning_rate": 9.233310215480157e-05,
      "loss": 0.6672,
      "step": 6025
    },
    {
      "epoch": 0.5388054363376252,
      "grad_norm": 0.1596348114983132,
      "learning_rate": 9.230422760856762e-05,
      "loss": 0.6517,
      "step": 6026
    },
    {
      "epoch": 0.5388948497854077,
      "grad_norm": 0.1518513864819099,
      "learning_rate": 9.227535370776877e-05,
      "loss": 0.6701,
      "step": 6027
    },
    {
      "epoch": 0.5389842632331903,
      "grad_norm": 0.14248837599456413,
      "learning_rate": 9.22464804548267e-05,
      "loss": 0.61,
      "step": 6028
    },
    {
      "epoch": 0.5390736766809728,
      "grad_norm": 0.14738001375368937,
      "learning_rate": 9.221760785216295e-05,
      "loss": 0.6317,
      "step": 6029
    },
    {
      "epoch": 0.5391630901287554,
      "grad_norm": 0.15976335376091017,
      "learning_rate": 9.218873590219905e-05,
      "loss": 0.6951,
      "step": 6030
    },
    {
      "epoch": 0.5392525035765379,
      "grad_norm": 0.15875734249032666,
      "learning_rate": 9.215986460735642e-05,
      "loss": 0.6625,
      "step": 6031
    },
    {
      "epoch": 0.5393419170243204,
      "grad_norm": 0.14485734994483762,
      "learning_rate": 9.213099397005646e-05,
      "loss": 0.6693,
      "step": 6032
    },
    {
      "epoch": 0.539431330472103,
      "grad_norm": 0.14637626819225633,
      "learning_rate": 9.210212399272056e-05,
      "loss": 0.6183,
      "step": 6033
    },
    {
      "epoch": 0.5395207439198856,
      "grad_norm": 0.1558339758824108,
      "learning_rate": 9.207325467776993e-05,
      "loss": 0.6589,
      "step": 6034
    },
    {
      "epoch": 0.539610157367668,
      "grad_norm": 0.16898196559828355,
      "learning_rate": 9.204438602762592e-05,
      "loss": 0.6862,
      "step": 6035
    },
    {
      "epoch": 0.5396995708154506,
      "grad_norm": 0.17375404735745928,
      "learning_rate": 9.201551804470962e-05,
      "loss": 0.6663,
      "step": 6036
    },
    {
      "epoch": 0.5397889842632332,
      "grad_norm": 0.15771696166353624,
      "learning_rate": 9.198665073144218e-05,
      "loss": 0.6454,
      "step": 6037
    },
    {
      "epoch": 0.5398783977110158,
      "grad_norm": 0.14858500847558445,
      "learning_rate": 9.195778409024468e-05,
      "loss": 0.6792,
      "step": 6038
    },
    {
      "epoch": 0.5399678111587983,
      "grad_norm": 0.14317171845702836,
      "learning_rate": 9.19289181235381e-05,
      "loss": 0.623,
      "step": 6039
    },
    {
      "epoch": 0.5400572246065808,
      "grad_norm": 0.14958844189423828,
      "learning_rate": 9.190005283374343e-05,
      "loss": 0.6707,
      "step": 6040
    },
    {
      "epoch": 0.5401466380543634,
      "grad_norm": 0.15659824781907136,
      "learning_rate": 9.187118822328149e-05,
      "loss": 0.6854,
      "step": 6041
    },
    {
      "epoch": 0.5402360515021459,
      "grad_norm": 0.15729032006285784,
      "learning_rate": 9.184232429457323e-05,
      "loss": 0.6827,
      "step": 6042
    },
    {
      "epoch": 0.5403254649499285,
      "grad_norm": 0.15304740108587367,
      "learning_rate": 9.181346105003936e-05,
      "loss": 0.6353,
      "step": 6043
    },
    {
      "epoch": 0.540414878397711,
      "grad_norm": 0.14199818793432714,
      "learning_rate": 9.178459849210063e-05,
      "loss": 0.6663,
      "step": 6044
    },
    {
      "epoch": 0.5405042918454935,
      "grad_norm": 0.1505903744794183,
      "learning_rate": 9.175573662317769e-05,
      "loss": 0.6683,
      "step": 6045
    },
    {
      "epoch": 0.5405937052932761,
      "grad_norm": 0.15227242587042608,
      "learning_rate": 9.172687544569118e-05,
      "loss": 0.6469,
      "step": 6046
    },
    {
      "epoch": 0.5406831187410587,
      "grad_norm": 0.15316694068324746,
      "learning_rate": 9.169801496206165e-05,
      "loss": 0.6784,
      "step": 6047
    },
    {
      "epoch": 0.5407725321888412,
      "grad_norm": 0.17234605058317562,
      "learning_rate": 9.166915517470953e-05,
      "loss": 0.7278,
      "step": 6048
    },
    {
      "epoch": 0.5408619456366237,
      "grad_norm": 0.14421292710834738,
      "learning_rate": 9.164029608605531e-05,
      "loss": 0.6263,
      "step": 6049
    },
    {
      "epoch": 0.5409513590844063,
      "grad_norm": 0.14733379088575982,
      "learning_rate": 9.161143769851941e-05,
      "loss": 0.6546,
      "step": 6050
    },
    {
      "epoch": 0.5410407725321889,
      "grad_norm": 0.16046581176057714,
      "learning_rate": 9.158258001452208e-05,
      "loss": 0.693,
      "step": 6051
    },
    {
      "epoch": 0.5411301859799714,
      "grad_norm": 0.14424176160558758,
      "learning_rate": 9.155372303648359e-05,
      "loss": 0.6557,
      "step": 6052
    },
    {
      "epoch": 0.5412195994277539,
      "grad_norm": 0.1537250814663125,
      "learning_rate": 9.152486676682415e-05,
      "loss": 0.6862,
      "step": 6053
    },
    {
      "epoch": 0.5413090128755365,
      "grad_norm": 0.16951606328217964,
      "learning_rate": 9.149601120796391e-05,
      "loss": 0.6773,
      "step": 6054
    },
    {
      "epoch": 0.541398426323319,
      "grad_norm": 0.15674589672268147,
      "learning_rate": 9.146715636232291e-05,
      "loss": 0.6869,
      "step": 6055
    },
    {
      "epoch": 0.5414878397711016,
      "grad_norm": 0.14907353839557178,
      "learning_rate": 9.14383022323212e-05,
      "loss": 0.6953,
      "step": 6056
    },
    {
      "epoch": 0.5415772532188842,
      "grad_norm": 0.1427855379532341,
      "learning_rate": 9.140944882037879e-05,
      "loss": 0.6416,
      "step": 6057
    },
    {
      "epoch": 0.5416666666666666,
      "grad_norm": 0.1388447651109442,
      "learning_rate": 9.138059612891551e-05,
      "loss": 0.6332,
      "step": 6058
    },
    {
      "epoch": 0.5417560801144492,
      "grad_norm": 0.16540804698160774,
      "learning_rate": 9.13517441603512e-05,
      "loss": 0.6859,
      "step": 6059
    },
    {
      "epoch": 0.5418454935622318,
      "grad_norm": 0.1494934034965563,
      "learning_rate": 9.13228929171057e-05,
      "loss": 0.6983,
      "step": 6060
    },
    {
      "epoch": 0.5419349070100143,
      "grad_norm": 0.14185172523219944,
      "learning_rate": 9.129404240159864e-05,
      "loss": 0.6489,
      "step": 6061
    },
    {
      "epoch": 0.5420243204577968,
      "grad_norm": 0.15604233206284288,
      "learning_rate": 9.126519261624977e-05,
      "loss": 0.6867,
      "step": 6062
    },
    {
      "epoch": 0.5421137339055794,
      "grad_norm": 0.12920844433845935,
      "learning_rate": 9.123634356347863e-05,
      "loss": 0.6232,
      "step": 6063
    },
    {
      "epoch": 0.542203147353362,
      "grad_norm": 0.1468943976809618,
      "learning_rate": 9.12074952457048e-05,
      "loss": 0.6712,
      "step": 6064
    },
    {
      "epoch": 0.5422925608011445,
      "grad_norm": 0.1390131162482025,
      "learning_rate": 9.117864766534772e-05,
      "loss": 0.6194,
      "step": 6065
    },
    {
      "epoch": 0.5423819742489271,
      "grad_norm": 0.13260261149686076,
      "learning_rate": 9.114980082482677e-05,
      "loss": 0.6532,
      "step": 6066
    },
    {
      "epoch": 0.5424713876967096,
      "grad_norm": 0.16404474286993376,
      "learning_rate": 9.112095472656137e-05,
      "loss": 0.6859,
      "step": 6067
    },
    {
      "epoch": 0.5425608011444921,
      "grad_norm": 0.1560550550542068,
      "learning_rate": 9.109210937297074e-05,
      "loss": 0.666,
      "step": 6068
    },
    {
      "epoch": 0.5426502145922747,
      "grad_norm": 0.15458003099511924,
      "learning_rate": 9.106326476647417e-05,
      "loss": 0.6639,
      "step": 6069
    },
    {
      "epoch": 0.5427396280400573,
      "grad_norm": 0.13468715008223753,
      "learning_rate": 9.103442090949077e-05,
      "loss": 0.6169,
      "step": 6070
    },
    {
      "epoch": 0.5428290414878397,
      "grad_norm": 0.15904650907305037,
      "learning_rate": 9.100557780443968e-05,
      "loss": 0.6819,
      "step": 6071
    },
    {
      "epoch": 0.5429184549356223,
      "grad_norm": 0.14704703625769622,
      "learning_rate": 9.09767354537399e-05,
      "loss": 0.6301,
      "step": 6072
    },
    {
      "epoch": 0.5430078683834049,
      "grad_norm": 0.15008085763604778,
      "learning_rate": 9.094789385981045e-05,
      "loss": 0.686,
      "step": 6073
    },
    {
      "epoch": 0.5430972818311874,
      "grad_norm": 0.1471644605022294,
      "learning_rate": 9.09190530250702e-05,
      "loss": 0.6432,
      "step": 6074
    },
    {
      "epoch": 0.54318669527897,
      "grad_norm": 0.14974308027124866,
      "learning_rate": 9.089021295193796e-05,
      "loss": 0.655,
      "step": 6075
    },
    {
      "epoch": 0.5432761087267525,
      "grad_norm": 0.1581288330529147,
      "learning_rate": 9.08613736428326e-05,
      "loss": 0.6239,
      "step": 6076
    },
    {
      "epoch": 0.543365522174535,
      "grad_norm": 0.18443263727813736,
      "learning_rate": 9.083253510017279e-05,
      "loss": 0.6778,
      "step": 6077
    },
    {
      "epoch": 0.5434549356223176,
      "grad_norm": 0.12900306230721043,
      "learning_rate": 9.08036973263772e-05,
      "loss": 0.6249,
      "step": 6078
    },
    {
      "epoch": 0.5435443490701002,
      "grad_norm": 0.14074919380839726,
      "learning_rate": 9.077486032386439e-05,
      "loss": 0.658,
      "step": 6079
    },
    {
      "epoch": 0.5436337625178826,
      "grad_norm": 0.15467714673081215,
      "learning_rate": 9.074602409505293e-05,
      "loss": 0.664,
      "step": 6080
    },
    {
      "epoch": 0.5437231759656652,
      "grad_norm": 0.15847437446843884,
      "learning_rate": 9.071718864236125e-05,
      "loss": 0.6816,
      "step": 6081
    },
    {
      "epoch": 0.5438125894134478,
      "grad_norm": 0.15829933831930035,
      "learning_rate": 9.06883539682077e-05,
      "loss": 0.6798,
      "step": 6082
    },
    {
      "epoch": 0.5439020028612304,
      "grad_norm": 0.1580909080101421,
      "learning_rate": 9.065952007501067e-05,
      "loss": 0.645,
      "step": 6083
    },
    {
      "epoch": 0.5439914163090128,
      "grad_norm": 0.17857136979776034,
      "learning_rate": 9.063068696518843e-05,
      "loss": 0.6876,
      "step": 6084
    },
    {
      "epoch": 0.5440808297567954,
      "grad_norm": 0.15013808454184713,
      "learning_rate": 9.060185464115918e-05,
      "loss": 0.702,
      "step": 6085
    },
    {
      "epoch": 0.544170243204578,
      "grad_norm": 0.16144910217571953,
      "learning_rate": 9.0573023105341e-05,
      "loss": 0.666,
      "step": 6086
    },
    {
      "epoch": 0.5442596566523605,
      "grad_norm": 0.16470497423151922,
      "learning_rate": 9.054419236015201e-05,
      "loss": 0.6615,
      "step": 6087
    },
    {
      "epoch": 0.5443490701001431,
      "grad_norm": 0.16936192199779926,
      "learning_rate": 9.05153624080102e-05,
      "loss": 0.6672,
      "step": 6088
    },
    {
      "epoch": 0.5444384835479256,
      "grad_norm": 0.166816870921426,
      "learning_rate": 9.048653325133343e-05,
      "loss": 0.6557,
      "step": 6089
    },
    {
      "epoch": 0.5445278969957081,
      "grad_norm": 0.15723029502658012,
      "learning_rate": 9.045770489253965e-05,
      "loss": 0.676,
      "step": 6090
    },
    {
      "epoch": 0.5446173104434907,
      "grad_norm": 0.14956098820347974,
      "learning_rate": 9.042887733404666e-05,
      "loss": 0.6444,
      "step": 6091
    },
    {
      "epoch": 0.5447067238912733,
      "grad_norm": 0.14099627315051694,
      "learning_rate": 9.040005057827216e-05,
      "loss": 0.6376,
      "step": 6092
    },
    {
      "epoch": 0.5447961373390557,
      "grad_norm": 0.15644916415436144,
      "learning_rate": 9.037122462763383e-05,
      "loss": 0.6745,
      "step": 6093
    },
    {
      "epoch": 0.5448855507868383,
      "grad_norm": 0.15865289073170344,
      "learning_rate": 9.034239948454925e-05,
      "loss": 0.6579,
      "step": 6094
    },
    {
      "epoch": 0.5449749642346209,
      "grad_norm": 0.15777732594243565,
      "learning_rate": 9.031357515143599e-05,
      "loss": 0.6346,
      "step": 6095
    },
    {
      "epoch": 0.5450643776824035,
      "grad_norm": 0.14829166038578676,
      "learning_rate": 9.028475163071141e-05,
      "loss": 0.6432,
      "step": 6096
    },
    {
      "epoch": 0.545153791130186,
      "grad_norm": 0.14019920416949005,
      "learning_rate": 9.025592892479303e-05,
      "loss": 0.6499,
      "step": 6097
    },
    {
      "epoch": 0.5452432045779685,
      "grad_norm": 0.15461510228852268,
      "learning_rate": 9.022710703609814e-05,
      "loss": 0.6376,
      "step": 6098
    },
    {
      "epoch": 0.5453326180257511,
      "grad_norm": 0.13602825074024394,
      "learning_rate": 9.019828596704394e-05,
      "loss": 0.6476,
      "step": 6099
    },
    {
      "epoch": 0.5454220314735336,
      "grad_norm": 0.14859923094148272,
      "learning_rate": 9.01694657200477e-05,
      "loss": 0.6825,
      "step": 6100
    },
    {
      "epoch": 0.5455114449213162,
      "grad_norm": 0.13909500982040016,
      "learning_rate": 9.014064629752647e-05,
      "loss": 0.643,
      "step": 6101
    },
    {
      "epoch": 0.5456008583690987,
      "grad_norm": 0.15788578589721255,
      "learning_rate": 9.011182770189733e-05,
      "loss": 0.6838,
      "step": 6102
    },
    {
      "epoch": 0.5456902718168812,
      "grad_norm": 0.14669747702870775,
      "learning_rate": 9.008300993557723e-05,
      "loss": 0.6625,
      "step": 6103
    },
    {
      "epoch": 0.5457796852646638,
      "grad_norm": 0.17044937553846087,
      "learning_rate": 9.005419300098316e-05,
      "loss": 0.6542,
      "step": 6104
    },
    {
      "epoch": 0.5458690987124464,
      "grad_norm": 0.14661876550230193,
      "learning_rate": 9.002537690053191e-05,
      "loss": 0.6571,
      "step": 6105
    },
    {
      "epoch": 0.545958512160229,
      "grad_norm": 0.14032832456409436,
      "learning_rate": 8.999656163664023e-05,
      "loss": 0.6857,
      "step": 6106
    },
    {
      "epoch": 0.5460479256080114,
      "grad_norm": 0.13512574517585363,
      "learning_rate": 8.996774721172487e-05,
      "loss": 0.6424,
      "step": 6107
    },
    {
      "epoch": 0.546137339055794,
      "grad_norm": 0.14225499578594716,
      "learning_rate": 8.993893362820241e-05,
      "loss": 0.6663,
      "step": 6108
    },
    {
      "epoch": 0.5462267525035766,
      "grad_norm": 0.14214555231825174,
      "learning_rate": 8.991012088848944e-05,
      "loss": 0.6469,
      "step": 6109
    },
    {
      "epoch": 0.5463161659513591,
      "grad_norm": 0.1537955546329494,
      "learning_rate": 8.988130899500243e-05,
      "loss": 0.6669,
      "step": 6110
    },
    {
      "epoch": 0.5464055793991416,
      "grad_norm": 0.16267361469559855,
      "learning_rate": 8.985249795015784e-05,
      "loss": 0.6696,
      "step": 6111
    },
    {
      "epoch": 0.5464949928469242,
      "grad_norm": 0.1583072140822948,
      "learning_rate": 8.9823687756372e-05,
      "loss": 0.6626,
      "step": 6112
    },
    {
      "epoch": 0.5465844062947067,
      "grad_norm": 0.1663886821384328,
      "learning_rate": 8.979487841606115e-05,
      "loss": 0.668,
      "step": 6113
    },
    {
      "epoch": 0.5466738197424893,
      "grad_norm": 0.15685324594104552,
      "learning_rate": 8.976606993164155e-05,
      "loss": 0.6437,
      "step": 6114
    },
    {
      "epoch": 0.5467632331902719,
      "grad_norm": 0.14759458524360183,
      "learning_rate": 8.97372623055293e-05,
      "loss": 0.6661,
      "step": 6115
    },
    {
      "epoch": 0.5468526466380543,
      "grad_norm": 0.14087413616720232,
      "learning_rate": 8.970845554014044e-05,
      "loss": 0.639,
      "step": 6116
    },
    {
      "epoch": 0.5469420600858369,
      "grad_norm": 0.13981826845245143,
      "learning_rate": 8.967964963789097e-05,
      "loss": 0.6609,
      "step": 6117
    },
    {
      "epoch": 0.5470314735336195,
      "grad_norm": 0.15875454465685504,
      "learning_rate": 8.965084460119687e-05,
      "loss": 0.6653,
      "step": 6118
    },
    {
      "epoch": 0.547120886981402,
      "grad_norm": 0.16461888445848857,
      "learning_rate": 8.962204043247393e-05,
      "loss": 0.7095,
      "step": 6119
    },
    {
      "epoch": 0.5472103004291845,
      "grad_norm": 0.1511960075477892,
      "learning_rate": 8.959323713413791e-05,
      "loss": 0.6577,
      "step": 6120
    },
    {
      "epoch": 0.5472997138769671,
      "grad_norm": 0.13822097441935394,
      "learning_rate": 8.956443470860453e-05,
      "loss": 0.6356,
      "step": 6121
    },
    {
      "epoch": 0.5473891273247496,
      "grad_norm": 0.15080589254987142,
      "learning_rate": 8.953563315828942e-05,
      "loss": 0.6745,
      "step": 6122
    },
    {
      "epoch": 0.5474785407725322,
      "grad_norm": 0.15201220205584828,
      "learning_rate": 8.95068324856081e-05,
      "loss": 0.635,
      "step": 6123
    },
    {
      "epoch": 0.5475679542203148,
      "grad_norm": 0.14773860646532663,
      "learning_rate": 8.947803269297604e-05,
      "loss": 0.6513,
      "step": 6124
    },
    {
      "epoch": 0.5476573676680973,
      "grad_norm": 0.18321048920719013,
      "learning_rate": 8.944923378280871e-05,
      "loss": 0.7263,
      "step": 6125
    },
    {
      "epoch": 0.5477467811158798,
      "grad_norm": 0.1484929889711229,
      "learning_rate": 8.942043575752141e-05,
      "loss": 0.6646,
      "step": 6126
    },
    {
      "epoch": 0.5478361945636624,
      "grad_norm": 0.14778110607383954,
      "learning_rate": 8.939163861952935e-05,
      "loss": 0.606,
      "step": 6127
    },
    {
      "epoch": 0.547925608011445,
      "grad_norm": 0.17366085557924063,
      "learning_rate": 8.936284237124778e-05,
      "loss": 0.6787,
      "step": 6128
    },
    {
      "epoch": 0.5480150214592274,
      "grad_norm": 0.1408268495896862,
      "learning_rate": 8.933404701509175e-05,
      "loss": 0.6312,
      "step": 6129
    },
    {
      "epoch": 0.54810443490701,
      "grad_norm": 0.15368433253687616,
      "learning_rate": 8.930525255347634e-05,
      "loss": 0.6941,
      "step": 6130
    },
    {
      "epoch": 0.5481938483547926,
      "grad_norm": 0.13742367844772213,
      "learning_rate": 8.927645898881644e-05,
      "loss": 0.6392,
      "step": 6131
    },
    {
      "epoch": 0.5482832618025751,
      "grad_norm": 0.13887160075679175,
      "learning_rate": 8.924766632352702e-05,
      "loss": 0.6595,
      "step": 6132
    },
    {
      "epoch": 0.5483726752503576,
      "grad_norm": 0.15516210458143448,
      "learning_rate": 8.92188745600228e-05,
      "loss": 0.6713,
      "step": 6133
    },
    {
      "epoch": 0.5484620886981402,
      "grad_norm": 0.13880959626167294,
      "learning_rate": 8.919008370071859e-05,
      "loss": 0.6568,
      "step": 6134
    },
    {
      "epoch": 0.5485515021459227,
      "grad_norm": 0.16348430435659014,
      "learning_rate": 8.916129374802899e-05,
      "loss": 0.6456,
      "step": 6135
    },
    {
      "epoch": 0.5486409155937053,
      "grad_norm": 0.1529977241593141,
      "learning_rate": 8.913250470436858e-05,
      "loss": 0.67,
      "step": 6136
    },
    {
      "epoch": 0.5487303290414879,
      "grad_norm": 0.14582811568208967,
      "learning_rate": 8.910371657215191e-05,
      "loss": 0.6404,
      "step": 6137
    },
    {
      "epoch": 0.5488197424892703,
      "grad_norm": 0.1468268698855911,
      "learning_rate": 8.907492935379331e-05,
      "loss": 0.625,
      "step": 6138
    },
    {
      "epoch": 0.5489091559370529,
      "grad_norm": 0.17904933894106828,
      "learning_rate": 8.904614305170724e-05,
      "loss": 0.6628,
      "step": 6139
    },
    {
      "epoch": 0.5489985693848355,
      "grad_norm": 0.16153052005381544,
      "learning_rate": 8.90173576683079e-05,
      "loss": 0.688,
      "step": 6140
    },
    {
      "epoch": 0.5490879828326181,
      "grad_norm": 0.15114385001197236,
      "learning_rate": 8.898857320600952e-05,
      "loss": 0.6541,
      "step": 6141
    },
    {
      "epoch": 0.5491773962804005,
      "grad_norm": 0.14792969949149837,
      "learning_rate": 8.895978966722623e-05,
      "loss": 0.6387,
      "step": 6142
    },
    {
      "epoch": 0.5492668097281831,
      "grad_norm": 0.17139076234776257,
      "learning_rate": 8.893100705437201e-05,
      "loss": 0.6998,
      "step": 6143
    },
    {
      "epoch": 0.5493562231759657,
      "grad_norm": 0.15965648349540285,
      "learning_rate": 8.890222536986085e-05,
      "loss": 0.664,
      "step": 6144
    },
    {
      "epoch": 0.5494456366237482,
      "grad_norm": 0.16334595104348268,
      "learning_rate": 8.887344461610668e-05,
      "loss": 0.678,
      "step": 6145
    },
    {
      "epoch": 0.5495350500715308,
      "grad_norm": 0.16253689040733085,
      "learning_rate": 8.884466479552328e-05,
      "loss": 0.6617,
      "step": 6146
    },
    {
      "epoch": 0.5496244635193133,
      "grad_norm": 0.17540448889780283,
      "learning_rate": 8.881588591052434e-05,
      "loss": 0.6671,
      "step": 6147
    },
    {
      "epoch": 0.5497138769670958,
      "grad_norm": 0.1941375150410333,
      "learning_rate": 8.878710796352358e-05,
      "loss": 0.3677,
      "step": 6148
    },
    {
      "epoch": 0.5498032904148784,
      "grad_norm": 0.14667666586559389,
      "learning_rate": 8.875833095693451e-05,
      "loss": 0.6336,
      "step": 6149
    },
    {
      "epoch": 0.549892703862661,
      "grad_norm": 0.1493227288190776,
      "learning_rate": 8.872955489317063e-05,
      "loss": 0.6837,
      "step": 6150
    },
    {
      "epoch": 0.5499821173104434,
      "grad_norm": 0.1646129141242276,
      "learning_rate": 8.870077977464537e-05,
      "loss": 0.6748,
      "step": 6151
    },
    {
      "epoch": 0.550071530758226,
      "grad_norm": 0.15543482483704243,
      "learning_rate": 8.867200560377209e-05,
      "loss": 0.6418,
      "step": 6152
    },
    {
      "epoch": 0.5501609442060086,
      "grad_norm": 0.14334100097083233,
      "learning_rate": 8.864323238296401e-05,
      "loss": 0.6653,
      "step": 6153
    },
    {
      "epoch": 0.5502503576537912,
      "grad_norm": 0.14823152013538965,
      "learning_rate": 8.861446011463432e-05,
      "loss": 0.6445,
      "step": 6154
    },
    {
      "epoch": 0.5503397711015737,
      "grad_norm": 0.14619372420761112,
      "learning_rate": 8.858568880119611e-05,
      "loss": 0.6449,
      "step": 6155
    },
    {
      "epoch": 0.5504291845493562,
      "grad_norm": 0.15779653796681886,
      "learning_rate": 8.855691844506238e-05,
      "loss": 0.6542,
      "step": 6156
    },
    {
      "epoch": 0.5505185979971388,
      "grad_norm": 0.14664923217699413,
      "learning_rate": 8.852814904864611e-05,
      "loss": 0.6844,
      "step": 6157
    },
    {
      "epoch": 0.5506080114449213,
      "grad_norm": 0.1585667879799285,
      "learning_rate": 8.849938061436006e-05,
      "loss": 0.6597,
      "step": 6158
    },
    {
      "epoch": 0.5506974248927039,
      "grad_norm": 0.14428558750082454,
      "learning_rate": 8.847061314461714e-05,
      "loss": 0.6468,
      "step": 6159
    },
    {
      "epoch": 0.5507868383404864,
      "grad_norm": 0.1536523702371531,
      "learning_rate": 8.844184664182993e-05,
      "loss": 0.6355,
      "step": 6160
    },
    {
      "epoch": 0.5508762517882689,
      "grad_norm": 0.13526273935390162,
      "learning_rate": 8.84130811084111e-05,
      "loss": 0.6271,
      "step": 6161
    },
    {
      "epoch": 0.5509656652360515,
      "grad_norm": 0.1456140694437746,
      "learning_rate": 8.838431654677317e-05,
      "loss": 0.6427,
      "step": 6162
    },
    {
      "epoch": 0.5510550786838341,
      "grad_norm": 0.15281901390491304,
      "learning_rate": 8.835555295932857e-05,
      "loss": 0.664,
      "step": 6163
    },
    {
      "epoch": 0.5511444921316166,
      "grad_norm": 0.1631978384810566,
      "learning_rate": 8.832679034848969e-05,
      "loss": 0.6861,
      "step": 6164
    },
    {
      "epoch": 0.5512339055793991,
      "grad_norm": 0.16488345518530606,
      "learning_rate": 8.829802871666877e-05,
      "loss": 0.6713,
      "step": 6165
    },
    {
      "epoch": 0.5513233190271817,
      "grad_norm": 0.1466827118970752,
      "learning_rate": 8.82692680662781e-05,
      "loss": 0.6472,
      "step": 6166
    },
    {
      "epoch": 0.5514127324749643,
      "grad_norm": 0.14865611961237368,
      "learning_rate": 8.824050839972973e-05,
      "loss": 0.6277,
      "step": 6167
    },
    {
      "epoch": 0.5515021459227468,
      "grad_norm": 0.1560482634019954,
      "learning_rate": 8.821174971943572e-05,
      "loss": 0.6391,
      "step": 6168
    },
    {
      "epoch": 0.5515915593705293,
      "grad_norm": 0.15779767754014784,
      "learning_rate": 8.818299202780805e-05,
      "loss": 0.6557,
      "step": 6169
    },
    {
      "epoch": 0.5516809728183119,
      "grad_norm": 0.154411778228252,
      "learning_rate": 8.815423532725852e-05,
      "loss": 0.6547,
      "step": 6170
    },
    {
      "epoch": 0.5517703862660944,
      "grad_norm": 0.13534661959759992,
      "learning_rate": 8.8125479620199e-05,
      "loss": 0.6334,
      "step": 6171
    },
    {
      "epoch": 0.551859799713877,
      "grad_norm": 0.14061133063154485,
      "learning_rate": 8.809672490904111e-05,
      "loss": 0.6639,
      "step": 6172
    },
    {
      "epoch": 0.5519492131616596,
      "grad_norm": 0.14489395790930862,
      "learning_rate": 8.806797119619658e-05,
      "loss": 0.6217,
      "step": 6173
    },
    {
      "epoch": 0.552038626609442,
      "grad_norm": 0.15634088470894686,
      "learning_rate": 8.803921848407687e-05,
      "loss": 0.6894,
      "step": 6174
    },
    {
      "epoch": 0.5521280400572246,
      "grad_norm": 0.1635405822104401,
      "learning_rate": 8.80104667750935e-05,
      "loss": 0.6675,
      "step": 6175
    },
    {
      "epoch": 0.5522174535050072,
      "grad_norm": 0.1640952261196094,
      "learning_rate": 8.798171607165778e-05,
      "loss": 0.7142,
      "step": 6176
    },
    {
      "epoch": 0.5523068669527897,
      "grad_norm": 0.17547558601382082,
      "learning_rate": 8.795296637618101e-05,
      "loss": 0.6703,
      "step": 6177
    },
    {
      "epoch": 0.5523962804005722,
      "grad_norm": 0.1555127531115298,
      "learning_rate": 8.792421769107442e-05,
      "loss": 0.6823,
      "step": 6178
    },
    {
      "epoch": 0.5524856938483548,
      "grad_norm": 0.13111588439352845,
      "learning_rate": 8.789547001874906e-05,
      "loss": 0.6004,
      "step": 6179
    },
    {
      "epoch": 0.5525751072961373,
      "grad_norm": 0.13943263939460995,
      "learning_rate": 8.786672336161605e-05,
      "loss": 0.642,
      "step": 6180
    },
    {
      "epoch": 0.5526645207439199,
      "grad_norm": 0.1573481892764528,
      "learning_rate": 8.783797772208628e-05,
      "loss": 0.6706,
      "step": 6181
    },
    {
      "epoch": 0.5527539341917024,
      "grad_norm": 0.17926929237412523,
      "learning_rate": 8.780923310257067e-05,
      "loss": 0.6924,
      "step": 6182
    },
    {
      "epoch": 0.552843347639485,
      "grad_norm": 0.1775992207659337,
      "learning_rate": 8.778048950547994e-05,
      "loss": 0.7008,
      "step": 6183
    },
    {
      "epoch": 0.5529327610872675,
      "grad_norm": 0.1541210380569129,
      "learning_rate": 8.775174693322478e-05,
      "loss": 0.6944,
      "step": 6184
    },
    {
      "epoch": 0.5530221745350501,
      "grad_norm": 0.14463680027097353,
      "learning_rate": 8.772300538821583e-05,
      "loss": 0.6407,
      "step": 6185
    },
    {
      "epoch": 0.5531115879828327,
      "grad_norm": 0.16625575395722517,
      "learning_rate": 8.769426487286356e-05,
      "loss": 0.67,
      "step": 6186
    },
    {
      "epoch": 0.5532010014306151,
      "grad_norm": 0.16612815663221886,
      "learning_rate": 8.766552538957846e-05,
      "loss": 0.6561,
      "step": 6187
    },
    {
      "epoch": 0.5532904148783977,
      "grad_norm": 0.1654582278647245,
      "learning_rate": 8.763678694077083e-05,
      "loss": 0.7016,
      "step": 6188
    },
    {
      "epoch": 0.5533798283261803,
      "grad_norm": 0.18536786469280303,
      "learning_rate": 8.760804952885098e-05,
      "loss": 0.4017,
      "step": 6189
    },
    {
      "epoch": 0.5534692417739628,
      "grad_norm": 0.1411041452862673,
      "learning_rate": 8.757931315622903e-05,
      "loss": 0.6457,
      "step": 6190
    },
    {
      "epoch": 0.5535586552217453,
      "grad_norm": 0.15343341967910146,
      "learning_rate": 8.755057782531509e-05,
      "loss": 0.6599,
      "step": 6191
    },
    {
      "epoch": 0.5536480686695279,
      "grad_norm": 0.15349203457464478,
      "learning_rate": 8.752184353851916e-05,
      "loss": 0.6881,
      "step": 6192
    },
    {
      "epoch": 0.5537374821173104,
      "grad_norm": 0.12745786785019758,
      "learning_rate": 8.749311029825111e-05,
      "loss": 0.6317,
      "step": 6193
    },
    {
      "epoch": 0.553826895565093,
      "grad_norm": 0.150538840004483,
      "learning_rate": 8.74643781069208e-05,
      "loss": 0.6982,
      "step": 6194
    },
    {
      "epoch": 0.5539163090128756,
      "grad_norm": 0.16211562711452174,
      "learning_rate": 8.7435646966938e-05,
      "loss": 0.6553,
      "step": 6195
    },
    {
      "epoch": 0.554005722460658,
      "grad_norm": 0.13871088061794226,
      "learning_rate": 8.74069168807123e-05,
      "loss": 0.6333,
      "step": 6196
    },
    {
      "epoch": 0.5540951359084406,
      "grad_norm": 0.15754722840371638,
      "learning_rate": 8.737818785065326e-05,
      "loss": 0.6381,
      "step": 6197
    },
    {
      "epoch": 0.5541845493562232,
      "grad_norm": 0.14200884456793159,
      "learning_rate": 8.734945987917038e-05,
      "loss": 0.6456,
      "step": 6198
    },
    {
      "epoch": 0.5542739628040058,
      "grad_norm": 0.16218759010815334,
      "learning_rate": 8.732073296867303e-05,
      "loss": 0.6696,
      "step": 6199
    },
    {
      "epoch": 0.5543633762517882,
      "grad_norm": 0.1696463160162376,
      "learning_rate": 8.729200712157043e-05,
      "loss": 0.6809,
      "step": 6200
    },
    {
      "epoch": 0.5544527896995708,
      "grad_norm": 0.1396457304787274,
      "learning_rate": 8.726328234027188e-05,
      "loss": 0.6161,
      "step": 6201
    },
    {
      "epoch": 0.5545422031473534,
      "grad_norm": 0.1749820614026397,
      "learning_rate": 8.723455862718649e-05,
      "loss": 0.6821,
      "step": 6202
    },
    {
      "epoch": 0.5546316165951359,
      "grad_norm": 0.1671438039085545,
      "learning_rate": 8.720583598472322e-05,
      "loss": 0.6519,
      "step": 6203
    },
    {
      "epoch": 0.5547210300429185,
      "grad_norm": 0.1596290031025968,
      "learning_rate": 8.717711441529104e-05,
      "loss": 0.6863,
      "step": 6204
    },
    {
      "epoch": 0.554810443490701,
      "grad_norm": 0.17834809887516956,
      "learning_rate": 8.71483939212988e-05,
      "loss": 0.6793,
      "step": 6205
    },
    {
      "epoch": 0.5548998569384835,
      "grad_norm": 0.14100059187545289,
      "learning_rate": 8.711967450515524e-05,
      "loss": 0.6622,
      "step": 6206
    },
    {
      "epoch": 0.5549892703862661,
      "grad_norm": 0.14031089223866614,
      "learning_rate": 8.709095616926897e-05,
      "loss": 0.6516,
      "step": 6207
    },
    {
      "epoch": 0.5550786838340487,
      "grad_norm": 0.1310918773772696,
      "learning_rate": 8.706223891604866e-05,
      "loss": 0.6518,
      "step": 6208
    },
    {
      "epoch": 0.5551680972818311,
      "grad_norm": 0.15173899147094183,
      "learning_rate": 8.703352274790276e-05,
      "loss": 0.6684,
      "step": 6209
    },
    {
      "epoch": 0.5552575107296137,
      "grad_norm": 0.1624754234387083,
      "learning_rate": 8.700480766723964e-05,
      "loss": 0.6831,
      "step": 6210
    },
    {
      "epoch": 0.5553469241773963,
      "grad_norm": 0.15871025378949574,
      "learning_rate": 8.69760936764676e-05,
      "loss": 0.6177,
      "step": 6211
    },
    {
      "epoch": 0.5554363376251789,
      "grad_norm": 0.14431484320321628,
      "learning_rate": 8.694738077799488e-05,
      "loss": 0.6218,
      "step": 6212
    },
    {
      "epoch": 0.5555257510729614,
      "grad_norm": 0.14780002114707777,
      "learning_rate": 8.691866897422952e-05,
      "loss": 0.6183,
      "step": 6213
    },
    {
      "epoch": 0.5556151645207439,
      "grad_norm": 0.1524457208129203,
      "learning_rate": 8.688995826757961e-05,
      "loss": 0.6537,
      "step": 6214
    },
    {
      "epoch": 0.5557045779685265,
      "grad_norm": 0.1409406289185369,
      "learning_rate": 8.686124866045308e-05,
      "loss": 0.6369,
      "step": 6215
    },
    {
      "epoch": 0.555793991416309,
      "grad_norm": 0.14542023456848516,
      "learning_rate": 8.683254015525776e-05,
      "loss": 0.6513,
      "step": 6216
    },
    {
      "epoch": 0.5558834048640916,
      "grad_norm": 0.14444622509585583,
      "learning_rate": 8.680383275440138e-05,
      "loss": 0.6628,
      "step": 6217
    },
    {
      "epoch": 0.5559728183118741,
      "grad_norm": 0.14599427348766553,
      "learning_rate": 8.677512646029163e-05,
      "loss": 0.6806,
      "step": 6218
    },
    {
      "epoch": 0.5560622317596566,
      "grad_norm": 0.1538834038973982,
      "learning_rate": 8.674642127533605e-05,
      "loss": 0.6493,
      "step": 6219
    },
    {
      "epoch": 0.5561516452074392,
      "grad_norm": 0.1483229556656701,
      "learning_rate": 8.671771720194211e-05,
      "loss": 0.6545,
      "step": 6220
    },
    {
      "epoch": 0.5562410586552218,
      "grad_norm": 0.1524853550577582,
      "learning_rate": 8.668901424251714e-05,
      "loss": 0.6931,
      "step": 6221
    },
    {
      "epoch": 0.5563304721030042,
      "grad_norm": 0.14930062608455233,
      "learning_rate": 8.666031239946852e-05,
      "loss": 0.6701,
      "step": 6222
    },
    {
      "epoch": 0.5564198855507868,
      "grad_norm": 0.15819005585000206,
      "learning_rate": 8.66316116752034e-05,
      "loss": 0.6471,
      "step": 6223
    },
    {
      "epoch": 0.5565092989985694,
      "grad_norm": 0.14553600527864108,
      "learning_rate": 8.660291207212882e-05,
      "loss": 0.6798,
      "step": 6224
    },
    {
      "epoch": 0.556598712446352,
      "grad_norm": 0.1527042432924091,
      "learning_rate": 8.657421359265188e-05,
      "loss": 0.6512,
      "step": 6225
    },
    {
      "epoch": 0.5566881258941345,
      "grad_norm": 0.13391496016253013,
      "learning_rate": 8.654551623917941e-05,
      "loss": 0.6493,
      "step": 6226
    },
    {
      "epoch": 0.556777539341917,
      "grad_norm": 0.145661226479325,
      "learning_rate": 8.651682001411821e-05,
      "loss": 0.6349,
      "step": 6227
    },
    {
      "epoch": 0.5568669527896996,
      "grad_norm": 0.16044163556762384,
      "learning_rate": 8.648812491987504e-05,
      "loss": 0.7055,
      "step": 6228
    },
    {
      "epoch": 0.5569563662374821,
      "grad_norm": 0.14768243664848824,
      "learning_rate": 8.645943095885655e-05,
      "loss": 0.659,
      "step": 6229
    },
    {
      "epoch": 0.5570457796852647,
      "grad_norm": 0.14644797736982962,
      "learning_rate": 8.643073813346922e-05,
      "loss": 0.6621,
      "step": 6230
    },
    {
      "epoch": 0.5571351931330472,
      "grad_norm": 0.16234036604316415,
      "learning_rate": 8.640204644611948e-05,
      "loss": 0.6775,
      "step": 6231
    },
    {
      "epoch": 0.5572246065808297,
      "grad_norm": 0.15189823793080312,
      "learning_rate": 8.63733558992137e-05,
      "loss": 0.6924,
      "step": 6232
    },
    {
      "epoch": 0.5573140200286123,
      "grad_norm": 0.16168135384471463,
      "learning_rate": 8.634466649515811e-05,
      "loss": 0.6763,
      "step": 6233
    },
    {
      "epoch": 0.5574034334763949,
      "grad_norm": 0.18265511274907334,
      "learning_rate": 8.63159782363588e-05,
      "loss": 0.3967,
      "step": 6234
    },
    {
      "epoch": 0.5574928469241774,
      "grad_norm": 0.15179469255661346,
      "learning_rate": 8.62872911252219e-05,
      "loss": 0.6688,
      "step": 6235
    },
    {
      "epoch": 0.5575822603719599,
      "grad_norm": 0.15991197290044035,
      "learning_rate": 8.625860516415335e-05,
      "loss": 0.6735,
      "step": 6236
    },
    {
      "epoch": 0.5576716738197425,
      "grad_norm": 0.16538107930510115,
      "learning_rate": 8.6229920355559e-05,
      "loss": 0.6773,
      "step": 6237
    },
    {
      "epoch": 0.557761087267525,
      "grad_norm": 0.1876527379804114,
      "learning_rate": 8.620123670184455e-05,
      "loss": 0.7014,
      "step": 6238
    },
    {
      "epoch": 0.5578505007153076,
      "grad_norm": 0.1552364791153843,
      "learning_rate": 8.617255420541576e-05,
      "loss": 0.6649,
      "step": 6239
    },
    {
      "epoch": 0.5579399141630901,
      "grad_norm": 0.1563025823045277,
      "learning_rate": 8.614387286867814e-05,
      "loss": 0.6666,
      "step": 6240
    },
    {
      "epoch": 0.5580293276108726,
      "grad_norm": 0.1427038215431827,
      "learning_rate": 8.611519269403712e-05,
      "loss": 0.622,
      "step": 6241
    },
    {
      "epoch": 0.5581187410586552,
      "grad_norm": 0.1694218908076472,
      "learning_rate": 8.608651368389815e-05,
      "loss": 0.7001,
      "step": 6242
    },
    {
      "epoch": 0.5582081545064378,
      "grad_norm": 0.15823738905464638,
      "learning_rate": 8.605783584066649e-05,
      "loss": 0.6539,
      "step": 6243
    },
    {
      "epoch": 0.5582975679542204,
      "grad_norm": 0.14342998171745816,
      "learning_rate": 8.602915916674731e-05,
      "loss": 0.6264,
      "step": 6244
    },
    {
      "epoch": 0.5583869814020028,
      "grad_norm": 0.13626115236715683,
      "learning_rate": 8.600048366454565e-05,
      "loss": 0.6505,
      "step": 6245
    },
    {
      "epoch": 0.5584763948497854,
      "grad_norm": 0.15669191292568427,
      "learning_rate": 8.597180933646653e-05,
      "loss": 0.6518,
      "step": 6246
    },
    {
      "epoch": 0.558565808297568,
      "grad_norm": 0.176855195893186,
      "learning_rate": 8.594313618491481e-05,
      "loss": 0.6755,
      "step": 6247
    },
    {
      "epoch": 0.5586552217453505,
      "grad_norm": 0.17272996522213369,
      "learning_rate": 8.591446421229528e-05,
      "loss": 0.6642,
      "step": 6248
    },
    {
      "epoch": 0.558744635193133,
      "grad_norm": 0.16131470251855107,
      "learning_rate": 8.588579342101263e-05,
      "loss": 0.6419,
      "step": 6249
    },
    {
      "epoch": 0.5588340486409156,
      "grad_norm": 0.1762896392851137,
      "learning_rate": 8.585712381347145e-05,
      "loss": 0.3866,
      "step": 6250
    },
    {
      "epoch": 0.5589234620886981,
      "grad_norm": 0.14493998072677924,
      "learning_rate": 8.58284553920762e-05,
      "loss": 0.6729,
      "step": 6251
    },
    {
      "epoch": 0.5590128755364807,
      "grad_norm": 0.1487484815819696,
      "learning_rate": 8.57997881592313e-05,
      "loss": 0.6394,
      "step": 6252
    },
    {
      "epoch": 0.5591022889842633,
      "grad_norm": 0.14537951498944096,
      "learning_rate": 8.577112211734104e-05,
      "loss": 0.6575,
      "step": 6253
    },
    {
      "epoch": 0.5591917024320457,
      "grad_norm": 0.15547704866331674,
      "learning_rate": 8.574245726880953e-05,
      "loss": 0.6614,
      "step": 6254
    },
    {
      "epoch": 0.5592811158798283,
      "grad_norm": 0.15646889923028437,
      "learning_rate": 8.571379361604091e-05,
      "loss": 0.648,
      "step": 6255
    },
    {
      "epoch": 0.5593705293276109,
      "grad_norm": 0.14635212779947737,
      "learning_rate": 8.568513116143919e-05,
      "loss": 0.6167,
      "step": 6256
    },
    {
      "epoch": 0.5594599427753935,
      "grad_norm": 0.14560957574292085,
      "learning_rate": 8.565646990740824e-05,
      "loss": 0.5895,
      "step": 6257
    },
    {
      "epoch": 0.5595493562231759,
      "grad_norm": 0.14448758384135543,
      "learning_rate": 8.562780985635183e-05,
      "loss": 0.6279,
      "step": 6258
    },
    {
      "epoch": 0.5596387696709585,
      "grad_norm": 0.16009405372207622,
      "learning_rate": 8.559915101067366e-05,
      "loss": 0.6629,
      "step": 6259
    },
    {
      "epoch": 0.5597281831187411,
      "grad_norm": 0.14291519393537988,
      "learning_rate": 8.55704933727773e-05,
      "loss": 0.6635,
      "step": 6260
    },
    {
      "epoch": 0.5598175965665236,
      "grad_norm": 0.1521333919328628,
      "learning_rate": 8.554183694506622e-05,
      "loss": 0.6537,
      "step": 6261
    },
    {
      "epoch": 0.5599070100143062,
      "grad_norm": 0.140142426053228,
      "learning_rate": 8.551318172994378e-05,
      "loss": 0.6704,
      "step": 6262
    },
    {
      "epoch": 0.5599964234620887,
      "grad_norm": 0.1716999641511326,
      "learning_rate": 8.548452772981334e-05,
      "loss": 0.6393,
      "step": 6263
    },
    {
      "epoch": 0.5600858369098712,
      "grad_norm": 0.15466634188552847,
      "learning_rate": 8.545587494707803e-05,
      "loss": 0.6504,
      "step": 6264
    },
    {
      "epoch": 0.5601752503576538,
      "grad_norm": 0.1633496429770901,
      "learning_rate": 8.54272233841409e-05,
      "loss": 0.6841,
      "step": 6265
    },
    {
      "epoch": 0.5602646638054364,
      "grad_norm": 0.15320991878752277,
      "learning_rate": 8.539857304340498e-05,
      "loss": 0.6615,
      "step": 6266
    },
    {
      "epoch": 0.5603540772532188,
      "grad_norm": 0.17012342842890132,
      "learning_rate": 8.53699239272731e-05,
      "loss": 0.7028,
      "step": 6267
    },
    {
      "epoch": 0.5604434907010014,
      "grad_norm": 0.16785153737596614,
      "learning_rate": 8.5341276038148e-05,
      "loss": 0.6852,
      "step": 6268
    },
    {
      "epoch": 0.560532904148784,
      "grad_norm": 0.1405792574974652,
      "learning_rate": 8.531262937843236e-05,
      "loss": 0.6241,
      "step": 6269
    },
    {
      "epoch": 0.5606223175965666,
      "grad_norm": 0.18693041810882122,
      "learning_rate": 8.528398395052879e-05,
      "loss": 0.696,
      "step": 6270
    },
    {
      "epoch": 0.560711731044349,
      "grad_norm": 0.14542739201166568,
      "learning_rate": 8.525533975683972e-05,
      "loss": 0.6666,
      "step": 6271
    },
    {
      "epoch": 0.5608011444921316,
      "grad_norm": 0.16880797263251893,
      "learning_rate": 8.522669679976749e-05,
      "loss": 0.7236,
      "step": 6272
    },
    {
      "epoch": 0.5608905579399142,
      "grad_norm": 0.1530815612714256,
      "learning_rate": 8.519805508171437e-05,
      "loss": 0.6767,
      "step": 6273
    },
    {
      "epoch": 0.5609799713876967,
      "grad_norm": 0.1424734950868713,
      "learning_rate": 8.516941460508247e-05,
      "loss": 0.6672,
      "step": 6274
    },
    {
      "epoch": 0.5610693848354793,
      "grad_norm": 0.1397223737058263,
      "learning_rate": 8.514077537227388e-05,
      "loss": 0.6418,
      "step": 6275
    },
    {
      "epoch": 0.5611587982832618,
      "grad_norm": 0.16894915948028252,
      "learning_rate": 8.511213738569046e-05,
      "loss": 0.6642,
      "step": 6276
    },
    {
      "epoch": 0.5612482117310443,
      "grad_norm": 0.16613007859623646,
      "learning_rate": 8.508350064773415e-05,
      "loss": 0.7039,
      "step": 6277
    },
    {
      "epoch": 0.5613376251788269,
      "grad_norm": 0.18557732126914106,
      "learning_rate": 8.50548651608066e-05,
      "loss": 0.6633,
      "step": 6278
    },
    {
      "epoch": 0.5614270386266095,
      "grad_norm": 0.15360631520502227,
      "learning_rate": 8.50262309273095e-05,
      "loss": 0.6334,
      "step": 6279
    },
    {
      "epoch": 0.5615164520743919,
      "grad_norm": 0.1566974522042328,
      "learning_rate": 8.49975979496443e-05,
      "loss": 0.6558,
      "step": 6280
    },
    {
      "epoch": 0.5616058655221745,
      "grad_norm": 0.15547357622707228,
      "learning_rate": 8.496896623021245e-05,
      "loss": 0.6729,
      "step": 6281
    },
    {
      "epoch": 0.5616952789699571,
      "grad_norm": 0.14730698589421545,
      "learning_rate": 8.494033577141525e-05,
      "loss": 0.643,
      "step": 6282
    },
    {
      "epoch": 0.5617846924177397,
      "grad_norm": 0.16250648940306217,
      "learning_rate": 8.491170657565386e-05,
      "loss": 0.6311,
      "step": 6283
    },
    {
      "epoch": 0.5618741058655222,
      "grad_norm": 0.15544917547316967,
      "learning_rate": 8.488307864532946e-05,
      "loss": 0.6554,
      "step": 6284
    },
    {
      "epoch": 0.5619635193133047,
      "grad_norm": 0.15958189833000336,
      "learning_rate": 8.485445198284298e-05,
      "loss": 0.6599,
      "step": 6285
    },
    {
      "epoch": 0.5620529327610873,
      "grad_norm": 0.18688192945063742,
      "learning_rate": 8.482582659059534e-05,
      "loss": 0.4016,
      "step": 6286
    },
    {
      "epoch": 0.5621423462088698,
      "grad_norm": 0.1677373803722555,
      "learning_rate": 8.47972024709873e-05,
      "loss": 0.6797,
      "step": 6287
    },
    {
      "epoch": 0.5622317596566524,
      "grad_norm": 0.17217776194804763,
      "learning_rate": 8.47685796264195e-05,
      "loss": 0.688,
      "step": 6288
    },
    {
      "epoch": 0.5623211731044349,
      "grad_norm": 0.15493025523329942,
      "learning_rate": 8.473995805929257e-05,
      "loss": 0.6646,
      "step": 6289
    },
    {
      "epoch": 0.5624105865522174,
      "grad_norm": 0.15773056626817256,
      "learning_rate": 8.471133777200688e-05,
      "loss": 0.6542,
      "step": 6290
    },
    {
      "epoch": 0.5625,
      "grad_norm": 0.146415714179841,
      "learning_rate": 8.468271876696286e-05,
      "loss": 0.642,
      "step": 6291
    },
    {
      "epoch": 0.5625894134477826,
      "grad_norm": 0.13688932094503928,
      "learning_rate": 8.46541010465607e-05,
      "loss": 0.6265,
      "step": 6292
    },
    {
      "epoch": 0.5626788268955651,
      "grad_norm": 0.15908687626605,
      "learning_rate": 8.462548461320057e-05,
      "loss": 0.6305,
      "step": 6293
    },
    {
      "epoch": 0.5627682403433476,
      "grad_norm": 0.16977150290083873,
      "learning_rate": 8.459686946928249e-05,
      "loss": 0.688,
      "step": 6294
    },
    {
      "epoch": 0.5628576537911302,
      "grad_norm": 0.1351626878615326,
      "learning_rate": 8.456825561720634e-05,
      "loss": 0.6501,
      "step": 6295
    },
    {
      "epoch": 0.5629470672389127,
      "grad_norm": 0.14850071786818742,
      "learning_rate": 8.453964305937197e-05,
      "loss": 0.6588,
      "step": 6296
    },
    {
      "epoch": 0.5630364806866953,
      "grad_norm": 0.16059947620571913,
      "learning_rate": 8.451103179817903e-05,
      "loss": 0.7024,
      "step": 6297
    },
    {
      "epoch": 0.5631258941344778,
      "grad_norm": 0.14388749351844174,
      "learning_rate": 8.448242183602719e-05,
      "loss": 0.6472,
      "step": 6298
    },
    {
      "epoch": 0.5632153075822603,
      "grad_norm": 0.1702424902023019,
      "learning_rate": 8.445381317531586e-05,
      "loss": 0.6815,
      "step": 6299
    },
    {
      "epoch": 0.5633047210300429,
      "grad_norm": 0.15722338853370096,
      "learning_rate": 8.442520581844447e-05,
      "loss": 0.6945,
      "step": 6300
    },
    {
      "epoch": 0.5633941344778255,
      "grad_norm": 0.17099751833717902,
      "learning_rate": 8.439659976781226e-05,
      "loss": 0.6617,
      "step": 6301
    },
    {
      "epoch": 0.5634835479256081,
      "grad_norm": 0.1584944770597338,
      "learning_rate": 8.436799502581836e-05,
      "loss": 0.6719,
      "step": 6302
    },
    {
      "epoch": 0.5635729613733905,
      "grad_norm": 0.16869042436894713,
      "learning_rate": 8.433939159486186e-05,
      "loss": 0.6442,
      "step": 6303
    },
    {
      "epoch": 0.5636623748211731,
      "grad_norm": 0.15377486165824808,
      "learning_rate": 8.431078947734164e-05,
      "loss": 0.6568,
      "step": 6304
    },
    {
      "epoch": 0.5637517882689557,
      "grad_norm": 0.17440241231383294,
      "learning_rate": 8.428218867565659e-05,
      "loss": 0.6596,
      "step": 6305
    },
    {
      "epoch": 0.5638412017167382,
      "grad_norm": 0.16755809185204246,
      "learning_rate": 8.425358919220537e-05,
      "loss": 0.6922,
      "step": 6306
    },
    {
      "epoch": 0.5639306151645207,
      "grad_norm": 0.14318615676934543,
      "learning_rate": 8.422499102938663e-05,
      "loss": 0.6188,
      "step": 6307
    },
    {
      "epoch": 0.5640200286123033,
      "grad_norm": 0.1528359014989888,
      "learning_rate": 8.419639418959884e-05,
      "loss": 0.7032,
      "step": 6308
    },
    {
      "epoch": 0.5641094420600858,
      "grad_norm": 0.16553878214661327,
      "learning_rate": 8.416779867524039e-05,
      "loss": 0.6809,
      "step": 6309
    },
    {
      "epoch": 0.5641988555078684,
      "grad_norm": 0.18403201894462418,
      "learning_rate": 8.413920448870954e-05,
      "loss": 0.6828,
      "step": 6310
    },
    {
      "epoch": 0.564288268955651,
      "grad_norm": 0.1551849483491413,
      "learning_rate": 8.411061163240441e-05,
      "loss": 0.6289,
      "step": 6311
    },
    {
      "epoch": 0.5643776824034334,
      "grad_norm": 0.16659713677818852,
      "learning_rate": 8.408202010872312e-05,
      "loss": 0.657,
      "step": 6312
    },
    {
      "epoch": 0.564467095851216,
      "grad_norm": 0.14692586238779728,
      "learning_rate": 8.40534299200636e-05,
      "loss": 0.6617,
      "step": 6313
    },
    {
      "epoch": 0.5645565092989986,
      "grad_norm": 0.1712736830533694,
      "learning_rate": 8.402484106882364e-05,
      "loss": 0.6426,
      "step": 6314
    },
    {
      "epoch": 0.5646459227467812,
      "grad_norm": 0.16556210787869868,
      "learning_rate": 8.399625355740097e-05,
      "loss": 0.6657,
      "step": 6315
    },
    {
      "epoch": 0.5647353361945636,
      "grad_norm": 0.17026083151244012,
      "learning_rate": 8.396766738819319e-05,
      "loss": 0.654,
      "step": 6316
    },
    {
      "epoch": 0.5648247496423462,
      "grad_norm": 0.1402495553245151,
      "learning_rate": 8.393908256359776e-05,
      "loss": 0.6651,
      "step": 6317
    },
    {
      "epoch": 0.5649141630901288,
      "grad_norm": 0.15239038945571926,
      "learning_rate": 8.39104990860121e-05,
      "loss": 0.6539,
      "step": 6318
    },
    {
      "epoch": 0.5650035765379113,
      "grad_norm": 0.15673242956474964,
      "learning_rate": 8.388191695783345e-05,
      "loss": 0.6977,
      "step": 6319
    },
    {
      "epoch": 0.5650929899856938,
      "grad_norm": 0.16569253458508248,
      "learning_rate": 8.385333618145896e-05,
      "loss": 0.7032,
      "step": 6320
    },
    {
      "epoch": 0.5651824034334764,
      "grad_norm": 0.1516796216479769,
      "learning_rate": 8.382475675928568e-05,
      "loss": 0.6556,
      "step": 6321
    },
    {
      "epoch": 0.5652718168812589,
      "grad_norm": 0.16494912136024692,
      "learning_rate": 8.379617869371049e-05,
      "loss": 0.6849,
      "step": 6322
    },
    {
      "epoch": 0.5653612303290415,
      "grad_norm": 0.14415941657931447,
      "learning_rate": 8.376760198713024e-05,
      "loss": 0.6266,
      "step": 6323
    },
    {
      "epoch": 0.5654506437768241,
      "grad_norm": 0.1528903302921458,
      "learning_rate": 8.373902664194156e-05,
      "loss": 0.6648,
      "step": 6324
    },
    {
      "epoch": 0.5655400572246065,
      "grad_norm": 0.1499036163737901,
      "learning_rate": 8.371045266054114e-05,
      "loss": 0.6843,
      "step": 6325
    },
    {
      "epoch": 0.5656294706723891,
      "grad_norm": 0.1804052484575965,
      "learning_rate": 8.368188004532535e-05,
      "loss": 0.6614,
      "step": 6326
    },
    {
      "epoch": 0.5657188841201717,
      "grad_norm": 0.1645832806932513,
      "learning_rate": 8.365330879869059e-05,
      "loss": 0.6593,
      "step": 6327
    },
    {
      "epoch": 0.5658082975679543,
      "grad_norm": 0.15493644339141413,
      "learning_rate": 8.362473892303308e-05,
      "loss": 0.6633,
      "step": 6328
    },
    {
      "epoch": 0.5658977110157367,
      "grad_norm": 0.1581007429913496,
      "learning_rate": 8.359617042074891e-05,
      "loss": 0.6795,
      "step": 6329
    },
    {
      "epoch": 0.5659871244635193,
      "grad_norm": 0.15936429169401561,
      "learning_rate": 8.356760329423417e-05,
      "loss": 0.6448,
      "step": 6330
    },
    {
      "epoch": 0.5660765379113019,
      "grad_norm": 0.15031123622936393,
      "learning_rate": 8.353903754588463e-05,
      "loss": 0.6847,
      "step": 6331
    },
    {
      "epoch": 0.5661659513590844,
      "grad_norm": 0.135318352045151,
      "learning_rate": 8.351047317809617e-05,
      "loss": 0.6757,
      "step": 6332
    },
    {
      "epoch": 0.566255364806867,
      "grad_norm": 0.15800207628837407,
      "learning_rate": 8.34819101932644e-05,
      "loss": 0.6676,
      "step": 6333
    },
    {
      "epoch": 0.5663447782546495,
      "grad_norm": 0.17887454431781905,
      "learning_rate": 8.345334859378489e-05,
      "loss": 0.6807,
      "step": 6334
    },
    {
      "epoch": 0.566434191702432,
      "grad_norm": 0.12826438650271704,
      "learning_rate": 8.342478838205302e-05,
      "loss": 0.6316,
      "step": 6335
    },
    {
      "epoch": 0.5665236051502146,
      "grad_norm": 0.15372556131132173,
      "learning_rate": 8.339622956046417e-05,
      "loss": 0.6683,
      "step": 6336
    },
    {
      "epoch": 0.5666130185979972,
      "grad_norm": 0.1355402724449355,
      "learning_rate": 8.336767213141348e-05,
      "loss": 0.6304,
      "step": 6337
    },
    {
      "epoch": 0.5667024320457796,
      "grad_norm": 0.13543029750527508,
      "learning_rate": 8.333911609729601e-05,
      "loss": 0.6041,
      "step": 6338
    },
    {
      "epoch": 0.5667918454935622,
      "grad_norm": 0.16134978998940244,
      "learning_rate": 8.331056146050676e-05,
      "loss": 0.6387,
      "step": 6339
    },
    {
      "epoch": 0.5668812589413448,
      "grad_norm": 0.1511959909893467,
      "learning_rate": 8.328200822344058e-05,
      "loss": 0.6208,
      "step": 6340
    },
    {
      "epoch": 0.5669706723891274,
      "grad_norm": 0.169551175316374,
      "learning_rate": 8.325345638849221e-05,
      "loss": 0.6835,
      "step": 6341
    },
    {
      "epoch": 0.5670600858369099,
      "grad_norm": 0.15130620512363543,
      "learning_rate": 8.322490595805619e-05,
      "loss": 0.6788,
      "step": 6342
    },
    {
      "epoch": 0.5671494992846924,
      "grad_norm": 0.12029778770272913,
      "learning_rate": 8.319635693452707e-05,
      "loss": 0.6332,
      "step": 6343
    },
    {
      "epoch": 0.567238912732475,
      "grad_norm": 0.14970909979679828,
      "learning_rate": 8.31678093202992e-05,
      "loss": 0.642,
      "step": 6344
    },
    {
      "epoch": 0.5673283261802575,
      "grad_norm": 0.16064258752677976,
      "learning_rate": 8.313926311776678e-05,
      "loss": 0.6664,
      "step": 6345
    },
    {
      "epoch": 0.5674177396280401,
      "grad_norm": 0.13834094512283526,
      "learning_rate": 8.311071832932404e-05,
      "loss": 0.615,
      "step": 6346
    },
    {
      "epoch": 0.5675071530758226,
      "grad_norm": 0.16431294795313373,
      "learning_rate": 8.308217495736496e-05,
      "loss": 0.6764,
      "step": 6347
    },
    {
      "epoch": 0.5675965665236051,
      "grad_norm": 0.14643731873627203,
      "learning_rate": 8.305363300428346e-05,
      "loss": 0.6841,
      "step": 6348
    },
    {
      "epoch": 0.5676859799713877,
      "grad_norm": 0.15301637756788106,
      "learning_rate": 8.302509247247325e-05,
      "loss": 0.6414,
      "step": 6349
    },
    {
      "epoch": 0.5677753934191703,
      "grad_norm": 0.15327127671744373,
      "learning_rate": 8.299655336432806e-05,
      "loss": 0.6668,
      "step": 6350
    },
    {
      "epoch": 0.5678648068669528,
      "grad_norm": 0.14292659077883335,
      "learning_rate": 8.296801568224142e-05,
      "loss": 0.6728,
      "step": 6351
    },
    {
      "epoch": 0.5679542203147353,
      "grad_norm": 0.17166656733742783,
      "learning_rate": 8.293947942860666e-05,
      "loss": 0.7154,
      "step": 6352
    },
    {
      "epoch": 0.5680436337625179,
      "grad_norm": 0.14507100573094475,
      "learning_rate": 8.291094460581721e-05,
      "loss": 0.6732,
      "step": 6353
    },
    {
      "epoch": 0.5681330472103004,
      "grad_norm": 0.16403910816104708,
      "learning_rate": 8.288241121626621e-05,
      "loss": 0.6721,
      "step": 6354
    },
    {
      "epoch": 0.568222460658083,
      "grad_norm": 0.13659956578220087,
      "learning_rate": 8.28538792623467e-05,
      "loss": 0.6319,
      "step": 6355
    },
    {
      "epoch": 0.5683118741058655,
      "grad_norm": 0.17108351745881914,
      "learning_rate": 8.282534874645162e-05,
      "loss": 0.6886,
      "step": 6356
    },
    {
      "epoch": 0.568401287553648,
      "grad_norm": 0.143865926221515,
      "learning_rate": 8.279681967097381e-05,
      "loss": 0.6149,
      "step": 6357
    },
    {
      "epoch": 0.5684907010014306,
      "grad_norm": 0.1535183536771339,
      "learning_rate": 8.276829203830596e-05,
      "loss": 0.6451,
      "step": 6358
    },
    {
      "epoch": 0.5685801144492132,
      "grad_norm": 0.1489284244574096,
      "learning_rate": 8.27397658508406e-05,
      "loss": 0.6209,
      "step": 6359
    },
    {
      "epoch": 0.5686695278969958,
      "grad_norm": 0.1496574096783196,
      "learning_rate": 8.271124111097026e-05,
      "loss": 0.6331,
      "step": 6360
    },
    {
      "epoch": 0.5687589413447782,
      "grad_norm": 0.17728220208026008,
      "learning_rate": 8.268271782108727e-05,
      "loss": 0.7092,
      "step": 6361
    },
    {
      "epoch": 0.5688483547925608,
      "grad_norm": 0.17129214785545724,
      "learning_rate": 8.265419598358381e-05,
      "loss": 0.6773,
      "step": 6362
    },
    {
      "epoch": 0.5689377682403434,
      "grad_norm": 0.16517339838872147,
      "learning_rate": 8.262567560085199e-05,
      "loss": 0.6282,
      "step": 6363
    },
    {
      "epoch": 0.5690271816881259,
      "grad_norm": 0.15762105678121904,
      "learning_rate": 8.259715667528377e-05,
      "loss": 0.6485,
      "step": 6364
    },
    {
      "epoch": 0.5691165951359084,
      "grad_norm": 0.1578531697115691,
      "learning_rate": 8.256863920927099e-05,
      "loss": 0.7152,
      "step": 6365
    },
    {
      "epoch": 0.569206008583691,
      "grad_norm": 0.1754230906766955,
      "learning_rate": 8.254012320520539e-05,
      "loss": 0.7084,
      "step": 6366
    },
    {
      "epoch": 0.5692954220314735,
      "grad_norm": 0.15371153481034747,
      "learning_rate": 8.251160866547857e-05,
      "loss": 0.6614,
      "step": 6367
    },
    {
      "epoch": 0.5693848354792561,
      "grad_norm": 0.14851991465036998,
      "learning_rate": 8.248309559248203e-05,
      "loss": 0.6419,
      "step": 6368
    },
    {
      "epoch": 0.5694742489270386,
      "grad_norm": 0.18121833505034363,
      "learning_rate": 8.245458398860709e-05,
      "loss": 0.6588,
      "step": 6369
    },
    {
      "epoch": 0.5695636623748211,
      "grad_norm": 0.15990778538024952,
      "learning_rate": 8.242607385624501e-05,
      "loss": 0.662,
      "step": 6370
    },
    {
      "epoch": 0.5696530758226037,
      "grad_norm": 0.15952099738605677,
      "learning_rate": 8.23975651977869e-05,
      "loss": 0.6561,
      "step": 6371
    },
    {
      "epoch": 0.5697424892703863,
      "grad_norm": 0.15239559233281014,
      "learning_rate": 8.236905801562373e-05,
      "loss": 0.633,
      "step": 6372
    },
    {
      "epoch": 0.5698319027181689,
      "grad_norm": 0.14052883149603795,
      "learning_rate": 8.234055231214634e-05,
      "loss": 0.6557,
      "step": 6373
    },
    {
      "epoch": 0.5699213161659513,
      "grad_norm": 0.15607802844451113,
      "learning_rate": 8.231204808974554e-05,
      "loss": 0.652,
      "step": 6374
    },
    {
      "epoch": 0.5700107296137339,
      "grad_norm": 0.1670227146028467,
      "learning_rate": 8.228354535081191e-05,
      "loss": 0.6644,
      "step": 6375
    },
    {
      "epoch": 0.5701001430615165,
      "grad_norm": 0.1658963097712082,
      "learning_rate": 8.225504409773591e-05,
      "loss": 0.6663,
      "step": 6376
    },
    {
      "epoch": 0.570189556509299,
      "grad_norm": 0.15424879578560127,
      "learning_rate": 8.222654433290795e-05,
      "loss": 0.6499,
      "step": 6377
    },
    {
      "epoch": 0.5702789699570815,
      "grad_norm": 0.15341055313114157,
      "learning_rate": 8.219804605871826e-05,
      "loss": 0.6334,
      "step": 6378
    },
    {
      "epoch": 0.5703683834048641,
      "grad_norm": 0.16559989713063733,
      "learning_rate": 8.216954927755692e-05,
      "loss": 0.6898,
      "step": 6379
    },
    {
      "epoch": 0.5704577968526466,
      "grad_norm": 0.1751255690509401,
      "learning_rate": 8.214105399181393e-05,
      "loss": 0.7266,
      "step": 6380
    },
    {
      "epoch": 0.5705472103004292,
      "grad_norm": 0.15194367558188115,
      "learning_rate": 8.21125602038792e-05,
      "loss": 0.6504,
      "step": 6381
    },
    {
      "epoch": 0.5706366237482118,
      "grad_norm": 0.16562876859235565,
      "learning_rate": 8.208406791614247e-05,
      "loss": 0.7059,
      "step": 6382
    },
    {
      "epoch": 0.5707260371959942,
      "grad_norm": 0.1641184251591517,
      "learning_rate": 8.20555771309933e-05,
      "loss": 0.6761,
      "step": 6383
    },
    {
      "epoch": 0.5708154506437768,
      "grad_norm": 0.15003842790319985,
      "learning_rate": 8.202708785082121e-05,
      "loss": 0.6834,
      "step": 6384
    },
    {
      "epoch": 0.5709048640915594,
      "grad_norm": 0.15090755460775,
      "learning_rate": 8.199860007801557e-05,
      "loss": 0.6512,
      "step": 6385
    },
    {
      "epoch": 0.570994277539342,
      "grad_norm": 0.16545839251447766,
      "learning_rate": 8.197011381496558e-05,
      "loss": 0.6348,
      "step": 6386
    },
    {
      "epoch": 0.5710836909871244,
      "grad_norm": 0.15970551246562067,
      "learning_rate": 8.194162906406033e-05,
      "loss": 0.6549,
      "step": 6387
    },
    {
      "epoch": 0.571173104434907,
      "grad_norm": 0.1574241626138834,
      "learning_rate": 8.191314582768891e-05,
      "loss": 0.6532,
      "step": 6388
    },
    {
      "epoch": 0.5712625178826896,
      "grad_norm": 0.15972410474067053,
      "learning_rate": 8.18846641082401e-05,
      "loss": 0.6854,
      "step": 6389
    },
    {
      "epoch": 0.5713519313304721,
      "grad_norm": 0.12896809127815662,
      "learning_rate": 8.18561839081026e-05,
      "loss": 0.6214,
      "step": 6390
    },
    {
      "epoch": 0.5714413447782547,
      "grad_norm": 0.141634707075275,
      "learning_rate": 8.182770522966507e-05,
      "loss": 0.6471,
      "step": 6391
    },
    {
      "epoch": 0.5715307582260372,
      "grad_norm": 0.1659134997601582,
      "learning_rate": 8.179922807531594e-05,
      "loss": 0.6526,
      "step": 6392
    },
    {
      "epoch": 0.5716201716738197,
      "grad_norm": 0.14934706101079318,
      "learning_rate": 8.177075244744358e-05,
      "loss": 0.6381,
      "step": 6393
    },
    {
      "epoch": 0.5717095851216023,
      "grad_norm": 0.14131558243263886,
      "learning_rate": 8.174227834843617e-05,
      "loss": 0.657,
      "step": 6394
    },
    {
      "epoch": 0.5717989985693849,
      "grad_norm": 0.14237950305201624,
      "learning_rate": 8.171380578068185e-05,
      "loss": 0.6359,
      "step": 6395
    },
    {
      "epoch": 0.5718884120171673,
      "grad_norm": 0.17830649083606806,
      "learning_rate": 8.168533474656855e-05,
      "loss": 0.6547,
      "step": 6396
    },
    {
      "epoch": 0.5719778254649499,
      "grad_norm": 0.16774961028061894,
      "learning_rate": 8.165686524848411e-05,
      "loss": 0.6809,
      "step": 6397
    },
    {
      "epoch": 0.5720672389127325,
      "grad_norm": 0.1750110958448243,
      "learning_rate": 8.162839728881625e-05,
      "loss": 0.6359,
      "step": 6398
    },
    {
      "epoch": 0.572156652360515,
      "grad_norm": 0.1399811073726303,
      "learning_rate": 8.159993086995249e-05,
      "loss": 0.6503,
      "step": 6399
    },
    {
      "epoch": 0.5722460658082976,
      "grad_norm": 0.17273787911020347,
      "learning_rate": 8.157146599428028e-05,
      "loss": 0.6595,
      "step": 6400
    },
    {
      "epoch": 0.5723354792560801,
      "grad_norm": 0.15887714169691447,
      "learning_rate": 8.154300266418702e-05,
      "loss": 0.6763,
      "step": 6401
    },
    {
      "epoch": 0.5724248927038627,
      "grad_norm": 0.15378387322128773,
      "learning_rate": 8.151454088205982e-05,
      "loss": 0.6529,
      "step": 6402
    },
    {
      "epoch": 0.5725143061516452,
      "grad_norm": 0.15955532924224047,
      "learning_rate": 8.148608065028574e-05,
      "loss": 0.6615,
      "step": 6403
    },
    {
      "epoch": 0.5726037195994278,
      "grad_norm": 0.16271255007624894,
      "learning_rate": 8.145762197125173e-05,
      "loss": 0.6821,
      "step": 6404
    },
    {
      "epoch": 0.5726931330472103,
      "grad_norm": 0.15749161171922485,
      "learning_rate": 8.142916484734458e-05,
      "loss": 0.6642,
      "step": 6405
    },
    {
      "epoch": 0.5727825464949928,
      "grad_norm": 0.1568976036398395,
      "learning_rate": 8.140070928095092e-05,
      "loss": 0.6573,
      "step": 6406
    },
    {
      "epoch": 0.5728719599427754,
      "grad_norm": 0.15087701190595063,
      "learning_rate": 8.137225527445727e-05,
      "loss": 0.6825,
      "step": 6407
    },
    {
      "epoch": 0.572961373390558,
      "grad_norm": 0.16607656252860914,
      "learning_rate": 8.134380283025014e-05,
      "loss": 0.6999,
      "step": 6408
    },
    {
      "epoch": 0.5730507868383404,
      "grad_norm": 0.1475256686885926,
      "learning_rate": 8.131535195071574e-05,
      "loss": 0.624,
      "step": 6409
    },
    {
      "epoch": 0.573140200286123,
      "grad_norm": 0.14186895374849753,
      "learning_rate": 8.128690263824017e-05,
      "loss": 0.6461,
      "step": 6410
    },
    {
      "epoch": 0.5732296137339056,
      "grad_norm": 0.1365380856996363,
      "learning_rate": 8.12584548952095e-05,
      "loss": 0.6324,
      "step": 6411
    },
    {
      "epoch": 0.5733190271816881,
      "grad_norm": 0.1656798887945329,
      "learning_rate": 8.123000872400959e-05,
      "loss": 0.7123,
      "step": 6412
    },
    {
      "epoch": 0.5734084406294707,
      "grad_norm": 0.15758242155427551,
      "learning_rate": 8.120156412702615e-05,
      "loss": 0.6724,
      "step": 6413
    },
    {
      "epoch": 0.5734978540772532,
      "grad_norm": 0.140053441328023,
      "learning_rate": 8.117312110664482e-05,
      "loss": 0.6202,
      "step": 6414
    },
    {
      "epoch": 0.5735872675250357,
      "grad_norm": 0.13974455984951817,
      "learning_rate": 8.114467966525112e-05,
      "loss": 0.646,
      "step": 6415
    },
    {
      "epoch": 0.5736766809728183,
      "grad_norm": 0.1477498647865894,
      "learning_rate": 8.111623980523035e-05,
      "loss": 0.6804,
      "step": 6416
    },
    {
      "epoch": 0.5737660944206009,
      "grad_norm": 0.16851712378307115,
      "learning_rate": 8.108780152896773e-05,
      "loss": 0.6481,
      "step": 6417
    },
    {
      "epoch": 0.5738555078683834,
      "grad_norm": 0.16521353474214895,
      "learning_rate": 8.105936483884838e-05,
      "loss": 0.7013,
      "step": 6418
    },
    {
      "epoch": 0.5739449213161659,
      "grad_norm": 0.13507053117246284,
      "learning_rate": 8.103092973725724e-05,
      "loss": 0.6571,
      "step": 6419
    },
    {
      "epoch": 0.5740343347639485,
      "grad_norm": 0.1572997474113716,
      "learning_rate": 8.100249622657907e-05,
      "loss": 0.6266,
      "step": 6420
    },
    {
      "epoch": 0.5741237482117311,
      "grad_norm": 0.15912669684330932,
      "learning_rate": 8.097406430919858e-05,
      "loss": 0.6699,
      "step": 6421
    },
    {
      "epoch": 0.5742131616595136,
      "grad_norm": 0.16202986655009854,
      "learning_rate": 8.094563398750039e-05,
      "loss": 0.6762,
      "step": 6422
    },
    {
      "epoch": 0.5743025751072961,
      "grad_norm": 0.1625768922228926,
      "learning_rate": 8.091720526386886e-05,
      "loss": 0.6579,
      "step": 6423
    },
    {
      "epoch": 0.5743919885550787,
      "grad_norm": 0.16973939200209873,
      "learning_rate": 8.088877814068827e-05,
      "loss": 0.6771,
      "step": 6424
    },
    {
      "epoch": 0.5744814020028612,
      "grad_norm": 0.16574800793697206,
      "learning_rate": 8.086035262034278e-05,
      "loss": 0.6834,
      "step": 6425
    },
    {
      "epoch": 0.5745708154506438,
      "grad_norm": 0.16440043742707083,
      "learning_rate": 8.083192870521638e-05,
      "loss": 0.6655,
      "step": 6426
    },
    {
      "epoch": 0.5746602288984263,
      "grad_norm": 0.16021337044844045,
      "learning_rate": 8.0803506397693e-05,
      "loss": 0.6671,
      "step": 6427
    },
    {
      "epoch": 0.5747496423462088,
      "grad_norm": 0.14267697533517912,
      "learning_rate": 8.077508570015632e-05,
      "loss": 0.6468,
      "step": 6428
    },
    {
      "epoch": 0.5748390557939914,
      "grad_norm": 0.14455832340445343,
      "learning_rate": 8.074666661499002e-05,
      "loss": 0.6435,
      "step": 6429
    },
    {
      "epoch": 0.574928469241774,
      "grad_norm": 0.14899540563475316,
      "learning_rate": 8.071824914457751e-05,
      "loss": 0.6709,
      "step": 6430
    },
    {
      "epoch": 0.5750178826895566,
      "grad_norm": 0.17963658693880621,
      "learning_rate": 8.068983329130218e-05,
      "loss": 0.6284,
      "step": 6431
    },
    {
      "epoch": 0.575107296137339,
      "grad_norm": 0.14867392638480406,
      "learning_rate": 8.066141905754723e-05,
      "loss": 0.6791,
      "step": 6432
    },
    {
      "epoch": 0.5751967095851216,
      "grad_norm": 0.16699065123516657,
      "learning_rate": 8.063300644569567e-05,
      "loss": 0.6511,
      "step": 6433
    },
    {
      "epoch": 0.5752861230329042,
      "grad_norm": 0.17596966862959143,
      "learning_rate": 8.060459545813049e-05,
      "loss": 0.3445,
      "step": 6434
    },
    {
      "epoch": 0.5753755364806867,
      "grad_norm": 0.15320151031653112,
      "learning_rate": 8.057618609723443e-05,
      "loss": 0.679,
      "step": 6435
    },
    {
      "epoch": 0.5754649499284692,
      "grad_norm": 0.15318141866928237,
      "learning_rate": 8.054777836539022e-05,
      "loss": 0.6874,
      "step": 6436
    },
    {
      "epoch": 0.5755543633762518,
      "grad_norm": 0.16237668438622496,
      "learning_rate": 8.051937226498034e-05,
      "loss": 0.6545,
      "step": 6437
    },
    {
      "epoch": 0.5756437768240343,
      "grad_norm": 0.1267887867541153,
      "learning_rate": 8.049096779838719e-05,
      "loss": 0.629,
      "step": 6438
    },
    {
      "epoch": 0.5757331902718169,
      "grad_norm": 0.1942754248066486,
      "learning_rate": 8.0462564967993e-05,
      "loss": 0.3791,
      "step": 6439
    },
    {
      "epoch": 0.5758226037195995,
      "grad_norm": 0.15434387221588533,
      "learning_rate": 8.043416377617988e-05,
      "loss": 0.6672,
      "step": 6440
    },
    {
      "epoch": 0.5759120171673819,
      "grad_norm": 0.1674844285292634,
      "learning_rate": 8.040576422532984e-05,
      "loss": 0.6232,
      "step": 6441
    },
    {
      "epoch": 0.5760014306151645,
      "grad_norm": 0.16602662169905966,
      "learning_rate": 8.037736631782465e-05,
      "loss": 0.6739,
      "step": 6442
    },
    {
      "epoch": 0.5760908440629471,
      "grad_norm": 0.14361524947488147,
      "learning_rate": 8.034897005604608e-05,
      "loss": 0.6169,
      "step": 6443
    },
    {
      "epoch": 0.5761802575107297,
      "grad_norm": 0.17877151491363177,
      "learning_rate": 8.032057544237565e-05,
      "loss": 0.6957,
      "step": 6444
    },
    {
      "epoch": 0.5762696709585121,
      "grad_norm": 0.16876356181306382,
      "learning_rate": 8.02921824791948e-05,
      "loss": 0.6904,
      "step": 6445
    },
    {
      "epoch": 0.5763590844062947,
      "grad_norm": 0.16738549442398745,
      "learning_rate": 8.026379116888481e-05,
      "loss": 0.6527,
      "step": 6446
    },
    {
      "epoch": 0.5764484978540773,
      "grad_norm": 0.17752418083973787,
      "learning_rate": 8.02354015138268e-05,
      "loss": 0.6741,
      "step": 6447
    },
    {
      "epoch": 0.5765379113018598,
      "grad_norm": 0.14365008028747991,
      "learning_rate": 8.020701351640182e-05,
      "loss": 0.6453,
      "step": 6448
    },
    {
      "epoch": 0.5766273247496424,
      "grad_norm": 0.1606757055477616,
      "learning_rate": 8.017862717899066e-05,
      "loss": 0.6852,
      "step": 6449
    },
    {
      "epoch": 0.5767167381974249,
      "grad_norm": 0.15700732002541684,
      "learning_rate": 8.015024250397415e-05,
      "loss": 0.5995,
      "step": 6450
    },
    {
      "epoch": 0.5768061516452074,
      "grad_norm": 0.1512111262231968,
      "learning_rate": 8.01218594937328e-05,
      "loss": 0.6205,
      "step": 6451
    },
    {
      "epoch": 0.57689556509299,
      "grad_norm": 0.15552009427235836,
      "learning_rate": 8.009347815064712e-05,
      "loss": 0.6662,
      "step": 6452
    },
    {
      "epoch": 0.5769849785407726,
      "grad_norm": 0.1521763709450754,
      "learning_rate": 8.006509847709735e-05,
      "loss": 0.6908,
      "step": 6453
    },
    {
      "epoch": 0.577074391988555,
      "grad_norm": 0.16796437314181073,
      "learning_rate": 8.003672047546373e-05,
      "loss": 0.6736,
      "step": 6454
    },
    {
      "epoch": 0.5771638054363376,
      "grad_norm": 0.16744398434947502,
      "learning_rate": 8.000834414812625e-05,
      "loss": 0.6527,
      "step": 6455
    },
    {
      "epoch": 0.5772532188841202,
      "grad_norm": 0.13641542123267256,
      "learning_rate": 7.997996949746477e-05,
      "loss": 0.6644,
      "step": 6456
    },
    {
      "epoch": 0.5773426323319027,
      "grad_norm": 0.1440428936380194,
      "learning_rate": 7.995159652585908e-05,
      "loss": 0.6372,
      "step": 6457
    },
    {
      "epoch": 0.5774320457796852,
      "grad_norm": 0.15543911855036685,
      "learning_rate": 7.99232252356888e-05,
      "loss": 0.66,
      "step": 6458
    },
    {
      "epoch": 0.5775214592274678,
      "grad_norm": 0.17432436624140657,
      "learning_rate": 7.989485562933338e-05,
      "loss": 0.6602,
      "step": 6459
    },
    {
      "epoch": 0.5776108726752504,
      "grad_norm": 0.16267739083551,
      "learning_rate": 7.98664877091721e-05,
      "loss": 0.626,
      "step": 6460
    },
    {
      "epoch": 0.5777002861230329,
      "grad_norm": 0.14550509846310114,
      "learning_rate": 7.983812147758422e-05,
      "loss": 0.6571,
      "step": 6461
    },
    {
      "epoch": 0.5777896995708155,
      "grad_norm": 0.14626607614162004,
      "learning_rate": 7.980975693694872e-05,
      "loss": 0.6432,
      "step": 6462
    },
    {
      "epoch": 0.577879113018598,
      "grad_norm": 0.1712029475614608,
      "learning_rate": 7.97813940896445e-05,
      "loss": 0.6762,
      "step": 6463
    },
    {
      "epoch": 0.5779685264663805,
      "grad_norm": 0.17749492585712706,
      "learning_rate": 7.975303293805035e-05,
      "loss": 0.6813,
      "step": 6464
    },
    {
      "epoch": 0.5780579399141631,
      "grad_norm": 0.16959584665067903,
      "learning_rate": 7.97246734845449e-05,
      "loss": 0.644,
      "step": 6465
    },
    {
      "epoch": 0.5781473533619457,
      "grad_norm": 0.16957608612901373,
      "learning_rate": 7.96963157315066e-05,
      "loss": 0.7187,
      "step": 6466
    },
    {
      "epoch": 0.5782367668097281,
      "grad_norm": 0.1506205252851635,
      "learning_rate": 7.966795968131377e-05,
      "loss": 0.6603,
      "step": 6467
    },
    {
      "epoch": 0.5783261802575107,
      "grad_norm": 0.16061576536878544,
      "learning_rate": 7.963960533634461e-05,
      "loss": 0.6707,
      "step": 6468
    },
    {
      "epoch": 0.5784155937052933,
      "grad_norm": 0.14971247227111048,
      "learning_rate": 7.961125269897716e-05,
      "loss": 0.619,
      "step": 6469
    },
    {
      "epoch": 0.5785050071530758,
      "grad_norm": 0.1658463443838099,
      "learning_rate": 7.95829017715893e-05,
      "loss": 0.6258,
      "step": 6470
    },
    {
      "epoch": 0.5785944206008584,
      "grad_norm": 0.1476128262980419,
      "learning_rate": 7.955455255655881e-05,
      "loss": 0.6359,
      "step": 6471
    },
    {
      "epoch": 0.5786838340486409,
      "grad_norm": 0.1436596105972735,
      "learning_rate": 7.952620505626333e-05,
      "loss": 0.6378,
      "step": 6472
    },
    {
      "epoch": 0.5787732474964234,
      "grad_norm": 0.15844451881464985,
      "learning_rate": 7.949785927308032e-05,
      "loss": 0.6733,
      "step": 6473
    },
    {
      "epoch": 0.578862660944206,
      "grad_norm": 0.14954172960407872,
      "learning_rate": 7.946951520938706e-05,
      "loss": 0.6583,
      "step": 6474
    },
    {
      "epoch": 0.5789520743919886,
      "grad_norm": 0.14284440665015466,
      "learning_rate": 7.944117286756079e-05,
      "loss": 0.6089,
      "step": 6475
    },
    {
      "epoch": 0.579041487839771,
      "grad_norm": 0.1380763339621578,
      "learning_rate": 7.94128322499785e-05,
      "loss": 0.6576,
      "step": 6476
    },
    {
      "epoch": 0.5791309012875536,
      "grad_norm": 0.14086501970121815,
      "learning_rate": 7.93844933590171e-05,
      "loss": 0.6358,
      "step": 6477
    },
    {
      "epoch": 0.5792203147353362,
      "grad_norm": 0.1731690469154778,
      "learning_rate": 7.935615619705334e-05,
      "loss": 0.6671,
      "step": 6478
    },
    {
      "epoch": 0.5793097281831188,
      "grad_norm": 0.16254146265232236,
      "learning_rate": 7.932782076646386e-05,
      "loss": 0.6562,
      "step": 6479
    },
    {
      "epoch": 0.5793991416309013,
      "grad_norm": 0.1596734451522726,
      "learning_rate": 7.929948706962508e-05,
      "loss": 0.6616,
      "step": 6480
    },
    {
      "epoch": 0.5794885550786838,
      "grad_norm": 0.1643699255462473,
      "learning_rate": 7.927115510891332e-05,
      "loss": 0.638,
      "step": 6481
    },
    {
      "epoch": 0.5795779685264664,
      "grad_norm": 0.16198015806786342,
      "learning_rate": 7.924282488670476e-05,
      "loss": 0.6682,
      "step": 6482
    },
    {
      "epoch": 0.5796673819742489,
      "grad_norm": 0.18393587523008748,
      "learning_rate": 7.921449640537535e-05,
      "loss": 0.3903,
      "step": 6483
    },
    {
      "epoch": 0.5797567954220315,
      "grad_norm": 0.1476962843790933,
      "learning_rate": 7.918616966730108e-05,
      "loss": 0.6468,
      "step": 6484
    },
    {
      "epoch": 0.579846208869814,
      "grad_norm": 0.1803100945511086,
      "learning_rate": 7.91578446748576e-05,
      "loss": 0.6555,
      "step": 6485
    },
    {
      "epoch": 0.5799356223175965,
      "grad_norm": 0.17222997857576514,
      "learning_rate": 7.912952143042052e-05,
      "loss": 0.6841,
      "step": 6486
    },
    {
      "epoch": 0.5800250357653791,
      "grad_norm": 0.14574878052616613,
      "learning_rate": 7.910119993636528e-05,
      "loss": 0.6576,
      "step": 6487
    },
    {
      "epoch": 0.5801144492131617,
      "grad_norm": 0.17208588036600758,
      "learning_rate": 7.907288019506717e-05,
      "loss": 0.7124,
      "step": 6488
    },
    {
      "epoch": 0.5802038626609443,
      "grad_norm": 0.15829922074001182,
      "learning_rate": 7.904456220890132e-05,
      "loss": 0.6819,
      "step": 6489
    },
    {
      "epoch": 0.5802932761087267,
      "grad_norm": 0.15459925230586563,
      "learning_rate": 7.901624598024269e-05,
      "loss": 0.6557,
      "step": 6490
    },
    {
      "epoch": 0.5803826895565093,
      "grad_norm": 0.1444670150609446,
      "learning_rate": 7.89879315114662e-05,
      "loss": 0.6503,
      "step": 6491
    },
    {
      "epoch": 0.5804721030042919,
      "grad_norm": 0.16174566639607452,
      "learning_rate": 7.895961880494652e-05,
      "loss": 0.6902,
      "step": 6492
    },
    {
      "epoch": 0.5805615164520744,
      "grad_norm": 0.15745811314778566,
      "learning_rate": 7.893130786305821e-05,
      "loss": 0.6531,
      "step": 6493
    },
    {
      "epoch": 0.5806509298998569,
      "grad_norm": 0.157330864235468,
      "learning_rate": 7.890299868817564e-05,
      "loss": 0.7014,
      "step": 6494
    },
    {
      "epoch": 0.5807403433476395,
      "grad_norm": 0.1380032571467774,
      "learning_rate": 7.887469128267312e-05,
      "loss": 0.6548,
      "step": 6495
    },
    {
      "epoch": 0.580829756795422,
      "grad_norm": 0.12696147940553393,
      "learning_rate": 7.884638564892472e-05,
      "loss": 0.6154,
      "step": 6496
    },
    {
      "epoch": 0.5809191702432046,
      "grad_norm": 0.13425333535211803,
      "learning_rate": 7.881808178930438e-05,
      "loss": 0.6284,
      "step": 6497
    },
    {
      "epoch": 0.5810085836909872,
      "grad_norm": 0.1787379222851999,
      "learning_rate": 7.878977970618595e-05,
      "loss": 0.684,
      "step": 6498
    },
    {
      "epoch": 0.5810979971387696,
      "grad_norm": 0.15285161822172225,
      "learning_rate": 7.876147940194311e-05,
      "loss": 0.6655,
      "step": 6499
    },
    {
      "epoch": 0.5811874105865522,
      "grad_norm": 0.1751692515420209,
      "learning_rate": 7.873318087894933e-05,
      "loss": 0.6705,
      "step": 6500
    },
    {
      "epoch": 0.5812768240343348,
      "grad_norm": 0.14683917420411355,
      "learning_rate": 7.870488413957797e-05,
      "loss": 0.6182,
      "step": 6501
    },
    {
      "epoch": 0.5813662374821174,
      "grad_norm": 0.1518318898607904,
      "learning_rate": 7.867658918620229e-05,
      "loss": 0.6092,
      "step": 6502
    },
    {
      "epoch": 0.5814556509298998,
      "grad_norm": 0.1649050210744287,
      "learning_rate": 7.86482960211953e-05,
      "loss": 0.6729,
      "step": 6503
    },
    {
      "epoch": 0.5815450643776824,
      "grad_norm": 0.1745846295665887,
      "learning_rate": 7.862000464692991e-05,
      "loss": 0.6492,
      "step": 6504
    },
    {
      "epoch": 0.581634477825465,
      "grad_norm": 0.1406512599916724,
      "learning_rate": 7.859171506577893e-05,
      "loss": 0.6381,
      "step": 6505
    },
    {
      "epoch": 0.5817238912732475,
      "grad_norm": 0.16329332624990756,
      "learning_rate": 7.856342728011498e-05,
      "loss": 0.6475,
      "step": 6506
    },
    {
      "epoch": 0.58181330472103,
      "grad_norm": 0.18978721541297952,
      "learning_rate": 7.853514129231049e-05,
      "loss": 0.4011,
      "step": 6507
    },
    {
      "epoch": 0.5819027181688126,
      "grad_norm": 0.1574009183485694,
      "learning_rate": 7.850685710473775e-05,
      "loss": 0.7198,
      "step": 6508
    },
    {
      "epoch": 0.5819921316165951,
      "grad_norm": 0.15716682679674024,
      "learning_rate": 7.847857471976897e-05,
      "loss": 0.6185,
      "step": 6509
    },
    {
      "epoch": 0.5820815450643777,
      "grad_norm": 0.15064886118242385,
      "learning_rate": 7.845029413977613e-05,
      "loss": 0.6729,
      "step": 6510
    },
    {
      "epoch": 0.5821709585121603,
      "grad_norm": 0.15308016122709198,
      "learning_rate": 7.842201536713107e-05,
      "loss": 0.6643,
      "step": 6511
    },
    {
      "epoch": 0.5822603719599427,
      "grad_norm": 0.16079849381178457,
      "learning_rate": 7.839373840420554e-05,
      "loss": 0.6491,
      "step": 6512
    },
    {
      "epoch": 0.5823497854077253,
      "grad_norm": 0.14085816469206147,
      "learning_rate": 7.83654632533711e-05,
      "loss": 0.6386,
      "step": 6513
    },
    {
      "epoch": 0.5824391988555079,
      "grad_norm": 0.14660258069030024,
      "learning_rate": 7.83371899169991e-05,
      "loss": 0.6507,
      "step": 6514
    },
    {
      "epoch": 0.5825286123032904,
      "grad_norm": 0.14855872663867198,
      "learning_rate": 7.830891839746083e-05,
      "loss": 0.6529,
      "step": 6515
    },
    {
      "epoch": 0.5826180257510729,
      "grad_norm": 0.16289998921815105,
      "learning_rate": 7.828064869712739e-05,
      "loss": 0.6363,
      "step": 6516
    },
    {
      "epoch": 0.5827074391988555,
      "grad_norm": 0.15761789607568058,
      "learning_rate": 7.82523808183697e-05,
      "loss": 0.6924,
      "step": 6517
    },
    {
      "epoch": 0.582796852646638,
      "grad_norm": 0.15353295724764712,
      "learning_rate": 7.822411476355854e-05,
      "loss": 0.6859,
      "step": 6518
    },
    {
      "epoch": 0.5828862660944206,
      "grad_norm": 0.16010837443839168,
      "learning_rate": 7.819585053506461e-05,
      "loss": 0.6517,
      "step": 6519
    },
    {
      "epoch": 0.5829756795422032,
      "grad_norm": 0.14651832768833034,
      "learning_rate": 7.816758813525836e-05,
      "loss": 0.6465,
      "step": 6520
    },
    {
      "epoch": 0.5830650929899857,
      "grad_norm": 0.17279029256307565,
      "learning_rate": 7.813932756651012e-05,
      "loss": 0.7048,
      "step": 6521
    },
    {
      "epoch": 0.5831545064377682,
      "grad_norm": 0.1612486524097293,
      "learning_rate": 7.811106883119008e-05,
      "loss": 0.6863,
      "step": 6522
    },
    {
      "epoch": 0.5832439198855508,
      "grad_norm": 0.15241908468457652,
      "learning_rate": 7.808281193166829e-05,
      "loss": 0.678,
      "step": 6523
    },
    {
      "epoch": 0.5833333333333334,
      "grad_norm": 0.15622965002702502,
      "learning_rate": 7.805455687031455e-05,
      "loss": 0.6455,
      "step": 6524
    },
    {
      "epoch": 0.5834227467811158,
      "grad_norm": 0.1544632500077361,
      "learning_rate": 7.80263036494986e-05,
      "loss": 0.634,
      "step": 6525
    },
    {
      "epoch": 0.5835121602288984,
      "grad_norm": 0.1891259446314259,
      "learning_rate": 7.799805227159007e-05,
      "loss": 0.4062,
      "step": 6526
    },
    {
      "epoch": 0.583601573676681,
      "grad_norm": 0.14242132588089446,
      "learning_rate": 7.796980273895833e-05,
      "loss": 0.6159,
      "step": 6527
    },
    {
      "epoch": 0.5836909871244635,
      "grad_norm": 0.15267807796254354,
      "learning_rate": 7.794155505397261e-05,
      "loss": 0.6576,
      "step": 6528
    },
    {
      "epoch": 0.5837804005722461,
      "grad_norm": 0.13787410392889501,
      "learning_rate": 7.791330921900205e-05,
      "loss": 0.6616,
      "step": 6529
    },
    {
      "epoch": 0.5838698140200286,
      "grad_norm": 0.12528436897517006,
      "learning_rate": 7.788506523641556e-05,
      "loss": 0.645,
      "step": 6530
    },
    {
      "epoch": 0.5839592274678111,
      "grad_norm": 0.14499109778983996,
      "learning_rate": 7.785682310858193e-05,
      "loss": 0.6505,
      "step": 6531
    },
    {
      "epoch": 0.5840486409155937,
      "grad_norm": 0.1655025198976373,
      "learning_rate": 7.782858283786976e-05,
      "loss": 0.6447,
      "step": 6532
    },
    {
      "epoch": 0.5841380543633763,
      "grad_norm": 0.15988503180747865,
      "learning_rate": 7.780034442664764e-05,
      "loss": 0.639,
      "step": 6533
    },
    {
      "epoch": 0.5842274678111588,
      "grad_norm": 0.14583461042654705,
      "learning_rate": 7.777210787728382e-05,
      "loss": 0.6541,
      "step": 6534
    },
    {
      "epoch": 0.5843168812589413,
      "grad_norm": 0.15844236258028366,
      "learning_rate": 7.774387319214643e-05,
      "loss": 0.6226,
      "step": 6535
    },
    {
      "epoch": 0.5844062947067239,
      "grad_norm": 0.16775827109527391,
      "learning_rate": 7.771564037360355e-05,
      "loss": 0.6674,
      "step": 6536
    },
    {
      "epoch": 0.5844957081545065,
      "grad_norm": 0.15637252068910734,
      "learning_rate": 7.768740942402301e-05,
      "loss": 0.6606,
      "step": 6537
    },
    {
      "epoch": 0.584585121602289,
      "grad_norm": 0.1570647881450568,
      "learning_rate": 7.765918034577245e-05,
      "loss": 0.7209,
      "step": 6538
    },
    {
      "epoch": 0.5846745350500715,
      "grad_norm": 0.15635819444606142,
      "learning_rate": 7.763095314121945e-05,
      "loss": 0.6819,
      "step": 6539
    },
    {
      "epoch": 0.5847639484978541,
      "grad_norm": 0.151749442116383,
      "learning_rate": 7.760272781273142e-05,
      "loss": 0.6687,
      "step": 6540
    },
    {
      "epoch": 0.5848533619456366,
      "grad_norm": 0.17069621779388056,
      "learning_rate": 7.757450436267558e-05,
      "loss": 0.6893,
      "step": 6541
    },
    {
      "epoch": 0.5849427753934192,
      "grad_norm": 0.14973683547000408,
      "learning_rate": 7.754628279341895e-05,
      "loss": 0.6493,
      "step": 6542
    },
    {
      "epoch": 0.5850321888412017,
      "grad_norm": 0.16970959135273192,
      "learning_rate": 7.751806310732847e-05,
      "loss": 0.6629,
      "step": 6543
    },
    {
      "epoch": 0.5851216022889842,
      "grad_norm": 0.1540497390350465,
      "learning_rate": 7.748984530677089e-05,
      "loss": 0.6529,
      "step": 6544
    },
    {
      "epoch": 0.5852110157367668,
      "grad_norm": 0.14916387880883183,
      "learning_rate": 7.746162939411279e-05,
      "loss": 0.6433,
      "step": 6545
    },
    {
      "epoch": 0.5853004291845494,
      "grad_norm": 0.14340688900770718,
      "learning_rate": 7.74334153717206e-05,
      "loss": 0.6272,
      "step": 6546
    },
    {
      "epoch": 0.585389842632332,
      "grad_norm": 0.13797926794615814,
      "learning_rate": 7.740520324196064e-05,
      "loss": 0.6804,
      "step": 6547
    },
    {
      "epoch": 0.5854792560801144,
      "grad_norm": 0.15131890261443337,
      "learning_rate": 7.737699300719896e-05,
      "loss": 0.6386,
      "step": 6548
    },
    {
      "epoch": 0.585568669527897,
      "grad_norm": 0.1493521507315761,
      "learning_rate": 7.734878466980159e-05,
      "loss": 0.6827,
      "step": 6549
    },
    {
      "epoch": 0.5856580829756796,
      "grad_norm": 0.14991095949675315,
      "learning_rate": 7.73205782321343e-05,
      "loss": 0.6591,
      "step": 6550
    },
    {
      "epoch": 0.5857474964234621,
      "grad_norm": 0.1605847081254028,
      "learning_rate": 7.729237369656269e-05,
      "loss": 0.7151,
      "step": 6551
    },
    {
      "epoch": 0.5858369098712446,
      "grad_norm": 0.15143373066113808,
      "learning_rate": 7.72641710654523e-05,
      "loss": 0.6469,
      "step": 6552
    },
    {
      "epoch": 0.5859263233190272,
      "grad_norm": 0.14905894002498038,
      "learning_rate": 7.723597034116838e-05,
      "loss": 0.6721,
      "step": 6553
    },
    {
      "epoch": 0.5860157367668097,
      "grad_norm": 0.1514801713885345,
      "learning_rate": 7.720777152607619e-05,
      "loss": 0.6987,
      "step": 6554
    },
    {
      "epoch": 0.5861051502145923,
      "grad_norm": 0.17369642764271315,
      "learning_rate": 7.717957462254065e-05,
      "loss": 0.6467,
      "step": 6555
    },
    {
      "epoch": 0.5861945636623748,
      "grad_norm": 0.1488563977403936,
      "learning_rate": 7.715137963292665e-05,
      "loss": 0.6465,
      "step": 6556
    },
    {
      "epoch": 0.5862839771101573,
      "grad_norm": 0.18016339291172134,
      "learning_rate": 7.712318655959884e-05,
      "loss": 0.6872,
      "step": 6557
    },
    {
      "epoch": 0.5863733905579399,
      "grad_norm": 0.14563045683629636,
      "learning_rate": 7.709499540492171e-05,
      "loss": 0.6658,
      "step": 6558
    },
    {
      "epoch": 0.5864628040057225,
      "grad_norm": 0.14789190332606694,
      "learning_rate": 7.70668061712597e-05,
      "loss": 0.6313,
      "step": 6559
    },
    {
      "epoch": 0.586552217453505,
      "grad_norm": 0.15646881277822092,
      "learning_rate": 7.70386188609769e-05,
      "loss": 0.6987,
      "step": 6560
    },
    {
      "epoch": 0.5866416309012875,
      "grad_norm": 0.15393005844363883,
      "learning_rate": 7.701043347643747e-05,
      "loss": 0.614,
      "step": 6561
    },
    {
      "epoch": 0.5867310443490701,
      "grad_norm": 0.15470397259840651,
      "learning_rate": 7.698225002000516e-05,
      "loss": 0.6883,
      "step": 6562
    },
    {
      "epoch": 0.5868204577968527,
      "grad_norm": 0.1526897127042945,
      "learning_rate": 7.695406849404379e-05,
      "loss": 0.6608,
      "step": 6563
    },
    {
      "epoch": 0.5869098712446352,
      "grad_norm": 0.15368996135123242,
      "learning_rate": 7.692588890091686e-05,
      "loss": 0.6448,
      "step": 6564
    },
    {
      "epoch": 0.5869992846924177,
      "grad_norm": 0.13073010834160997,
      "learning_rate": 7.689771124298774e-05,
      "loss": 0.6249,
      "step": 6565
    },
    {
      "epoch": 0.5870886981402003,
      "grad_norm": 0.13568888001820076,
      "learning_rate": 7.686953552261966e-05,
      "loss": 0.6415,
      "step": 6566
    },
    {
      "epoch": 0.5871781115879828,
      "grad_norm": 0.159447704993377,
      "learning_rate": 7.684136174217574e-05,
      "loss": 0.6571,
      "step": 6567
    },
    {
      "epoch": 0.5872675250357654,
      "grad_norm": 0.16040712624067127,
      "learning_rate": 7.681318990401885e-05,
      "loss": 0.6747,
      "step": 6568
    },
    {
      "epoch": 0.587356938483548,
      "grad_norm": 0.1648396838080043,
      "learning_rate": 7.678502001051168e-05,
      "loss": 0.6604,
      "step": 6569
    },
    {
      "epoch": 0.5874463519313304,
      "grad_norm": 0.1451004389142686,
      "learning_rate": 7.675685206401689e-05,
      "loss": 0.611,
      "step": 6570
    },
    {
      "epoch": 0.587535765379113,
      "grad_norm": 0.16658847905414823,
      "learning_rate": 7.67286860668968e-05,
      "loss": 0.6636,
      "step": 6571
    },
    {
      "epoch": 0.5876251788268956,
      "grad_norm": 0.16556305416507583,
      "learning_rate": 7.670052202151374e-05,
      "loss": 0.6575,
      "step": 6572
    },
    {
      "epoch": 0.5877145922746781,
      "grad_norm": 0.1646032171262615,
      "learning_rate": 7.667235993022972e-05,
      "loss": 0.6907,
      "step": 6573
    },
    {
      "epoch": 0.5878040057224606,
      "grad_norm": 0.15078814507658525,
      "learning_rate": 7.664419979540673e-05,
      "loss": 0.6466,
      "step": 6574
    },
    {
      "epoch": 0.5878934191702432,
      "grad_norm": 0.17563984034159338,
      "learning_rate": 7.66160416194065e-05,
      "loss": 0.6658,
      "step": 6575
    },
    {
      "epoch": 0.5879828326180258,
      "grad_norm": 0.16990377681848,
      "learning_rate": 7.658788540459062e-05,
      "loss": 0.6807,
      "step": 6576
    },
    {
      "epoch": 0.5880722460658083,
      "grad_norm": 0.14891814451636073,
      "learning_rate": 7.655973115332052e-05,
      "loss": 0.6134,
      "step": 6577
    },
    {
      "epoch": 0.5881616595135909,
      "grad_norm": 0.1714929070483477,
      "learning_rate": 7.653157886795744e-05,
      "loss": 0.6746,
      "step": 6578
    },
    {
      "epoch": 0.5882510729613734,
      "grad_norm": 0.13270959575391703,
      "learning_rate": 7.65034285508625e-05,
      "loss": 0.6414,
      "step": 6579
    },
    {
      "epoch": 0.5883404864091559,
      "grad_norm": 0.16638152688117933,
      "learning_rate": 7.647528020439662e-05,
      "loss": 0.6468,
      "step": 6580
    },
    {
      "epoch": 0.5884298998569385,
      "grad_norm": 0.1439342357838051,
      "learning_rate": 7.64471338309206e-05,
      "loss": 0.6588,
      "step": 6581
    },
    {
      "epoch": 0.5885193133047211,
      "grad_norm": 0.16501241125983157,
      "learning_rate": 7.641898943279501e-05,
      "loss": 0.6951,
      "step": 6582
    },
    {
      "epoch": 0.5886087267525035,
      "grad_norm": 0.14606982812652503,
      "learning_rate": 7.639084701238032e-05,
      "loss": 0.645,
      "step": 6583
    },
    {
      "epoch": 0.5886981402002861,
      "grad_norm": 0.16352804359927897,
      "learning_rate": 7.636270657203677e-05,
      "loss": 0.6532,
      "step": 6584
    },
    {
      "epoch": 0.5887875536480687,
      "grad_norm": 0.1503993841664719,
      "learning_rate": 7.633456811412446e-05,
      "loss": 0.635,
      "step": 6585
    },
    {
      "epoch": 0.5888769670958512,
      "grad_norm": 0.14680673261746385,
      "learning_rate": 7.630643164100335e-05,
      "loss": 0.6353,
      "step": 6586
    },
    {
      "epoch": 0.5889663805436338,
      "grad_norm": 0.15980271929148,
      "learning_rate": 7.627829715503317e-05,
      "loss": 0.6824,
      "step": 6587
    },
    {
      "epoch": 0.5890557939914163,
      "grad_norm": 0.1512889392340896,
      "learning_rate": 7.625016465857361e-05,
      "loss": 0.6901,
      "step": 6588
    },
    {
      "epoch": 0.5891452074391988,
      "grad_norm": 0.14403432175837394,
      "learning_rate": 7.622203415398402e-05,
      "loss": 0.6633,
      "step": 6589
    },
    {
      "epoch": 0.5892346208869814,
      "grad_norm": 0.1757921481511769,
      "learning_rate": 7.619390564362374e-05,
      "loss": 0.6984,
      "step": 6590
    },
    {
      "epoch": 0.589324034334764,
      "grad_norm": 0.18103522631724134,
      "learning_rate": 7.616577912985185e-05,
      "loss": 0.3566,
      "step": 6591
    },
    {
      "epoch": 0.5894134477825465,
      "grad_norm": 0.15791529801464665,
      "learning_rate": 7.613765461502724e-05,
      "loss": 0.6773,
      "step": 6592
    },
    {
      "epoch": 0.589502861230329,
      "grad_norm": 0.15096836206745412,
      "learning_rate": 7.610953210150875e-05,
      "loss": 0.6514,
      "step": 6593
    },
    {
      "epoch": 0.5895922746781116,
      "grad_norm": 0.15420909029305882,
      "learning_rate": 7.608141159165492e-05,
      "loss": 0.6888,
      "step": 6594
    },
    {
      "epoch": 0.5896816881258942,
      "grad_norm": 0.1669842207598747,
      "learning_rate": 7.605329308782423e-05,
      "loss": 0.6822,
      "step": 6595
    },
    {
      "epoch": 0.5897711015736766,
      "grad_norm": 0.17649068164611187,
      "learning_rate": 7.602517659237492e-05,
      "loss": 0.3484,
      "step": 6596
    },
    {
      "epoch": 0.5898605150214592,
      "grad_norm": 0.15687476750461066,
      "learning_rate": 7.599706210766513e-05,
      "loss": 0.6381,
      "step": 6597
    },
    {
      "epoch": 0.5899499284692418,
      "grad_norm": 0.16009348654691472,
      "learning_rate": 7.596894963605274e-05,
      "loss": 0.6574,
      "step": 6598
    },
    {
      "epoch": 0.5900393419170243,
      "grad_norm": 0.14504027441653267,
      "learning_rate": 7.594083917989549e-05,
      "loss": 0.6373,
      "step": 6599
    },
    {
      "epoch": 0.5901287553648069,
      "grad_norm": 0.13203563463622633,
      "learning_rate": 7.591273074155104e-05,
      "loss": 0.6681,
      "step": 6600
    },
    {
      "epoch": 0.5902181688125894,
      "grad_norm": 0.1403995062503025,
      "learning_rate": 7.588462432337672e-05,
      "loss": 0.6683,
      "step": 6601
    },
    {
      "epoch": 0.5903075822603719,
      "grad_norm": 0.16792666240487597,
      "learning_rate": 7.585651992772988e-05,
      "loss": 0.6587,
      "step": 6602
    },
    {
      "epoch": 0.5903969957081545,
      "grad_norm": 0.1961863906356469,
      "learning_rate": 7.582841755696754e-05,
      "loss": 0.3765,
      "step": 6603
    },
    {
      "epoch": 0.5904864091559371,
      "grad_norm": 0.15671556758322847,
      "learning_rate": 7.580031721344663e-05,
      "loss": 0.6401,
      "step": 6604
    },
    {
      "epoch": 0.5905758226037195,
      "grad_norm": 0.15146683231277716,
      "learning_rate": 7.577221889952389e-05,
      "loss": 0.6301,
      "step": 6605
    },
    {
      "epoch": 0.5906652360515021,
      "grad_norm": 0.14954111484722973,
      "learning_rate": 7.57441226175559e-05,
      "loss": 0.6692,
      "step": 6606
    },
    {
      "epoch": 0.5907546494992847,
      "grad_norm": 0.14778926598231257,
      "learning_rate": 7.571602836989906e-05,
      "loss": 0.651,
      "step": 6607
    },
    {
      "epoch": 0.5908440629470673,
      "grad_norm": 0.14254132336688874,
      "learning_rate": 7.568793615890954e-05,
      "loss": 0.622,
      "step": 6608
    },
    {
      "epoch": 0.5909334763948498,
      "grad_norm": 0.15455394804904424,
      "learning_rate": 7.565984598694349e-05,
      "loss": 0.6673,
      "step": 6609
    },
    {
      "epoch": 0.5910228898426323,
      "grad_norm": 0.15614056440536928,
      "learning_rate": 7.563175785635678e-05,
      "loss": 0.6667,
      "step": 6610
    },
    {
      "epoch": 0.5911123032904149,
      "grad_norm": 0.17258104010116845,
      "learning_rate": 7.56036717695051e-05,
      "loss": 0.6941,
      "step": 6611
    },
    {
      "epoch": 0.5912017167381974,
      "grad_norm": 0.1600113334249228,
      "learning_rate": 7.557558772874398e-05,
      "loss": 0.6394,
      "step": 6612
    },
    {
      "epoch": 0.59129113018598,
      "grad_norm": 0.15537224080656487,
      "learning_rate": 7.554750573642886e-05,
      "loss": 0.6858,
      "step": 6613
    },
    {
      "epoch": 0.5913805436337625,
      "grad_norm": 0.13248396264374918,
      "learning_rate": 7.551942579491489e-05,
      "loss": 0.6683,
      "step": 6614
    },
    {
      "epoch": 0.591469957081545,
      "grad_norm": 0.15439300457129665,
      "learning_rate": 7.549134790655708e-05,
      "loss": 0.6454,
      "step": 6615
    },
    {
      "epoch": 0.5915593705293276,
      "grad_norm": 0.14634091868113516,
      "learning_rate": 7.546327207371033e-05,
      "loss": 0.621,
      "step": 6616
    },
    {
      "epoch": 0.5916487839771102,
      "grad_norm": 0.15381757106704858,
      "learning_rate": 7.543519829872934e-05,
      "loss": 0.636,
      "step": 6617
    },
    {
      "epoch": 0.5917381974248928,
      "grad_norm": 0.1427205808080491,
      "learning_rate": 7.54071265839686e-05,
      "loss": 0.6388,
      "step": 6618
    },
    {
      "epoch": 0.5918276108726752,
      "grad_norm": 0.16286787843438427,
      "learning_rate": 7.537905693178245e-05,
      "loss": 0.671,
      "step": 6619
    },
    {
      "epoch": 0.5919170243204578,
      "grad_norm": 0.19168326842080793,
      "learning_rate": 7.535098934452508e-05,
      "loss": 0.7094,
      "step": 6620
    },
    {
      "epoch": 0.5920064377682404,
      "grad_norm": 0.15266275967143883,
      "learning_rate": 7.532292382455044e-05,
      "loss": 0.6738,
      "step": 6621
    },
    {
      "epoch": 0.5920958512160229,
      "grad_norm": 0.1398980470656855,
      "learning_rate": 7.529486037421235e-05,
      "loss": 0.6427,
      "step": 6622
    },
    {
      "epoch": 0.5921852646638054,
      "grad_norm": 0.1324811351419343,
      "learning_rate": 7.52667989958645e-05,
      "loss": 0.648,
      "step": 6623
    },
    {
      "epoch": 0.592274678111588,
      "grad_norm": 0.15387626471694016,
      "learning_rate": 7.523873969186039e-05,
      "loss": 0.6468,
      "step": 6624
    },
    {
      "epoch": 0.5923640915593705,
      "grad_norm": 0.1486580190667548,
      "learning_rate": 7.521068246455325e-05,
      "loss": 0.6481,
      "step": 6625
    },
    {
      "epoch": 0.5924535050071531,
      "grad_norm": 0.16825580900791173,
      "learning_rate": 7.518262731629623e-05,
      "loss": 0.6853,
      "step": 6626
    },
    {
      "epoch": 0.5925429184549357,
      "grad_norm": 0.1851427315219087,
      "learning_rate": 7.51545742494423e-05,
      "loss": 0.3696,
      "step": 6627
    },
    {
      "epoch": 0.5926323319027181,
      "grad_norm": 0.1636875487733691,
      "learning_rate": 7.512652326634421e-05,
      "loss": 0.6875,
      "step": 6628
    },
    {
      "epoch": 0.5927217453505007,
      "grad_norm": 0.1462312750026088,
      "learning_rate": 7.509847436935455e-05,
      "loss": 0.6518,
      "step": 6629
    },
    {
      "epoch": 0.5928111587982833,
      "grad_norm": 0.13887834407455568,
      "learning_rate": 7.50704275608258e-05,
      "loss": 0.6296,
      "step": 6630
    },
    {
      "epoch": 0.5929005722460658,
      "grad_norm": 0.15929444628030565,
      "learning_rate": 7.504238284311019e-05,
      "loss": 0.6753,
      "step": 6631
    },
    {
      "epoch": 0.5929899856938483,
      "grad_norm": 0.1632198593583922,
      "learning_rate": 7.501434021855977e-05,
      "loss": 0.7008,
      "step": 6632
    },
    {
      "epoch": 0.5930793991416309,
      "grad_norm": 0.15302154895653566,
      "learning_rate": 7.498629968952648e-05,
      "loss": 0.6505,
      "step": 6633
    },
    {
      "epoch": 0.5931688125894135,
      "grad_norm": 0.1698754952781303,
      "learning_rate": 7.495826125836203e-05,
      "loss": 0.658,
      "step": 6634
    },
    {
      "epoch": 0.593258226037196,
      "grad_norm": 0.13319823071531214,
      "learning_rate": 7.493022492741795e-05,
      "loss": 0.5991,
      "step": 6635
    },
    {
      "epoch": 0.5933476394849786,
      "grad_norm": 0.16981084823988196,
      "learning_rate": 7.49021906990456e-05,
      "loss": 0.6871,
      "step": 6636
    },
    {
      "epoch": 0.593437052932761,
      "grad_norm": 0.13038850928373674,
      "learning_rate": 7.487415857559625e-05,
      "loss": 0.6495,
      "step": 6637
    },
    {
      "epoch": 0.5935264663805436,
      "grad_norm": 0.15188943547267436,
      "learning_rate": 7.484612855942088e-05,
      "loss": 0.6744,
      "step": 6638
    },
    {
      "epoch": 0.5936158798283262,
      "grad_norm": 0.14648976164701177,
      "learning_rate": 7.481810065287029e-05,
      "loss": 0.6187,
      "step": 6639
    },
    {
      "epoch": 0.5937052932761088,
      "grad_norm": 0.1881404151159211,
      "learning_rate": 7.479007485829523e-05,
      "loss": 0.3864,
      "step": 6640
    },
    {
      "epoch": 0.5937947067238912,
      "grad_norm": 0.13292502306812284,
      "learning_rate": 7.476205117804614e-05,
      "loss": 0.673,
      "step": 6641
    },
    {
      "epoch": 0.5938841201716738,
      "grad_norm": 0.15055462743628556,
      "learning_rate": 7.47340296144733e-05,
      "loss": 0.6546,
      "step": 6642
    },
    {
      "epoch": 0.5939735336194564,
      "grad_norm": 0.15845028886362064,
      "learning_rate": 7.470601016992687e-05,
      "loss": 0.6422,
      "step": 6643
    },
    {
      "epoch": 0.594062947067239,
      "grad_norm": 0.17491378329352367,
      "learning_rate": 7.467799284675687e-05,
      "loss": 0.6615,
      "step": 6644
    },
    {
      "epoch": 0.5941523605150214,
      "grad_norm": 0.18236906291986943,
      "learning_rate": 7.464997764731304e-05,
      "loss": 0.6561,
      "step": 6645
    },
    {
      "epoch": 0.594241773962804,
      "grad_norm": 0.13780413995139681,
      "learning_rate": 7.462196457394493e-05,
      "loss": 0.6719,
      "step": 6646
    },
    {
      "epoch": 0.5943311874105865,
      "grad_norm": 0.14122816204006405,
      "learning_rate": 7.459395362900201e-05,
      "loss": 0.6459,
      "step": 6647
    },
    {
      "epoch": 0.5944206008583691,
      "grad_norm": 0.16617161587037735,
      "learning_rate": 7.456594481483355e-05,
      "loss": 0.6518,
      "step": 6648
    },
    {
      "epoch": 0.5945100143061517,
      "grad_norm": 0.1582661071462874,
      "learning_rate": 7.453793813378853e-05,
      "loss": 0.6646,
      "step": 6649
    },
    {
      "epoch": 0.5945994277539342,
      "grad_norm": 0.1511792178520668,
      "learning_rate": 7.450993358821589e-05,
      "loss": 0.6674,
      "step": 6650
    },
    {
      "epoch": 0.5946888412017167,
      "grad_norm": 0.150745906016437,
      "learning_rate": 7.448193118046435e-05,
      "loss": 0.6681,
      "step": 6651
    },
    {
      "epoch": 0.5947782546494993,
      "grad_norm": 0.16938499486214348,
      "learning_rate": 7.445393091288247e-05,
      "loss": 0.6795,
      "step": 6652
    },
    {
      "epoch": 0.5948676680972819,
      "grad_norm": 0.15727245824667987,
      "learning_rate": 7.442593278781848e-05,
      "loss": 0.6792,
      "step": 6653
    },
    {
      "epoch": 0.5949570815450643,
      "grad_norm": 0.12850177273594968,
      "learning_rate": 7.439793680762068e-05,
      "loss": 0.6313,
      "step": 6654
    },
    {
      "epoch": 0.5950464949928469,
      "grad_norm": 0.13976978030931098,
      "learning_rate": 7.436994297463698e-05,
      "loss": 0.6271,
      "step": 6655
    },
    {
      "epoch": 0.5951359084406295,
      "grad_norm": 0.17036081706417747,
      "learning_rate": 7.434195129121518e-05,
      "loss": 0.6572,
      "step": 6656
    },
    {
      "epoch": 0.595225321888412,
      "grad_norm": 0.15549687248671726,
      "learning_rate": 7.431396175970296e-05,
      "loss": 0.6683,
      "step": 6657
    },
    {
      "epoch": 0.5953147353361946,
      "grad_norm": 0.1565806621641658,
      "learning_rate": 7.428597438244776e-05,
      "loss": 0.7053,
      "step": 6658
    },
    {
      "epoch": 0.5954041487839771,
      "grad_norm": 0.145647346981643,
      "learning_rate": 7.425798916179683e-05,
      "loss": 0.6692,
      "step": 6659
    },
    {
      "epoch": 0.5954935622317596,
      "grad_norm": 0.14401128449573855,
      "learning_rate": 7.423000610009725e-05,
      "loss": 0.6272,
      "step": 6660
    },
    {
      "epoch": 0.5955829756795422,
      "grad_norm": 0.15678269399431125,
      "learning_rate": 7.420202519969595e-05,
      "loss": 0.6488,
      "step": 6661
    },
    {
      "epoch": 0.5956723891273248,
      "grad_norm": 0.15635093520142138,
      "learning_rate": 7.417404646293961e-05,
      "loss": 0.665,
      "step": 6662
    },
    {
      "epoch": 0.5957618025751072,
      "grad_norm": 0.19287194503150676,
      "learning_rate": 7.414606989217482e-05,
      "loss": 0.3815,
      "step": 6663
    },
    {
      "epoch": 0.5958512160228898,
      "grad_norm": 0.14431574403364775,
      "learning_rate": 7.411809548974792e-05,
      "loss": 0.6335,
      "step": 6664
    },
    {
      "epoch": 0.5959406294706724,
      "grad_norm": 0.132254257642138,
      "learning_rate": 7.409012325800511e-05,
      "loss": 0.6482,
      "step": 6665
    },
    {
      "epoch": 0.596030042918455,
      "grad_norm": 0.1472501211321581,
      "learning_rate": 7.406215319929235e-05,
      "loss": 0.6542,
      "step": 6666
    },
    {
      "epoch": 0.5961194563662375,
      "grad_norm": 0.15407804032828407,
      "learning_rate": 7.403418531595551e-05,
      "loss": 0.6621,
      "step": 6667
    },
    {
      "epoch": 0.59620886981402,
      "grad_norm": 0.18711136466814915,
      "learning_rate": 7.400621961034018e-05,
      "loss": 0.7152,
      "step": 6668
    },
    {
      "epoch": 0.5962982832618026,
      "grad_norm": 0.1632463164758681,
      "learning_rate": 7.39782560847918e-05,
      "loss": 0.6278,
      "step": 6669
    },
    {
      "epoch": 0.5963876967095851,
      "grad_norm": 0.15498857066364538,
      "learning_rate": 7.395029474165562e-05,
      "loss": 0.6762,
      "step": 6670
    },
    {
      "epoch": 0.5964771101573677,
      "grad_norm": 0.13666543911165074,
      "learning_rate": 7.392233558327683e-05,
      "loss": 0.6576,
      "step": 6671
    },
    {
      "epoch": 0.5965665236051502,
      "grad_norm": 0.1523616312518648,
      "learning_rate": 7.389437861200024e-05,
      "loss": 0.6664,
      "step": 6672
    },
    {
      "epoch": 0.5966559370529327,
      "grad_norm": 0.15125844376373251,
      "learning_rate": 7.386642383017057e-05,
      "loss": 0.6411,
      "step": 6673
    },
    {
      "epoch": 0.5967453505007153,
      "grad_norm": 0.14549216548387822,
      "learning_rate": 7.383847124013239e-05,
      "loss": 0.6263,
      "step": 6674
    },
    {
      "epoch": 0.5968347639484979,
      "grad_norm": 0.14346737938718318,
      "learning_rate": 7.381052084423005e-05,
      "loss": 0.6209,
      "step": 6675
    },
    {
      "epoch": 0.5969241773962805,
      "grad_norm": 0.16261417082245613,
      "learning_rate": 7.378257264480766e-05,
      "loss": 0.6464,
      "step": 6676
    },
    {
      "epoch": 0.5970135908440629,
      "grad_norm": 0.15256698335739838,
      "learning_rate": 7.375462664420922e-05,
      "loss": 0.662,
      "step": 6677
    },
    {
      "epoch": 0.5971030042918455,
      "grad_norm": 0.14411054823743186,
      "learning_rate": 7.37266828447786e-05,
      "loss": 0.649,
      "step": 6678
    },
    {
      "epoch": 0.5971924177396281,
      "grad_norm": 0.14648546892349307,
      "learning_rate": 7.369874124885934e-05,
      "loss": 0.6264,
      "step": 6679
    },
    {
      "epoch": 0.5972818311874106,
      "grad_norm": 0.1439245926669237,
      "learning_rate": 7.367080185879489e-05,
      "loss": 0.6392,
      "step": 6680
    },
    {
      "epoch": 0.5973712446351931,
      "grad_norm": 0.15277611180858303,
      "learning_rate": 7.364286467692848e-05,
      "loss": 0.6875,
      "step": 6681
    },
    {
      "epoch": 0.5974606580829757,
      "grad_norm": 0.161192936147945,
      "learning_rate": 7.361492970560322e-05,
      "loss": 0.7002,
      "step": 6682
    },
    {
      "epoch": 0.5975500715307582,
      "grad_norm": 0.14822752534429676,
      "learning_rate": 7.358699694716189e-05,
      "loss": 0.6351,
      "step": 6683
    },
    {
      "epoch": 0.5976394849785408,
      "grad_norm": 0.16009784730963533,
      "learning_rate": 7.35590664039472e-05,
      "loss": 0.6476,
      "step": 6684
    },
    {
      "epoch": 0.5977288984263234,
      "grad_norm": 0.16292491415154942,
      "learning_rate": 7.353113807830175e-05,
      "loss": 0.6334,
      "step": 6685
    },
    {
      "epoch": 0.5978183118741058,
      "grad_norm": 0.14839116588264345,
      "learning_rate": 7.350321197256777e-05,
      "loss": 0.674,
      "step": 6686
    },
    {
      "epoch": 0.5979077253218884,
      "grad_norm": 0.155905652420029,
      "learning_rate": 7.347528808908737e-05,
      "loss": 0.6667,
      "step": 6687
    },
    {
      "epoch": 0.597997138769671,
      "grad_norm": 0.14306715348746674,
      "learning_rate": 7.344736643020256e-05,
      "loss": 0.6613,
      "step": 6688
    },
    {
      "epoch": 0.5980865522174535,
      "grad_norm": 0.15338012311005023,
      "learning_rate": 7.341944699825503e-05,
      "loss": 0.6682,
      "step": 6689
    },
    {
      "epoch": 0.598175965665236,
      "grad_norm": 0.156193035283421,
      "learning_rate": 7.33915297955864e-05,
      "loss": 0.6436,
      "step": 6690
    },
    {
      "epoch": 0.5982653791130186,
      "grad_norm": 0.1427101477696175,
      "learning_rate": 7.3363614824538e-05,
      "loss": 0.6318,
      "step": 6691
    },
    {
      "epoch": 0.5983547925608012,
      "grad_norm": 0.14061544992052835,
      "learning_rate": 7.333570208745109e-05,
      "loss": 0.6307,
      "step": 6692
    },
    {
      "epoch": 0.5984442060085837,
      "grad_norm": 0.15805656148408637,
      "learning_rate": 7.330779158666661e-05,
      "loss": 0.6553,
      "step": 6693
    },
    {
      "epoch": 0.5985336194563662,
      "grad_norm": 0.1725138713077395,
      "learning_rate": 7.327988332452545e-05,
      "loss": 0.6445,
      "step": 6694
    },
    {
      "epoch": 0.5986230329041488,
      "grad_norm": 0.17454632070056508,
      "learning_rate": 7.325197730336819e-05,
      "loss": 0.6655,
      "step": 6695
    },
    {
      "epoch": 0.5987124463519313,
      "grad_norm": 0.15657218966911723,
      "learning_rate": 7.322407352553529e-05,
      "loss": 0.6929,
      "step": 6696
    },
    {
      "epoch": 0.5988018597997139,
      "grad_norm": 0.16317991394641576,
      "learning_rate": 7.319617199336701e-05,
      "loss": 0.6774,
      "step": 6697
    },
    {
      "epoch": 0.5988912732474965,
      "grad_norm": 0.13381624041778387,
      "learning_rate": 7.316827270920339e-05,
      "loss": 0.6273,
      "step": 6698
    },
    {
      "epoch": 0.5989806866952789,
      "grad_norm": 0.15791601777639758,
      "learning_rate": 7.314037567538436e-05,
      "loss": 0.6576,
      "step": 6699
    },
    {
      "epoch": 0.5990701001430615,
      "grad_norm": 0.14889854060167956,
      "learning_rate": 7.311248089424958e-05,
      "loss": 0.6466,
      "step": 6700
    },
    {
      "epoch": 0.5991595135908441,
      "grad_norm": 0.14920785165313608,
      "learning_rate": 7.308458836813856e-05,
      "loss": 0.6298,
      "step": 6701
    },
    {
      "epoch": 0.5992489270386266,
      "grad_norm": 0.16606857985012963,
      "learning_rate": 7.305669809939062e-05,
      "loss": 0.6499,
      "step": 6702
    },
    {
      "epoch": 0.5993383404864091,
      "grad_norm": 0.17282969082005747,
      "learning_rate": 7.302881009034484e-05,
      "loss": 0.3401,
      "step": 6703
    },
    {
      "epoch": 0.5994277539341917,
      "grad_norm": 0.1707384846024771,
      "learning_rate": 7.30009243433402e-05,
      "loss": 0.6801,
      "step": 6704
    },
    {
      "epoch": 0.5995171673819742,
      "grad_norm": 0.1510840744440226,
      "learning_rate": 7.29730408607154e-05,
      "loss": 0.6728,
      "step": 6705
    },
    {
      "epoch": 0.5996065808297568,
      "grad_norm": 0.14452754489288833,
      "learning_rate": 7.294515964480906e-05,
      "loss": 0.6575,
      "step": 6706
    },
    {
      "epoch": 0.5996959942775394,
      "grad_norm": 0.14794423184140393,
      "learning_rate": 7.291728069795948e-05,
      "loss": 0.6076,
      "step": 6707
    },
    {
      "epoch": 0.5997854077253219,
      "grad_norm": 0.1484203245493183,
      "learning_rate": 7.28894040225049e-05,
      "loss": 0.6677,
      "step": 6708
    },
    {
      "epoch": 0.5998748211731044,
      "grad_norm": 0.179486724202163,
      "learning_rate": 7.286152962078326e-05,
      "loss": 0.6548,
      "step": 6709
    },
    {
      "epoch": 0.599964234620887,
      "grad_norm": 0.16050669211610055,
      "learning_rate": 7.283365749513231e-05,
      "loss": 0.686,
      "step": 6710
    },
    {
      "epoch": 0.6000536480686696,
      "grad_norm": 0.17375115360987717,
      "learning_rate": 7.280578764788975e-05,
      "loss": 0.3822,
      "step": 6711
    },
    {
      "epoch": 0.600143061516452,
      "grad_norm": 0.15897338513388695,
      "learning_rate": 7.277792008139287e-05,
      "loss": 0.636,
      "step": 6712
    },
    {
      "epoch": 0.6002324749642346,
      "grad_norm": 0.1568917853773584,
      "learning_rate": 7.2750054797979e-05,
      "loss": 0.676,
      "step": 6713
    },
    {
      "epoch": 0.6003218884120172,
      "grad_norm": 0.16349722713185047,
      "learning_rate": 7.272219179998511e-05,
      "loss": 0.6527,
      "step": 6714
    },
    {
      "epoch": 0.6004113018597997,
      "grad_norm": 0.15454973944119949,
      "learning_rate": 7.269433108974809e-05,
      "loss": 0.654,
      "step": 6715
    },
    {
      "epoch": 0.6005007153075823,
      "grad_norm": 0.1825224003095534,
      "learning_rate": 7.266647266960452e-05,
      "loss": 0.7339,
      "step": 6716
    },
    {
      "epoch": 0.6005901287553648,
      "grad_norm": 0.16904571440612332,
      "learning_rate": 7.263861654189086e-05,
      "loss": 0.6944,
      "step": 6717
    },
    {
      "epoch": 0.6006795422031473,
      "grad_norm": 0.16263856376160168,
      "learning_rate": 7.261076270894342e-05,
      "loss": 0.6399,
      "step": 6718
    },
    {
      "epoch": 0.6007689556509299,
      "grad_norm": 0.17117232911704475,
      "learning_rate": 7.258291117309817e-05,
      "loss": 0.6827,
      "step": 6719
    },
    {
      "epoch": 0.6008583690987125,
      "grad_norm": 0.15069006766043944,
      "learning_rate": 7.25550619366911e-05,
      "loss": 0.6545,
      "step": 6720
    },
    {
      "epoch": 0.600947782546495,
      "grad_norm": 0.18004509322571324,
      "learning_rate": 7.252721500205783e-05,
      "loss": 0.6377,
      "step": 6721
    },
    {
      "epoch": 0.6010371959942775,
      "grad_norm": 0.16504988248298472,
      "learning_rate": 7.249937037153387e-05,
      "loss": 0.6555,
      "step": 6722
    },
    {
      "epoch": 0.6011266094420601,
      "grad_norm": 0.12351304269466626,
      "learning_rate": 7.24715280474545e-05,
      "loss": 0.63,
      "step": 6723
    },
    {
      "epoch": 0.6012160228898427,
      "grad_norm": 0.14490501883854312,
      "learning_rate": 7.244368803215482e-05,
      "loss": 0.6681,
      "step": 6724
    },
    {
      "epoch": 0.6013054363376252,
      "grad_norm": 0.17561186505525145,
      "learning_rate": 7.241585032796977e-05,
      "loss": 0.3921,
      "step": 6725
    },
    {
      "epoch": 0.6013948497854077,
      "grad_norm": 0.14627527062096327,
      "learning_rate": 7.238801493723398e-05,
      "loss": 0.6531,
      "step": 6726
    },
    {
      "epoch": 0.6014842632331903,
      "grad_norm": 0.157848081541042,
      "learning_rate": 7.236018186228206e-05,
      "loss": 0.6835,
      "step": 6727
    },
    {
      "epoch": 0.6015736766809728,
      "grad_norm": 0.15600023173540997,
      "learning_rate": 7.233235110544833e-05,
      "loss": 0.6792,
      "step": 6728
    },
    {
      "epoch": 0.6016630901287554,
      "grad_norm": 0.1467807048201273,
      "learning_rate": 7.230452266906689e-05,
      "loss": 0.6717,
      "step": 6729
    },
    {
      "epoch": 0.6017525035765379,
      "grad_norm": 0.1537119381994752,
      "learning_rate": 7.227669655547167e-05,
      "loss": 0.6756,
      "step": 6730
    },
    {
      "epoch": 0.6018419170243204,
      "grad_norm": 0.12961482400915114,
      "learning_rate": 7.224887276699645e-05,
      "loss": 0.6288,
      "step": 6731
    },
    {
      "epoch": 0.601931330472103,
      "grad_norm": 0.1503103402783213,
      "learning_rate": 7.222105130597477e-05,
      "loss": 0.6899,
      "step": 6732
    },
    {
      "epoch": 0.6020207439198856,
      "grad_norm": 0.15218430819554646,
      "learning_rate": 7.21932321747399e-05,
      "loss": 0.6393,
      "step": 6733
    },
    {
      "epoch": 0.602110157367668,
      "grad_norm": 0.1704977704537732,
      "learning_rate": 7.21654153756251e-05,
      "loss": 0.6534,
      "step": 6734
    },
    {
      "epoch": 0.6021995708154506,
      "grad_norm": 0.14279055367734783,
      "learning_rate": 7.213760091096331e-05,
      "loss": 0.6286,
      "step": 6735
    },
    {
      "epoch": 0.6022889842632332,
      "grad_norm": 0.14257268556764446,
      "learning_rate": 7.210978878308729e-05,
      "loss": 0.6592,
      "step": 6736
    },
    {
      "epoch": 0.6023783977110158,
      "grad_norm": 0.13317002634495262,
      "learning_rate": 7.208197899432958e-05,
      "loss": 0.6239,
      "step": 6737
    },
    {
      "epoch": 0.6024678111587983,
      "grad_norm": 0.15428214535543644,
      "learning_rate": 7.20541715470226e-05,
      "loss": 0.6545,
      "step": 6738
    },
    {
      "epoch": 0.6025572246065808,
      "grad_norm": 0.14455380567944645,
      "learning_rate": 7.202636644349845e-05,
      "loss": 0.6321,
      "step": 6739
    },
    {
      "epoch": 0.6026466380543634,
      "grad_norm": 0.15890708309368415,
      "learning_rate": 7.199856368608922e-05,
      "loss": 0.6991,
      "step": 6740
    },
    {
      "epoch": 0.6027360515021459,
      "grad_norm": 0.1597526282989651,
      "learning_rate": 7.197076327712659e-05,
      "loss": 0.6544,
      "step": 6741
    },
    {
      "epoch": 0.6028254649499285,
      "grad_norm": 0.149650331513329,
      "learning_rate": 7.194296521894223e-05,
      "loss": 0.6415,
      "step": 6742
    },
    {
      "epoch": 0.602914878397711,
      "grad_norm": 0.15783581594025298,
      "learning_rate": 7.191516951386751e-05,
      "loss": 0.6448,
      "step": 6743
    },
    {
      "epoch": 0.6030042918454935,
      "grad_norm": 0.1647079221350447,
      "learning_rate": 7.188737616423356e-05,
      "loss": 0.6575,
      "step": 6744
    },
    {
      "epoch": 0.6030937052932761,
      "grad_norm": 0.15598675703274156,
      "learning_rate": 7.185958517237146e-05,
      "loss": 0.6365,
      "step": 6745
    },
    {
      "epoch": 0.6031831187410587,
      "grad_norm": 0.14735660316558047,
      "learning_rate": 7.183179654061191e-05,
      "loss": 0.6576,
      "step": 6746
    },
    {
      "epoch": 0.6032725321888412,
      "grad_norm": 0.14001143648433723,
      "learning_rate": 7.18040102712856e-05,
      "loss": 0.6213,
      "step": 6747
    },
    {
      "epoch": 0.6033619456366237,
      "grad_norm": 0.16288322889800314,
      "learning_rate": 7.17762263667229e-05,
      "loss": 0.6785,
      "step": 6748
    },
    {
      "epoch": 0.6034513590844063,
      "grad_norm": 0.14899168866751075,
      "learning_rate": 7.1748444829254e-05,
      "loss": 0.6659,
      "step": 6749
    },
    {
      "epoch": 0.6035407725321889,
      "grad_norm": 0.1341923840565931,
      "learning_rate": 7.172066566120892e-05,
      "loss": 0.6413,
      "step": 6750
    },
    {
      "epoch": 0.6036301859799714,
      "grad_norm": 0.17210089010536242,
      "learning_rate": 7.169288886491746e-05,
      "loss": 0.6599,
      "step": 6751
    },
    {
      "epoch": 0.6037195994277539,
      "grad_norm": 0.14948305081717536,
      "learning_rate": 7.166511444270924e-05,
      "loss": 0.6666,
      "step": 6752
    },
    {
      "epoch": 0.6038090128755365,
      "grad_norm": 0.15207368807907112,
      "learning_rate": 7.16373423969136e-05,
      "loss": 0.6123,
      "step": 6753
    },
    {
      "epoch": 0.603898426323319,
      "grad_norm": 0.16918600058474917,
      "learning_rate": 7.160957272985982e-05,
      "loss": 0.6729,
      "step": 6754
    },
    {
      "epoch": 0.6039878397711016,
      "grad_norm": 0.17097631019824863,
      "learning_rate": 7.158180544387691e-05,
      "loss": 0.6424,
      "step": 6755
    },
    {
      "epoch": 0.6040772532188842,
      "grad_norm": 0.15782669939338065,
      "learning_rate": 7.155404054129366e-05,
      "loss": 0.6828,
      "step": 6756
    },
    {
      "epoch": 0.6041666666666666,
      "grad_norm": 0.1572467329379838,
      "learning_rate": 7.152627802443866e-05,
      "loss": 0.6543,
      "step": 6757
    },
    {
      "epoch": 0.6042560801144492,
      "grad_norm": 0.14303869143238174,
      "learning_rate": 7.149851789564034e-05,
      "loss": 0.6236,
      "step": 6758
    },
    {
      "epoch": 0.6043454935622318,
      "grad_norm": 0.16353526569104546,
      "learning_rate": 7.147076015722691e-05,
      "loss": 0.6418,
      "step": 6759
    },
    {
      "epoch": 0.6044349070100143,
      "grad_norm": 0.16069916444871243,
      "learning_rate": 7.144300481152633e-05,
      "loss": 0.6443,
      "step": 6760
    },
    {
      "epoch": 0.6045243204577968,
      "grad_norm": 0.16349965507852096,
      "learning_rate": 7.141525186086647e-05,
      "loss": 0.6616,
      "step": 6761
    },
    {
      "epoch": 0.6046137339055794,
      "grad_norm": 0.15538070927592934,
      "learning_rate": 7.138750130757493e-05,
      "loss": 0.6398,
      "step": 6762
    },
    {
      "epoch": 0.604703147353362,
      "grad_norm": 0.15562057301805976,
      "learning_rate": 7.135975315397912e-05,
      "loss": 0.6486,
      "step": 6763
    },
    {
      "epoch": 0.6047925608011445,
      "grad_norm": 0.1417884959982476,
      "learning_rate": 7.133200740240618e-05,
      "loss": 0.6354,
      "step": 6764
    },
    {
      "epoch": 0.6048819742489271,
      "grad_norm": 0.15403434760213489,
      "learning_rate": 7.130426405518318e-05,
      "loss": 0.7039,
      "step": 6765
    },
    {
      "epoch": 0.6049713876967096,
      "grad_norm": 0.1654167002107765,
      "learning_rate": 7.127652311463691e-05,
      "loss": 0.682,
      "step": 6766
    },
    {
      "epoch": 0.6050608011444921,
      "grad_norm": 0.15111571183530742,
      "learning_rate": 7.124878458309391e-05,
      "loss": 0.6381,
      "step": 6767
    },
    {
      "epoch": 0.6051502145922747,
      "grad_norm": 0.1749880268215445,
      "learning_rate": 7.122104846288064e-05,
      "loss": 0.6887,
      "step": 6768
    },
    {
      "epoch": 0.6052396280400573,
      "grad_norm": 0.16769869513672636,
      "learning_rate": 7.119331475632332e-05,
      "loss": 0.6631,
      "step": 6769
    },
    {
      "epoch": 0.6053290414878397,
      "grad_norm": 0.15394782488192027,
      "learning_rate": 7.116558346574788e-05,
      "loss": 0.6801,
      "step": 6770
    },
    {
      "epoch": 0.6054184549356223,
      "grad_norm": 0.14663548876589982,
      "learning_rate": 7.113785459348012e-05,
      "loss": 0.6483,
      "step": 6771
    },
    {
      "epoch": 0.6055078683834049,
      "grad_norm": 0.15034001612046397,
      "learning_rate": 7.111012814184566e-05,
      "loss": 0.6499,
      "step": 6772
    },
    {
      "epoch": 0.6055972818311874,
      "grad_norm": 0.14056810619538612,
      "learning_rate": 7.108240411316986e-05,
      "loss": 0.6418,
      "step": 6773
    },
    {
      "epoch": 0.60568669527897,
      "grad_norm": 0.1551510779354927,
      "learning_rate": 7.105468250977786e-05,
      "loss": 0.633,
      "step": 6774
    },
    {
      "epoch": 0.6057761087267525,
      "grad_norm": 0.1568696246440158,
      "learning_rate": 7.10269633339947e-05,
      "loss": 0.679,
      "step": 6775
    },
    {
      "epoch": 0.605865522174535,
      "grad_norm": 0.14800012420925654,
      "learning_rate": 7.099924658814517e-05,
      "loss": 0.6261,
      "step": 6776
    },
    {
      "epoch": 0.6059549356223176,
      "grad_norm": 0.16747103650525919,
      "learning_rate": 7.097153227455379e-05,
      "loss": 0.6585,
      "step": 6777
    },
    {
      "epoch": 0.6060443490701002,
      "grad_norm": 0.16816806530798198,
      "learning_rate": 7.094382039554493e-05,
      "loss": 0.7108,
      "step": 6778
    },
    {
      "epoch": 0.6061337625178826,
      "grad_norm": 0.13818780438006625,
      "learning_rate": 7.091611095344277e-05,
      "loss": 0.6696,
      "step": 6779
    },
    {
      "epoch": 0.6062231759656652,
      "grad_norm": 0.14793562809870392,
      "learning_rate": 7.088840395057124e-05,
      "loss": 0.6508,
      "step": 6780
    },
    {
      "epoch": 0.6063125894134478,
      "grad_norm": 0.1459563453533083,
      "learning_rate": 7.086069938925411e-05,
      "loss": 0.6493,
      "step": 6781
    },
    {
      "epoch": 0.6064020028612304,
      "grad_norm": 0.1468143462377746,
      "learning_rate": 7.083299727181495e-05,
      "loss": 0.634,
      "step": 6782
    },
    {
      "epoch": 0.6064914163090128,
      "grad_norm": 0.16432797489321496,
      "learning_rate": 7.080529760057709e-05,
      "loss": 0.6527,
      "step": 6783
    },
    {
      "epoch": 0.6065808297567954,
      "grad_norm": 0.16045691324592573,
      "learning_rate": 7.077760037786365e-05,
      "loss": 0.6553,
      "step": 6784
    },
    {
      "epoch": 0.606670243204578,
      "grad_norm": 0.17806054674166313,
      "learning_rate": 7.074990560599759e-05,
      "loss": 0.6528,
      "step": 6785
    },
    {
      "epoch": 0.6067596566523605,
      "grad_norm": 0.17709575789364682,
      "learning_rate": 7.072221328730162e-05,
      "loss": 0.6915,
      "step": 6786
    },
    {
      "epoch": 0.6068490701001431,
      "grad_norm": 0.14627967277852685,
      "learning_rate": 7.069452342409825e-05,
      "loss": 0.6781,
      "step": 6787
    },
    {
      "epoch": 0.6069384835479256,
      "grad_norm": 0.16375448423262515,
      "learning_rate": 7.066683601870978e-05,
      "loss": 0.6414,
      "step": 6788
    },
    {
      "epoch": 0.6070278969957081,
      "grad_norm": 0.1516871517367815,
      "learning_rate": 7.063915107345839e-05,
      "loss": 0.6663,
      "step": 6789
    },
    {
      "epoch": 0.6071173104434907,
      "grad_norm": 0.17089422491862066,
      "learning_rate": 7.061146859066594e-05,
      "loss": 0.6503,
      "step": 6790
    },
    {
      "epoch": 0.6072067238912733,
      "grad_norm": 0.14358827088621193,
      "learning_rate": 7.058378857265411e-05,
      "loss": 0.6592,
      "step": 6791
    },
    {
      "epoch": 0.6072961373390557,
      "grad_norm": 0.13373615983847473,
      "learning_rate": 7.055611102174442e-05,
      "loss": 0.6258,
      "step": 6792
    },
    {
      "epoch": 0.6073855507868383,
      "grad_norm": 0.15008871579477745,
      "learning_rate": 7.052843594025815e-05,
      "loss": 0.6325,
      "step": 6793
    },
    {
      "epoch": 0.6074749642346209,
      "grad_norm": 0.17701129041837296,
      "learning_rate": 7.050076333051634e-05,
      "loss": 0.673,
      "step": 6794
    },
    {
      "epoch": 0.6075643776824035,
      "grad_norm": 0.16443169501448104,
      "learning_rate": 7.047309319483985e-05,
      "loss": 0.6873,
      "step": 6795
    },
    {
      "epoch": 0.607653791130186,
      "grad_norm": 0.15961338950541842,
      "learning_rate": 7.044542553554943e-05,
      "loss": 0.6766,
      "step": 6796
    },
    {
      "epoch": 0.6077432045779685,
      "grad_norm": 0.18754920995008287,
      "learning_rate": 7.041776035496547e-05,
      "loss": 0.4239,
      "step": 6797
    },
    {
      "epoch": 0.6078326180257511,
      "grad_norm": 0.16735856665949833,
      "learning_rate": 7.039009765540822e-05,
      "loss": 0.634,
      "step": 6798
    },
    {
      "epoch": 0.6079220314735336,
      "grad_norm": 0.15434439729232202,
      "learning_rate": 7.036243743919773e-05,
      "loss": 0.6409,
      "step": 6799
    },
    {
      "epoch": 0.6080114449213162,
      "grad_norm": 0.14202324934425678,
      "learning_rate": 7.033477970865381e-05,
      "loss": 0.6456,
      "step": 6800
    },
    {
      "epoch": 0.6081008583690987,
      "grad_norm": 0.16430369637314102,
      "learning_rate": 7.030712446609608e-05,
      "loss": 0.6645,
      "step": 6801
    },
    {
      "epoch": 0.6081902718168812,
      "grad_norm": 0.15097487346077842,
      "learning_rate": 7.027947171384394e-05,
      "loss": 0.6519,
      "step": 6802
    },
    {
      "epoch": 0.6082796852646638,
      "grad_norm": 0.1521500142691298,
      "learning_rate": 7.025182145421665e-05,
      "loss": 0.6453,
      "step": 6803
    },
    {
      "epoch": 0.6083690987124464,
      "grad_norm": 0.14848087757715092,
      "learning_rate": 7.022417368953317e-05,
      "loss": 0.6344,
      "step": 6804
    },
    {
      "epoch": 0.608458512160229,
      "grad_norm": 0.1863768476203763,
      "learning_rate": 7.019652842211226e-05,
      "loss": 0.7069,
      "step": 6805
    },
    {
      "epoch": 0.6085479256080114,
      "grad_norm": 0.14603582174570934,
      "learning_rate": 7.016888565427253e-05,
      "loss": 0.6325,
      "step": 6806
    },
    {
      "epoch": 0.608637339055794,
      "grad_norm": 0.1518763052843461,
      "learning_rate": 7.014124538833234e-05,
      "loss": 0.6451,
      "step": 6807
    },
    {
      "epoch": 0.6087267525035766,
      "grad_norm": 0.14680995862276158,
      "learning_rate": 7.011360762660983e-05,
      "loss": 0.6383,
      "step": 6808
    },
    {
      "epoch": 0.6088161659513591,
      "grad_norm": 0.1468295927243866,
      "learning_rate": 7.008597237142293e-05,
      "loss": 0.6555,
      "step": 6809
    },
    {
      "epoch": 0.6089055793991416,
      "grad_norm": 0.13695551187193844,
      "learning_rate": 7.005833962508943e-05,
      "loss": 0.6469,
      "step": 6810
    },
    {
      "epoch": 0.6089949928469242,
      "grad_norm": 0.1440460550067863,
      "learning_rate": 7.003070938992682e-05,
      "loss": 0.6712,
      "step": 6811
    },
    {
      "epoch": 0.6090844062947067,
      "grad_norm": 0.15956855931870673,
      "learning_rate": 7.000308166825243e-05,
      "loss": 0.6718,
      "step": 6812
    },
    {
      "epoch": 0.6091738197424893,
      "grad_norm": 0.13425679389765985,
      "learning_rate": 6.997545646238335e-05,
      "loss": 0.6307,
      "step": 6813
    },
    {
      "epoch": 0.6092632331902719,
      "grad_norm": 0.1580176049436838,
      "learning_rate": 6.994783377463645e-05,
      "loss": 0.668,
      "step": 6814
    },
    {
      "epoch": 0.6093526466380543,
      "grad_norm": 0.1488248944117652,
      "learning_rate": 6.992021360732848e-05,
      "loss": 0.6204,
      "step": 6815
    },
    {
      "epoch": 0.6094420600858369,
      "grad_norm": 0.13439380467149042,
      "learning_rate": 6.989259596277582e-05,
      "loss": 0.5975,
      "step": 6816
    },
    {
      "epoch": 0.6095314735336195,
      "grad_norm": 0.17684047303452147,
      "learning_rate": 6.98649808432948e-05,
      "loss": 0.6699,
      "step": 6817
    },
    {
      "epoch": 0.609620886981402,
      "grad_norm": 0.19125023818697015,
      "learning_rate": 6.983736825120144e-05,
      "loss": 0.391,
      "step": 6818
    },
    {
      "epoch": 0.6097103004291845,
      "grad_norm": 0.16646845498048354,
      "learning_rate": 6.980975818881159e-05,
      "loss": 0.6706,
      "step": 6819
    },
    {
      "epoch": 0.6097997138769671,
      "grad_norm": 0.13993599431041956,
      "learning_rate": 6.978215065844087e-05,
      "loss": 0.6292,
      "step": 6820
    },
    {
      "epoch": 0.6098891273247496,
      "grad_norm": 0.13305592156574803,
      "learning_rate": 6.975454566240465e-05,
      "loss": 0.6474,
      "step": 6821
    },
    {
      "epoch": 0.6099785407725322,
      "grad_norm": 0.17042974186447105,
      "learning_rate": 6.972694320301813e-05,
      "loss": 0.7074,
      "step": 6822
    },
    {
      "epoch": 0.6100679542203148,
      "grad_norm": 0.15312511610034665,
      "learning_rate": 6.969934328259637e-05,
      "loss": 0.6442,
      "step": 6823
    },
    {
      "epoch": 0.6101573676680973,
      "grad_norm": 0.14454092307746097,
      "learning_rate": 6.96717459034541e-05,
      "loss": 0.6102,
      "step": 6824
    },
    {
      "epoch": 0.6102467811158798,
      "grad_norm": 0.14778022853789516,
      "learning_rate": 6.964415106790586e-05,
      "loss": 0.6512,
      "step": 6825
    },
    {
      "epoch": 0.6103361945636624,
      "grad_norm": 0.149902871383118,
      "learning_rate": 6.961655877826603e-05,
      "loss": 0.6526,
      "step": 6826
    },
    {
      "epoch": 0.610425608011445,
      "grad_norm": 0.1490346866981613,
      "learning_rate": 6.95889690368487e-05,
      "loss": 0.6648,
      "step": 6827
    },
    {
      "epoch": 0.6105150214592274,
      "grad_norm": 0.16120002102819334,
      "learning_rate": 6.956138184596782e-05,
      "loss": 0.6945,
      "step": 6828
    },
    {
      "epoch": 0.61060443490701,
      "grad_norm": 0.14861985954797213,
      "learning_rate": 6.953379720793703e-05,
      "loss": 0.6106,
      "step": 6829
    },
    {
      "epoch": 0.6106938483547926,
      "grad_norm": 0.14387627448228868,
      "learning_rate": 6.950621512506993e-05,
      "loss": 0.6592,
      "step": 6830
    },
    {
      "epoch": 0.6107832618025751,
      "grad_norm": 0.17330733623746605,
      "learning_rate": 6.947863559967976e-05,
      "loss": 0.6939,
      "step": 6831
    },
    {
      "epoch": 0.6108726752503576,
      "grad_norm": 0.1632881556485106,
      "learning_rate": 6.945105863407951e-05,
      "loss": 0.6483,
      "step": 6832
    },
    {
      "epoch": 0.6109620886981402,
      "grad_norm": 0.16919222075309515,
      "learning_rate": 6.942348423058212e-05,
      "loss": 0.6513,
      "step": 6833
    },
    {
      "epoch": 0.6110515021459227,
      "grad_norm": 0.15313453104171434,
      "learning_rate": 6.939591239150014e-05,
      "loss": 0.6519,
      "step": 6834
    },
    {
      "epoch": 0.6111409155937053,
      "grad_norm": 0.14867123539399918,
      "learning_rate": 6.936834311914606e-05,
      "loss": 0.6483,
      "step": 6835
    },
    {
      "epoch": 0.6112303290414879,
      "grad_norm": 0.1599527573965094,
      "learning_rate": 6.934077641583201e-05,
      "loss": 0.6862,
      "step": 6836
    },
    {
      "epoch": 0.6113197424892703,
      "grad_norm": 0.1714518788887899,
      "learning_rate": 6.931321228387005e-05,
      "loss": 0.6355,
      "step": 6837
    },
    {
      "epoch": 0.6114091559370529,
      "grad_norm": 0.1617662494914007,
      "learning_rate": 6.928565072557191e-05,
      "loss": 0.6479,
      "step": 6838
    },
    {
      "epoch": 0.6114985693848355,
      "grad_norm": 0.15382149694426356,
      "learning_rate": 6.925809174324915e-05,
      "loss": 0.6584,
      "step": 6839
    },
    {
      "epoch": 0.6115879828326181,
      "grad_norm": 0.17304011449850298,
      "learning_rate": 6.923053533921312e-05,
      "loss": 0.6946,
      "step": 6840
    },
    {
      "epoch": 0.6116773962804005,
      "grad_norm": 0.16225789464959603,
      "learning_rate": 6.920298151577491e-05,
      "loss": 0.6555,
      "step": 6841
    },
    {
      "epoch": 0.6117668097281831,
      "grad_norm": 0.17672382697070066,
      "learning_rate": 6.917543027524546e-05,
      "loss": 0.6568,
      "step": 6842
    },
    {
      "epoch": 0.6118562231759657,
      "grad_norm": 0.14775369718066755,
      "learning_rate": 6.914788161993542e-05,
      "loss": 0.67,
      "step": 6843
    },
    {
      "epoch": 0.6119456366237482,
      "grad_norm": 0.16720757015744603,
      "learning_rate": 6.912033555215532e-05,
      "loss": 0.6019,
      "step": 6844
    },
    {
      "epoch": 0.6120350500715308,
      "grad_norm": 0.16649185339453426,
      "learning_rate": 6.909279207421536e-05,
      "loss": 0.631,
      "step": 6845
    },
    {
      "epoch": 0.6121244635193133,
      "grad_norm": 0.1486187227814877,
      "learning_rate": 6.906525118842563e-05,
      "loss": 0.6723,
      "step": 6846
    },
    {
      "epoch": 0.6122138769670958,
      "grad_norm": 0.14451347017145652,
      "learning_rate": 6.903771289709591e-05,
      "loss": 0.6278,
      "step": 6847
    },
    {
      "epoch": 0.6123032904148784,
      "grad_norm": 0.16489778524346754,
      "learning_rate": 6.901017720253583e-05,
      "loss": 0.6923,
      "step": 6848
    },
    {
      "epoch": 0.612392703862661,
      "grad_norm": 0.14967241188408723,
      "learning_rate": 6.898264410705475e-05,
      "loss": 0.6364,
      "step": 6849
    },
    {
      "epoch": 0.6124821173104434,
      "grad_norm": 0.17019583967851423,
      "learning_rate": 6.89551136129618e-05,
      "loss": 0.6886,
      "step": 6850
    },
    {
      "epoch": 0.612571530758226,
      "grad_norm": 0.14970448780957696,
      "learning_rate": 6.892758572256604e-05,
      "loss": 0.6516,
      "step": 6851
    },
    {
      "epoch": 0.6126609442060086,
      "grad_norm": 0.15094103788197527,
      "learning_rate": 6.890006043817612e-05,
      "loss": 0.6622,
      "step": 6852
    },
    {
      "epoch": 0.6127503576537912,
      "grad_norm": 0.15625288986012886,
      "learning_rate": 6.887253776210058e-05,
      "loss": 0.6966,
      "step": 6853
    },
    {
      "epoch": 0.6128397711015737,
      "grad_norm": 0.1657648625265905,
      "learning_rate": 6.884501769664773e-05,
      "loss": 0.6947,
      "step": 6854
    },
    {
      "epoch": 0.6129291845493562,
      "grad_norm": 0.15432606346779415,
      "learning_rate": 6.881750024412557e-05,
      "loss": 0.6584,
      "step": 6855
    },
    {
      "epoch": 0.6130185979971388,
      "grad_norm": 0.15242042613076048,
      "learning_rate": 6.878998540684206e-05,
      "loss": 0.652,
      "step": 6856
    },
    {
      "epoch": 0.6131080114449213,
      "grad_norm": 0.16161269726068556,
      "learning_rate": 6.876247318710471e-05,
      "loss": 0.6996,
      "step": 6857
    },
    {
      "epoch": 0.6131974248927039,
      "grad_norm": 0.1707352560547453,
      "learning_rate": 6.873496358722105e-05,
      "loss": 0.6606,
      "step": 6858
    },
    {
      "epoch": 0.6132868383404864,
      "grad_norm": 0.15099196519881442,
      "learning_rate": 6.870745660949822e-05,
      "loss": 0.6361,
      "step": 6859
    },
    {
      "epoch": 0.6133762517882689,
      "grad_norm": 0.16072983427855717,
      "learning_rate": 6.867995225624324e-05,
      "loss": 0.6596,
      "step": 6860
    },
    {
      "epoch": 0.6134656652360515,
      "grad_norm": 0.1544013885086443,
      "learning_rate": 6.865245052976284e-05,
      "loss": 0.6638,
      "step": 6861
    },
    {
      "epoch": 0.6135550786838341,
      "grad_norm": 0.1540460297767353,
      "learning_rate": 6.862495143236353e-05,
      "loss": 0.634,
      "step": 6862
    },
    {
      "epoch": 0.6136444921316166,
      "grad_norm": 0.16459563499661475,
      "learning_rate": 6.859745496635167e-05,
      "loss": 0.6409,
      "step": 6863
    },
    {
      "epoch": 0.6137339055793991,
      "grad_norm": 0.16491083109063437,
      "learning_rate": 6.85699611340333e-05,
      "loss": 0.6383,
      "step": 6864
    },
    {
      "epoch": 0.6138233190271817,
      "grad_norm": 0.15900259890298163,
      "learning_rate": 6.854246993771438e-05,
      "loss": 0.6868,
      "step": 6865
    },
    {
      "epoch": 0.6139127324749643,
      "grad_norm": 0.15528570943349426,
      "learning_rate": 6.851498137970049e-05,
      "loss": 0.7103,
      "step": 6866
    },
    {
      "epoch": 0.6140021459227468,
      "grad_norm": 0.14032180750164017,
      "learning_rate": 6.84874954622971e-05,
      "loss": 0.6478,
      "step": 6867
    },
    {
      "epoch": 0.6140915593705293,
      "grad_norm": 0.1649442176859392,
      "learning_rate": 6.84600121878094e-05,
      "loss": 0.6854,
      "step": 6868
    },
    {
      "epoch": 0.6141809728183119,
      "grad_norm": 0.14952087538753767,
      "learning_rate": 6.843253155854239e-05,
      "loss": 0.6377,
      "step": 6869
    },
    {
      "epoch": 0.6142703862660944,
      "grad_norm": 0.13576856456978076,
      "learning_rate": 6.840505357680085e-05,
      "loss": 0.6611,
      "step": 6870
    },
    {
      "epoch": 0.614359799713877,
      "grad_norm": 0.1857024207995828,
      "learning_rate": 6.837757824488927e-05,
      "loss": 0.3843,
      "step": 6871
    },
    {
      "epoch": 0.6144492131616596,
      "grad_norm": 0.14686564825102313,
      "learning_rate": 6.835010556511201e-05,
      "loss": 0.6597,
      "step": 6872
    },
    {
      "epoch": 0.614538626609442,
      "grad_norm": 0.15387671884626047,
      "learning_rate": 6.832263553977321e-05,
      "loss": 0.6499,
      "step": 6873
    },
    {
      "epoch": 0.6146280400572246,
      "grad_norm": 0.15102908194732664,
      "learning_rate": 6.829516817117671e-05,
      "loss": 0.6826,
      "step": 6874
    },
    {
      "epoch": 0.6147174535050072,
      "grad_norm": 0.16779113806130985,
      "learning_rate": 6.826770346162614e-05,
      "loss": 0.6743,
      "step": 6875
    },
    {
      "epoch": 0.6148068669527897,
      "grad_norm": 0.15289332752008522,
      "learning_rate": 6.8240241413425e-05,
      "loss": 0.6378,
      "step": 6876
    },
    {
      "epoch": 0.6148962804005722,
      "grad_norm": 0.17057449379537815,
      "learning_rate": 6.821278202887643e-05,
      "loss": 0.6446,
      "step": 6877
    },
    {
      "epoch": 0.6149856938483548,
      "grad_norm": 0.16823688204742604,
      "learning_rate": 6.818532531028342e-05,
      "loss": 0.6189,
      "step": 6878
    },
    {
      "epoch": 0.6150751072961373,
      "grad_norm": 0.1753141370258435,
      "learning_rate": 6.815787125994875e-05,
      "loss": 0.3962,
      "step": 6879
    },
    {
      "epoch": 0.6151645207439199,
      "grad_norm": 0.16987425610697185,
      "learning_rate": 6.813041988017501e-05,
      "loss": 0.6602,
      "step": 6880
    },
    {
      "epoch": 0.6152539341917024,
      "grad_norm": 0.1570115208667393,
      "learning_rate": 6.810297117326445e-05,
      "loss": 0.6477,
      "step": 6881
    },
    {
      "epoch": 0.615343347639485,
      "grad_norm": 0.16101643783774844,
      "learning_rate": 6.807552514151915e-05,
      "loss": 0.6743,
      "step": 6882
    },
    {
      "epoch": 0.6154327610872675,
      "grad_norm": 0.1585199811772215,
      "learning_rate": 6.804808178724105e-05,
      "loss": 0.6493,
      "step": 6883
    },
    {
      "epoch": 0.6155221745350501,
      "grad_norm": 0.14907705038381666,
      "learning_rate": 6.802064111273173e-05,
      "loss": 0.6563,
      "step": 6884
    },
    {
      "epoch": 0.6156115879828327,
      "grad_norm": 0.16487403685464527,
      "learning_rate": 6.799320312029256e-05,
      "loss": 0.637,
      "step": 6885
    },
    {
      "epoch": 0.6157010014306151,
      "grad_norm": 0.1541291962814921,
      "learning_rate": 6.796576781222481e-05,
      "loss": 0.6165,
      "step": 6886
    },
    {
      "epoch": 0.6157904148783977,
      "grad_norm": 0.1748096422879787,
      "learning_rate": 6.793833519082946e-05,
      "loss": 0.6816,
      "step": 6887
    },
    {
      "epoch": 0.6158798283261803,
      "grad_norm": 0.15669936353730976,
      "learning_rate": 6.791090525840722e-05,
      "loss": 0.6585,
      "step": 6888
    },
    {
      "epoch": 0.6159692417739628,
      "grad_norm": 0.161725125851283,
      "learning_rate": 6.788347801725859e-05,
      "loss": 0.6357,
      "step": 6889
    },
    {
      "epoch": 0.6160586552217453,
      "grad_norm": 0.15490567246434175,
      "learning_rate": 6.785605346968386e-05,
      "loss": 0.6846,
      "step": 6890
    },
    {
      "epoch": 0.6161480686695279,
      "grad_norm": 0.13556396667798537,
      "learning_rate": 6.782863161798311e-05,
      "loss": 0.6265,
      "step": 6891
    },
    {
      "epoch": 0.6162374821173104,
      "grad_norm": 0.13882784102019785,
      "learning_rate": 6.780121246445617e-05,
      "loss": 0.6111,
      "step": 6892
    },
    {
      "epoch": 0.616326895565093,
      "grad_norm": 0.15784898905192846,
      "learning_rate": 6.777379601140264e-05,
      "loss": 0.6447,
      "step": 6893
    },
    {
      "epoch": 0.6164163090128756,
      "grad_norm": 0.1877038456386921,
      "learning_rate": 6.774638226112195e-05,
      "loss": 0.413,
      "step": 6894
    },
    {
      "epoch": 0.616505722460658,
      "grad_norm": 0.17371802803904857,
      "learning_rate": 6.771897121591321e-05,
      "loss": 0.6755,
      "step": 6895
    },
    {
      "epoch": 0.6165951359084406,
      "grad_norm": 0.17332225403939724,
      "learning_rate": 6.76915628780754e-05,
      "loss": 0.6917,
      "step": 6896
    },
    {
      "epoch": 0.6166845493562232,
      "grad_norm": 0.13871928296421188,
      "learning_rate": 6.766415724990718e-05,
      "loss": 0.6123,
      "step": 6897
    },
    {
      "epoch": 0.6167739628040058,
      "grad_norm": 0.15109094453892713,
      "learning_rate": 6.7636754333707e-05,
      "loss": 0.5999,
      "step": 6898
    },
    {
      "epoch": 0.6168633762517882,
      "grad_norm": 0.16394813517876763,
      "learning_rate": 6.760935413177316e-05,
      "loss": 0.6582,
      "step": 6899
    },
    {
      "epoch": 0.6169527896995708,
      "grad_norm": 0.15793591554007325,
      "learning_rate": 6.75819566464037e-05,
      "loss": 0.6926,
      "step": 6900
    },
    {
      "epoch": 0.6170422031473534,
      "grad_norm": 0.1413510122529387,
      "learning_rate": 6.755456187989637e-05,
      "loss": 0.6465,
      "step": 6901
    },
    {
      "epoch": 0.6171316165951359,
      "grad_norm": 0.1482763335191618,
      "learning_rate": 6.752716983454875e-05,
      "loss": 0.605,
      "step": 6902
    },
    {
      "epoch": 0.6172210300429185,
      "grad_norm": 0.16420317515596022,
      "learning_rate": 6.749978051265819e-05,
      "loss": 0.6625,
      "step": 6903
    },
    {
      "epoch": 0.617310443490701,
      "grad_norm": 0.1596089546252031,
      "learning_rate": 6.74723939165218e-05,
      "loss": 0.5983,
      "step": 6904
    },
    {
      "epoch": 0.6173998569384835,
      "grad_norm": 0.15370758731190096,
      "learning_rate": 6.74450100484364e-05,
      "loss": 0.6586,
      "step": 6905
    },
    {
      "epoch": 0.6174892703862661,
      "grad_norm": 0.16134934045881338,
      "learning_rate": 6.741762891069871e-05,
      "loss": 0.627,
      "step": 6906
    },
    {
      "epoch": 0.6175786838340487,
      "grad_norm": 0.15055574456806867,
      "learning_rate": 6.739025050560514e-05,
      "loss": 0.6479,
      "step": 6907
    },
    {
      "epoch": 0.6176680972818311,
      "grad_norm": 0.1438637555643153,
      "learning_rate": 6.736287483545191e-05,
      "loss": 0.6314,
      "step": 6908
    },
    {
      "epoch": 0.6177575107296137,
      "grad_norm": 0.15768995755528464,
      "learning_rate": 6.73355019025349e-05,
      "loss": 0.6319,
      "step": 6909
    },
    {
      "epoch": 0.6178469241773963,
      "grad_norm": 0.15452285009695624,
      "learning_rate": 6.730813170914993e-05,
      "loss": 0.6755,
      "step": 6910
    },
    {
      "epoch": 0.6179363376251789,
      "grad_norm": 0.1358686982133799,
      "learning_rate": 6.72807642575925e-05,
      "loss": 0.6337,
      "step": 6911
    },
    {
      "epoch": 0.6180257510729614,
      "grad_norm": 0.15973632258439802,
      "learning_rate": 6.725339955015777e-05,
      "loss": 0.6863,
      "step": 6912
    },
    {
      "epoch": 0.6181151645207439,
      "grad_norm": 0.1522813891198699,
      "learning_rate": 6.722603758914092e-05,
      "loss": 0.6535,
      "step": 6913
    },
    {
      "epoch": 0.6182045779685265,
      "grad_norm": 0.16559120514236275,
      "learning_rate": 6.719867837683672e-05,
      "loss": 0.6581,
      "step": 6914
    },
    {
      "epoch": 0.618293991416309,
      "grad_norm": 0.16773559223776416,
      "learning_rate": 6.717132191553977e-05,
      "loss": 0.6536,
      "step": 6915
    },
    {
      "epoch": 0.6183834048640916,
      "grad_norm": 0.16070784568197385,
      "learning_rate": 6.714396820754436e-05,
      "loss": 0.6816,
      "step": 6916
    },
    {
      "epoch": 0.6184728183118741,
      "grad_norm": 0.15661404772277465,
      "learning_rate": 6.711661725514469e-05,
      "loss": 0.6607,
      "step": 6917
    },
    {
      "epoch": 0.6185622317596566,
      "grad_norm": 0.15294958924238508,
      "learning_rate": 6.708926906063462e-05,
      "loss": 0.6735,
      "step": 6918
    },
    {
      "epoch": 0.6186516452074392,
      "grad_norm": 0.1567887928880889,
      "learning_rate": 6.706192362630776e-05,
      "loss": 0.6456,
      "step": 6919
    },
    {
      "epoch": 0.6187410586552218,
      "grad_norm": 0.1822019200311437,
      "learning_rate": 6.70345809544576e-05,
      "loss": 0.7051,
      "step": 6920
    },
    {
      "epoch": 0.6188304721030042,
      "grad_norm": 0.1516805023621902,
      "learning_rate": 6.700724104737736e-05,
      "loss": 0.6474,
      "step": 6921
    },
    {
      "epoch": 0.6189198855507868,
      "grad_norm": 0.16455589596072004,
      "learning_rate": 6.697990390735997e-05,
      "loss": 0.6756,
      "step": 6922
    },
    {
      "epoch": 0.6190092989985694,
      "grad_norm": 0.14577460768968284,
      "learning_rate": 6.695256953669812e-05,
      "loss": 0.6525,
      "step": 6923
    },
    {
      "epoch": 0.619098712446352,
      "grad_norm": 0.1573218605303485,
      "learning_rate": 6.69252379376844e-05,
      "loss": 0.6665,
      "step": 6924
    },
    {
      "epoch": 0.6191881258941345,
      "grad_norm": 0.15489427623503169,
      "learning_rate": 6.689790911261099e-05,
      "loss": 0.6768,
      "step": 6925
    },
    {
      "epoch": 0.619277539341917,
      "grad_norm": 0.14718145093952084,
      "learning_rate": 6.687058306376997e-05,
      "loss": 0.6071,
      "step": 6926
    },
    {
      "epoch": 0.6193669527896996,
      "grad_norm": 0.16215436858292626,
      "learning_rate": 6.684325979345315e-05,
      "loss": 0.67,
      "step": 6927
    },
    {
      "epoch": 0.6194563662374821,
      "grad_norm": 0.16361639971296393,
      "learning_rate": 6.681593930395209e-05,
      "loss": 0.6732,
      "step": 6928
    },
    {
      "epoch": 0.6195457796852647,
      "grad_norm": 0.15459908642783365,
      "learning_rate": 6.678862159755809e-05,
      "loss": 0.6475,
      "step": 6929
    },
    {
      "epoch": 0.6196351931330472,
      "grad_norm": 0.14177524416054826,
      "learning_rate": 6.676130667656235e-05,
      "loss": 0.6434,
      "step": 6930
    },
    {
      "epoch": 0.6197246065808297,
      "grad_norm": 0.15057463929711812,
      "learning_rate": 6.673399454325565e-05,
      "loss": 0.6721,
      "step": 6931
    },
    {
      "epoch": 0.6198140200286123,
      "grad_norm": 0.15492275818801055,
      "learning_rate": 6.670668519992864e-05,
      "loss": 0.6559,
      "step": 6932
    },
    {
      "epoch": 0.6199034334763949,
      "grad_norm": 0.15511522253495283,
      "learning_rate": 6.66793786488717e-05,
      "loss": 0.642,
      "step": 6933
    },
    {
      "epoch": 0.6199928469241774,
      "grad_norm": 0.1415045723473422,
      "learning_rate": 6.66520748923751e-05,
      "loss": 0.6738,
      "step": 6934
    },
    {
      "epoch": 0.6200822603719599,
      "grad_norm": 0.1782701182630905,
      "learning_rate": 6.662477393272869e-05,
      "loss": 0.6905,
      "step": 6935
    },
    {
      "epoch": 0.6201716738197425,
      "grad_norm": 0.16648873273207135,
      "learning_rate": 6.659747577222216e-05,
      "loss": 0.6475,
      "step": 6936
    },
    {
      "epoch": 0.620261087267525,
      "grad_norm": 0.13472042995925798,
      "learning_rate": 6.657018041314502e-05,
      "loss": 0.6276,
      "step": 6937
    },
    {
      "epoch": 0.6203505007153076,
      "grad_norm": 0.1293422568099288,
      "learning_rate": 6.654288785778646e-05,
      "loss": 0.664,
      "step": 6938
    },
    {
      "epoch": 0.6204399141630901,
      "grad_norm": 0.16047120797405087,
      "learning_rate": 6.651559810843548e-05,
      "loss": 0.6525,
      "step": 6939
    },
    {
      "epoch": 0.6205293276108726,
      "grad_norm": 0.16639005773204107,
      "learning_rate": 6.648831116738083e-05,
      "loss": 0.6917,
      "step": 6940
    },
    {
      "epoch": 0.6206187410586552,
      "grad_norm": 0.16723513186794467,
      "learning_rate": 6.646102703691111e-05,
      "loss": 0.6529,
      "step": 6941
    },
    {
      "epoch": 0.6207081545064378,
      "grad_norm": 0.1688491693051347,
      "learning_rate": 6.643374571931451e-05,
      "loss": 0.6409,
      "step": 6942
    },
    {
      "epoch": 0.6207975679542204,
      "grad_norm": 0.15069948330309638,
      "learning_rate": 6.640646721687913e-05,
      "loss": 0.679,
      "step": 6943
    },
    {
      "epoch": 0.6208869814020028,
      "grad_norm": 0.16374371958224138,
      "learning_rate": 6.637919153189279e-05,
      "loss": 0.6872,
      "step": 6944
    },
    {
      "epoch": 0.6209763948497854,
      "grad_norm": 0.18543022337022913,
      "learning_rate": 6.635191866664303e-05,
      "loss": 0.7036,
      "step": 6945
    },
    {
      "epoch": 0.621065808297568,
      "grad_norm": 0.14790763955001165,
      "learning_rate": 6.632464862341721e-05,
      "loss": 0.6432,
      "step": 6946
    },
    {
      "epoch": 0.6211552217453505,
      "grad_norm": 0.16454344400389076,
      "learning_rate": 6.629738140450241e-05,
      "loss": 0.6565,
      "step": 6947
    },
    {
      "epoch": 0.621244635193133,
      "grad_norm": 0.14434889732903478,
      "learning_rate": 6.62701170121856e-05,
      "loss": 0.6076,
      "step": 6948
    },
    {
      "epoch": 0.6213340486409156,
      "grad_norm": 0.14739493093573164,
      "learning_rate": 6.62428554487533e-05,
      "loss": 0.6546,
      "step": 6949
    },
    {
      "epoch": 0.6214234620886981,
      "grad_norm": 0.16559128157124967,
      "learning_rate": 6.621559671649196e-05,
      "loss": 0.6453,
      "step": 6950
    },
    {
      "epoch": 0.6215128755364807,
      "grad_norm": 0.15525722941948328,
      "learning_rate": 6.618834081768772e-05,
      "loss": 0.628,
      "step": 6951
    },
    {
      "epoch": 0.6216022889842633,
      "grad_norm": 0.16101172216169077,
      "learning_rate": 6.616108775462649e-05,
      "loss": 0.632,
      "step": 6952
    },
    {
      "epoch": 0.6216917024320457,
      "grad_norm": 0.13483499911194616,
      "learning_rate": 6.613383752959398e-05,
      "loss": 0.6475,
      "step": 6953
    },
    {
      "epoch": 0.6217811158798283,
      "grad_norm": 0.14665014808677893,
      "learning_rate": 6.610659014487557e-05,
      "loss": 0.676,
      "step": 6954
    },
    {
      "epoch": 0.6218705293276109,
      "grad_norm": 0.1534654606715901,
      "learning_rate": 6.607934560275657e-05,
      "loss": 0.7068,
      "step": 6955
    },
    {
      "epoch": 0.6219599427753935,
      "grad_norm": 0.15577614671395146,
      "learning_rate": 6.605210390552185e-05,
      "loss": 0.659,
      "step": 6956
    },
    {
      "epoch": 0.6220493562231759,
      "grad_norm": 0.15700353361453667,
      "learning_rate": 6.602486505545621e-05,
      "loss": 0.6703,
      "step": 6957
    },
    {
      "epoch": 0.6221387696709585,
      "grad_norm": 0.14324997671251768,
      "learning_rate": 6.59976290548441e-05,
      "loss": 0.6031,
      "step": 6958
    },
    {
      "epoch": 0.6222281831187411,
      "grad_norm": 0.1491100140601177,
      "learning_rate": 6.597039590596976e-05,
      "loss": 0.6476,
      "step": 6959
    },
    {
      "epoch": 0.6223175965665236,
      "grad_norm": 0.15304546828053303,
      "learning_rate": 6.594316561111724e-05,
      "loss": 0.6706,
      "step": 6960
    },
    {
      "epoch": 0.6224070100143062,
      "grad_norm": 0.18759820027158608,
      "learning_rate": 6.591593817257025e-05,
      "loss": 0.3917,
      "step": 6961
    },
    {
      "epoch": 0.6224964234620887,
      "grad_norm": 0.14355771906381626,
      "learning_rate": 6.58887135926124e-05,
      "loss": 0.6513,
      "step": 6962
    },
    {
      "epoch": 0.6225858369098712,
      "grad_norm": 0.16517764462940007,
      "learning_rate": 6.58614918735269e-05,
      "loss": 0.6479,
      "step": 6963
    },
    {
      "epoch": 0.6226752503576538,
      "grad_norm": 0.163275788141175,
      "learning_rate": 6.58342730175969e-05,
      "loss": 0.657,
      "step": 6964
    },
    {
      "epoch": 0.6227646638054364,
      "grad_norm": 0.15973307872833192,
      "learning_rate": 6.580705702710514e-05,
      "loss": 0.6727,
      "step": 6965
    },
    {
      "epoch": 0.6228540772532188,
      "grad_norm": 0.16187997287436484,
      "learning_rate": 6.577984390433421e-05,
      "loss": 0.678,
      "step": 6966
    },
    {
      "epoch": 0.6229434907010014,
      "grad_norm": 0.1411698458728765,
      "learning_rate": 6.575263365156647e-05,
      "loss": 0.6591,
      "step": 6967
    },
    {
      "epoch": 0.623032904148784,
      "grad_norm": 0.14806852284122524,
      "learning_rate": 6.572542627108393e-05,
      "loss": 0.6434,
      "step": 6968
    },
    {
      "epoch": 0.6231223175965666,
      "grad_norm": 0.15790656641948903,
      "learning_rate": 6.569822176516853e-05,
      "loss": 0.6585,
      "step": 6969
    },
    {
      "epoch": 0.623211731044349,
      "grad_norm": 0.13273583182132764,
      "learning_rate": 6.567102013610184e-05,
      "loss": 0.5963,
      "step": 6970
    },
    {
      "epoch": 0.6233011444921316,
      "grad_norm": 0.16024598589900113,
      "learning_rate": 6.564382138616526e-05,
      "loss": 0.6866,
      "step": 6971
    },
    {
      "epoch": 0.6233905579399142,
      "grad_norm": 0.17359864975522596,
      "learning_rate": 6.561662551763984e-05,
      "loss": 0.6263,
      "step": 6972
    },
    {
      "epoch": 0.6234799713876967,
      "grad_norm": 0.1350901475866107,
      "learning_rate": 6.558943253280654e-05,
      "loss": 0.608,
      "step": 6973
    },
    {
      "epoch": 0.6235693848354793,
      "grad_norm": 0.1576538311751163,
      "learning_rate": 6.556224243394599e-05,
      "loss": 0.6658,
      "step": 6974
    },
    {
      "epoch": 0.6236587982832618,
      "grad_norm": 0.14708496094085818,
      "learning_rate": 6.553505522333853e-05,
      "loss": 0.6315,
      "step": 6975
    },
    {
      "epoch": 0.6237482117310443,
      "grad_norm": 0.158702728162294,
      "learning_rate": 6.55078709032644e-05,
      "loss": 0.6649,
      "step": 6976
    },
    {
      "epoch": 0.6238376251788269,
      "grad_norm": 0.13613150475237162,
      "learning_rate": 6.548068947600346e-05,
      "loss": 0.642,
      "step": 6977
    },
    {
      "epoch": 0.6239270386266095,
      "grad_norm": 0.14326665998585916,
      "learning_rate": 6.545351094383544e-05,
      "loss": 0.6245,
      "step": 6978
    },
    {
      "epoch": 0.6240164520743919,
      "grad_norm": 0.14434409736054085,
      "learning_rate": 6.542633530903972e-05,
      "loss": 0.6636,
      "step": 6979
    },
    {
      "epoch": 0.6241058655221745,
      "grad_norm": 0.19108775244898285,
      "learning_rate": 6.53991625738955e-05,
      "loss": 0.7273,
      "step": 6980
    },
    {
      "epoch": 0.6241952789699571,
      "grad_norm": 0.1386345881653238,
      "learning_rate": 6.537199274068173e-05,
      "loss": 0.6543,
      "step": 6981
    },
    {
      "epoch": 0.6242846924177397,
      "grad_norm": 0.17129894402928142,
      "learning_rate": 6.534482581167707e-05,
      "loss": 0.7087,
      "step": 6982
    },
    {
      "epoch": 0.6243741058655222,
      "grad_norm": 0.1610836067852047,
      "learning_rate": 6.531766178916008e-05,
      "loss": 0.6862,
      "step": 6983
    },
    {
      "epoch": 0.6244635193133047,
      "grad_norm": 0.1374122610000617,
      "learning_rate": 6.529050067540887e-05,
      "loss": 0.6284,
      "step": 6984
    },
    {
      "epoch": 0.6245529327610873,
      "grad_norm": 0.14419368363416518,
      "learning_rate": 6.526334247270147e-05,
      "loss": 0.6575,
      "step": 6985
    },
    {
      "epoch": 0.6246423462088698,
      "grad_norm": 0.1435371016443705,
      "learning_rate": 6.523618718331557e-05,
      "loss": 0.6705,
      "step": 6986
    },
    {
      "epoch": 0.6247317596566524,
      "grad_norm": 0.16766644625907615,
      "learning_rate": 6.520903480952869e-05,
      "loss": 0.6758,
      "step": 6987
    },
    {
      "epoch": 0.6248211731044349,
      "grad_norm": 0.1461557198916918,
      "learning_rate": 6.518188535361803e-05,
      "loss": 0.6517,
      "step": 6988
    },
    {
      "epoch": 0.6249105865522174,
      "grad_norm": 0.13814008925594165,
      "learning_rate": 6.515473881786056e-05,
      "loss": 0.6345,
      "step": 6989
    },
    {
      "epoch": 0.625,
      "grad_norm": 0.1660822053651048,
      "learning_rate": 6.512759520453308e-05,
      "loss": 0.6504,
      "step": 6990
    },
    {
      "epoch": 0.6250894134477826,
      "grad_norm": 0.19085758650761042,
      "learning_rate": 6.510045451591211e-05,
      "loss": 0.3663,
      "step": 6991
    },
    {
      "epoch": 0.6251788268955651,
      "grad_norm": 0.15559615656031797,
      "learning_rate": 6.507331675427387e-05,
      "loss": 0.6514,
      "step": 6992
    },
    {
      "epoch": 0.6252682403433476,
      "grad_norm": 0.16377521997256253,
      "learning_rate": 6.504618192189435e-05,
      "loss": 0.6526,
      "step": 6993
    },
    {
      "epoch": 0.6253576537911302,
      "grad_norm": 0.17288994828068577,
      "learning_rate": 6.501905002104935e-05,
      "loss": 0.6342,
      "step": 6994
    },
    {
      "epoch": 0.6254470672389127,
      "grad_norm": 0.15742154608593392,
      "learning_rate": 6.499192105401435e-05,
      "loss": 0.6466,
      "step": 6995
    },
    {
      "epoch": 0.6255364806866953,
      "grad_norm": 0.14344805224908316,
      "learning_rate": 6.49647950230647e-05,
      "loss": 0.6202,
      "step": 6996
    },
    {
      "epoch": 0.6256258941344778,
      "grad_norm": 0.15303298609791452,
      "learning_rate": 6.493767193047534e-05,
      "loss": 0.6351,
      "step": 6997
    },
    {
      "epoch": 0.6257153075822603,
      "grad_norm": 0.1504639011701542,
      "learning_rate": 6.491055177852111e-05,
      "loss": 0.6709,
      "step": 6998
    },
    {
      "epoch": 0.6258047210300429,
      "grad_norm": 0.16239353847473473,
      "learning_rate": 6.488343456947654e-05,
      "loss": 0.6588,
      "step": 6999
    },
    {
      "epoch": 0.6258941344778255,
      "grad_norm": 0.15806091131922656,
      "learning_rate": 6.485632030561587e-05,
      "loss": 0.6538,
      "step": 7000
    },
    {
      "epoch": 0.6259835479256081,
      "grad_norm": 0.16116317418201515,
      "learning_rate": 6.48292089892132e-05,
      "loss": 0.6599,
      "step": 7001
    },
    {
      "epoch": 0.6260729613733905,
      "grad_norm": 0.15068493089028132,
      "learning_rate": 6.480210062254225e-05,
      "loss": 0.6605,
      "step": 7002
    },
    {
      "epoch": 0.6261623748211731,
      "grad_norm": 0.16692791222079045,
      "learning_rate": 6.477499520787665e-05,
      "loss": 0.6187,
      "step": 7003
    },
    {
      "epoch": 0.6262517882689557,
      "grad_norm": 0.15866815363302667,
      "learning_rate": 6.474789274748964e-05,
      "loss": 0.678,
      "step": 7004
    },
    {
      "epoch": 0.6263412017167382,
      "grad_norm": 0.1596055736956596,
      "learning_rate": 6.472079324365433e-05,
      "loss": 0.6577,
      "step": 7005
    },
    {
      "epoch": 0.6264306151645207,
      "grad_norm": 0.1660972793030899,
      "learning_rate": 6.469369669864346e-05,
      "loss": 0.6598,
      "step": 7006
    },
    {
      "epoch": 0.6265200286123033,
      "grad_norm": 0.16885854162039482,
      "learning_rate": 6.466660311472962e-05,
      "loss": 0.3808,
      "step": 7007
    },
    {
      "epoch": 0.6266094420600858,
      "grad_norm": 0.15899521793904597,
      "learning_rate": 6.46395124941851e-05,
      "loss": 0.6948,
      "step": 7008
    },
    {
      "epoch": 0.6266988555078684,
      "grad_norm": 0.14716332817598274,
      "learning_rate": 6.461242483928194e-05,
      "loss": 0.6282,
      "step": 7009
    },
    {
      "epoch": 0.626788268955651,
      "grad_norm": 0.1356900466697975,
      "learning_rate": 6.4585340152292e-05,
      "loss": 0.6272,
      "step": 7010
    },
    {
      "epoch": 0.6268776824034334,
      "grad_norm": 0.16811209365318974,
      "learning_rate": 6.455825843548678e-05,
      "loss": 0.6611,
      "step": 7011
    },
    {
      "epoch": 0.626967095851216,
      "grad_norm": 0.1306471440144741,
      "learning_rate": 6.453117969113767e-05,
      "loss": 0.6276,
      "step": 7012
    },
    {
      "epoch": 0.6270565092989986,
      "grad_norm": 0.16130178790920405,
      "learning_rate": 6.450410392151564e-05,
      "loss": 0.6762,
      "step": 7013
    },
    {
      "epoch": 0.6271459227467812,
      "grad_norm": 0.15607629359200365,
      "learning_rate": 6.447703112889158e-05,
      "loss": 0.6147,
      "step": 7014
    },
    {
      "epoch": 0.6272353361945636,
      "grad_norm": 0.14465663901242234,
      "learning_rate": 6.4449961315536e-05,
      "loss": 0.6615,
      "step": 7015
    },
    {
      "epoch": 0.6273247496423462,
      "grad_norm": 0.13683512562566327,
      "learning_rate": 6.44228944837192e-05,
      "loss": 0.6382,
      "step": 7016
    },
    {
      "epoch": 0.6274141630901288,
      "grad_norm": 0.1588957258339809,
      "learning_rate": 6.43958306357113e-05,
      "loss": 0.6608,
      "step": 7017
    },
    {
      "epoch": 0.6275035765379113,
      "grad_norm": 0.1725096878417772,
      "learning_rate": 6.43687697737821e-05,
      "loss": 0.7265,
      "step": 7018
    },
    {
      "epoch": 0.6275929899856938,
      "grad_norm": 0.14265113893200815,
      "learning_rate": 6.434171190020116e-05,
      "loss": 0.5977,
      "step": 7019
    },
    {
      "epoch": 0.6276824034334764,
      "grad_norm": 0.1392360382057395,
      "learning_rate": 6.431465701723774e-05,
      "loss": 0.6208,
      "step": 7020
    },
    {
      "epoch": 0.6277718168812589,
      "grad_norm": 0.1544713578927783,
      "learning_rate": 6.428760512716096e-05,
      "loss": 0.6119,
      "step": 7021
    },
    {
      "epoch": 0.6278612303290415,
      "grad_norm": 0.16584281441149365,
      "learning_rate": 6.426055623223963e-05,
      "loss": 0.6452,
      "step": 7022
    },
    {
      "epoch": 0.6279506437768241,
      "grad_norm": 0.16821352655181496,
      "learning_rate": 6.423351033474223e-05,
      "loss": 0.6472,
      "step": 7023
    },
    {
      "epoch": 0.6280400572246065,
      "grad_norm": 0.16746864008250129,
      "learning_rate": 6.420646743693714e-05,
      "loss": 0.6634,
      "step": 7024
    },
    {
      "epoch": 0.6281294706723891,
      "grad_norm": 0.15855758349481455,
      "learning_rate": 6.417942754109242e-05,
      "loss": 0.6282,
      "step": 7025
    },
    {
      "epoch": 0.6282188841201717,
      "grad_norm": 0.15300405612125192,
      "learning_rate": 6.415239064947587e-05,
      "loss": 0.6641,
      "step": 7026
    },
    {
      "epoch": 0.6283082975679543,
      "grad_norm": 0.17918320084153933,
      "learning_rate": 6.412535676435498e-05,
      "loss": 0.3624,
      "step": 7027
    },
    {
      "epoch": 0.6283977110157367,
      "grad_norm": 0.1405175737504599,
      "learning_rate": 6.409832588799713e-05,
      "loss": 0.6337,
      "step": 7028
    },
    {
      "epoch": 0.6284871244635193,
      "grad_norm": 0.1581880331421036,
      "learning_rate": 6.407129802266932e-05,
      "loss": 0.6622,
      "step": 7029
    },
    {
      "epoch": 0.6285765379113019,
      "grad_norm": 0.1745132395023698,
      "learning_rate": 6.404427317063832e-05,
      "loss": 0.6697,
      "step": 7030
    },
    {
      "epoch": 0.6286659513590844,
      "grad_norm": 0.17439231501052163,
      "learning_rate": 6.401725133417071e-05,
      "loss": 0.6836,
      "step": 7031
    },
    {
      "epoch": 0.628755364806867,
      "grad_norm": 0.1604066805164297,
      "learning_rate": 6.39902325155328e-05,
      "loss": 0.6772,
      "step": 7032
    },
    {
      "epoch": 0.6288447782546495,
      "grad_norm": 0.15433846321854916,
      "learning_rate": 6.396321671699061e-05,
      "loss": 0.6208,
      "step": 7033
    },
    {
      "epoch": 0.628934191702432,
      "grad_norm": 0.16227107304935012,
      "learning_rate": 6.39362039408099e-05,
      "loss": 0.644,
      "step": 7034
    },
    {
      "epoch": 0.6290236051502146,
      "grad_norm": 0.15891331918057963,
      "learning_rate": 6.39091941892562e-05,
      "loss": 0.668,
      "step": 7035
    },
    {
      "epoch": 0.6291130185979972,
      "grad_norm": 0.1712983448326402,
      "learning_rate": 6.388218746459483e-05,
      "loss": 0.682,
      "step": 7036
    },
    {
      "epoch": 0.6292024320457796,
      "grad_norm": 0.1562911705399184,
      "learning_rate": 6.385518376909072e-05,
      "loss": 0.6799,
      "step": 7037
    },
    {
      "epoch": 0.6292918454935622,
      "grad_norm": 0.14459002016030004,
      "learning_rate": 6.382818310500871e-05,
      "loss": 0.6645,
      "step": 7038
    },
    {
      "epoch": 0.6293812589413448,
      "grad_norm": 0.16028399772888152,
      "learning_rate": 6.380118547461334e-05,
      "loss": 0.6744,
      "step": 7039
    },
    {
      "epoch": 0.6294706723891274,
      "grad_norm": 0.15467355821183637,
      "learning_rate": 6.377419088016881e-05,
      "loss": 0.6616,
      "step": 7040
    },
    {
      "epoch": 0.6295600858369099,
      "grad_norm": 0.16346524618954072,
      "learning_rate": 6.374719932393913e-05,
      "loss": 0.6569,
      "step": 7041
    },
    {
      "epoch": 0.6296494992846924,
      "grad_norm": 0.17709846044712593,
      "learning_rate": 6.372021080818808e-05,
      "loss": 0.6755,
      "step": 7042
    },
    {
      "epoch": 0.629738912732475,
      "grad_norm": 0.18100634525076825,
      "learning_rate": 6.36932253351791e-05,
      "loss": 0.6267,
      "step": 7043
    },
    {
      "epoch": 0.6298283261802575,
      "grad_norm": 0.15775210541486592,
      "learning_rate": 6.366624290717548e-05,
      "loss": 0.6594,
      "step": 7044
    },
    {
      "epoch": 0.6299177396280401,
      "grad_norm": 0.14596632116375652,
      "learning_rate": 6.363926352644019e-05,
      "loss": 0.6758,
      "step": 7045
    },
    {
      "epoch": 0.6300071530758226,
      "grad_norm": 0.157218228988123,
      "learning_rate": 6.361228719523595e-05,
      "loss": 0.6478,
      "step": 7046
    },
    {
      "epoch": 0.6300965665236051,
      "grad_norm": 0.14119681855695398,
      "learning_rate": 6.358531391582525e-05,
      "loss": 0.6394,
      "step": 7047
    },
    {
      "epoch": 0.6301859799713877,
      "grad_norm": 0.15043194479590255,
      "learning_rate": 6.355834369047029e-05,
      "loss": 0.6629,
      "step": 7048
    },
    {
      "epoch": 0.6302753934191703,
      "grad_norm": 0.1607704732099198,
      "learning_rate": 6.353137652143305e-05,
      "loss": 0.6436,
      "step": 7049
    },
    {
      "epoch": 0.6303648068669528,
      "grad_norm": 0.15662493011494774,
      "learning_rate": 6.350441241097518e-05,
      "loss": 0.6286,
      "step": 7050
    },
    {
      "epoch": 0.6304542203147353,
      "grad_norm": 0.1577259938461075,
      "learning_rate": 6.347745136135816e-05,
      "loss": 0.6759,
      "step": 7051
    },
    {
      "epoch": 0.6305436337625179,
      "grad_norm": 0.19751996728893584,
      "learning_rate": 6.345049337484323e-05,
      "loss": 0.706,
      "step": 7052
    },
    {
      "epoch": 0.6306330472103004,
      "grad_norm": 0.1589326187763387,
      "learning_rate": 6.342353845369127e-05,
      "loss": 0.6518,
      "step": 7053
    },
    {
      "epoch": 0.630722460658083,
      "grad_norm": 0.17513756469584923,
      "learning_rate": 6.339658660016295e-05,
      "loss": 0.3606,
      "step": 7054
    },
    {
      "epoch": 0.6308118741058655,
      "grad_norm": 0.18485016159522208,
      "learning_rate": 6.336963781651873e-05,
      "loss": 0.3831,
      "step": 7055
    },
    {
      "epoch": 0.630901287553648,
      "grad_norm": 0.15468660485270044,
      "learning_rate": 6.334269210501875e-05,
      "loss": 0.6413,
      "step": 7056
    },
    {
      "epoch": 0.6309907010014306,
      "grad_norm": 0.14634170041853317,
      "learning_rate": 6.331574946792288e-05,
      "loss": 0.6509,
      "step": 7057
    },
    {
      "epoch": 0.6310801144492132,
      "grad_norm": 0.13735293219099048,
      "learning_rate": 6.328880990749079e-05,
      "loss": 0.6339,
      "step": 7058
    },
    {
      "epoch": 0.6311695278969958,
      "grad_norm": 0.17538254067916617,
      "learning_rate": 6.32618734259819e-05,
      "loss": 0.7435,
      "step": 7059
    },
    {
      "epoch": 0.6312589413447782,
      "grad_norm": 0.15108131353646456,
      "learning_rate": 6.323494002565534e-05,
      "loss": 0.6908,
      "step": 7060
    },
    {
      "epoch": 0.6313483547925608,
      "grad_norm": 0.14542012977066707,
      "learning_rate": 6.320800970876992e-05,
      "loss": 0.6457,
      "step": 7061
    },
    {
      "epoch": 0.6314377682403434,
      "grad_norm": 0.14079100121042473,
      "learning_rate": 6.318108247758434e-05,
      "loss": 0.6497,
      "step": 7062
    },
    {
      "epoch": 0.6315271816881259,
      "grad_norm": 0.14620116666715094,
      "learning_rate": 6.315415833435687e-05,
      "loss": 0.6891,
      "step": 7063
    },
    {
      "epoch": 0.6316165951359084,
      "grad_norm": 0.1767070344686821,
      "learning_rate": 6.312723728134565e-05,
      "loss": 0.3832,
      "step": 7064
    },
    {
      "epoch": 0.631706008583691,
      "grad_norm": 0.15181984598835452,
      "learning_rate": 6.310031932080847e-05,
      "loss": 0.6425,
      "step": 7065
    },
    {
      "epoch": 0.6317954220314735,
      "grad_norm": 0.15085978975197023,
      "learning_rate": 6.3073404455003e-05,
      "loss": 0.6332,
      "step": 7066
    },
    {
      "epoch": 0.6318848354792561,
      "grad_norm": 0.16944897269335946,
      "learning_rate": 6.304649268618649e-05,
      "loss": 0.6582,
      "step": 7067
    },
    {
      "epoch": 0.6319742489270386,
      "grad_norm": 0.15917625666837545,
      "learning_rate": 6.3019584016616e-05,
      "loss": 0.6727,
      "step": 7068
    },
    {
      "epoch": 0.6320636623748211,
      "grad_norm": 0.14131960344302585,
      "learning_rate": 6.299267844854835e-05,
      "loss": 0.6613,
      "step": 7069
    },
    {
      "epoch": 0.6321530758226037,
      "grad_norm": 0.14936275912187838,
      "learning_rate": 6.296577598424004e-05,
      "loss": 0.6678,
      "step": 7070
    },
    {
      "epoch": 0.6322424892703863,
      "grad_norm": 0.1777612988563612,
      "learning_rate": 6.293887662594742e-05,
      "loss": 0.681,
      "step": 7071
    },
    {
      "epoch": 0.6323319027181689,
      "grad_norm": 0.14015708828711823,
      "learning_rate": 6.291198037592639e-05,
      "loss": 0.6243,
      "step": 7072
    },
    {
      "epoch": 0.6324213161659513,
      "grad_norm": 0.1608753670516349,
      "learning_rate": 6.288508723643283e-05,
      "loss": 0.6722,
      "step": 7073
    },
    {
      "epoch": 0.6325107296137339,
      "grad_norm": 0.14760848523101108,
      "learning_rate": 6.285819720972214e-05,
      "loss": 0.6424,
      "step": 7074
    },
    {
      "epoch": 0.6326001430615165,
      "grad_norm": 0.13030487466960805,
      "learning_rate": 6.283131029804963e-05,
      "loss": 0.6327,
      "step": 7075
    },
    {
      "epoch": 0.632689556509299,
      "grad_norm": 0.13784982944435947,
      "learning_rate": 6.280442650367025e-05,
      "loss": 0.6341,
      "step": 7076
    },
    {
      "epoch": 0.6327789699570815,
      "grad_norm": 0.12125127053251361,
      "learning_rate": 6.277754582883866e-05,
      "loss": 0.6149,
      "step": 7077
    },
    {
      "epoch": 0.6328683834048641,
      "grad_norm": 0.15836454248255905,
      "learning_rate": 6.275066827580933e-05,
      "loss": 0.6529,
      "step": 7078
    },
    {
      "epoch": 0.6329577968526466,
      "grad_norm": 0.17422885946222094,
      "learning_rate": 6.272379384683651e-05,
      "loss": 0.6661,
      "step": 7079
    },
    {
      "epoch": 0.6330472103004292,
      "grad_norm": 0.16616616350945673,
      "learning_rate": 6.269692254417408e-05,
      "loss": 0.7039,
      "step": 7080
    },
    {
      "epoch": 0.6331366237482118,
      "grad_norm": 0.14770745932093832,
      "learning_rate": 6.267005437007567e-05,
      "loss": 0.6038,
      "step": 7081
    },
    {
      "epoch": 0.6332260371959942,
      "grad_norm": 0.14590808966880597,
      "learning_rate": 6.264318932679476e-05,
      "loss": 0.6741,
      "step": 7082
    },
    {
      "epoch": 0.6333154506437768,
      "grad_norm": 0.1591347711469283,
      "learning_rate": 6.261632741658443e-05,
      "loss": 0.6649,
      "step": 7083
    },
    {
      "epoch": 0.6334048640915594,
      "grad_norm": 0.14259098485091135,
      "learning_rate": 6.258946864169757e-05,
      "loss": 0.6107,
      "step": 7084
    },
    {
      "epoch": 0.633494277539342,
      "grad_norm": 0.14445723922545653,
      "learning_rate": 6.256261300438676e-05,
      "loss": 0.6234,
      "step": 7085
    },
    {
      "epoch": 0.6335836909871244,
      "grad_norm": 0.14762772704720456,
      "learning_rate": 6.253576050690442e-05,
      "loss": 0.6361,
      "step": 7086
    },
    {
      "epoch": 0.633673104434907,
      "grad_norm": 0.16246320678697154,
      "learning_rate": 6.250891115150261e-05,
      "loss": 0.6453,
      "step": 7087
    },
    {
      "epoch": 0.6337625178826896,
      "grad_norm": 0.18875819622947965,
      "learning_rate": 6.248206494043313e-05,
      "loss": 0.6764,
      "step": 7088
    },
    {
      "epoch": 0.6338519313304721,
      "grad_norm": 0.158030750113573,
      "learning_rate": 6.245522187594757e-05,
      "loss": 0.6443,
      "step": 7089
    },
    {
      "epoch": 0.6339413447782547,
      "grad_norm": 0.13599383936556023,
      "learning_rate": 6.242838196029719e-05,
      "loss": 0.62,
      "step": 7090
    },
    {
      "epoch": 0.6340307582260372,
      "grad_norm": 0.17257408107974273,
      "learning_rate": 6.240154519573304e-05,
      "loss": 0.6792,
      "step": 7091
    },
    {
      "epoch": 0.6341201716738197,
      "grad_norm": 0.17965512393381308,
      "learning_rate": 6.237471158450585e-05,
      "loss": 0.6827,
      "step": 7092
    },
    {
      "epoch": 0.6342095851216023,
      "grad_norm": 0.14837729429174676,
      "learning_rate": 6.234788112886623e-05,
      "loss": 0.6147,
      "step": 7093
    },
    {
      "epoch": 0.6342989985693849,
      "grad_norm": 0.1625116642027572,
      "learning_rate": 6.232105383106432e-05,
      "loss": 0.6593,
      "step": 7094
    },
    {
      "epoch": 0.6343884120171673,
      "grad_norm": 0.15642621595583292,
      "learning_rate": 6.22942296933501e-05,
      "loss": 0.6747,
      "step": 7095
    },
    {
      "epoch": 0.6344778254649499,
      "grad_norm": 0.15564321850484208,
      "learning_rate": 6.226740871797334e-05,
      "loss": 0.6564,
      "step": 7096
    },
    {
      "epoch": 0.6345672389127325,
      "grad_norm": 0.16109590273303565,
      "learning_rate": 6.224059090718341e-05,
      "loss": 0.6822,
      "step": 7097
    },
    {
      "epoch": 0.634656652360515,
      "grad_norm": 0.16404442302395492,
      "learning_rate": 6.221377626322953e-05,
      "loss": 0.6472,
      "step": 7098
    },
    {
      "epoch": 0.6347460658082976,
      "grad_norm": 0.15701890366681587,
      "learning_rate": 6.218696478836058e-05,
      "loss": 0.6124,
      "step": 7099
    },
    {
      "epoch": 0.6348354792560801,
      "grad_norm": 0.16391366735471166,
      "learning_rate": 6.216015648482525e-05,
      "loss": 0.6509,
      "step": 7100
    },
    {
      "epoch": 0.6349248927038627,
      "grad_norm": 0.138253860802126,
      "learning_rate": 6.21333513548719e-05,
      "loss": 0.6122,
      "step": 7101
    },
    {
      "epoch": 0.6350143061516452,
      "grad_norm": 0.16236062035536877,
      "learning_rate": 6.210654940074861e-05,
      "loss": 0.6386,
      "step": 7102
    },
    {
      "epoch": 0.6351037195994278,
      "grad_norm": 0.14280323468810405,
      "learning_rate": 6.20797506247033e-05,
      "loss": 0.6143,
      "step": 7103
    },
    {
      "epoch": 0.6351931330472103,
      "grad_norm": 0.1673387207720936,
      "learning_rate": 6.205295502898348e-05,
      "loss": 0.7021,
      "step": 7104
    },
    {
      "epoch": 0.6352825464949928,
      "grad_norm": 0.15569549201083457,
      "learning_rate": 6.202616261583652e-05,
      "loss": 0.6767,
      "step": 7105
    },
    {
      "epoch": 0.6353719599427754,
      "grad_norm": 0.147649943366679,
      "learning_rate": 6.199937338750939e-05,
      "loss": 0.6163,
      "step": 7106
    },
    {
      "epoch": 0.635461373390558,
      "grad_norm": 0.16341385699580288,
      "learning_rate": 6.197258734624896e-05,
      "loss": 0.6703,
      "step": 7107
    },
    {
      "epoch": 0.6355507868383404,
      "grad_norm": 0.16671202553890518,
      "learning_rate": 6.194580449430168e-05,
      "loss": 0.6273,
      "step": 7108
    },
    {
      "epoch": 0.635640200286123,
      "grad_norm": 0.15828693998591442,
      "learning_rate": 6.191902483391386e-05,
      "loss": 0.6485,
      "step": 7109
    },
    {
      "epoch": 0.6357296137339056,
      "grad_norm": 0.1931038638458852,
      "learning_rate": 6.18922483673314e-05,
      "loss": 0.3965,
      "step": 7110
    },
    {
      "epoch": 0.6358190271816881,
      "grad_norm": 0.14734593344042526,
      "learning_rate": 6.186547509680007e-05,
      "loss": 0.6336,
      "step": 7111
    },
    {
      "epoch": 0.6359084406294707,
      "grad_norm": 0.17419735824460217,
      "learning_rate": 6.183870502456529e-05,
      "loss": 0.6719,
      "step": 7112
    },
    {
      "epoch": 0.6359978540772532,
      "grad_norm": 0.15100445180789557,
      "learning_rate": 6.181193815287218e-05,
      "loss": 0.667,
      "step": 7113
    },
    {
      "epoch": 0.6360872675250357,
      "grad_norm": 0.15149487157210623,
      "learning_rate": 6.178517448396575e-05,
      "loss": 0.6891,
      "step": 7114
    },
    {
      "epoch": 0.6361766809728183,
      "grad_norm": 0.14313068584517627,
      "learning_rate": 6.175841402009058e-05,
      "loss": 0.6601,
      "step": 7115
    },
    {
      "epoch": 0.6362660944206009,
      "grad_norm": 0.16204954899762591,
      "learning_rate": 6.173165676349103e-05,
      "loss": 0.6724,
      "step": 7116
    },
    {
      "epoch": 0.6363555078683834,
      "grad_norm": 0.14944180804634027,
      "learning_rate": 6.170490271641123e-05,
      "loss": 0.6432,
      "step": 7117
    },
    {
      "epoch": 0.6364449213161659,
      "grad_norm": 0.16171445816772034,
      "learning_rate": 6.167815188109496e-05,
      "loss": 0.6424,
      "step": 7118
    },
    {
      "epoch": 0.6365343347639485,
      "grad_norm": 0.1652718832063958,
      "learning_rate": 6.165140425978584e-05,
      "loss": 0.6233,
      "step": 7119
    },
    {
      "epoch": 0.6366237482117311,
      "grad_norm": 0.1628206652635057,
      "learning_rate": 6.16246598547271e-05,
      "loss": 0.6448,
      "step": 7120
    },
    {
      "epoch": 0.6367131616595136,
      "grad_norm": 0.1764891501727085,
      "learning_rate": 6.159791866816182e-05,
      "loss": 0.6684,
      "step": 7121
    },
    {
      "epoch": 0.6368025751072961,
      "grad_norm": 0.17399437119600333,
      "learning_rate": 6.157118070233269e-05,
      "loss": 0.6867,
      "step": 7122
    },
    {
      "epoch": 0.6368919885550787,
      "grad_norm": 0.15452658646313278,
      "learning_rate": 6.154444595948227e-05,
      "loss": 0.6651,
      "step": 7123
    },
    {
      "epoch": 0.6369814020028612,
      "grad_norm": 0.14453296769174112,
      "learning_rate": 6.15177144418527e-05,
      "loss": 0.6446,
      "step": 7124
    },
    {
      "epoch": 0.6370708154506438,
      "grad_norm": 0.1647169561264362,
      "learning_rate": 6.149098615168594e-05,
      "loss": 0.6597,
      "step": 7125
    },
    {
      "epoch": 0.6371602288984263,
      "grad_norm": 0.15622808275049094,
      "learning_rate": 6.14642610912237e-05,
      "loss": 0.6477,
      "step": 7126
    },
    {
      "epoch": 0.6372496423462088,
      "grad_norm": 0.1594713199937113,
      "learning_rate": 6.143753926270727e-05,
      "loss": 0.6493,
      "step": 7127
    },
    {
      "epoch": 0.6373390557939914,
      "grad_norm": 0.17059276386792324,
      "learning_rate": 6.141082066837791e-05,
      "loss": 0.6847,
      "step": 7128
    },
    {
      "epoch": 0.637428469241774,
      "grad_norm": 0.17599085656558153,
      "learning_rate": 6.13841053104764e-05,
      "loss": 0.6916,
      "step": 7129
    },
    {
      "epoch": 0.6375178826895566,
      "grad_norm": 0.1630793673405072,
      "learning_rate": 6.135739319124335e-05,
      "loss": 0.6273,
      "step": 7130
    },
    {
      "epoch": 0.637607296137339,
      "grad_norm": 0.1656604480580773,
      "learning_rate": 6.133068431291904e-05,
      "loss": 0.6438,
      "step": 7131
    },
    {
      "epoch": 0.6376967095851216,
      "grad_norm": 0.16286670354010496,
      "learning_rate": 6.130397867774357e-05,
      "loss": 0.6988,
      "step": 7132
    },
    {
      "epoch": 0.6377861230329042,
      "grad_norm": 0.16114963661705664,
      "learning_rate": 6.127727628795668e-05,
      "loss": 0.6459,
      "step": 7133
    },
    {
      "epoch": 0.6378755364806867,
      "grad_norm": 0.16336457549054606,
      "learning_rate": 6.12505771457978e-05,
      "loss": 0.6851,
      "step": 7134
    },
    {
      "epoch": 0.6379649499284692,
      "grad_norm": 0.16361583704793603,
      "learning_rate": 6.122388125350625e-05,
      "loss": 0.654,
      "step": 7135
    },
    {
      "epoch": 0.6380543633762518,
      "grad_norm": 0.16041487187640008,
      "learning_rate": 6.119718861332098e-05,
      "loss": 0.6552,
      "step": 7136
    },
    {
      "epoch": 0.6381437768240343,
      "grad_norm": 0.16914767856383892,
      "learning_rate": 6.117049922748063e-05,
      "loss": 0.6349,
      "step": 7137
    },
    {
      "epoch": 0.6382331902718169,
      "grad_norm": 0.15038909638517448,
      "learning_rate": 6.114381309822359e-05,
      "loss": 0.6496,
      "step": 7138
    },
    {
      "epoch": 0.6383226037195995,
      "grad_norm": 0.14241713468795827,
      "learning_rate": 6.111713022778804e-05,
      "loss": 0.6322,
      "step": 7139
    },
    {
      "epoch": 0.6384120171673819,
      "grad_norm": 0.17322464062781842,
      "learning_rate": 6.109045061841183e-05,
      "loss": 0.6693,
      "step": 7140
    },
    {
      "epoch": 0.6385014306151645,
      "grad_norm": 0.1773790713268366,
      "learning_rate": 6.106377427233247e-05,
      "loss": 0.6597,
      "step": 7141
    },
    {
      "epoch": 0.6385908440629471,
      "grad_norm": 0.14349609514895448,
      "learning_rate": 6.103710119178738e-05,
      "loss": 0.6041,
      "step": 7142
    },
    {
      "epoch": 0.6386802575107297,
      "grad_norm": 0.14446007098435285,
      "learning_rate": 6.1010431379013585e-05,
      "loss": 0.6673,
      "step": 7143
    },
    {
      "epoch": 0.6387696709585121,
      "grad_norm": 0.1480968478658082,
      "learning_rate": 6.098376483624781e-05,
      "loss": 0.6384,
      "step": 7144
    },
    {
      "epoch": 0.6388590844062947,
      "grad_norm": 0.14436894075363868,
      "learning_rate": 6.095710156572654e-05,
      "loss": 0.6331,
      "step": 7145
    },
    {
      "epoch": 0.6389484978540773,
      "grad_norm": 0.17818787164373848,
      "learning_rate": 6.0930441569686036e-05,
      "loss": 0.6345,
      "step": 7146
    },
    {
      "epoch": 0.6390379113018598,
      "grad_norm": 0.14621929777596462,
      "learning_rate": 6.090378485036221e-05,
      "loss": 0.6637,
      "step": 7147
    },
    {
      "epoch": 0.6391273247496424,
      "grad_norm": 0.14356300001823039,
      "learning_rate": 6.0877131409990684e-05,
      "loss": 0.6516,
      "step": 7148
    },
    {
      "epoch": 0.6392167381974249,
      "grad_norm": 0.16508491348301782,
      "learning_rate": 6.085048125080692e-05,
      "loss": 0.6711,
      "step": 7149
    },
    {
      "epoch": 0.6393061516452074,
      "grad_norm": 0.1566463126152046,
      "learning_rate": 6.082383437504604e-05,
      "loss": 0.6384,
      "step": 7150
    },
    {
      "epoch": 0.63939556509299,
      "grad_norm": 0.17333081333404862,
      "learning_rate": 6.079719078494286e-05,
      "loss": 0.3835,
      "step": 7151
    },
    {
      "epoch": 0.6394849785407726,
      "grad_norm": 0.15506783393181067,
      "learning_rate": 6.0770550482731924e-05,
      "loss": 0.6242,
      "step": 7152
    },
    {
      "epoch": 0.639574391988555,
      "grad_norm": 0.16202362121883668,
      "learning_rate": 6.0743913470647564e-05,
      "loss": 0.6463,
      "step": 7153
    },
    {
      "epoch": 0.6396638054363376,
      "grad_norm": 0.14807871790488394,
      "learning_rate": 6.071727975092376e-05,
      "loss": 0.6692,
      "step": 7154
    },
    {
      "epoch": 0.6397532188841202,
      "grad_norm": 0.1446569122932435,
      "learning_rate": 6.069064932579423e-05,
      "loss": 0.6625,
      "step": 7155
    },
    {
      "epoch": 0.6398426323319027,
      "grad_norm": 0.16529189151226506,
      "learning_rate": 6.0664022197492475e-05,
      "loss": 0.66,
      "step": 7156
    },
    {
      "epoch": 0.6399320457796852,
      "grad_norm": 0.154904096613346,
      "learning_rate": 6.0637398368251705e-05,
      "loss": 0.6621,
      "step": 7157
    },
    {
      "epoch": 0.6400214592274678,
      "grad_norm": 0.1545878679497828,
      "learning_rate": 6.06107778403048e-05,
      "loss": 0.6069,
      "step": 7158
    },
    {
      "epoch": 0.6401108726752504,
      "grad_norm": 0.17282868519053335,
      "learning_rate": 6.058416061588434e-05,
      "loss": 0.628,
      "step": 7159
    },
    {
      "epoch": 0.6402002861230329,
      "grad_norm": 0.14463479430489876,
      "learning_rate": 6.055754669722278e-05,
      "loss": 0.6334,
      "step": 7160
    },
    {
      "epoch": 0.6402896995708155,
      "grad_norm": 0.16898339468215337,
      "learning_rate": 6.0530936086552095e-05,
      "loss": 0.6668,
      "step": 7161
    },
    {
      "epoch": 0.640379113018598,
      "grad_norm": 0.16454224437174653,
      "learning_rate": 6.050432878610417e-05,
      "loss": 0.6684,
      "step": 7162
    },
    {
      "epoch": 0.6404685264663805,
      "grad_norm": 0.17078494007050818,
      "learning_rate": 6.047772479811047e-05,
      "loss": 0.6943,
      "step": 7163
    },
    {
      "epoch": 0.6405579399141631,
      "grad_norm": 0.14940165023748567,
      "learning_rate": 6.0451124124802275e-05,
      "loss": 0.644,
      "step": 7164
    },
    {
      "epoch": 0.6406473533619457,
      "grad_norm": 0.14398137514519246,
      "learning_rate": 6.042452676841053e-05,
      "loss": 0.6397,
      "step": 7165
    },
    {
      "epoch": 0.6407367668097281,
      "grad_norm": 0.13937352367713046,
      "learning_rate": 6.039793273116594e-05,
      "loss": 0.6535,
      "step": 7166
    },
    {
      "epoch": 0.6408261802575107,
      "grad_norm": 0.15780937701948572,
      "learning_rate": 6.03713420152989e-05,
      "loss": 0.6363,
      "step": 7167
    },
    {
      "epoch": 0.6409155937052933,
      "grad_norm": 0.15374841948171722,
      "learning_rate": 6.034475462303952e-05,
      "loss": 0.6642,
      "step": 7168
    },
    {
      "epoch": 0.6410050071530758,
      "grad_norm": 0.1609803421991997,
      "learning_rate": 6.031817055661769e-05,
      "loss": 0.6474,
      "step": 7169
    },
    {
      "epoch": 0.6410944206008584,
      "grad_norm": 0.15358202840214116,
      "learning_rate": 6.029158981826299e-05,
      "loss": 0.6461,
      "step": 7170
    },
    {
      "epoch": 0.6411838340486409,
      "grad_norm": 0.15708053756861223,
      "learning_rate": 6.02650124102047e-05,
      "loss": 0.6435,
      "step": 7171
    },
    {
      "epoch": 0.6412732474964234,
      "grad_norm": 0.1747653872275134,
      "learning_rate": 6.023843833467182e-05,
      "loss": 0.6931,
      "step": 7172
    },
    {
      "epoch": 0.641362660944206,
      "grad_norm": 0.16761657730550342,
      "learning_rate": 6.02118675938931e-05,
      "loss": 0.651,
      "step": 7173
    },
    {
      "epoch": 0.6414520743919886,
      "grad_norm": 0.1715545945214883,
      "learning_rate": 6.0185300190097004e-05,
      "loss": 0.6494,
      "step": 7174
    },
    {
      "epoch": 0.641541487839771,
      "grad_norm": 0.1632531315631592,
      "learning_rate": 6.0158736125511664e-05,
      "loss": 0.6695,
      "step": 7175
    },
    {
      "epoch": 0.6416309012875536,
      "grad_norm": 0.16065321465097404,
      "learning_rate": 6.013217540236502e-05,
      "loss": 0.6412,
      "step": 7176
    },
    {
      "epoch": 0.6417203147353362,
      "grad_norm": 0.1671347433657952,
      "learning_rate": 6.0105618022884694e-05,
      "loss": 0.6907,
      "step": 7177
    },
    {
      "epoch": 0.6418097281831188,
      "grad_norm": 0.14726694279498395,
      "learning_rate": 6.0079063989298e-05,
      "loss": 0.6462,
      "step": 7178
    },
    {
      "epoch": 0.6418991416309013,
      "grad_norm": 0.1579763057587919,
      "learning_rate": 6.005251330383199e-05,
      "loss": 0.6831,
      "step": 7179
    },
    {
      "epoch": 0.6419885550786838,
      "grad_norm": 0.15476406454502675,
      "learning_rate": 6.002596596871346e-05,
      "loss": 0.6565,
      "step": 7180
    },
    {
      "epoch": 0.6420779685264664,
      "grad_norm": 0.1475307910170982,
      "learning_rate": 5.999942198616888e-05,
      "loss": 0.6593,
      "step": 7181
    },
    {
      "epoch": 0.6421673819742489,
      "grad_norm": 0.1470104353558792,
      "learning_rate": 5.9972881358424436e-05,
      "loss": 0.6301,
      "step": 7182
    },
    {
      "epoch": 0.6422567954220315,
      "grad_norm": 0.18097879486455307,
      "learning_rate": 5.994634408770612e-05,
      "loss": 0.6854,
      "step": 7183
    },
    {
      "epoch": 0.642346208869814,
      "grad_norm": 0.15007325914162703,
      "learning_rate": 5.991981017623955e-05,
      "loss": 0.6639,
      "step": 7184
    },
    {
      "epoch": 0.6424356223175965,
      "grad_norm": 0.18205955950129815,
      "learning_rate": 5.9893279626250124e-05,
      "loss": 0.6731,
      "step": 7185
    },
    {
      "epoch": 0.6425250357653791,
      "grad_norm": 0.14579565777915676,
      "learning_rate": 5.986675243996286e-05,
      "loss": 0.6318,
      "step": 7186
    },
    {
      "epoch": 0.6426144492131617,
      "grad_norm": 0.15175698291243395,
      "learning_rate": 5.9840228619602636e-05,
      "loss": 0.6772,
      "step": 7187
    },
    {
      "epoch": 0.6427038626609443,
      "grad_norm": 0.16712109403499273,
      "learning_rate": 5.981370816739389e-05,
      "loss": 0.6804,
      "step": 7188
    },
    {
      "epoch": 0.6427932761087267,
      "grad_norm": 0.1481900771568536,
      "learning_rate": 5.978719108556094e-05,
      "loss": 0.6251,
      "step": 7189
    },
    {
      "epoch": 0.6428826895565093,
      "grad_norm": 0.1369241376942781,
      "learning_rate": 5.976067737632769e-05,
      "loss": 0.6314,
      "step": 7190
    },
    {
      "epoch": 0.6429721030042919,
      "grad_norm": 0.14399451668680288,
      "learning_rate": 5.9734167041917856e-05,
      "loss": 0.6361,
      "step": 7191
    },
    {
      "epoch": 0.6430615164520744,
      "grad_norm": 0.13772598942993677,
      "learning_rate": 5.9707660084554774e-05,
      "loss": 0.6236,
      "step": 7192
    },
    {
      "epoch": 0.6431509298998569,
      "grad_norm": 0.15817183217707909,
      "learning_rate": 5.968115650646161e-05,
      "loss": 0.6293,
      "step": 7193
    },
    {
      "epoch": 0.6432403433476395,
      "grad_norm": 0.15052403000084721,
      "learning_rate": 5.9654656309861155e-05,
      "loss": 0.6477,
      "step": 7194
    },
    {
      "epoch": 0.643329756795422,
      "grad_norm": 0.16264982418124038,
      "learning_rate": 5.9628159496975935e-05,
      "loss": 0.6628,
      "step": 7195
    },
    {
      "epoch": 0.6434191702432046,
      "grad_norm": 0.15992153629582506,
      "learning_rate": 5.9601666070028194e-05,
      "loss": 0.6585,
      "step": 7196
    },
    {
      "epoch": 0.6435085836909872,
      "grad_norm": 0.16562991104200706,
      "learning_rate": 5.9575176031239964e-05,
      "loss": 0.6286,
      "step": 7197
    },
    {
      "epoch": 0.6435979971387696,
      "grad_norm": 0.14746793418044252,
      "learning_rate": 5.954868938283291e-05,
      "loss": 0.6708,
      "step": 7198
    },
    {
      "epoch": 0.6436874105865522,
      "grad_norm": 0.13871721005593354,
      "learning_rate": 5.9522206127028414e-05,
      "loss": 0.6461,
      "step": 7199
    },
    {
      "epoch": 0.6437768240343348,
      "grad_norm": 0.14759587059933646,
      "learning_rate": 5.9495726266047605e-05,
      "loss": 0.6506,
      "step": 7200
    },
    {
      "epoch": 0.6438662374821174,
      "grad_norm": 0.14910651467994954,
      "learning_rate": 5.9469249802111324e-05,
      "loss": 0.6379,
      "step": 7201
    },
    {
      "epoch": 0.6439556509298998,
      "grad_norm": 0.15296804937115258,
      "learning_rate": 5.94427767374401e-05,
      "loss": 0.6604,
      "step": 7202
    },
    {
      "epoch": 0.6440450643776824,
      "grad_norm": 0.17194518999395084,
      "learning_rate": 5.941630707425418e-05,
      "loss": 0.7155,
      "step": 7203
    },
    {
      "epoch": 0.644134477825465,
      "grad_norm": 0.16037058237560953,
      "learning_rate": 5.938984081477363e-05,
      "loss": 0.6378,
      "step": 7204
    },
    {
      "epoch": 0.6442238912732475,
      "grad_norm": 0.16816520023853035,
      "learning_rate": 5.936337796121807e-05,
      "loss": 0.7182,
      "step": 7205
    },
    {
      "epoch": 0.64431330472103,
      "grad_norm": 0.1567932066649316,
      "learning_rate": 5.9336918515806914e-05,
      "loss": 0.6411,
      "step": 7206
    },
    {
      "epoch": 0.6444027181688126,
      "grad_norm": 0.16955030570807886,
      "learning_rate": 5.931046248075931e-05,
      "loss": 0.6771,
      "step": 7207
    },
    {
      "epoch": 0.6444921316165951,
      "grad_norm": 0.13867788458881186,
      "learning_rate": 5.9284009858294076e-05,
      "loss": 0.6286,
      "step": 7208
    },
    {
      "epoch": 0.6445815450643777,
      "grad_norm": 0.14737121335506115,
      "learning_rate": 5.925756065062975e-05,
      "loss": 0.6546,
      "step": 7209
    },
    {
      "epoch": 0.6446709585121603,
      "grad_norm": 0.17889253073866163,
      "learning_rate": 5.9231114859984584e-05,
      "loss": 0.7143,
      "step": 7210
    },
    {
      "epoch": 0.6447603719599427,
      "grad_norm": 0.1521282833618457,
      "learning_rate": 5.920467248857661e-05,
      "loss": 0.6347,
      "step": 7211
    },
    {
      "epoch": 0.6448497854077253,
      "grad_norm": 0.153970893452178,
      "learning_rate": 5.9178233538623486e-05,
      "loss": 0.6515,
      "step": 7212
    },
    {
      "epoch": 0.6449391988555079,
      "grad_norm": 0.16470597248239865,
      "learning_rate": 5.9151798012342605e-05,
      "loss": 0.6611,
      "step": 7213
    },
    {
      "epoch": 0.6450286123032904,
      "grad_norm": 0.15371767869784192,
      "learning_rate": 5.91253659119511e-05,
      "loss": 0.6419,
      "step": 7214
    },
    {
      "epoch": 0.6451180257510729,
      "grad_norm": 0.16015897859736028,
      "learning_rate": 5.9098937239665796e-05,
      "loss": 0.6262,
      "step": 7215
    },
    {
      "epoch": 0.6452074391988555,
      "grad_norm": 0.16584966760384381,
      "learning_rate": 5.9072511997703226e-05,
      "loss": 0.6556,
      "step": 7216
    },
    {
      "epoch": 0.645296852646638,
      "grad_norm": 0.1518887397193282,
      "learning_rate": 5.904609018827961e-05,
      "loss": 0.6496,
      "step": 7217
    },
    {
      "epoch": 0.6453862660944206,
      "grad_norm": 0.16470584805739696,
      "learning_rate": 5.9019671813610986e-05,
      "loss": 0.6646,
      "step": 7218
    },
    {
      "epoch": 0.6454756795422032,
      "grad_norm": 0.15761602444009865,
      "learning_rate": 5.899325687591302e-05,
      "loss": 0.6379,
      "step": 7219
    },
    {
      "epoch": 0.6455650929899857,
      "grad_norm": 0.15242683386784434,
      "learning_rate": 5.896684537740103e-05,
      "loss": 0.6301,
      "step": 7220
    },
    {
      "epoch": 0.6456545064377682,
      "grad_norm": 0.16157247930906932,
      "learning_rate": 5.89404373202902e-05,
      "loss": 0.6585,
      "step": 7221
    },
    {
      "epoch": 0.6457439198855508,
      "grad_norm": 0.15582480065867904,
      "learning_rate": 5.891403270679527e-05,
      "loss": 0.6788,
      "step": 7222
    },
    {
      "epoch": 0.6458333333333334,
      "grad_norm": 0.16374128630454257,
      "learning_rate": 5.8887631539130826e-05,
      "loss": 0.6886,
      "step": 7223
    },
    {
      "epoch": 0.6459227467811158,
      "grad_norm": 0.15015575658851482,
      "learning_rate": 5.886123381951103e-05,
      "loss": 0.676,
      "step": 7224
    },
    {
      "epoch": 0.6460121602288984,
      "grad_norm": 0.17146539499991895,
      "learning_rate": 5.883483955014992e-05,
      "loss": 0.6746,
      "step": 7225
    },
    {
      "epoch": 0.646101573676681,
      "grad_norm": 0.1475692701231164,
      "learning_rate": 5.8808448733261076e-05,
      "loss": 0.6565,
      "step": 7226
    },
    {
      "epoch": 0.6461909871244635,
      "grad_norm": 0.16175527676177662,
      "learning_rate": 5.878206137105791e-05,
      "loss": 0.6825,
      "step": 7227
    },
    {
      "epoch": 0.6462804005722461,
      "grad_norm": 0.16264290427231654,
      "learning_rate": 5.875567746575348e-05,
      "loss": 0.6409,
      "step": 7228
    },
    {
      "epoch": 0.6463698140200286,
      "grad_norm": 0.15179216099221327,
      "learning_rate": 5.872929701956054e-05,
      "loss": 0.6448,
      "step": 7229
    },
    {
      "epoch": 0.6464592274678111,
      "grad_norm": 0.16704449273340735,
      "learning_rate": 5.870292003469164e-05,
      "loss": 0.6519,
      "step": 7230
    },
    {
      "epoch": 0.6465486409155937,
      "grad_norm": 0.1524707884680034,
      "learning_rate": 5.867654651335893e-05,
      "loss": 0.6295,
      "step": 7231
    },
    {
      "epoch": 0.6466380543633763,
      "grad_norm": 0.14304044494356108,
      "learning_rate": 5.86501764577744e-05,
      "loss": 0.6477,
      "step": 7232
    },
    {
      "epoch": 0.6467274678111588,
      "grad_norm": 0.1657961497352661,
      "learning_rate": 5.862380987014959e-05,
      "loss": 0.6988,
      "step": 7233
    },
    {
      "epoch": 0.6468168812589413,
      "grad_norm": 0.14329806202419126,
      "learning_rate": 5.8597446752695915e-05,
      "loss": 0.6201,
      "step": 7234
    },
    {
      "epoch": 0.6469062947067239,
      "grad_norm": 0.15503350065208715,
      "learning_rate": 5.857108710762439e-05,
      "loss": 0.6424,
      "step": 7235
    },
    {
      "epoch": 0.6469957081545065,
      "grad_norm": 0.162907549810076,
      "learning_rate": 5.854473093714572e-05,
      "loss": 0.677,
      "step": 7236
    },
    {
      "epoch": 0.647085121602289,
      "grad_norm": 0.1484534712332384,
      "learning_rate": 5.851837824347042e-05,
      "loss": 0.6413,
      "step": 7237
    },
    {
      "epoch": 0.6471745350500715,
      "grad_norm": 0.1610286752698204,
      "learning_rate": 5.8492029028808615e-05,
      "loss": 0.7032,
      "step": 7238
    },
    {
      "epoch": 0.6472639484978541,
      "grad_norm": 0.16126850442759177,
      "learning_rate": 5.846568329537023e-05,
      "loss": 0.6653,
      "step": 7239
    },
    {
      "epoch": 0.6473533619456366,
      "grad_norm": 0.14277469787310176,
      "learning_rate": 5.8439341045364815e-05,
      "loss": 0.6586,
      "step": 7240
    },
    {
      "epoch": 0.6474427753934192,
      "grad_norm": 0.15893811232502678,
      "learning_rate": 5.8413002281001686e-05,
      "loss": 0.6487,
      "step": 7241
    },
    {
      "epoch": 0.6475321888412017,
      "grad_norm": 0.16684859025479537,
      "learning_rate": 5.8386667004489835e-05,
      "loss": 0.6799,
      "step": 7242
    },
    {
      "epoch": 0.6476216022889842,
      "grad_norm": 0.1530325336523487,
      "learning_rate": 5.836033521803796e-05,
      "loss": 0.6362,
      "step": 7243
    },
    {
      "epoch": 0.6477110157367668,
      "grad_norm": 0.15404771252846172,
      "learning_rate": 5.833400692385444e-05,
      "loss": 0.664,
      "step": 7244
    },
    {
      "epoch": 0.6478004291845494,
      "grad_norm": 0.13261433503669462,
      "learning_rate": 5.8307682124147466e-05,
      "loss": 0.6489,
      "step": 7245
    },
    {
      "epoch": 0.647889842632332,
      "grad_norm": 0.16183020937233616,
      "learning_rate": 5.8281360821124884e-05,
      "loss": 0.649,
      "step": 7246
    },
    {
      "epoch": 0.6479792560801144,
      "grad_norm": 0.16254400722315346,
      "learning_rate": 5.8255043016994145e-05,
      "loss": 0.6387,
      "step": 7247
    },
    {
      "epoch": 0.648068669527897,
      "grad_norm": 0.14959813713394718,
      "learning_rate": 5.8228728713962543e-05,
      "loss": 0.6448,
      "step": 7248
    },
    {
      "epoch": 0.6481580829756796,
      "grad_norm": 0.16961892728689268,
      "learning_rate": 5.820241791423704e-05,
      "loss": 0.6641,
      "step": 7249
    },
    {
      "epoch": 0.6482474964234621,
      "grad_norm": 0.14716107259958708,
      "learning_rate": 5.8176110620024236e-05,
      "loss": 0.6422,
      "step": 7250
    },
    {
      "epoch": 0.6483369098712446,
      "grad_norm": 0.15211636410693072,
      "learning_rate": 5.814980683353053e-05,
      "loss": 0.613,
      "step": 7251
    },
    {
      "epoch": 0.6484263233190272,
      "grad_norm": 0.1396234048795595,
      "learning_rate": 5.812350655696197e-05,
      "loss": 0.6317,
      "step": 7252
    },
    {
      "epoch": 0.6485157367668097,
      "grad_norm": 0.1737445933805643,
      "learning_rate": 5.809720979252435e-05,
      "loss": 0.71,
      "step": 7253
    },
    {
      "epoch": 0.6486051502145923,
      "grad_norm": 0.16742257028775767,
      "learning_rate": 5.807091654242318e-05,
      "loss": 0.6677,
      "step": 7254
    },
    {
      "epoch": 0.6486945636623748,
      "grad_norm": 0.1900092608695902,
      "learning_rate": 5.8044626808863557e-05,
      "loss": 0.3899,
      "step": 7255
    },
    {
      "epoch": 0.6487839771101573,
      "grad_norm": 0.14537228824218304,
      "learning_rate": 5.801834059405041e-05,
      "loss": 0.6388,
      "step": 7256
    },
    {
      "epoch": 0.6488733905579399,
      "grad_norm": 0.1531525163094528,
      "learning_rate": 5.799205790018838e-05,
      "loss": 0.6468,
      "step": 7257
    },
    {
      "epoch": 0.6489628040057225,
      "grad_norm": 0.15409605666741355,
      "learning_rate": 5.796577872948165e-05,
      "loss": 0.6841,
      "step": 7258
    },
    {
      "epoch": 0.649052217453505,
      "grad_norm": 0.12882504522239502,
      "learning_rate": 5.793950308413432e-05,
      "loss": 0.6515,
      "step": 7259
    },
    {
      "epoch": 0.6491416309012875,
      "grad_norm": 0.15372268456929494,
      "learning_rate": 5.7913230966350116e-05,
      "loss": 0.6585,
      "step": 7260
    },
    {
      "epoch": 0.6492310443490701,
      "grad_norm": 0.163234020509528,
      "learning_rate": 5.788696237833237e-05,
      "loss": 0.628,
      "step": 7261
    },
    {
      "epoch": 0.6493204577968527,
      "grad_norm": 0.17444180131217107,
      "learning_rate": 5.786069732228423e-05,
      "loss": 0.6378,
      "step": 7262
    },
    {
      "epoch": 0.6494098712446352,
      "grad_norm": 0.16101566095929393,
      "learning_rate": 5.783443580040854e-05,
      "loss": 0.6538,
      "step": 7263
    },
    {
      "epoch": 0.6494992846924177,
      "grad_norm": 0.17212360155653786,
      "learning_rate": 5.780817781490777e-05,
      "loss": 0.6631,
      "step": 7264
    },
    {
      "epoch": 0.6495886981402003,
      "grad_norm": 0.1953220405148045,
      "learning_rate": 5.778192336798416e-05,
      "loss": 0.7243,
      "step": 7265
    },
    {
      "epoch": 0.6496781115879828,
      "grad_norm": 0.14168056277641236,
      "learning_rate": 5.775567246183966e-05,
      "loss": 0.679,
      "step": 7266
    },
    {
      "epoch": 0.6497675250357654,
      "grad_norm": 0.16472553345861582,
      "learning_rate": 5.772942509867588e-05,
      "loss": 0.6408,
      "step": 7267
    },
    {
      "epoch": 0.649856938483548,
      "grad_norm": 0.1583415848519975,
      "learning_rate": 5.7703181280694184e-05,
      "loss": 0.6673,
      "step": 7268
    },
    {
      "epoch": 0.6499463519313304,
      "grad_norm": 0.14583981589413952,
      "learning_rate": 5.767694101009562e-05,
      "loss": 0.6769,
      "step": 7269
    },
    {
      "epoch": 0.650035765379113,
      "grad_norm": 0.1559452133322016,
      "learning_rate": 5.765070428908086e-05,
      "loss": 0.6669,
      "step": 7270
    },
    {
      "epoch": 0.6501251788268956,
      "grad_norm": 0.15566507686455353,
      "learning_rate": 5.762447111985039e-05,
      "loss": 0.596,
      "step": 7271
    },
    {
      "epoch": 0.6502145922746781,
      "grad_norm": 0.15603777045753975,
      "learning_rate": 5.759824150460435e-05,
      "loss": 0.6723,
      "step": 7272
    },
    {
      "epoch": 0.6503040057224606,
      "grad_norm": 0.131822224464533,
      "learning_rate": 5.7572015445542594e-05,
      "loss": 0.6435,
      "step": 7273
    },
    {
      "epoch": 0.6503934191702432,
      "grad_norm": 0.16111104275805654,
      "learning_rate": 5.7545792944864696e-05,
      "loss": 0.6429,
      "step": 7274
    },
    {
      "epoch": 0.6504828326180258,
      "grad_norm": 0.13885159534917776,
      "learning_rate": 5.751957400476984e-05,
      "loss": 0.6672,
      "step": 7275
    },
    {
      "epoch": 0.6505722460658083,
      "grad_norm": 0.1423976673343939,
      "learning_rate": 5.7493358627456995e-05,
      "loss": 0.6343,
      "step": 7276
    },
    {
      "epoch": 0.6506616595135909,
      "grad_norm": 0.19338698136260468,
      "learning_rate": 5.7467146815124874e-05,
      "loss": 0.402,
      "step": 7277
    },
    {
      "epoch": 0.6507510729613734,
      "grad_norm": 0.17282658017138475,
      "learning_rate": 5.744093856997175e-05,
      "loss": 0.7018,
      "step": 7278
    },
    {
      "epoch": 0.6508404864091559,
      "grad_norm": 0.1640758849938023,
      "learning_rate": 5.741473389419565e-05,
      "loss": 0.6754,
      "step": 7279
    },
    {
      "epoch": 0.6509298998569385,
      "grad_norm": 0.17090975137099715,
      "learning_rate": 5.7388532789994476e-05,
      "loss": 0.6562,
      "step": 7280
    },
    {
      "epoch": 0.6510193133047211,
      "grad_norm": 0.1593494016077553,
      "learning_rate": 5.7362335259565556e-05,
      "loss": 0.6477,
      "step": 7281
    },
    {
      "epoch": 0.6511087267525035,
      "grad_norm": 0.14395650324160672,
      "learning_rate": 5.733614130510609e-05,
      "loss": 0.6523,
      "step": 7282
    },
    {
      "epoch": 0.6511981402002861,
      "grad_norm": 0.15622528392758217,
      "learning_rate": 5.730995092881297e-05,
      "loss": 0.6532,
      "step": 7283
    },
    {
      "epoch": 0.6512875536480687,
      "grad_norm": 0.1467928895336571,
      "learning_rate": 5.728376413288267e-05,
      "loss": 0.6896,
      "step": 7284
    },
    {
      "epoch": 0.6513769670958512,
      "grad_norm": 0.15254793660120738,
      "learning_rate": 5.725758091951148e-05,
      "loss": 0.6299,
      "step": 7285
    },
    {
      "epoch": 0.6514663805436338,
      "grad_norm": 0.15739339876157085,
      "learning_rate": 5.723140129089535e-05,
      "loss": 0.6496,
      "step": 7286
    },
    {
      "epoch": 0.6515557939914163,
      "grad_norm": 0.17853714809295423,
      "learning_rate": 5.720522524922995e-05,
      "loss": 0.3385,
      "step": 7287
    },
    {
      "epoch": 0.6516452074391988,
      "grad_norm": 0.13900383602446567,
      "learning_rate": 5.717905279671068e-05,
      "loss": 0.6308,
      "step": 7288
    },
    {
      "epoch": 0.6517346208869814,
      "grad_norm": 0.17633920856591997,
      "learning_rate": 5.715288393553247e-05,
      "loss": 0.6832,
      "step": 7289
    },
    {
      "epoch": 0.651824034334764,
      "grad_norm": 0.15606249144731238,
      "learning_rate": 5.712671866789015e-05,
      "loss": 0.6052,
      "step": 7290
    },
    {
      "epoch": 0.6519134477825465,
      "grad_norm": 0.1550985139138986,
      "learning_rate": 5.710055699597816e-05,
      "loss": 0.6556,
      "step": 7291
    },
    {
      "epoch": 0.652002861230329,
      "grad_norm": 0.16742791742493804,
      "learning_rate": 5.707439892199068e-05,
      "loss": 0.662,
      "step": 7292
    },
    {
      "epoch": 0.6520922746781116,
      "grad_norm": 0.16015249064108045,
      "learning_rate": 5.7048244448121447e-05,
      "loss": 0.695,
      "step": 7293
    },
    {
      "epoch": 0.6521816881258942,
      "grad_norm": 0.15759845106425863,
      "learning_rate": 5.7022093576564165e-05,
      "loss": 0.6891,
      "step": 7294
    },
    {
      "epoch": 0.6522711015736766,
      "grad_norm": 0.16752762395409598,
      "learning_rate": 5.6995946309511924e-05,
      "loss": 0.6489,
      "step": 7295
    },
    {
      "epoch": 0.6523605150214592,
      "grad_norm": 0.16139896512215093,
      "learning_rate": 5.696980264915777e-05,
      "loss": 0.6468,
      "step": 7296
    },
    {
      "epoch": 0.6524499284692418,
      "grad_norm": 0.17539789429756575,
      "learning_rate": 5.69436625976943e-05,
      "loss": 0.6959,
      "step": 7297
    },
    {
      "epoch": 0.6525393419170243,
      "grad_norm": 0.1591973475617672,
      "learning_rate": 5.691752615731384e-05,
      "loss": 0.6522,
      "step": 7298
    },
    {
      "epoch": 0.6526287553648069,
      "grad_norm": 0.14982837757974202,
      "learning_rate": 5.689139333020842e-05,
      "loss": 0.6362,
      "step": 7299
    },
    {
      "epoch": 0.6527181688125894,
      "grad_norm": 0.16380956889169332,
      "learning_rate": 5.686526411856978e-05,
      "loss": 0.6216,
      "step": 7300
    },
    {
      "epoch": 0.6528075822603719,
      "grad_norm": 0.14539552767935093,
      "learning_rate": 5.6839138524589344e-05,
      "loss": 0.5725,
      "step": 7301
    },
    {
      "epoch": 0.6528969957081545,
      "grad_norm": 0.1538285829427603,
      "learning_rate": 5.681301655045823e-05,
      "loss": 0.6001,
      "step": 7302
    },
    {
      "epoch": 0.6529864091559371,
      "grad_norm": 0.15051261654477452,
      "learning_rate": 5.678689819836731e-05,
      "loss": 0.6361,
      "step": 7303
    },
    {
      "epoch": 0.6530758226037195,
      "grad_norm": 0.14919453306740224,
      "learning_rate": 5.6760783470506996e-05,
      "loss": 0.6495,
      "step": 7304
    },
    {
      "epoch": 0.6531652360515021,
      "grad_norm": 0.18902242600157312,
      "learning_rate": 5.673467236906758e-05,
      "loss": 0.645,
      "step": 7305
    },
    {
      "epoch": 0.6532546494992847,
      "grad_norm": 0.16211519739939373,
      "learning_rate": 5.6708564896238944e-05,
      "loss": 0.6661,
      "step": 7306
    },
    {
      "epoch": 0.6533440629470673,
      "grad_norm": 0.13494801540217446,
      "learning_rate": 5.6682461054210635e-05,
      "loss": 0.6405,
      "step": 7307
    },
    {
      "epoch": 0.6534334763948498,
      "grad_norm": 0.15714690065751627,
      "learning_rate": 5.6656360845172076e-05,
      "loss": 0.6541,
      "step": 7308
    },
    {
      "epoch": 0.6535228898426323,
      "grad_norm": 0.15803425527930284,
      "learning_rate": 5.663026427131215e-05,
      "loss": 0.6632,
      "step": 7309
    },
    {
      "epoch": 0.6536123032904149,
      "grad_norm": 0.14752912925382608,
      "learning_rate": 5.6604171334819564e-05,
      "loss": 0.6448,
      "step": 7310
    },
    {
      "epoch": 0.6537017167381974,
      "grad_norm": 0.1733107072520675,
      "learning_rate": 5.657808203788277e-05,
      "loss": 0.6536,
      "step": 7311
    },
    {
      "epoch": 0.65379113018598,
      "grad_norm": 0.1552063396052512,
      "learning_rate": 5.6551996382689776e-05,
      "loss": 0.6224,
      "step": 7312
    },
    {
      "epoch": 0.6538805436337625,
      "grad_norm": 0.1478590163406204,
      "learning_rate": 5.6525914371428344e-05,
      "loss": 0.5878,
      "step": 7313
    },
    {
      "epoch": 0.653969957081545,
      "grad_norm": 0.14956665808410324,
      "learning_rate": 5.649983600628599e-05,
      "loss": 0.6412,
      "step": 7314
    },
    {
      "epoch": 0.6540593705293276,
      "grad_norm": 0.13560644001884975,
      "learning_rate": 5.647376128944984e-05,
      "loss": 0.6323,
      "step": 7315
    },
    {
      "epoch": 0.6541487839771102,
      "grad_norm": 0.14256711482990894,
      "learning_rate": 5.6447690223106775e-05,
      "loss": 0.6387,
      "step": 7316
    },
    {
      "epoch": 0.6542381974248928,
      "grad_norm": 0.14924367351235415,
      "learning_rate": 5.642162280944336e-05,
      "loss": 0.5961,
      "step": 7317
    },
    {
      "epoch": 0.6543276108726752,
      "grad_norm": 0.15989187252373405,
      "learning_rate": 5.6395559050645794e-05,
      "loss": 0.6825,
      "step": 7318
    },
    {
      "epoch": 0.6544170243204578,
      "grad_norm": 0.1457786592016509,
      "learning_rate": 5.6369498948900014e-05,
      "loss": 0.6427,
      "step": 7319
    },
    {
      "epoch": 0.6545064377682404,
      "grad_norm": 0.1478088658443216,
      "learning_rate": 5.63434425063917e-05,
      "loss": 0.6701,
      "step": 7320
    },
    {
      "epoch": 0.6545958512160229,
      "grad_norm": 0.17677392434395595,
      "learning_rate": 5.6317389725306066e-05,
      "loss": 0.6482,
      "step": 7321
    },
    {
      "epoch": 0.6546852646638054,
      "grad_norm": 0.1522066213488092,
      "learning_rate": 5.629134060782828e-05,
      "loss": 0.6278,
      "step": 7322
    },
    {
      "epoch": 0.654774678111588,
      "grad_norm": 0.18606802557830524,
      "learning_rate": 5.626529515614294e-05,
      "loss": 0.7102,
      "step": 7323
    },
    {
      "epoch": 0.6548640915593705,
      "grad_norm": 0.16730692432403663,
      "learning_rate": 5.6239253372434465e-05,
      "loss": 0.6887,
      "step": 7324
    },
    {
      "epoch": 0.6549535050071531,
      "grad_norm": 0.1518756449978407,
      "learning_rate": 5.621321525888697e-05,
      "loss": 0.6718,
      "step": 7325
    },
    {
      "epoch": 0.6550429184549357,
      "grad_norm": 0.1732875274901166,
      "learning_rate": 5.618718081768426e-05,
      "loss": 0.6869,
      "step": 7326
    },
    {
      "epoch": 0.6551323319027181,
      "grad_norm": 0.18150846549565028,
      "learning_rate": 5.616115005100975e-05,
      "loss": 0.3946,
      "step": 7327
    },
    {
      "epoch": 0.6552217453505007,
      "grad_norm": 0.15890706497264617,
      "learning_rate": 5.613512296104663e-05,
      "loss": 0.6473,
      "step": 7328
    },
    {
      "epoch": 0.6553111587982833,
      "grad_norm": 0.15947950202058286,
      "learning_rate": 5.6109099549977786e-05,
      "loss": 0.6608,
      "step": 7329
    },
    {
      "epoch": 0.6554005722460658,
      "grad_norm": 0.1644588914689451,
      "learning_rate": 5.608307981998574e-05,
      "loss": 0.6506,
      "step": 7330
    },
    {
      "epoch": 0.6554899856938483,
      "grad_norm": 0.16270624409073295,
      "learning_rate": 5.6057063773252794e-05,
      "loss": 0.6577,
      "step": 7331
    },
    {
      "epoch": 0.6555793991416309,
      "grad_norm": 0.17819747612219292,
      "learning_rate": 5.603105141196081e-05,
      "loss": 0.7197,
      "step": 7332
    },
    {
      "epoch": 0.6556688125894135,
      "grad_norm": 0.17163619391557774,
      "learning_rate": 5.600504273829144e-05,
      "loss": 0.6933,
      "step": 7333
    },
    {
      "epoch": 0.655758226037196,
      "grad_norm": 0.1470723172019814,
      "learning_rate": 5.5979037754426003e-05,
      "loss": 0.6169,
      "step": 7334
    },
    {
      "epoch": 0.6558476394849786,
      "grad_norm": 0.1642960417784825,
      "learning_rate": 5.5953036462545505e-05,
      "loss": 0.6136,
      "step": 7335
    },
    {
      "epoch": 0.655937052932761,
      "grad_norm": 0.1676182646793358,
      "learning_rate": 5.592703886483064e-05,
      "loss": 0.6431,
      "step": 7336
    },
    {
      "epoch": 0.6560264663805436,
      "grad_norm": 0.16844814932492616,
      "learning_rate": 5.590104496346185e-05,
      "loss": 0.665,
      "step": 7337
    },
    {
      "epoch": 0.6561158798283262,
      "grad_norm": 0.1311285701708673,
      "learning_rate": 5.5875054760619104e-05,
      "loss": 0.64,
      "step": 7338
    },
    {
      "epoch": 0.6562052932761088,
      "grad_norm": 0.16468730013990773,
      "learning_rate": 5.584906825848224e-05,
      "loss": 0.6625,
      "step": 7339
    },
    {
      "epoch": 0.6562947067238912,
      "grad_norm": 0.1639857159420988,
      "learning_rate": 5.582308545923074e-05,
      "loss": 0.6154,
      "step": 7340
    },
    {
      "epoch": 0.6563841201716738,
      "grad_norm": 0.18207391603745793,
      "learning_rate": 5.579710636504362e-05,
      "loss": 0.6853,
      "step": 7341
    },
    {
      "epoch": 0.6564735336194564,
      "grad_norm": 0.17741887633869086,
      "learning_rate": 5.577113097809989e-05,
      "loss": 0.6656,
      "step": 7342
    },
    {
      "epoch": 0.656562947067239,
      "grad_norm": 0.16342442705913485,
      "learning_rate": 5.574515930057795e-05,
      "loss": 0.6476,
      "step": 7343
    },
    {
      "epoch": 0.6566523605150214,
      "grad_norm": 0.147509356801378,
      "learning_rate": 5.571919133465605e-05,
      "loss": 0.6194,
      "step": 7344
    },
    {
      "epoch": 0.656741773962804,
      "grad_norm": 0.16554424038461754,
      "learning_rate": 5.569322708251215e-05,
      "loss": 0.6548,
      "step": 7345
    },
    {
      "epoch": 0.6568311874105865,
      "grad_norm": 0.14271803219883802,
      "learning_rate": 5.5667266546323723e-05,
      "loss": 0.639,
      "step": 7346
    },
    {
      "epoch": 0.6569206008583691,
      "grad_norm": 0.17697876752315225,
      "learning_rate": 5.564130972826813e-05,
      "loss": 0.6916,
      "step": 7347
    },
    {
      "epoch": 0.6570100143061517,
      "grad_norm": 0.16068944407886443,
      "learning_rate": 5.561535663052231e-05,
      "loss": 0.6351,
      "step": 7348
    },
    {
      "epoch": 0.6570994277539342,
      "grad_norm": 0.16976167028313113,
      "learning_rate": 5.558940725526291e-05,
      "loss": 0.6659,
      "step": 7349
    },
    {
      "epoch": 0.6571888412017167,
      "grad_norm": 0.144100026185656,
      "learning_rate": 5.5563461604666325e-05,
      "loss": 0.6706,
      "step": 7350
    },
    {
      "epoch": 0.6572782546494993,
      "grad_norm": 0.15987310441996722,
      "learning_rate": 5.553751968090857e-05,
      "loss": 0.6285,
      "step": 7351
    },
    {
      "epoch": 0.6573676680972819,
      "grad_norm": 0.15974480357666032,
      "learning_rate": 5.55115814861653e-05,
      "loss": 0.6681,
      "step": 7352
    },
    {
      "epoch": 0.6574570815450643,
      "grad_norm": 0.15106084709343068,
      "learning_rate": 5.548564702261196e-05,
      "loss": 0.642,
      "step": 7353
    },
    {
      "epoch": 0.6575464949928469,
      "grad_norm": 0.15824628235210272,
      "learning_rate": 5.545971629242369e-05,
      "loss": 0.6699,
      "step": 7354
    },
    {
      "epoch": 0.6576359084406295,
      "grad_norm": 0.16486195243667773,
      "learning_rate": 5.543378929777514e-05,
      "loss": 0.654,
      "step": 7355
    },
    {
      "epoch": 0.657725321888412,
      "grad_norm": 0.15212544247901186,
      "learning_rate": 5.540786604084091e-05,
      "loss": 0.6369,
      "step": 7356
    },
    {
      "epoch": 0.6578147353361946,
      "grad_norm": 0.16702377892087653,
      "learning_rate": 5.538194652379514e-05,
      "loss": 0.6753,
      "step": 7357
    },
    {
      "epoch": 0.6579041487839771,
      "grad_norm": 0.17592054723967673,
      "learning_rate": 5.5356030748811575e-05,
      "loss": 0.6453,
      "step": 7358
    },
    {
      "epoch": 0.6579935622317596,
      "grad_norm": 0.1391199679857344,
      "learning_rate": 5.5330118718063795e-05,
      "loss": 0.6631,
      "step": 7359
    },
    {
      "epoch": 0.6580829756795422,
      "grad_norm": 0.15864176656698784,
      "learning_rate": 5.530421043372507e-05,
      "loss": 0.642,
      "step": 7360
    },
    {
      "epoch": 0.6581723891273248,
      "grad_norm": 0.138911582245501,
      "learning_rate": 5.5278305897968185e-05,
      "loss": 0.6158,
      "step": 7361
    },
    {
      "epoch": 0.6582618025751072,
      "grad_norm": 0.18542203298198776,
      "learning_rate": 5.525240511296577e-05,
      "loss": 0.6863,
      "step": 7362
    },
    {
      "epoch": 0.6583512160228898,
      "grad_norm": 0.13625878975526354,
      "learning_rate": 5.522650808089011e-05,
      "loss": 0.6322,
      "step": 7363
    },
    {
      "epoch": 0.6584406294706724,
      "grad_norm": 0.1429475174924821,
      "learning_rate": 5.520061480391313e-05,
      "loss": 0.6304,
      "step": 7364
    },
    {
      "epoch": 0.658530042918455,
      "grad_norm": 0.16836397046874624,
      "learning_rate": 5.517472528420653e-05,
      "loss": 0.6567,
      "step": 7365
    },
    {
      "epoch": 0.6586194563662375,
      "grad_norm": 0.19245810104708141,
      "learning_rate": 5.514883952394154e-05,
      "loss": 0.6492,
      "step": 7366
    },
    {
      "epoch": 0.65870886981402,
      "grad_norm": 0.14494961319443506,
      "learning_rate": 5.512295752528922e-05,
      "loss": 0.6248,
      "step": 7367
    },
    {
      "epoch": 0.6587982832618026,
      "grad_norm": 0.14086986879211993,
      "learning_rate": 5.50970792904203e-05,
      "loss": 0.6169,
      "step": 7368
    },
    {
      "epoch": 0.6588876967095851,
      "grad_norm": 0.164934034436561,
      "learning_rate": 5.507120482150501e-05,
      "loss": 0.69,
      "step": 7369
    },
    {
      "epoch": 0.6589771101573677,
      "grad_norm": 0.14914750710380204,
      "learning_rate": 5.5045334120713565e-05,
      "loss": 0.6405,
      "step": 7370
    },
    {
      "epoch": 0.6590665236051502,
      "grad_norm": 0.14024311119675295,
      "learning_rate": 5.501946719021569e-05,
      "loss": 0.662,
      "step": 7371
    },
    {
      "epoch": 0.6591559370529327,
      "grad_norm": 0.14717796557464982,
      "learning_rate": 5.4993604032180746e-05,
      "loss": 0.649,
      "step": 7372
    },
    {
      "epoch": 0.6592453505007153,
      "grad_norm": 0.147105475638965,
      "learning_rate": 5.496774464877787e-05,
      "loss": 0.6744,
      "step": 7373
    },
    {
      "epoch": 0.6593347639484979,
      "grad_norm": 0.1550142350337352,
      "learning_rate": 5.494188904217592e-05,
      "loss": 0.6775,
      "step": 7374
    },
    {
      "epoch": 0.6594241773962805,
      "grad_norm": 0.1417939373584384,
      "learning_rate": 5.491603721454327e-05,
      "loss": 0.6585,
      "step": 7375
    },
    {
      "epoch": 0.6595135908440629,
      "grad_norm": 0.15406943418538316,
      "learning_rate": 5.489018916804813e-05,
      "loss": 0.6407,
      "step": 7376
    },
    {
      "epoch": 0.6596030042918455,
      "grad_norm": 0.17000562411124626,
      "learning_rate": 5.4864344904858345e-05,
      "loss": 0.6357,
      "step": 7377
    },
    {
      "epoch": 0.6596924177396281,
      "grad_norm": 0.14958939246355338,
      "learning_rate": 5.483850442714145e-05,
      "loss": 0.6197,
      "step": 7378
    },
    {
      "epoch": 0.6597818311874106,
      "grad_norm": 0.1609665840625941,
      "learning_rate": 5.481266773706468e-05,
      "loss": 0.6421,
      "step": 7379
    },
    {
      "epoch": 0.6598712446351931,
      "grad_norm": 0.15629850562392228,
      "learning_rate": 5.4786834836794855e-05,
      "loss": 0.6794,
      "step": 7380
    },
    {
      "epoch": 0.6599606580829757,
      "grad_norm": 0.15548710660069337,
      "learning_rate": 5.4761005728498594e-05,
      "loss": 0.6301,
      "step": 7381
    },
    {
      "epoch": 0.6600500715307582,
      "grad_norm": 0.15126207296936406,
      "learning_rate": 5.4735180414342134e-05,
      "loss": 0.6417,
      "step": 7382
    },
    {
      "epoch": 0.6601394849785408,
      "grad_norm": 0.16084001936572534,
      "learning_rate": 5.4709358896491445e-05,
      "loss": 0.6834,
      "step": 7383
    },
    {
      "epoch": 0.6602288984263234,
      "grad_norm": 0.14428684723621193,
      "learning_rate": 5.468354117711212e-05,
      "loss": 0.6364,
      "step": 7384
    },
    {
      "epoch": 0.6603183118741058,
      "grad_norm": 0.16423623151677036,
      "learning_rate": 5.465772725836951e-05,
      "loss": 0.6688,
      "step": 7385
    },
    {
      "epoch": 0.6604077253218884,
      "grad_norm": 0.14636970934596663,
      "learning_rate": 5.463191714242851e-05,
      "loss": 0.6478,
      "step": 7386
    },
    {
      "epoch": 0.660497138769671,
      "grad_norm": 0.16832224998391726,
      "learning_rate": 5.4606110831453836e-05,
      "loss": 0.6474,
      "step": 7387
    },
    {
      "epoch": 0.6605865522174535,
      "grad_norm": 0.1544094671267407,
      "learning_rate": 5.458030832760985e-05,
      "loss": 0.6198,
      "step": 7388
    },
    {
      "epoch": 0.660675965665236,
      "grad_norm": 0.17129344261249704,
      "learning_rate": 5.4554509633060524e-05,
      "loss": 0.6532,
      "step": 7389
    },
    {
      "epoch": 0.6607653791130186,
      "grad_norm": 0.1741360163110514,
      "learning_rate": 5.452871474996955e-05,
      "loss": 0.6863,
      "step": 7390
    },
    {
      "epoch": 0.6608547925608012,
      "grad_norm": 0.14966386124141615,
      "learning_rate": 5.450292368050043e-05,
      "loss": 0.6271,
      "step": 7391
    },
    {
      "epoch": 0.6609442060085837,
      "grad_norm": 0.1545997984833067,
      "learning_rate": 5.447713642681612e-05,
      "loss": 0.6624,
      "step": 7392
    },
    {
      "epoch": 0.6610336194563662,
      "grad_norm": 0.13117224860048118,
      "learning_rate": 5.44513529910794e-05,
      "loss": 0.5863,
      "step": 7393
    },
    {
      "epoch": 0.6611230329041488,
      "grad_norm": 0.1587982136980578,
      "learning_rate": 5.442557337545273e-05,
      "loss": 0.697,
      "step": 7394
    },
    {
      "epoch": 0.6612124463519313,
      "grad_norm": 0.1787967157333266,
      "learning_rate": 5.4399797582098144e-05,
      "loss": 0.658,
      "step": 7395
    },
    {
      "epoch": 0.6613018597997139,
      "grad_norm": 0.13745664566195412,
      "learning_rate": 5.437402561317746e-05,
      "loss": 0.6133,
      "step": 7396
    },
    {
      "epoch": 0.6613912732474965,
      "grad_norm": 0.1629874493031606,
      "learning_rate": 5.434825747085215e-05,
      "loss": 0.6674,
      "step": 7397
    },
    {
      "epoch": 0.6614806866952789,
      "grad_norm": 0.16130544699857283,
      "learning_rate": 5.432249315728336e-05,
      "loss": 0.6606,
      "step": 7398
    },
    {
      "epoch": 0.6615701001430615,
      "grad_norm": 0.12439377922171191,
      "learning_rate": 5.429673267463193e-05,
      "loss": 0.6285,
      "step": 7399
    },
    {
      "epoch": 0.6616595135908441,
      "grad_norm": 0.18025041123753363,
      "learning_rate": 5.427097602505831e-05,
      "loss": 0.6388,
      "step": 7400
    },
    {
      "epoch": 0.6617489270386266,
      "grad_norm": 0.14862918389162155,
      "learning_rate": 5.42452232107227e-05,
      "loss": 0.6125,
      "step": 7401
    },
    {
      "epoch": 0.6618383404864091,
      "grad_norm": 0.15355800757988036,
      "learning_rate": 5.4219474233785e-05,
      "loss": 0.6648,
      "step": 7402
    },
    {
      "epoch": 0.6619277539341917,
      "grad_norm": 0.1647935378514898,
      "learning_rate": 5.419372909640466e-05,
      "loss": 0.6689,
      "step": 7403
    },
    {
      "epoch": 0.6620171673819742,
      "grad_norm": 0.15029582278680573,
      "learning_rate": 5.416798780074091e-05,
      "loss": 0.6668,
      "step": 7404
    },
    {
      "epoch": 0.6621065808297568,
      "grad_norm": 0.15451158058986436,
      "learning_rate": 5.414225034895273e-05,
      "loss": 0.6558,
      "step": 7405
    },
    {
      "epoch": 0.6621959942775394,
      "grad_norm": 0.17518509678041053,
      "learning_rate": 5.411651674319862e-05,
      "loss": 0.6959,
      "step": 7406
    },
    {
      "epoch": 0.6622854077253219,
      "grad_norm": 0.16717622258716736,
      "learning_rate": 5.409078698563682e-05,
      "loss": 0.663,
      "step": 7407
    },
    {
      "epoch": 0.6623748211731044,
      "grad_norm": 0.15424522969812768,
      "learning_rate": 5.4065061078425315e-05,
      "loss": 0.656,
      "step": 7408
    },
    {
      "epoch": 0.662464234620887,
      "grad_norm": 0.1506521920811973,
      "learning_rate": 5.403933902372162e-05,
      "loss": 0.651,
      "step": 7409
    },
    {
      "epoch": 0.6625536480686696,
      "grad_norm": 0.15234558349703017,
      "learning_rate": 5.401362082368306e-05,
      "loss": 0.664,
      "step": 7410
    },
    {
      "epoch": 0.662643061516452,
      "grad_norm": 0.15486696922660637,
      "learning_rate": 5.3987906480466586e-05,
      "loss": 0.6495,
      "step": 7411
    },
    {
      "epoch": 0.6627324749642346,
      "grad_norm": 0.14952815601270586,
      "learning_rate": 5.3962195996228825e-05,
      "loss": 0.6372,
      "step": 7412
    },
    {
      "epoch": 0.6628218884120172,
      "grad_norm": 0.1651586446584537,
      "learning_rate": 5.3936489373126075e-05,
      "loss": 0.6324,
      "step": 7413
    },
    {
      "epoch": 0.6629113018597997,
      "grad_norm": 0.14218398743672303,
      "learning_rate": 5.391078661331439e-05,
      "loss": 0.6418,
      "step": 7414
    },
    {
      "epoch": 0.6630007153075823,
      "grad_norm": 0.1493728823613927,
      "learning_rate": 5.388508771894931e-05,
      "loss": 0.6428,
      "step": 7415
    },
    {
      "epoch": 0.6630901287553648,
      "grad_norm": 0.17715145382844572,
      "learning_rate": 5.385939269218625e-05,
      "loss": 0.6905,
      "step": 7416
    },
    {
      "epoch": 0.6631795422031473,
      "grad_norm": 0.172510088133687,
      "learning_rate": 5.383370153518019e-05,
      "loss": 0.3857,
      "step": 7417
    },
    {
      "epoch": 0.6632689556509299,
      "grad_norm": 0.14196902293704214,
      "learning_rate": 5.3808014250085836e-05,
      "loss": 0.6246,
      "step": 7418
    },
    {
      "epoch": 0.6633583690987125,
      "grad_norm": 0.15843557419372803,
      "learning_rate": 5.3782330839057573e-05,
      "loss": 0.6622,
      "step": 7419
    },
    {
      "epoch": 0.663447782546495,
      "grad_norm": 0.15191019560266444,
      "learning_rate": 5.375665130424936e-05,
      "loss": 0.6504,
      "step": 7420
    },
    {
      "epoch": 0.6635371959942775,
      "grad_norm": 0.13528293373860914,
      "learning_rate": 5.373097564781496e-05,
      "loss": 0.6226,
      "step": 7421
    },
    {
      "epoch": 0.6636266094420601,
      "grad_norm": 0.17733745531477613,
      "learning_rate": 5.3705303871907795e-05,
      "loss": 0.3557,
      "step": 7422
    },
    {
      "epoch": 0.6637160228898427,
      "grad_norm": 0.16296455475940705,
      "learning_rate": 5.3679635978680843e-05,
      "loss": 0.6624,
      "step": 7423
    },
    {
      "epoch": 0.6638054363376252,
      "grad_norm": 0.19544904734860005,
      "learning_rate": 5.365397197028685e-05,
      "loss": 0.3805,
      "step": 7424
    },
    {
      "epoch": 0.6638948497854077,
      "grad_norm": 0.13924263146901292,
      "learning_rate": 5.3628311848878333e-05,
      "loss": 0.6423,
      "step": 7425
    },
    {
      "epoch": 0.6639842632331903,
      "grad_norm": 0.17571725371586241,
      "learning_rate": 5.360265561660725e-05,
      "loss": 0.6597,
      "step": 7426
    },
    {
      "epoch": 0.6640736766809728,
      "grad_norm": 0.16727814579634726,
      "learning_rate": 5.35770032756254e-05,
      "loss": 0.621,
      "step": 7427
    },
    {
      "epoch": 0.6641630901287554,
      "grad_norm": 0.15807279145034606,
      "learning_rate": 5.3551354828084276e-05,
      "loss": 0.6455,
      "step": 7428
    },
    {
      "epoch": 0.6642525035765379,
      "grad_norm": 0.11733926097665875,
      "learning_rate": 5.352571027613489e-05,
      "loss": 0.6253,
      "step": 7429
    },
    {
      "epoch": 0.6643419170243204,
      "grad_norm": 0.15224733161646004,
      "learning_rate": 5.350006962192804e-05,
      "loss": 0.6551,
      "step": 7430
    },
    {
      "epoch": 0.664431330472103,
      "grad_norm": 0.14360007436364633,
      "learning_rate": 5.34744328676142e-05,
      "loss": 0.6393,
      "step": 7431
    },
    {
      "epoch": 0.6645207439198856,
      "grad_norm": 0.12827336529727884,
      "learning_rate": 5.344880001534349e-05,
      "loss": 0.6713,
      "step": 7432
    },
    {
      "epoch": 0.664610157367668,
      "grad_norm": 0.13587152473654873,
      "learning_rate": 5.342317106726574e-05,
      "loss": 0.6224,
      "step": 7433
    },
    {
      "epoch": 0.6646995708154506,
      "grad_norm": 0.1414592353907008,
      "learning_rate": 5.339754602553034e-05,
      "loss": 0.6218,
      "step": 7434
    },
    {
      "epoch": 0.6647889842632332,
      "grad_norm": 0.1704247247870996,
      "learning_rate": 5.3371924892286484e-05,
      "loss": 0.6494,
      "step": 7435
    },
    {
      "epoch": 0.6648783977110158,
      "grad_norm": 0.13612875208171513,
      "learning_rate": 5.3346307669683005e-05,
      "loss": 0.6185,
      "step": 7436
    },
    {
      "epoch": 0.6649678111587983,
      "grad_norm": 0.17998921527436756,
      "learning_rate": 5.332069435986832e-05,
      "loss": 0.715,
      "step": 7437
    },
    {
      "epoch": 0.6650572246065808,
      "grad_norm": 0.17050414888046891,
      "learning_rate": 5.329508496499058e-05,
      "loss": 0.6301,
      "step": 7438
    },
    {
      "epoch": 0.6651466380543634,
      "grad_norm": 0.1506906018376664,
      "learning_rate": 5.326947948719775e-05,
      "loss": 0.6666,
      "step": 7439
    },
    {
      "epoch": 0.6652360515021459,
      "grad_norm": 0.17945874080341756,
      "learning_rate": 5.324387792863719e-05,
      "loss": 0.6725,
      "step": 7440
    },
    {
      "epoch": 0.6653254649499285,
      "grad_norm": 0.13233088838097753,
      "learning_rate": 5.3218280291456126e-05,
      "loss": 0.6306,
      "step": 7441
    },
    {
      "epoch": 0.665414878397711,
      "grad_norm": 0.18045998297787538,
      "learning_rate": 5.319268657780143e-05,
      "loss": 0.3744,
      "step": 7442
    },
    {
      "epoch": 0.6655042918454935,
      "grad_norm": 0.15603807397261127,
      "learning_rate": 5.316709678981955e-05,
      "loss": 0.6763,
      "step": 7443
    },
    {
      "epoch": 0.6655937052932761,
      "grad_norm": 0.14712200932081057,
      "learning_rate": 5.314151092965669e-05,
      "loss": 0.634,
      "step": 7444
    },
    {
      "epoch": 0.6656831187410587,
      "grad_norm": 0.17159317536812865,
      "learning_rate": 5.311592899945873e-05,
      "loss": 0.6798,
      "step": 7445
    },
    {
      "epoch": 0.6657725321888412,
      "grad_norm": 0.14683440449186785,
      "learning_rate": 5.3090351001371185e-05,
      "loss": 0.5982,
      "step": 7446
    },
    {
      "epoch": 0.6658619456366237,
      "grad_norm": 0.14965404236591745,
      "learning_rate": 5.306477693753924e-05,
      "loss": 0.658,
      "step": 7447
    },
    {
      "epoch": 0.6659513590844063,
      "grad_norm": 0.177374446151378,
      "learning_rate": 5.303920681010781e-05,
      "loss": 0.4059,
      "step": 7448
    },
    {
      "epoch": 0.6660407725321889,
      "grad_norm": 0.15404580299006052,
      "learning_rate": 5.301364062122136e-05,
      "loss": 0.6414,
      "step": 7449
    },
    {
      "epoch": 0.6661301859799714,
      "grad_norm": 0.140914724450317,
      "learning_rate": 5.298807837302411e-05,
      "loss": 0.6429,
      "step": 7450
    },
    {
      "epoch": 0.6662195994277539,
      "grad_norm": 0.1577751501771057,
      "learning_rate": 5.2962520067660004e-05,
      "loss": 0.6548,
      "step": 7451
    },
    {
      "epoch": 0.6663090128755365,
      "grad_norm": 0.14927553072254673,
      "learning_rate": 5.2936965707272446e-05,
      "loss": 0.6466,
      "step": 7452
    },
    {
      "epoch": 0.666398426323319,
      "grad_norm": 0.14985734950995314,
      "learning_rate": 5.291141529400483e-05,
      "loss": 0.6291,
      "step": 7453
    },
    {
      "epoch": 0.6664878397711016,
      "grad_norm": 0.1542012111021877,
      "learning_rate": 5.288586882999989e-05,
      "loss": 0.658,
      "step": 7454
    },
    {
      "epoch": 0.6665772532188842,
      "grad_norm": 0.15480064769326127,
      "learning_rate": 5.286032631740023e-05,
      "loss": 0.6332,
      "step": 7455
    },
    {
      "epoch": 0.6666666666666666,
      "grad_norm": 0.15378646163589477,
      "learning_rate": 5.283478775834811e-05,
      "loss": 0.6695,
      "step": 7456
    },
    {
      "epoch": 0.6667560801144492,
      "grad_norm": 0.14115683466604784,
      "learning_rate": 5.280925315498536e-05,
      "loss": 0.6492,
      "step": 7457
    },
    {
      "epoch": 0.6668454935622318,
      "grad_norm": 0.13477311248659793,
      "learning_rate": 5.278372250945354e-05,
      "loss": 0.6314,
      "step": 7458
    },
    {
      "epoch": 0.6669349070100143,
      "grad_norm": 0.17548780862465346,
      "learning_rate": 5.2758195823893896e-05,
      "loss": 0.6301,
      "step": 7459
    },
    {
      "epoch": 0.6670243204577968,
      "grad_norm": 0.17380332873369983,
      "learning_rate": 5.273267310044732e-05,
      "loss": 0.663,
      "step": 7460
    },
    {
      "epoch": 0.6671137339055794,
      "grad_norm": 0.15875665688354396,
      "learning_rate": 5.270715434125435e-05,
      "loss": 0.6502,
      "step": 7461
    },
    {
      "epoch": 0.667203147353362,
      "grad_norm": 0.17181697437441873,
      "learning_rate": 5.2681639548455284e-05,
      "loss": 0.6658,
      "step": 7462
    },
    {
      "epoch": 0.6672925608011445,
      "grad_norm": 0.1623448882584177,
      "learning_rate": 5.2656128724189916e-05,
      "loss": 0.6755,
      "step": 7463
    },
    {
      "epoch": 0.6673819742489271,
      "grad_norm": 0.149447435298481,
      "learning_rate": 5.263062187059785e-05,
      "loss": 0.6084,
      "step": 7464
    },
    {
      "epoch": 0.6674713876967096,
      "grad_norm": 0.18222550201500307,
      "learning_rate": 5.260511898981837e-05,
      "loss": 0.6683,
      "step": 7465
    },
    {
      "epoch": 0.6675608011444921,
      "grad_norm": 0.1452081716988644,
      "learning_rate": 5.2579620083990244e-05,
      "loss": 0.6019,
      "step": 7466
    },
    {
      "epoch": 0.6676502145922747,
      "grad_norm": 0.1475407936588908,
      "learning_rate": 5.2554125155252175e-05,
      "loss": 0.6696,
      "step": 7467
    },
    {
      "epoch": 0.6677396280400573,
      "grad_norm": 0.1576549717082644,
      "learning_rate": 5.25286342057423e-05,
      "loss": 0.6645,
      "step": 7468
    },
    {
      "epoch": 0.6678290414878397,
      "grad_norm": 0.16470681422202868,
      "learning_rate": 5.2503147237598546e-05,
      "loss": 0.6546,
      "step": 7469
    },
    {
      "epoch": 0.6679184549356223,
      "grad_norm": 0.15771910364408506,
      "learning_rate": 5.247766425295848e-05,
      "loss": 0.6252,
      "step": 7470
    },
    {
      "epoch": 0.6680078683834049,
      "grad_norm": 0.15014236207579737,
      "learning_rate": 5.245218525395934e-05,
      "loss": 0.6382,
      "step": 7471
    },
    {
      "epoch": 0.6680972818311874,
      "grad_norm": 0.16761239698426808,
      "learning_rate": 5.242671024273798e-05,
      "loss": 0.6936,
      "step": 7472
    },
    {
      "epoch": 0.66818669527897,
      "grad_norm": 0.15656408889365683,
      "learning_rate": 5.240123922143096e-05,
      "loss": 0.641,
      "step": 7473
    },
    {
      "epoch": 0.6682761087267525,
      "grad_norm": 0.16825753195861806,
      "learning_rate": 5.2375772192174534e-05,
      "loss": 0.6583,
      "step": 7474
    },
    {
      "epoch": 0.668365522174535,
      "grad_norm": 0.14662681641675218,
      "learning_rate": 5.235030915710457e-05,
      "loss": 0.637,
      "step": 7475
    },
    {
      "epoch": 0.6684549356223176,
      "grad_norm": 0.1415821132858685,
      "learning_rate": 5.2324850118356674e-05,
      "loss": 0.6225,
      "step": 7476
    },
    {
      "epoch": 0.6685443490701002,
      "grad_norm": 0.15550938396614947,
      "learning_rate": 5.229939507806598e-05,
      "loss": 0.6276,
      "step": 7477
    },
    {
      "epoch": 0.6686337625178826,
      "grad_norm": 0.1671245721026899,
      "learning_rate": 5.2273944038367416e-05,
      "loss": 0.643,
      "step": 7478
    },
    {
      "epoch": 0.6687231759656652,
      "grad_norm": 0.15159970466038833,
      "learning_rate": 5.224849700139557e-05,
      "loss": 0.6194,
      "step": 7479
    },
    {
      "epoch": 0.6688125894134478,
      "grad_norm": 0.16609110180794373,
      "learning_rate": 5.222305396928453e-05,
      "loss": 0.6263,
      "step": 7480
    },
    {
      "epoch": 0.6689020028612304,
      "grad_norm": 0.17817137881409315,
      "learning_rate": 5.219761494416828e-05,
      "loss": 0.691,
      "step": 7481
    },
    {
      "epoch": 0.6689914163090128,
      "grad_norm": 0.16098289268183835,
      "learning_rate": 5.2172179928180395e-05,
      "loss": 0.6697,
      "step": 7482
    },
    {
      "epoch": 0.6690808297567954,
      "grad_norm": 0.15371818593439984,
      "learning_rate": 5.214674892345397e-05,
      "loss": 0.6682,
      "step": 7483
    },
    {
      "epoch": 0.669170243204578,
      "grad_norm": 0.14654549131122255,
      "learning_rate": 5.2121321932121916e-05,
      "loss": 0.6295,
      "step": 7484
    },
    {
      "epoch": 0.6692596566523605,
      "grad_norm": 0.13399264863941424,
      "learning_rate": 5.209589895631681e-05,
      "loss": 0.6592,
      "step": 7485
    },
    {
      "epoch": 0.6693490701001431,
      "grad_norm": 0.14989080877624938,
      "learning_rate": 5.207047999817076e-05,
      "loss": 0.6665,
      "step": 7486
    },
    {
      "epoch": 0.6694384835479256,
      "grad_norm": 0.1386987733268404,
      "learning_rate": 5.2045065059815676e-05,
      "loss": 0.6298,
      "step": 7487
    },
    {
      "epoch": 0.6695278969957081,
      "grad_norm": 0.1580123501307196,
      "learning_rate": 5.201965414338308e-05,
      "loss": 0.6991,
      "step": 7488
    },
    {
      "epoch": 0.6696173104434907,
      "grad_norm": 0.15805221038748374,
      "learning_rate": 5.199424725100413e-05,
      "loss": 0.6328,
      "step": 7489
    },
    {
      "epoch": 0.6697067238912733,
      "grad_norm": 0.15994460328516558,
      "learning_rate": 5.1968844384809734e-05,
      "loss": 0.6747,
      "step": 7490
    },
    {
      "epoch": 0.6697961373390557,
      "grad_norm": 0.15173550581823395,
      "learning_rate": 5.194344554693032e-05,
      "loss": 0.6533,
      "step": 7491
    },
    {
      "epoch": 0.6698855507868383,
      "grad_norm": 0.16666183140649884,
      "learning_rate": 5.1918050739496074e-05,
      "loss": 0.6543,
      "step": 7492
    },
    {
      "epoch": 0.6699749642346209,
      "grad_norm": 0.15743777978027962,
      "learning_rate": 5.189265996463689e-05,
      "loss": 0.6376,
      "step": 7493
    },
    {
      "epoch": 0.6700643776824035,
      "grad_norm": 0.18450928507671802,
      "learning_rate": 5.186727322448214e-05,
      "loss": 0.6538,
      "step": 7494
    },
    {
      "epoch": 0.670153791130186,
      "grad_norm": 0.17380462195915294,
      "learning_rate": 5.1841890521161085e-05,
      "loss": 0.679,
      "step": 7495
    },
    {
      "epoch": 0.6702432045779685,
      "grad_norm": 0.15471468792034979,
      "learning_rate": 5.181651185680256e-05,
      "loss": 0.6507,
      "step": 7496
    },
    {
      "epoch": 0.6703326180257511,
      "grad_norm": 0.1489819985489758,
      "learning_rate": 5.1791137233534946e-05,
      "loss": 0.6229,
      "step": 7497
    },
    {
      "epoch": 0.6704220314735336,
      "grad_norm": 0.16310180568735388,
      "learning_rate": 5.1765766653486446e-05,
      "loss": 0.674,
      "step": 7498
    },
    {
      "epoch": 0.6705114449213162,
      "grad_norm": 0.16023628212341837,
      "learning_rate": 5.174040011878487e-05,
      "loss": 0.6352,
      "step": 7499
    },
    {
      "epoch": 0.6706008583690987,
      "grad_norm": 0.1628446273927905,
      "learning_rate": 5.171503763155758e-05,
      "loss": 0.6648,
      "step": 7500
    },
    {
      "epoch": 0.6706902718168812,
      "grad_norm": 0.15881218303906294,
      "learning_rate": 5.168967919393186e-05,
      "loss": 0.6584,
      "step": 7501
    },
    {
      "epoch": 0.6707796852646638,
      "grad_norm": 0.15412260464567756,
      "learning_rate": 5.166432480803435e-05,
      "loss": 0.6429,
      "step": 7502
    },
    {
      "epoch": 0.6708690987124464,
      "grad_norm": 0.17499376813430817,
      "learning_rate": 5.1638974475991554e-05,
      "loss": 0.6867,
      "step": 7503
    },
    {
      "epoch": 0.670958512160229,
      "grad_norm": 0.1993193120306967,
      "learning_rate": 5.1613628199929544e-05,
      "loss": 0.6725,
      "step": 7504
    },
    {
      "epoch": 0.6710479256080114,
      "grad_norm": 0.17157630427418197,
      "learning_rate": 5.158828598197416e-05,
      "loss": 0.6515,
      "step": 7505
    },
    {
      "epoch": 0.671137339055794,
      "grad_norm": 0.15605358114878556,
      "learning_rate": 5.1562947824250704e-05,
      "loss": 0.6486,
      "step": 7506
    },
    {
      "epoch": 0.6712267525035766,
      "grad_norm": 0.164594322769013,
      "learning_rate": 5.1537613728884335e-05,
      "loss": 0.6563,
      "step": 7507
    },
    {
      "epoch": 0.6713161659513591,
      "grad_norm": 0.16030717318094623,
      "learning_rate": 5.151228369799976e-05,
      "loss": 0.6418,
      "step": 7508
    },
    {
      "epoch": 0.6714055793991416,
      "grad_norm": 0.16744038142702644,
      "learning_rate": 5.1486957733721405e-05,
      "loss": 0.7,
      "step": 7509
    },
    {
      "epoch": 0.6714949928469242,
      "grad_norm": 0.14921778551171064,
      "learning_rate": 5.146163583817336e-05,
      "loss": 0.6244,
      "step": 7510
    },
    {
      "epoch": 0.6715844062947067,
      "grad_norm": 0.14382443725038704,
      "learning_rate": 5.143631801347926e-05,
      "loss": 0.6211,
      "step": 7511
    },
    {
      "epoch": 0.6716738197424893,
      "grad_norm": 0.16175207097384764,
      "learning_rate": 5.14110042617625e-05,
      "loss": 0.6638,
      "step": 7512
    },
    {
      "epoch": 0.6717632331902719,
      "grad_norm": 0.1526755152295451,
      "learning_rate": 5.138569458514617e-05,
      "loss": 0.6869,
      "step": 7513
    },
    {
      "epoch": 0.6718526466380543,
      "grad_norm": 0.15375896185831234,
      "learning_rate": 5.136038898575286e-05,
      "loss": 0.6623,
      "step": 7514
    },
    {
      "epoch": 0.6719420600858369,
      "grad_norm": 0.1823108610848069,
      "learning_rate": 5.133508746570502e-05,
      "loss": 0.6782,
      "step": 7515
    },
    {
      "epoch": 0.6720314735336195,
      "grad_norm": 0.17428706966771146,
      "learning_rate": 5.130979002712466e-05,
      "loss": 0.6706,
      "step": 7516
    },
    {
      "epoch": 0.672120886981402,
      "grad_norm": 0.15585084430284635,
      "learning_rate": 5.128449667213337e-05,
      "loss": 0.6487,
      "step": 7517
    },
    {
      "epoch": 0.6722103004291845,
      "grad_norm": 0.1571596534350335,
      "learning_rate": 5.1259207402852506e-05,
      "loss": 0.6469,
      "step": 7518
    },
    {
      "epoch": 0.6722997138769671,
      "grad_norm": 0.1503142077534453,
      "learning_rate": 5.1233922221403094e-05,
      "loss": 0.6486,
      "step": 7519
    },
    {
      "epoch": 0.6723891273247496,
      "grad_norm": 0.15284423444850603,
      "learning_rate": 5.120864112990569e-05,
      "loss": 0.6215,
      "step": 7520
    },
    {
      "epoch": 0.6724785407725322,
      "grad_norm": 0.17056663990156992,
      "learning_rate": 5.118336413048064e-05,
      "loss": 0.6699,
      "step": 7521
    },
    {
      "epoch": 0.6725679542203148,
      "grad_norm": 0.15974683749847693,
      "learning_rate": 5.115809122524787e-05,
      "loss": 0.6518,
      "step": 7522
    },
    {
      "epoch": 0.6726573676680973,
      "grad_norm": 0.15820357019088113,
      "learning_rate": 5.113282241632702e-05,
      "loss": 0.6653,
      "step": 7523
    },
    {
      "epoch": 0.6727467811158798,
      "grad_norm": 0.14448586484409562,
      "learning_rate": 5.110755770583736e-05,
      "loss": 0.6848,
      "step": 7524
    },
    {
      "epoch": 0.6728361945636624,
      "grad_norm": 0.1676553705558416,
      "learning_rate": 5.108229709589776e-05,
      "loss": 0.6526,
      "step": 7525
    },
    {
      "epoch": 0.672925608011445,
      "grad_norm": 0.18855167525111044,
      "learning_rate": 5.1057040588626816e-05,
      "loss": 0.6658,
      "step": 7526
    },
    {
      "epoch": 0.6730150214592274,
      "grad_norm": 0.14649171905158145,
      "learning_rate": 5.103178818614277e-05,
      "loss": 0.6483,
      "step": 7527
    },
    {
      "epoch": 0.67310443490701,
      "grad_norm": 0.17574418123829824,
      "learning_rate": 5.100653989056352e-05,
      "loss": 0.6464,
      "step": 7528
    },
    {
      "epoch": 0.6731938483547926,
      "grad_norm": 0.1519553031147619,
      "learning_rate": 5.098129570400658e-05,
      "loss": 0.6113,
      "step": 7529
    },
    {
      "epoch": 0.6732832618025751,
      "grad_norm": 0.15957167199813668,
      "learning_rate": 5.095605562858923e-05,
      "loss": 0.6832,
      "step": 7530
    },
    {
      "epoch": 0.6733726752503576,
      "grad_norm": 0.14248858462811678,
      "learning_rate": 5.093081966642822e-05,
      "loss": 0.6313,
      "step": 7531
    },
    {
      "epoch": 0.6734620886981402,
      "grad_norm": 0.14934861472932665,
      "learning_rate": 5.09055878196401e-05,
      "loss": 0.652,
      "step": 7532
    },
    {
      "epoch": 0.6735515021459227,
      "grad_norm": 0.15721808097219697,
      "learning_rate": 5.0880360090341084e-05,
      "loss": 0.6758,
      "step": 7533
    },
    {
      "epoch": 0.6736409155937053,
      "grad_norm": 0.1563069380300321,
      "learning_rate": 5.08551364806469e-05,
      "loss": 0.6899,
      "step": 7534
    },
    {
      "epoch": 0.6737303290414879,
      "grad_norm": 0.1685884541483336,
      "learning_rate": 5.0829916992673035e-05,
      "loss": 0.6354,
      "step": 7535
    },
    {
      "epoch": 0.6738197424892703,
      "grad_norm": 0.13376374042190348,
      "learning_rate": 5.080470162853472e-05,
      "loss": 0.6378,
      "step": 7536
    },
    {
      "epoch": 0.6739091559370529,
      "grad_norm": 0.13055444696029345,
      "learning_rate": 5.0779490390346626e-05,
      "loss": 0.5894,
      "step": 7537
    },
    {
      "epoch": 0.6739985693848355,
      "grad_norm": 0.1501153525967508,
      "learning_rate": 5.075428328022325e-05,
      "loss": 0.6577,
      "step": 7538
    },
    {
      "epoch": 0.6740879828326181,
      "grad_norm": 0.14842544297345409,
      "learning_rate": 5.0729080300278676e-05,
      "loss": 0.6414,
      "step": 7539
    },
    {
      "epoch": 0.6741773962804005,
      "grad_norm": 0.16099512275905442,
      "learning_rate": 5.07038814526266e-05,
      "loss": 0.6152,
      "step": 7540
    },
    {
      "epoch": 0.6742668097281831,
      "grad_norm": 0.1638283321673885,
      "learning_rate": 5.0678686739380455e-05,
      "loss": 0.6669,
      "step": 7541
    },
    {
      "epoch": 0.6743562231759657,
      "grad_norm": 0.13863309777942573,
      "learning_rate": 5.065349616265329e-05,
      "loss": 0.614,
      "step": 7542
    },
    {
      "epoch": 0.6744456366237482,
      "grad_norm": 0.16454444044422956,
      "learning_rate": 5.062830972455781e-05,
      "loss": 0.6542,
      "step": 7543
    },
    {
      "epoch": 0.6745350500715308,
      "grad_norm": 0.1758492352547341,
      "learning_rate": 5.060312742720639e-05,
      "loss": 0.3908,
      "step": 7544
    },
    {
      "epoch": 0.6746244635193133,
      "grad_norm": 0.17321903399848326,
      "learning_rate": 5.0577949272711e-05,
      "loss": 0.6537,
      "step": 7545
    },
    {
      "epoch": 0.6747138769670958,
      "grad_norm": 0.14698822084490396,
      "learning_rate": 5.0552775263183294e-05,
      "loss": 0.6654,
      "step": 7546
    },
    {
      "epoch": 0.6748032904148784,
      "grad_norm": 0.1407765403811609,
      "learning_rate": 5.052760540073467e-05,
      "loss": 0.6075,
      "step": 7547
    },
    {
      "epoch": 0.674892703862661,
      "grad_norm": 0.1462162466059146,
      "learning_rate": 5.050243968747599e-05,
      "loss": 0.6464,
      "step": 7548
    },
    {
      "epoch": 0.6749821173104434,
      "grad_norm": 0.15061336245582765,
      "learning_rate": 5.047727812551786e-05,
      "loss": 0.6262,
      "step": 7549
    },
    {
      "epoch": 0.675071530758226,
      "grad_norm": 0.14979980174391125,
      "learning_rate": 5.04521207169707e-05,
      "loss": 0.6585,
      "step": 7550
    },
    {
      "epoch": 0.6751609442060086,
      "grad_norm": 0.1675232904374696,
      "learning_rate": 5.0426967463944285e-05,
      "loss": 0.6276,
      "step": 7551
    },
    {
      "epoch": 0.6752503576537912,
      "grad_norm": 0.19400722440996693,
      "learning_rate": 5.040181836854825e-05,
      "loss": 0.7259,
      "step": 7552
    },
    {
      "epoch": 0.6753397711015737,
      "grad_norm": 0.1564634215836135,
      "learning_rate": 5.037667343289185e-05,
      "loss": 0.6275,
      "step": 7553
    },
    {
      "epoch": 0.6754291845493562,
      "grad_norm": 0.16328160576265335,
      "learning_rate": 5.035153265908388e-05,
      "loss": 0.6723,
      "step": 7554
    },
    {
      "epoch": 0.6755185979971388,
      "grad_norm": 0.14099886036373202,
      "learning_rate": 5.032639604923289e-05,
      "loss": 0.6432,
      "step": 7555
    },
    {
      "epoch": 0.6756080114449213,
      "grad_norm": 0.15184645530655344,
      "learning_rate": 5.0301263605447093e-05,
      "loss": 0.6787,
      "step": 7556
    },
    {
      "epoch": 0.6756974248927039,
      "grad_norm": 0.14122066971098665,
      "learning_rate": 5.0276135329834284e-05,
      "loss": 0.6256,
      "step": 7557
    },
    {
      "epoch": 0.6757868383404864,
      "grad_norm": 0.13387130366181726,
      "learning_rate": 5.0251011224502e-05,
      "loss": 0.6257,
      "step": 7558
    },
    {
      "epoch": 0.6758762517882689,
      "grad_norm": 0.1717137871999529,
      "learning_rate": 5.0225891291557284e-05,
      "loss": 0.6969,
      "step": 7559
    },
    {
      "epoch": 0.6759656652360515,
      "grad_norm": 0.16056096573633344,
      "learning_rate": 5.020077553310694e-05,
      "loss": 0.6605,
      "step": 7560
    },
    {
      "epoch": 0.6760550786838341,
      "grad_norm": 0.16155856459204576,
      "learning_rate": 5.0175663951257424e-05,
      "loss": 0.6565,
      "step": 7561
    },
    {
      "epoch": 0.6761444921316166,
      "grad_norm": 0.13682372554047734,
      "learning_rate": 5.015055654811484e-05,
      "loss": 0.6234,
      "step": 7562
    },
    {
      "epoch": 0.6762339055793991,
      "grad_norm": 0.13892454496941042,
      "learning_rate": 5.012545332578479e-05,
      "loss": 0.6294,
      "step": 7563
    },
    {
      "epoch": 0.6763233190271817,
      "grad_norm": 0.15813979491095376,
      "learning_rate": 5.0100354286372806e-05,
      "loss": 0.6404,
      "step": 7564
    },
    {
      "epoch": 0.6764127324749643,
      "grad_norm": 0.16140629702339251,
      "learning_rate": 5.007525943198382e-05,
      "loss": 0.6652,
      "step": 7565
    },
    {
      "epoch": 0.6765021459227468,
      "grad_norm": 0.16933543945225685,
      "learning_rate": 5.0050168764722524e-05,
      "loss": 0.654,
      "step": 7566
    },
    {
      "epoch": 0.6765915593705293,
      "grad_norm": 0.16375133044088316,
      "learning_rate": 5.002508228669329e-05,
      "loss": 0.6126,
      "step": 7567
    },
    {
      "epoch": 0.6766809728183119,
      "grad_norm": 0.16876334828492992,
      "learning_rate": 5.000000000000002e-05,
      "loss": 0.6548,
      "step": 7568
    },
    {
      "epoch": 0.6767703862660944,
      "grad_norm": 0.14283052737925037,
      "learning_rate": 4.9974921906746363e-05,
      "loss": 0.6532,
      "step": 7569
    },
    {
      "epoch": 0.676859799713877,
      "grad_norm": 0.1751195132070284,
      "learning_rate": 4.9949848009035584e-05,
      "loss": 0.6697,
      "step": 7570
    },
    {
      "epoch": 0.6769492131616596,
      "grad_norm": 0.1656826942998364,
      "learning_rate": 4.992477830897061e-05,
      "loss": 0.6922,
      "step": 7571
    },
    {
      "epoch": 0.677038626609442,
      "grad_norm": 0.16250480048812221,
      "learning_rate": 4.989971280865401e-05,
      "loss": 0.6337,
      "step": 7572
    },
    {
      "epoch": 0.6771280400572246,
      "grad_norm": 0.14482407345711495,
      "learning_rate": 4.987465151018802e-05,
      "loss": 0.6624,
      "step": 7573
    },
    {
      "epoch": 0.6772174535050072,
      "grad_norm": 0.14801695280773558,
      "learning_rate": 4.984959441567443e-05,
      "loss": 0.6529,
      "step": 7574
    },
    {
      "epoch": 0.6773068669527897,
      "grad_norm": 0.13529251163044095,
      "learning_rate": 4.9824541527214797e-05,
      "loss": 0.587,
      "step": 7575
    },
    {
      "epoch": 0.6773962804005722,
      "grad_norm": 0.15018103924569634,
      "learning_rate": 4.979949284691031e-05,
      "loss": 0.6451,
      "step": 7576
    },
    {
      "epoch": 0.6774856938483548,
      "grad_norm": 0.1379580640954402,
      "learning_rate": 4.977444837686165e-05,
      "loss": 0.6684,
      "step": 7577
    },
    {
      "epoch": 0.6775751072961373,
      "grad_norm": 0.1592479315860015,
      "learning_rate": 4.974940811916943e-05,
      "loss": 0.6501,
      "step": 7578
    },
    {
      "epoch": 0.6776645207439199,
      "grad_norm": 0.14166997639868564,
      "learning_rate": 4.9724372075933615e-05,
      "loss": 0.6442,
      "step": 7579
    },
    {
      "epoch": 0.6777539341917024,
      "grad_norm": 0.15814217703201147,
      "learning_rate": 4.9699340249254e-05,
      "loss": 0.6367,
      "step": 7580
    },
    {
      "epoch": 0.677843347639485,
      "grad_norm": 0.1540296430200705,
      "learning_rate": 4.9674312641230015e-05,
      "loss": 0.6344,
      "step": 7581
    },
    {
      "epoch": 0.6779327610872675,
      "grad_norm": 0.14941691634083043,
      "learning_rate": 4.9649289253960606e-05,
      "loss": 0.6634,
      "step": 7582
    },
    {
      "epoch": 0.6780221745350501,
      "grad_norm": 0.167973392111924,
      "learning_rate": 4.9624270089544464e-05,
      "loss": 0.6736,
      "step": 7583
    },
    {
      "epoch": 0.6781115879828327,
      "grad_norm": 0.14818056128307308,
      "learning_rate": 4.959925515008002e-05,
      "loss": 0.6222,
      "step": 7584
    },
    {
      "epoch": 0.6782010014306151,
      "grad_norm": 0.1744441451038741,
      "learning_rate": 4.9574244437665154e-05,
      "loss": 0.6927,
      "step": 7585
    },
    {
      "epoch": 0.6782904148783977,
      "grad_norm": 0.16247711283510036,
      "learning_rate": 4.9549237954397495e-05,
      "loss": 0.6388,
      "step": 7586
    },
    {
      "epoch": 0.6783798283261803,
      "grad_norm": 0.15048868062432744,
      "learning_rate": 4.952423570237437e-05,
      "loss": 0.618,
      "step": 7587
    },
    {
      "epoch": 0.6784692417739628,
      "grad_norm": 0.16643089802167188,
      "learning_rate": 4.949923768369259e-05,
      "loss": 0.7092,
      "step": 7588
    },
    {
      "epoch": 0.6785586552217453,
      "grad_norm": 0.14856213640536808,
      "learning_rate": 4.9474243900448755e-05,
      "loss": 0.6552,
      "step": 7589
    },
    {
      "epoch": 0.6786480686695279,
      "grad_norm": 0.15269561717331367,
      "learning_rate": 4.9449254354739074e-05,
      "loss": 0.645,
      "step": 7590
    },
    {
      "epoch": 0.6787374821173104,
      "grad_norm": 0.1460340782680476,
      "learning_rate": 4.9424269048659375e-05,
      "loss": 0.5951,
      "step": 7591
    },
    {
      "epoch": 0.678826895565093,
      "grad_norm": 0.16255506434740036,
      "learning_rate": 4.939928798430515e-05,
      "loss": 0.6709,
      "step": 7592
    },
    {
      "epoch": 0.6789163090128756,
      "grad_norm": 0.16665519286650016,
      "learning_rate": 4.9374311163771567e-05,
      "loss": 0.6826,
      "step": 7593
    },
    {
      "epoch": 0.679005722460658,
      "grad_norm": 0.15518143333389242,
      "learning_rate": 4.9349338589153335e-05,
      "loss": 0.6556,
      "step": 7594
    },
    {
      "epoch": 0.6790951359084406,
      "grad_norm": 0.1467471101394154,
      "learning_rate": 4.9324370262544905e-05,
      "loss": 0.6365,
      "step": 7595
    },
    {
      "epoch": 0.6791845493562232,
      "grad_norm": 0.12989041879533758,
      "learning_rate": 4.929940618604037e-05,
      "loss": 0.6453,
      "step": 7596
    },
    {
      "epoch": 0.6792739628040058,
      "grad_norm": 0.16266872912134847,
      "learning_rate": 4.927444636173334e-05,
      "loss": 0.7002,
      "step": 7597
    },
    {
      "epoch": 0.6793633762517882,
      "grad_norm": 0.1387194461485351,
      "learning_rate": 4.92494907917173e-05,
      "loss": 0.6361,
      "step": 7598
    },
    {
      "epoch": 0.6794527896995708,
      "grad_norm": 0.18858787971274177,
      "learning_rate": 4.9224539478085144e-05,
      "loss": 0.3627,
      "step": 7599
    },
    {
      "epoch": 0.6795422031473534,
      "grad_norm": 0.1533257204685479,
      "learning_rate": 4.919959242292954e-05,
      "loss": 0.6665,
      "step": 7600
    },
    {
      "epoch": 0.6796316165951359,
      "grad_norm": 0.1800373554195663,
      "learning_rate": 4.9174649628342805e-05,
      "loss": 0.3863,
      "step": 7601
    },
    {
      "epoch": 0.6797210300429185,
      "grad_norm": 0.15940956595436365,
      "learning_rate": 4.914971109641678e-05,
      "loss": 0.6769,
      "step": 7602
    },
    {
      "epoch": 0.679810443490701,
      "grad_norm": 0.1359452626241094,
      "learning_rate": 4.912477682924309e-05,
      "loss": 0.6264,
      "step": 7603
    },
    {
      "epoch": 0.6798998569384835,
      "grad_norm": 0.1615032841731518,
      "learning_rate": 4.909984682891291e-05,
      "loss": 0.6688,
      "step": 7604
    },
    {
      "epoch": 0.6799892703862661,
      "grad_norm": 0.16166374815446558,
      "learning_rate": 4.907492109751711e-05,
      "loss": 0.6656,
      "step": 7605
    },
    {
      "epoch": 0.6800786838340487,
      "grad_norm": 0.15173574775090562,
      "learning_rate": 4.904999963714618e-05,
      "loss": 0.6406,
      "step": 7606
    },
    {
      "epoch": 0.6801680972818311,
      "grad_norm": 0.1466246250845133,
      "learning_rate": 4.902508244989028e-05,
      "loss": 0.6536,
      "step": 7607
    },
    {
      "epoch": 0.6802575107296137,
      "grad_norm": 0.1671172382889626,
      "learning_rate": 4.900016953783912e-05,
      "loss": 0.6624,
      "step": 7608
    },
    {
      "epoch": 0.6803469241773963,
      "grad_norm": 0.15664816134208148,
      "learning_rate": 4.8975260903082157e-05,
      "loss": 0.6482,
      "step": 7609
    },
    {
      "epoch": 0.6804363376251789,
      "grad_norm": 0.16234416392671966,
      "learning_rate": 4.895035654770846e-05,
      "loss": 0.6585,
      "step": 7610
    },
    {
      "epoch": 0.6805257510729614,
      "grad_norm": 0.16182508216936686,
      "learning_rate": 4.892545647380664e-05,
      "loss": 0.6362,
      "step": 7611
    },
    {
      "epoch": 0.6806151645207439,
      "grad_norm": 0.17159727488227547,
      "learning_rate": 4.890056068346518e-05,
      "loss": 0.6547,
      "step": 7612
    },
    {
      "epoch": 0.6807045779685265,
      "grad_norm": 0.15133902932975746,
      "learning_rate": 4.887566917877194e-05,
      "loss": 0.6589,
      "step": 7613
    },
    {
      "epoch": 0.680793991416309,
      "grad_norm": 0.1743037715927526,
      "learning_rate": 4.885078196181458e-05,
      "loss": 0.6684,
      "step": 7614
    },
    {
      "epoch": 0.6808834048640916,
      "grad_norm": 0.1776493688807853,
      "learning_rate": 4.882589903468041e-05,
      "loss": 0.6537,
      "step": 7615
    },
    {
      "epoch": 0.6809728183118741,
      "grad_norm": 0.18282471021467572,
      "learning_rate": 4.880102039945624e-05,
      "loss": 0.6741,
      "step": 7616
    },
    {
      "epoch": 0.6810622317596566,
      "grad_norm": 0.14347717725055292,
      "learning_rate": 4.8776146058228665e-05,
      "loss": 0.6239,
      "step": 7617
    },
    {
      "epoch": 0.6811516452074392,
      "grad_norm": 0.14979655153719487,
      "learning_rate": 4.875127601308386e-05,
      "loss": 0.6493,
      "step": 7618
    },
    {
      "epoch": 0.6812410586552218,
      "grad_norm": 0.14632303072304592,
      "learning_rate": 4.8726410266107634e-05,
      "loss": 0.6198,
      "step": 7619
    },
    {
      "epoch": 0.6813304721030042,
      "grad_norm": 0.14584474406136802,
      "learning_rate": 4.870154881938546e-05,
      "loss": 0.633,
      "step": 7620
    },
    {
      "epoch": 0.6814198855507868,
      "grad_norm": 0.17766509619102513,
      "learning_rate": 4.867669167500247e-05,
      "loss": 0.6432,
      "step": 7621
    },
    {
      "epoch": 0.6815092989985694,
      "grad_norm": 0.16463229308289887,
      "learning_rate": 4.865183883504333e-05,
      "loss": 0.6306,
      "step": 7622
    },
    {
      "epoch": 0.681598712446352,
      "grad_norm": 0.16692938868620733,
      "learning_rate": 4.862699030159246e-05,
      "loss": 0.6644,
      "step": 7623
    },
    {
      "epoch": 0.6816881258941345,
      "grad_norm": 0.16736132435389423,
      "learning_rate": 4.86021460767339e-05,
      "loss": 0.6675,
      "step": 7624
    },
    {
      "epoch": 0.681777539341917,
      "grad_norm": 0.1501330558698494,
      "learning_rate": 4.8577306162551196e-05,
      "loss": 0.636,
      "step": 7625
    },
    {
      "epoch": 0.6818669527896996,
      "grad_norm": 0.14287275526966478,
      "learning_rate": 4.8552470561127775e-05,
      "loss": 0.6261,
      "step": 7626
    },
    {
      "epoch": 0.6819563662374821,
      "grad_norm": 0.15037659955405686,
      "learning_rate": 4.852763927454653e-05,
      "loss": 0.6593,
      "step": 7627
    },
    {
      "epoch": 0.6820457796852647,
      "grad_norm": 0.17483263923927655,
      "learning_rate": 4.850281230489e-05,
      "loss": 0.6318,
      "step": 7628
    },
    {
      "epoch": 0.6821351931330472,
      "grad_norm": 0.1367595104929997,
      "learning_rate": 4.84779896542404e-05,
      "loss": 0.6294,
      "step": 7629
    },
    {
      "epoch": 0.6822246065808297,
      "grad_norm": 0.15873704976153968,
      "learning_rate": 4.845317132467963e-05,
      "loss": 0.6581,
      "step": 7630
    },
    {
      "epoch": 0.6823140200286123,
      "grad_norm": 0.17037572200794662,
      "learning_rate": 4.842835731828908e-05,
      "loss": 0.6287,
      "step": 7631
    },
    {
      "epoch": 0.6824034334763949,
      "grad_norm": 0.16782282262330409,
      "learning_rate": 4.840354763714991e-05,
      "loss": 0.6629,
      "step": 7632
    },
    {
      "epoch": 0.6824928469241774,
      "grad_norm": 0.17227106585722218,
      "learning_rate": 4.83787422833429e-05,
      "loss": 0.6916,
      "step": 7633
    },
    {
      "epoch": 0.6825822603719599,
      "grad_norm": 0.14089978310213314,
      "learning_rate": 4.835394125894843e-05,
      "loss": 0.6081,
      "step": 7634
    },
    {
      "epoch": 0.6826716738197425,
      "grad_norm": 0.169719486136236,
      "learning_rate": 4.832914456604658e-05,
      "loss": 0.6432,
      "step": 7635
    },
    {
      "epoch": 0.682761087267525,
      "grad_norm": 0.1641093558912807,
      "learning_rate": 4.830435220671693e-05,
      "loss": 0.6143,
      "step": 7636
    },
    {
      "epoch": 0.6828505007153076,
      "grad_norm": 0.14940584626591816,
      "learning_rate": 4.8279564183038825e-05,
      "loss": 0.6193,
      "step": 7637
    },
    {
      "epoch": 0.6829399141630901,
      "grad_norm": 0.17346985410115157,
      "learning_rate": 4.825478049709124e-05,
      "loss": 0.6857,
      "step": 7638
    },
    {
      "epoch": 0.6830293276108726,
      "grad_norm": 0.1508331313290555,
      "learning_rate": 4.823000115095266e-05,
      "loss": 0.6374,
      "step": 7639
    },
    {
      "epoch": 0.6831187410586552,
      "grad_norm": 0.16049654817805764,
      "learning_rate": 4.82052261467014e-05,
      "loss": 0.6554,
      "step": 7640
    },
    {
      "epoch": 0.6832081545064378,
      "grad_norm": 0.15206434203986155,
      "learning_rate": 4.81804554864153e-05,
      "loss": 0.6538,
      "step": 7641
    },
    {
      "epoch": 0.6832975679542204,
      "grad_norm": 0.144226262798159,
      "learning_rate": 4.815568917217178e-05,
      "loss": 0.6272,
      "step": 7642
    },
    {
      "epoch": 0.6833869814020028,
      "grad_norm": 0.17088452172593804,
      "learning_rate": 4.813092720604799e-05,
      "loss": 0.6231,
      "step": 7643
    },
    {
      "epoch": 0.6834763948497854,
      "grad_norm": 0.19081974753432088,
      "learning_rate": 4.8106169590120745e-05,
      "loss": 0.3923,
      "step": 7644
    },
    {
      "epoch": 0.683565808297568,
      "grad_norm": 0.19454515544685433,
      "learning_rate": 4.8081416326466346e-05,
      "loss": 0.6841,
      "step": 7645
    },
    {
      "epoch": 0.6836552217453505,
      "grad_norm": 0.16728969848253944,
      "learning_rate": 4.805666741716085e-05,
      "loss": 0.6514,
      "step": 7646
    },
    {
      "epoch": 0.683744635193133,
      "grad_norm": 0.15566475816230524,
      "learning_rate": 4.8031922864279924e-05,
      "loss": 0.6826,
      "step": 7647
    },
    {
      "epoch": 0.6838340486409156,
      "grad_norm": 0.15217715407461196,
      "learning_rate": 4.800718266989888e-05,
      "loss": 0.6722,
      "step": 7648
    },
    {
      "epoch": 0.6839234620886981,
      "grad_norm": 0.15876148134715398,
      "learning_rate": 4.798244683609262e-05,
      "loss": 0.642,
      "step": 7649
    },
    {
      "epoch": 0.6840128755364807,
      "grad_norm": 0.1585210340736084,
      "learning_rate": 4.795771536493576e-05,
      "loss": 0.6764,
      "step": 7650
    },
    {
      "epoch": 0.6841022889842633,
      "grad_norm": 0.16310483939213938,
      "learning_rate": 4.793298825850243e-05,
      "loss": 0.6605,
      "step": 7651
    },
    {
      "epoch": 0.6841917024320457,
      "grad_norm": 0.14348278926813213,
      "learning_rate": 4.790826551886649e-05,
      "loss": 0.6368,
      "step": 7652
    },
    {
      "epoch": 0.6842811158798283,
      "grad_norm": 0.14780114861655913,
      "learning_rate": 4.788354714810141e-05,
      "loss": 0.6581,
      "step": 7653
    },
    {
      "epoch": 0.6843705293276109,
      "grad_norm": 0.16192689830581794,
      "learning_rate": 4.7858833148280294e-05,
      "loss": 0.6566,
      "step": 7654
    },
    {
      "epoch": 0.6844599427753935,
      "grad_norm": 0.15444384355781146,
      "learning_rate": 4.78341235214759e-05,
      "loss": 0.6424,
      "step": 7655
    },
    {
      "epoch": 0.6845493562231759,
      "grad_norm": 0.14818436154403403,
      "learning_rate": 4.7809418269760545e-05,
      "loss": 0.6463,
      "step": 7656
    },
    {
      "epoch": 0.6846387696709585,
      "grad_norm": 0.15284441527290502,
      "learning_rate": 4.778471739520624e-05,
      "loss": 0.6483,
      "step": 7657
    },
    {
      "epoch": 0.6847281831187411,
      "grad_norm": 0.17702845908925727,
      "learning_rate": 4.7760020899884664e-05,
      "loss": 0.6312,
      "step": 7658
    },
    {
      "epoch": 0.6848175965665236,
      "grad_norm": 0.16378201592461808,
      "learning_rate": 4.7735328785867004e-05,
      "loss": 0.6423,
      "step": 7659
    },
    {
      "epoch": 0.6849070100143062,
      "grad_norm": 0.16210598189285635,
      "learning_rate": 4.771064105522417e-05,
      "loss": 0.644,
      "step": 7660
    },
    {
      "epoch": 0.6849964234620887,
      "grad_norm": 0.1434836352734216,
      "learning_rate": 4.7685957710026784e-05,
      "loss": 0.6351,
      "step": 7661
    },
    {
      "epoch": 0.6850858369098712,
      "grad_norm": 0.1554347611743012,
      "learning_rate": 4.766127875234492e-05,
      "loss": 0.6193,
      "step": 7662
    },
    {
      "epoch": 0.6851752503576538,
      "grad_norm": 0.16319753309989993,
      "learning_rate": 4.763660418424839e-05,
      "loss": 0.6766,
      "step": 7663
    },
    {
      "epoch": 0.6852646638054364,
      "grad_norm": 0.1763870236524665,
      "learning_rate": 4.7611934007806666e-05,
      "loss": 0.6892,
      "step": 7664
    },
    {
      "epoch": 0.6853540772532188,
      "grad_norm": 0.1856384563312507,
      "learning_rate": 4.758726822508874e-05,
      "loss": 0.6687,
      "step": 7665
    },
    {
      "epoch": 0.6854434907010014,
      "grad_norm": 0.14944104834973865,
      "learning_rate": 4.756260683816333e-05,
      "loss": 0.6273,
      "step": 7666
    },
    {
      "epoch": 0.685532904148784,
      "grad_norm": 0.1703291761483932,
      "learning_rate": 4.753794984909874e-05,
      "loss": 0.6518,
      "step": 7667
    },
    {
      "epoch": 0.6856223175965666,
      "grad_norm": 0.1559674089231629,
      "learning_rate": 4.751329725996295e-05,
      "loss": 0.6419,
      "step": 7668
    },
    {
      "epoch": 0.685711731044349,
      "grad_norm": 0.14742033675771568,
      "learning_rate": 4.748864907282357e-05,
      "loss": 0.6367,
      "step": 7669
    },
    {
      "epoch": 0.6858011444921316,
      "grad_norm": 0.18331243195586933,
      "learning_rate": 4.746400528974772e-05,
      "loss": 0.6557,
      "step": 7670
    },
    {
      "epoch": 0.6858905579399142,
      "grad_norm": 0.1558697011858476,
      "learning_rate": 4.7439365912802314e-05,
      "loss": 0.6223,
      "step": 7671
    },
    {
      "epoch": 0.6859799713876967,
      "grad_norm": 0.1451463974337362,
      "learning_rate": 4.741473094405386e-05,
      "loss": 0.6569,
      "step": 7672
    },
    {
      "epoch": 0.6860693848354793,
      "grad_norm": 0.1475684839878435,
      "learning_rate": 4.739010038556831e-05,
      "loss": 0.6462,
      "step": 7673
    },
    {
      "epoch": 0.6861587982832618,
      "grad_norm": 0.172634964748001,
      "learning_rate": 4.736547423941157e-05,
      "loss": 0.6476,
      "step": 7674
    },
    {
      "epoch": 0.6862482117310443,
      "grad_norm": 0.1619316971933746,
      "learning_rate": 4.734085250764896e-05,
      "loss": 0.6843,
      "step": 7675
    },
    {
      "epoch": 0.6863376251788269,
      "grad_norm": 0.12611033225403023,
      "learning_rate": 4.7316235192345416e-05,
      "loss": 0.5822,
      "step": 7676
    },
    {
      "epoch": 0.6864270386266095,
      "grad_norm": 0.1521770544133057,
      "learning_rate": 4.729162229556561e-05,
      "loss": 0.6495,
      "step": 7677
    },
    {
      "epoch": 0.6865164520743919,
      "grad_norm": 0.17017626634324307,
      "learning_rate": 4.726701381937382e-05,
      "loss": 0.3865,
      "step": 7678
    },
    {
      "epoch": 0.6866058655221745,
      "grad_norm": 0.1837447701164544,
      "learning_rate": 4.724240976583386e-05,
      "loss": 0.6451,
      "step": 7679
    },
    {
      "epoch": 0.6866952789699571,
      "grad_norm": 0.16267309265040816,
      "learning_rate": 4.7217810137009274e-05,
      "loss": 0.6581,
      "step": 7680
    },
    {
      "epoch": 0.6867846924177397,
      "grad_norm": 0.16551412723400902,
      "learning_rate": 4.7193214934963206e-05,
      "loss": 0.6722,
      "step": 7681
    },
    {
      "epoch": 0.6868741058655222,
      "grad_norm": 0.16894553498223916,
      "learning_rate": 4.716862416175844e-05,
      "loss": 0.636,
      "step": 7682
    },
    {
      "epoch": 0.6869635193133047,
      "grad_norm": 0.1615796640546758,
      "learning_rate": 4.7144037819457345e-05,
      "loss": 0.6729,
      "step": 7683
    },
    {
      "epoch": 0.6870529327610873,
      "grad_norm": 0.17524390311232987,
      "learning_rate": 4.7119455910122e-05,
      "loss": 0.6841,
      "step": 7684
    },
    {
      "epoch": 0.6871423462088698,
      "grad_norm": 0.1353713412322648,
      "learning_rate": 4.709487843581399e-05,
      "loss": 0.6265,
      "step": 7685
    },
    {
      "epoch": 0.6872317596566524,
      "grad_norm": 0.1465279498605069,
      "learning_rate": 4.707030539859465e-05,
      "loss": 0.6298,
      "step": 7686
    },
    {
      "epoch": 0.6873211731044349,
      "grad_norm": 0.16020236555074596,
      "learning_rate": 4.7045736800524856e-05,
      "loss": 0.621,
      "step": 7687
    },
    {
      "epoch": 0.6874105865522174,
      "grad_norm": 0.13631930652556656,
      "learning_rate": 4.702117264366517e-05,
      "loss": 0.6217,
      "step": 7688
    },
    {
      "epoch": 0.6875,
      "grad_norm": 0.15976486096320464,
      "learning_rate": 4.699661293007579e-05,
      "loss": 0.6516,
      "step": 7689
    },
    {
      "epoch": 0.6875894134477826,
      "grad_norm": 0.16442183758550674,
      "learning_rate": 4.6972057661816426e-05,
      "loss": 0.7007,
      "step": 7690
    },
    {
      "epoch": 0.6876788268955651,
      "grad_norm": 0.15791020273598586,
      "learning_rate": 4.6947506840946555e-05,
      "loss": 0.6089,
      "step": 7691
    },
    {
      "epoch": 0.6877682403433476,
      "grad_norm": 0.15063372659148463,
      "learning_rate": 4.6922960469525245e-05,
      "loss": 0.6389,
      "step": 7692
    },
    {
      "epoch": 0.6878576537911302,
      "grad_norm": 0.15088383452703877,
      "learning_rate": 4.68984185496111e-05,
      "loss": 0.6722,
      "step": 7693
    },
    {
      "epoch": 0.6879470672389127,
      "grad_norm": 0.14457289423893988,
      "learning_rate": 4.687388108326243e-05,
      "loss": 0.6443,
      "step": 7694
    },
    {
      "epoch": 0.6880364806866953,
      "grad_norm": 0.15092863799183023,
      "learning_rate": 4.684934807253727e-05,
      "loss": 0.6302,
      "step": 7695
    },
    {
      "epoch": 0.6881258941344778,
      "grad_norm": 0.16610810880273125,
      "learning_rate": 4.6824819519493057e-05,
      "loss": 0.6793,
      "step": 7696
    },
    {
      "epoch": 0.6882153075822603,
      "grad_norm": 0.1478848230515688,
      "learning_rate": 4.6800295426187e-05,
      "loss": 0.6682,
      "step": 7697
    },
    {
      "epoch": 0.6883047210300429,
      "grad_norm": 0.16229200109782235,
      "learning_rate": 4.677577579467597e-05,
      "loss": 0.6809,
      "step": 7698
    },
    {
      "epoch": 0.6883941344778255,
      "grad_norm": 0.17622422676757438,
      "learning_rate": 4.67512606270163e-05,
      "loss": 0.6545,
      "step": 7699
    },
    {
      "epoch": 0.6884835479256081,
      "grad_norm": 0.17111147319833178,
      "learning_rate": 4.67267499252641e-05,
      "loss": 0.6366,
      "step": 7700
    },
    {
      "epoch": 0.6885729613733905,
      "grad_norm": 0.17253758835797492,
      "learning_rate": 4.670224369147505e-05,
      "loss": 0.6937,
      "step": 7701
    },
    {
      "epoch": 0.6886623748211731,
      "grad_norm": 0.1398084778319829,
      "learning_rate": 4.6677741927704434e-05,
      "loss": 0.6251,
      "step": 7702
    },
    {
      "epoch": 0.6887517882689557,
      "grad_norm": 0.18210832116842082,
      "learning_rate": 4.6653244636007255e-05,
      "loss": 0.6771,
      "step": 7703
    },
    {
      "epoch": 0.6888412017167382,
      "grad_norm": 0.15526503678426176,
      "learning_rate": 4.6628751818437985e-05,
      "loss": 0.654,
      "step": 7704
    },
    {
      "epoch": 0.6889306151645207,
      "grad_norm": 0.16563818176211623,
      "learning_rate": 4.660426347705085e-05,
      "loss": 0.6185,
      "step": 7705
    },
    {
      "epoch": 0.6890200286123033,
      "grad_norm": 0.17798393839522256,
      "learning_rate": 4.6579779613899644e-05,
      "loss": 0.6562,
      "step": 7706
    },
    {
      "epoch": 0.6891094420600858,
      "grad_norm": 0.15280695185569104,
      "learning_rate": 4.6555300231037836e-05,
      "loss": 0.6648,
      "step": 7707
    },
    {
      "epoch": 0.6891988555078684,
      "grad_norm": 0.16426727330301827,
      "learning_rate": 4.653082533051839e-05,
      "loss": 0.67,
      "step": 7708
    },
    {
      "epoch": 0.689288268955651,
      "grad_norm": 0.15489833815946571,
      "learning_rate": 4.650635491439412e-05,
      "loss": 0.6446,
      "step": 7709
    },
    {
      "epoch": 0.6893776824034334,
      "grad_norm": 0.1745585969828138,
      "learning_rate": 4.6481888984717225e-05,
      "loss": 0.6726,
      "step": 7710
    },
    {
      "epoch": 0.689467095851216,
      "grad_norm": 0.16662322646296285,
      "learning_rate": 4.6457427543539654e-05,
      "loss": 0.6716,
      "step": 7711
    },
    {
      "epoch": 0.6895565092989986,
      "grad_norm": 0.16202938848087653,
      "learning_rate": 4.6432970592913026e-05,
      "loss": 0.6733,
      "step": 7712
    },
    {
      "epoch": 0.6896459227467812,
      "grad_norm": 0.1646166593323084,
      "learning_rate": 4.640851813488842e-05,
      "loss": 0.6244,
      "step": 7713
    },
    {
      "epoch": 0.6897353361945636,
      "grad_norm": 0.15568228319075736,
      "learning_rate": 4.638407017151667e-05,
      "loss": 0.6493,
      "step": 7714
    },
    {
      "epoch": 0.6898247496423462,
      "grad_norm": 0.15867215051159017,
      "learning_rate": 4.6359626704848215e-05,
      "loss": 0.6633,
      "step": 7715
    },
    {
      "epoch": 0.6899141630901288,
      "grad_norm": 0.15978466226145116,
      "learning_rate": 4.633518773693307e-05,
      "loss": 0.6399,
      "step": 7716
    },
    {
      "epoch": 0.6900035765379113,
      "grad_norm": 0.16876327702837102,
      "learning_rate": 4.631075326982093e-05,
      "loss": 0.6832,
      "step": 7717
    },
    {
      "epoch": 0.6900929899856938,
      "grad_norm": 0.17659120905573997,
      "learning_rate": 4.6286323305561105e-05,
      "loss": 0.6565,
      "step": 7718
    },
    {
      "epoch": 0.6901824034334764,
      "grad_norm": 0.15208936078071608,
      "learning_rate": 4.626189784620245e-05,
      "loss": 0.6486,
      "step": 7719
    },
    {
      "epoch": 0.6902718168812589,
      "grad_norm": 0.16583677468746608,
      "learning_rate": 4.623747689379351e-05,
      "loss": 0.6506,
      "step": 7720
    },
    {
      "epoch": 0.6903612303290415,
      "grad_norm": 0.15441168255801802,
      "learning_rate": 4.621306045038249e-05,
      "loss": 0.7026,
      "step": 7721
    },
    {
      "epoch": 0.6904506437768241,
      "grad_norm": 0.15970327174032678,
      "learning_rate": 4.618864851801707e-05,
      "loss": 0.6724,
      "step": 7722
    },
    {
      "epoch": 0.6905400572246065,
      "grad_norm": 0.16377851512434352,
      "learning_rate": 4.6164241098744776e-05,
      "loss": 0.6503,
      "step": 7723
    },
    {
      "epoch": 0.6906294706723891,
      "grad_norm": 0.16285803787264663,
      "learning_rate": 4.613983819461253e-05,
      "loss": 0.6851,
      "step": 7724
    },
    {
      "epoch": 0.6907188841201717,
      "grad_norm": 0.16459049093523961,
      "learning_rate": 4.6115439807667005e-05,
      "loss": 0.673,
      "step": 7725
    },
    {
      "epoch": 0.6908082975679543,
      "grad_norm": 0.1693160498421862,
      "learning_rate": 4.6091045939954514e-05,
      "loss": 0.6396,
      "step": 7726
    },
    {
      "epoch": 0.6908977110157367,
      "grad_norm": 0.15750955399486666,
      "learning_rate": 4.606665659352085e-05,
      "loss": 0.6388,
      "step": 7727
    },
    {
      "epoch": 0.6909871244635193,
      "grad_norm": 0.1624148022970941,
      "learning_rate": 4.604227177041156e-05,
      "loss": 0.6338,
      "step": 7728
    },
    {
      "epoch": 0.6910765379113019,
      "grad_norm": 0.15916303213346897,
      "learning_rate": 4.601789147267177e-05,
      "loss": 0.6464,
      "step": 7729
    },
    {
      "epoch": 0.6911659513590844,
      "grad_norm": 0.15214946345092126,
      "learning_rate": 4.5993515702346235e-05,
      "loss": 0.6609,
      "step": 7730
    },
    {
      "epoch": 0.691255364806867,
      "grad_norm": 0.15316721388596619,
      "learning_rate": 4.596914446147932e-05,
      "loss": 0.6561,
      "step": 7731
    },
    {
      "epoch": 0.6913447782546495,
      "grad_norm": 0.1842250669573512,
      "learning_rate": 4.594477775211503e-05,
      "loss": 0.3685,
      "step": 7732
    },
    {
      "epoch": 0.691434191702432,
      "grad_norm": 0.18320771286664062,
      "learning_rate": 4.5920415576296914e-05,
      "loss": 0.656,
      "step": 7733
    },
    {
      "epoch": 0.6915236051502146,
      "grad_norm": 0.16657775070055333,
      "learning_rate": 4.589605793606824e-05,
      "loss": 0.6311,
      "step": 7734
    },
    {
      "epoch": 0.6916130185979972,
      "grad_norm": 0.14349189083564637,
      "learning_rate": 4.5871704833471876e-05,
      "loss": 0.6599,
      "step": 7735
    },
    {
      "epoch": 0.6917024320457796,
      "grad_norm": 0.13964531751587722,
      "learning_rate": 4.584735627055019e-05,
      "loss": 0.6143,
      "step": 7736
    },
    {
      "epoch": 0.6917918454935622,
      "grad_norm": 0.1618860786130276,
      "learning_rate": 4.5823012249345396e-05,
      "loss": 0.6507,
      "step": 7737
    },
    {
      "epoch": 0.6918812589413448,
      "grad_norm": 0.16534277694959243,
      "learning_rate": 4.579867277189911e-05,
      "loss": 0.6757,
      "step": 7738
    },
    {
      "epoch": 0.6919706723891274,
      "grad_norm": 0.16824968399268433,
      "learning_rate": 4.5774337840252666e-05,
      "loss": 0.6844,
      "step": 7739
    },
    {
      "epoch": 0.6920600858369099,
      "grad_norm": 0.16908202536565523,
      "learning_rate": 4.575000745644703e-05,
      "loss": 0.6734,
      "step": 7740
    },
    {
      "epoch": 0.6921494992846924,
      "grad_norm": 0.16469724462957208,
      "learning_rate": 4.5725681622522795e-05,
      "loss": 0.6829,
      "step": 7741
    },
    {
      "epoch": 0.692238912732475,
      "grad_norm": 0.13969881533393003,
      "learning_rate": 4.570136034052005e-05,
      "loss": 0.6132,
      "step": 7742
    },
    {
      "epoch": 0.6923283261802575,
      "grad_norm": 0.17440717425569957,
      "learning_rate": 4.567704361247863e-05,
      "loss": 0.6801,
      "step": 7743
    },
    {
      "epoch": 0.6924177396280401,
      "grad_norm": 0.15839877951634146,
      "learning_rate": 4.5652731440437965e-05,
      "loss": 0.6433,
      "step": 7744
    },
    {
      "epoch": 0.6925071530758226,
      "grad_norm": 0.16298554835280507,
      "learning_rate": 4.5628423826437085e-05,
      "loss": 0.6412,
      "step": 7745
    },
    {
      "epoch": 0.6925965665236051,
      "grad_norm": 0.16030524126523182,
      "learning_rate": 4.5604120772514655e-05,
      "loss": 0.6501,
      "step": 7746
    },
    {
      "epoch": 0.6926859799713877,
      "grad_norm": 0.16715914118028175,
      "learning_rate": 4.557982228070891e-05,
      "loss": 0.6769,
      "step": 7747
    },
    {
      "epoch": 0.6927753934191703,
      "grad_norm": 0.1928420825973098,
      "learning_rate": 4.5555528353057716e-05,
      "loss": 0.7264,
      "step": 7748
    },
    {
      "epoch": 0.6928648068669528,
      "grad_norm": 0.16717263221246056,
      "learning_rate": 4.553123899159867e-05,
      "loss": 0.4112,
      "step": 7749
    },
    {
      "epoch": 0.6929542203147353,
      "grad_norm": 0.1641556865208799,
      "learning_rate": 4.5506954198368744e-05,
      "loss": 0.6521,
      "step": 7750
    },
    {
      "epoch": 0.6930436337625179,
      "grad_norm": 0.16142762355406826,
      "learning_rate": 4.54826739754048e-05,
      "loss": 0.6729,
      "step": 7751
    },
    {
      "epoch": 0.6931330472103004,
      "grad_norm": 0.15258855870910618,
      "learning_rate": 4.545839832474318e-05,
      "loss": 0.6384,
      "step": 7752
    },
    {
      "epoch": 0.693222460658083,
      "grad_norm": 0.16866466899592342,
      "learning_rate": 4.543412724841979e-05,
      "loss": 0.66,
      "step": 7753
    },
    {
      "epoch": 0.6933118741058655,
      "grad_norm": 0.1379988102371824,
      "learning_rate": 4.5409860748470246e-05,
      "loss": 0.6009,
      "step": 7754
    },
    {
      "epoch": 0.693401287553648,
      "grad_norm": 0.14744594419311557,
      "learning_rate": 4.538559882692979e-05,
      "loss": 0.6511,
      "step": 7755
    },
    {
      "epoch": 0.6934907010014306,
      "grad_norm": 0.163938148324155,
      "learning_rate": 4.536134148583313e-05,
      "loss": 0.6522,
      "step": 7756
    },
    {
      "epoch": 0.6935801144492132,
      "grad_norm": 0.15956893710009687,
      "learning_rate": 4.5337088727214835e-05,
      "loss": 0.6459,
      "step": 7757
    },
    {
      "epoch": 0.6936695278969958,
      "grad_norm": 0.15935218142454924,
      "learning_rate": 4.531284055310887e-05,
      "loss": 0.664,
      "step": 7758
    },
    {
      "epoch": 0.6937589413447782,
      "grad_norm": 0.16666035576140503,
      "learning_rate": 4.5288596965548924e-05,
      "loss": 0.6672,
      "step": 7759
    },
    {
      "epoch": 0.6938483547925608,
      "grad_norm": 0.14862683368860213,
      "learning_rate": 4.5264357966568306e-05,
      "loss": 0.6442,
      "step": 7760
    },
    {
      "epoch": 0.6939377682403434,
      "grad_norm": 0.15692571528967103,
      "learning_rate": 4.5240123558199846e-05,
      "loss": 0.6753,
      "step": 7761
    },
    {
      "epoch": 0.6940271816881259,
      "grad_norm": 0.15816332355740553,
      "learning_rate": 4.521589374247609e-05,
      "loss": 0.6461,
      "step": 7762
    },
    {
      "epoch": 0.6941165951359084,
      "grad_norm": 0.16268087980965965,
      "learning_rate": 4.519166852142917e-05,
      "loss": 0.6752,
      "step": 7763
    },
    {
      "epoch": 0.694206008583691,
      "grad_norm": 0.1497415069909483,
      "learning_rate": 4.516744789709081e-05,
      "loss": 0.6116,
      "step": 7764
    },
    {
      "epoch": 0.6942954220314735,
      "grad_norm": 0.1497018183169731,
      "learning_rate": 4.5143231871492375e-05,
      "loss": 0.6727,
      "step": 7765
    },
    {
      "epoch": 0.6943848354792561,
      "grad_norm": 0.1487407860096016,
      "learning_rate": 4.5119020446664875e-05,
      "loss": 0.6467,
      "step": 7766
    },
    {
      "epoch": 0.6944742489270386,
      "grad_norm": 0.16254901449876505,
      "learning_rate": 4.509481362463881e-05,
      "loss": 0.64,
      "step": 7767
    },
    {
      "epoch": 0.6945636623748211,
      "grad_norm": 0.16125531747488628,
      "learning_rate": 4.507061140744442e-05,
      "loss": 0.6464,
      "step": 7768
    },
    {
      "epoch": 0.6946530758226037,
      "grad_norm": 0.15511496057217544,
      "learning_rate": 4.504641379711154e-05,
      "loss": 0.6326,
      "step": 7769
    },
    {
      "epoch": 0.6947424892703863,
      "grad_norm": 0.1444165378570904,
      "learning_rate": 4.502222079566951e-05,
      "loss": 0.6432,
      "step": 7770
    },
    {
      "epoch": 0.6948319027181689,
      "grad_norm": 0.15998880835169932,
      "learning_rate": 4.499803240514745e-05,
      "loss": 0.6515,
      "step": 7771
    },
    {
      "epoch": 0.6949213161659513,
      "grad_norm": 0.15723652052515283,
      "learning_rate": 4.497384862757403e-05,
      "loss": 0.6499,
      "step": 7772
    },
    {
      "epoch": 0.6950107296137339,
      "grad_norm": 0.136605518272814,
      "learning_rate": 4.494966946497743e-05,
      "loss": 0.6659,
      "step": 7773
    },
    {
      "epoch": 0.6951001430615165,
      "grad_norm": 0.15071750810190399,
      "learning_rate": 4.492549491938557e-05,
      "loss": 0.6375,
      "step": 7774
    },
    {
      "epoch": 0.695189556509299,
      "grad_norm": 0.17929891742224033,
      "learning_rate": 4.4901324992825975e-05,
      "loss": 0.7111,
      "step": 7775
    },
    {
      "epoch": 0.6952789699570815,
      "grad_norm": 0.15795503486675,
      "learning_rate": 4.487715968732568e-05,
      "loss": 0.6739,
      "step": 7776
    },
    {
      "epoch": 0.6953683834048641,
      "grad_norm": 0.15402920342363904,
      "learning_rate": 4.4852999004911425e-05,
      "loss": 0.6462,
      "step": 7777
    },
    {
      "epoch": 0.6954577968526466,
      "grad_norm": 0.16488819809840052,
      "learning_rate": 4.482884294760954e-05,
      "loss": 0.6782,
      "step": 7778
    },
    {
      "epoch": 0.6955472103004292,
      "grad_norm": 0.15133553748863782,
      "learning_rate": 4.480469151744596e-05,
      "loss": 0.6415,
      "step": 7779
    },
    {
      "epoch": 0.6956366237482118,
      "grad_norm": 0.15916709540767904,
      "learning_rate": 4.4780544716446294e-05,
      "loss": 0.6294,
      "step": 7780
    },
    {
      "epoch": 0.6957260371959942,
      "grad_norm": 0.15001404730528337,
      "learning_rate": 4.475640254663561e-05,
      "loss": 0.6478,
      "step": 7781
    },
    {
      "epoch": 0.6958154506437768,
      "grad_norm": 0.1412420742644898,
      "learning_rate": 4.473226501003873e-05,
      "loss": 0.6053,
      "step": 7782
    },
    {
      "epoch": 0.6959048640915594,
      "grad_norm": 0.16431450050434937,
      "learning_rate": 4.470813210868008e-05,
      "loss": 0.6459,
      "step": 7783
    },
    {
      "epoch": 0.695994277539342,
      "grad_norm": 0.17360016558316665,
      "learning_rate": 4.4684003844583534e-05,
      "loss": 0.3669,
      "step": 7784
    },
    {
      "epoch": 0.6960836909871244,
      "grad_norm": 0.13545717035824723,
      "learning_rate": 4.465988021977282e-05,
      "loss": 0.6244,
      "step": 7785
    },
    {
      "epoch": 0.696173104434907,
      "grad_norm": 0.17370830760746653,
      "learning_rate": 4.4635761236271144e-05,
      "loss": 0.6497,
      "step": 7786
    },
    {
      "epoch": 0.6962625178826896,
      "grad_norm": 0.18134724923900938,
      "learning_rate": 4.461164689610129e-05,
      "loss": 0.6569,
      "step": 7787
    },
    {
      "epoch": 0.6963519313304721,
      "grad_norm": 0.15321815855640578,
      "learning_rate": 4.458753720128571e-05,
      "loss": 0.6185,
      "step": 7788
    },
    {
      "epoch": 0.6964413447782547,
      "grad_norm": 0.1599247160230556,
      "learning_rate": 4.4563432153846494e-05,
      "loss": 0.6783,
      "step": 7789
    },
    {
      "epoch": 0.6965307582260372,
      "grad_norm": 0.1663075144419654,
      "learning_rate": 4.453933175580525e-05,
      "loss": 0.6567,
      "step": 7790
    },
    {
      "epoch": 0.6966201716738197,
      "grad_norm": 0.17726696367634123,
      "learning_rate": 4.451523600918327e-05,
      "loss": 0.685,
      "step": 7791
    },
    {
      "epoch": 0.6967095851216023,
      "grad_norm": 0.18794550738847182,
      "learning_rate": 4.4491144916001425e-05,
      "loss": 0.693,
      "step": 7792
    },
    {
      "epoch": 0.6967989985693849,
      "grad_norm": 0.16629750442989305,
      "learning_rate": 4.4467058478280235e-05,
      "loss": 0.668,
      "step": 7793
    },
    {
      "epoch": 0.6968884120171673,
      "grad_norm": 0.15913827691801097,
      "learning_rate": 4.444297669803981e-05,
      "loss": 0.6819,
      "step": 7794
    },
    {
      "epoch": 0.6969778254649499,
      "grad_norm": 0.13377804297153706,
      "learning_rate": 4.441889957729979e-05,
      "loss": 0.6346,
      "step": 7795
    },
    {
      "epoch": 0.6970672389127325,
      "grad_norm": 0.17132998985346257,
      "learning_rate": 4.439482711807955e-05,
      "loss": 0.673,
      "step": 7796
    },
    {
      "epoch": 0.697156652360515,
      "grad_norm": 0.1399812301724474,
      "learning_rate": 4.4370759322398006e-05,
      "loss": 0.6116,
      "step": 7797
    },
    {
      "epoch": 0.6972460658082976,
      "grad_norm": 0.1850936032033835,
      "learning_rate": 4.434669619227368e-05,
      "loss": 0.3404,
      "step": 7798
    },
    {
      "epoch": 0.6973354792560801,
      "grad_norm": 0.16207017822284456,
      "learning_rate": 4.432263772972475e-05,
      "loss": 0.6241,
      "step": 7799
    },
    {
      "epoch": 0.6974248927038627,
      "grad_norm": 0.18611318382900738,
      "learning_rate": 4.4298583936768976e-05,
      "loss": 0.3621,
      "step": 7800
    },
    {
      "epoch": 0.6975143061516452,
      "grad_norm": 0.15516829490608455,
      "learning_rate": 4.427453481542366e-05,
      "loss": 0.6749,
      "step": 7801
    },
    {
      "epoch": 0.6976037195994278,
      "grad_norm": 0.15586657339428073,
      "learning_rate": 4.4250490367705824e-05,
      "loss": 0.6415,
      "step": 7802
    },
    {
      "epoch": 0.6976931330472103,
      "grad_norm": 0.14825833832557914,
      "learning_rate": 4.4226450595632055e-05,
      "loss": 0.6515,
      "step": 7803
    },
    {
      "epoch": 0.6977825464949928,
      "grad_norm": 0.1640681370037909,
      "learning_rate": 4.420241550121849e-05,
      "loss": 0.5942,
      "step": 7804
    },
    {
      "epoch": 0.6978719599427754,
      "grad_norm": 0.13370408513549367,
      "learning_rate": 4.41783850864809e-05,
      "loss": 0.662,
      "step": 7805
    },
    {
      "epoch": 0.697961373390558,
      "grad_norm": 0.17248373292963948,
      "learning_rate": 4.4154359353434824e-05,
      "loss": 0.6876,
      "step": 7806
    },
    {
      "epoch": 0.6980507868383404,
      "grad_norm": 0.16243437760623058,
      "learning_rate": 4.4130338304095146e-05,
      "loss": 0.6591,
      "step": 7807
    },
    {
      "epoch": 0.698140200286123,
      "grad_norm": 0.14843966942152867,
      "learning_rate": 4.4106321940476516e-05,
      "loss": 0.6657,
      "step": 7808
    },
    {
      "epoch": 0.6982296137339056,
      "grad_norm": 0.13942624710007587,
      "learning_rate": 4.408231026459321e-05,
      "loss": 0.606,
      "step": 7809
    },
    {
      "epoch": 0.6983190271816881,
      "grad_norm": 0.15193082873782524,
      "learning_rate": 4.405830327845896e-05,
      "loss": 0.6591,
      "step": 7810
    },
    {
      "epoch": 0.6984084406294707,
      "grad_norm": 0.1583191440299485,
      "learning_rate": 4.403430098408726e-05,
      "loss": 0.6626,
      "step": 7811
    },
    {
      "epoch": 0.6984978540772532,
      "grad_norm": 0.15062350516184078,
      "learning_rate": 4.401030338349115e-05,
      "loss": 0.6211,
      "step": 7812
    },
    {
      "epoch": 0.6985872675250357,
      "grad_norm": 0.1627476544354769,
      "learning_rate": 4.3986310478683265e-05,
      "loss": 0.6604,
      "step": 7813
    },
    {
      "epoch": 0.6986766809728183,
      "grad_norm": 0.15830182231220535,
      "learning_rate": 4.3962322271675915e-05,
      "loss": 0.6587,
      "step": 7814
    },
    {
      "epoch": 0.6987660944206009,
      "grad_norm": 0.1604604781240377,
      "learning_rate": 4.393833876448089e-05,
      "loss": 0.6487,
      "step": 7815
    },
    {
      "epoch": 0.6988555078683834,
      "grad_norm": 0.15670671754929627,
      "learning_rate": 4.3914359959109686e-05,
      "loss": 0.6335,
      "step": 7816
    },
    {
      "epoch": 0.6989449213161659,
      "grad_norm": 0.14389335412412493,
      "learning_rate": 4.389038585757341e-05,
      "loss": 0.6429,
      "step": 7817
    },
    {
      "epoch": 0.6990343347639485,
      "grad_norm": 0.1459196094850136,
      "learning_rate": 4.3866416461882676e-05,
      "loss": 0.6254,
      "step": 7818
    },
    {
      "epoch": 0.6991237482117311,
      "grad_norm": 0.16333145613399055,
      "learning_rate": 4.3842451774047755e-05,
      "loss": 0.605,
      "step": 7819
    },
    {
      "epoch": 0.6992131616595136,
      "grad_norm": 0.15857997767960277,
      "learning_rate": 4.381849179607867e-05,
      "loss": 0.6619,
      "step": 7820
    },
    {
      "epoch": 0.6993025751072961,
      "grad_norm": 0.18911033278630393,
      "learning_rate": 4.379453652998479e-05,
      "loss": 0.374,
      "step": 7821
    },
    {
      "epoch": 0.6993919885550787,
      "grad_norm": 0.15566649161697405,
      "learning_rate": 4.377058597777524e-05,
      "loss": 0.6651,
      "step": 7822
    },
    {
      "epoch": 0.6994814020028612,
      "grad_norm": 0.14976164992981097,
      "learning_rate": 4.3746640141458786e-05,
      "loss": 0.6326,
      "step": 7823
    },
    {
      "epoch": 0.6995708154506438,
      "grad_norm": 0.14932400664708037,
      "learning_rate": 4.372269902304363e-05,
      "loss": 0.6413,
      "step": 7824
    },
    {
      "epoch": 0.6996602288984263,
      "grad_norm": 0.1596395352606119,
      "learning_rate": 4.369876262453776e-05,
      "loss": 0.6466,
      "step": 7825
    },
    {
      "epoch": 0.6997496423462088,
      "grad_norm": 0.13411890311664912,
      "learning_rate": 4.367483094794866e-05,
      "loss": 0.6157,
      "step": 7826
    },
    {
      "epoch": 0.6998390557939914,
      "grad_norm": 0.18074787651677457,
      "learning_rate": 4.365090399528349e-05,
      "loss": 0.6639,
      "step": 7827
    },
    {
      "epoch": 0.699928469241774,
      "grad_norm": 0.16545739869860482,
      "learning_rate": 4.362698176854892e-05,
      "loss": 0.6824,
      "step": 7828
    },
    {
      "epoch": 0.7000178826895566,
      "grad_norm": 0.1521019326703392,
      "learning_rate": 4.360306426975136e-05,
      "loss": 0.6525,
      "step": 7829
    },
    {
      "epoch": 0.700107296137339,
      "grad_norm": 0.16542001356137723,
      "learning_rate": 4.357915150089665e-05,
      "loss": 0.6248,
      "step": 7830
    },
    {
      "epoch": 0.7001967095851216,
      "grad_norm": 0.15529502793740196,
      "learning_rate": 4.355524346399037e-05,
      "loss": 0.6838,
      "step": 7831
    },
    {
      "epoch": 0.7002861230329042,
      "grad_norm": 0.1565333690780114,
      "learning_rate": 4.3531340161037684e-05,
      "loss": 0.6228,
      "step": 7832
    },
    {
      "epoch": 0.7003755364806867,
      "grad_norm": 0.15929448539682844,
      "learning_rate": 4.350744159404323e-05,
      "loss": 0.6595,
      "step": 7833
    },
    {
      "epoch": 0.7004649499284692,
      "grad_norm": 0.14358081677034235,
      "learning_rate": 4.348354776501149e-05,
      "loss": 0.621,
      "step": 7834
    },
    {
      "epoch": 0.7005543633762518,
      "grad_norm": 0.15273001865399433,
      "learning_rate": 4.345965867594631e-05,
      "loss": 0.668,
      "step": 7835
    },
    {
      "epoch": 0.7006437768240343,
      "grad_norm": 0.16092380663846054,
      "learning_rate": 4.3435774328851276e-05,
      "loss": 0.6682,
      "step": 7836
    },
    {
      "epoch": 0.7007331902718169,
      "grad_norm": 0.14757172762580745,
      "learning_rate": 4.3411894725729576e-05,
      "loss": 0.6434,
      "step": 7837
    },
    {
      "epoch": 0.7008226037195995,
      "grad_norm": 0.1781455678137899,
      "learning_rate": 4.338801986858388e-05,
      "loss": 0.4064,
      "step": 7838
    },
    {
      "epoch": 0.7009120171673819,
      "grad_norm": 0.14990141085977085,
      "learning_rate": 4.336414975941656e-05,
      "loss": 0.6167,
      "step": 7839
    },
    {
      "epoch": 0.7010014306151645,
      "grad_norm": 0.1530657809705885,
      "learning_rate": 4.3340284400229666e-05,
      "loss": 0.6434,
      "step": 7840
    },
    {
      "epoch": 0.7010908440629471,
      "grad_norm": 0.1545394422575906,
      "learning_rate": 4.331642379302466e-05,
      "loss": 0.6324,
      "step": 7841
    },
    {
      "epoch": 0.7011802575107297,
      "grad_norm": 0.1678358366737077,
      "learning_rate": 4.329256793980274e-05,
      "loss": 0.6978,
      "step": 7842
    },
    {
      "epoch": 0.7012696709585121,
      "grad_norm": 0.16655404176690206,
      "learning_rate": 4.326871684256469e-05,
      "loss": 0.6664,
      "step": 7843
    },
    {
      "epoch": 0.7013590844062947,
      "grad_norm": 0.15345607619246565,
      "learning_rate": 4.324487050331082e-05,
      "loss": 0.6321,
      "step": 7844
    },
    {
      "epoch": 0.7014484978540773,
      "grad_norm": 0.16152183609889087,
      "learning_rate": 4.3221028924041105e-05,
      "loss": 0.6628,
      "step": 7845
    },
    {
      "epoch": 0.7015379113018598,
      "grad_norm": 0.17518900510622068,
      "learning_rate": 4.3197192106755125e-05,
      "loss": 0.6662,
      "step": 7846
    },
    {
      "epoch": 0.7016273247496424,
      "grad_norm": 0.16456959494182516,
      "learning_rate": 4.317336005345204e-05,
      "loss": 0.3867,
      "step": 7847
    },
    {
      "epoch": 0.7017167381974249,
      "grad_norm": 0.13807688352910924,
      "learning_rate": 4.314953276613066e-05,
      "loss": 0.6077,
      "step": 7848
    },
    {
      "epoch": 0.7018061516452074,
      "grad_norm": 0.15663302824751782,
      "learning_rate": 4.312571024678926e-05,
      "loss": 0.6088,
      "step": 7849
    },
    {
      "epoch": 0.70189556509299,
      "grad_norm": 0.16177597079778175,
      "learning_rate": 4.310189249742588e-05,
      "loss": 0.6702,
      "step": 7850
    },
    {
      "epoch": 0.7019849785407726,
      "grad_norm": 0.1595825486060163,
      "learning_rate": 4.307807952003804e-05,
      "loss": 0.6486,
      "step": 7851
    },
    {
      "epoch": 0.702074391988555,
      "grad_norm": 0.16044183372234325,
      "learning_rate": 4.305427131662296e-05,
      "loss": 0.6684,
      "step": 7852
    },
    {
      "epoch": 0.7021638054363376,
      "grad_norm": 0.15447102823337375,
      "learning_rate": 4.303046788917732e-05,
      "loss": 0.6633,
      "step": 7853
    },
    {
      "epoch": 0.7022532188841202,
      "grad_norm": 0.15053979129210343,
      "learning_rate": 4.3006669239697596e-05,
      "loss": 0.5937,
      "step": 7854
    },
    {
      "epoch": 0.7023426323319027,
      "grad_norm": 0.1605801097435217,
      "learning_rate": 4.298287537017965e-05,
      "loss": 0.6384,
      "step": 7855
    },
    {
      "epoch": 0.7024320457796852,
      "grad_norm": 0.16241956901862897,
      "learning_rate": 4.29590862826191e-05,
      "loss": 0.6388,
      "step": 7856
    },
    {
      "epoch": 0.7025214592274678,
      "grad_norm": 0.13698722734118074,
      "learning_rate": 4.293530197901112e-05,
      "loss": 0.615,
      "step": 7857
    },
    {
      "epoch": 0.7026108726752504,
      "grad_norm": 0.16918137212526496,
      "learning_rate": 4.291152246135042e-05,
      "loss": 0.6301,
      "step": 7858
    },
    {
      "epoch": 0.7027002861230329,
      "grad_norm": 0.1607523190934879,
      "learning_rate": 4.288774773163138e-05,
      "loss": 0.6021,
      "step": 7859
    },
    {
      "epoch": 0.7027896995708155,
      "grad_norm": 0.1532046804529666,
      "learning_rate": 4.286397779184796e-05,
      "loss": 0.6216,
      "step": 7860
    },
    {
      "epoch": 0.702879113018598,
      "grad_norm": 0.15234893171771086,
      "learning_rate": 4.2840212643993725e-05,
      "loss": 0.6596,
      "step": 7861
    },
    {
      "epoch": 0.7029685264663805,
      "grad_norm": 0.1657448686002503,
      "learning_rate": 4.2816452290061826e-05,
      "loss": 0.3582,
      "step": 7862
    },
    {
      "epoch": 0.7030579399141631,
      "grad_norm": 0.18339396646723816,
      "learning_rate": 4.279269673204504e-05,
      "loss": 0.7101,
      "step": 7863
    },
    {
      "epoch": 0.7031473533619457,
      "grad_norm": 0.16609408670880166,
      "learning_rate": 4.276894597193567e-05,
      "loss": 0.6338,
      "step": 7864
    },
    {
      "epoch": 0.7032367668097281,
      "grad_norm": 0.15129774851295189,
      "learning_rate": 4.274520001172567e-05,
      "loss": 0.6224,
      "step": 7865
    },
    {
      "epoch": 0.7033261802575107,
      "grad_norm": 0.14919276451148517,
      "learning_rate": 4.2721458853406646e-05,
      "loss": 0.6284,
      "step": 7866
    },
    {
      "epoch": 0.7034155937052933,
      "grad_norm": 0.16345884113750456,
      "learning_rate": 4.2697722498969616e-05,
      "loss": 0.6409,
      "step": 7867
    },
    {
      "epoch": 0.7035050071530758,
      "grad_norm": 0.16646758621194882,
      "learning_rate": 4.267399095040546e-05,
      "loss": 0.6622,
      "step": 7868
    },
    {
      "epoch": 0.7035944206008584,
      "grad_norm": 0.13228754329281397,
      "learning_rate": 4.265026420970443e-05,
      "loss": 0.5941,
      "step": 7869
    },
    {
      "epoch": 0.7036838340486409,
      "grad_norm": 0.15243541072420738,
      "learning_rate": 4.2626542278856464e-05,
      "loss": 0.6657,
      "step": 7870
    },
    {
      "epoch": 0.7037732474964234,
      "grad_norm": 0.15708369454951793,
      "learning_rate": 4.2602825159851156e-05,
      "loss": 0.6481,
      "step": 7871
    },
    {
      "epoch": 0.703862660944206,
      "grad_norm": 0.1632451343201185,
      "learning_rate": 4.257911285467754e-05,
      "loss": 0.6438,
      "step": 7872
    },
    {
      "epoch": 0.7039520743919886,
      "grad_norm": 0.1522573842574241,
      "learning_rate": 4.2555405365324385e-05,
      "loss": 0.6547,
      "step": 7873
    },
    {
      "epoch": 0.704041487839771,
      "grad_norm": 0.15043265850955712,
      "learning_rate": 4.2531702693780005e-05,
      "loss": 0.629,
      "step": 7874
    },
    {
      "epoch": 0.7041309012875536,
      "grad_norm": 0.154132763493458,
      "learning_rate": 4.250800484203232e-05,
      "loss": 0.6098,
      "step": 7875
    },
    {
      "epoch": 0.7042203147353362,
      "grad_norm": 0.15692450153132187,
      "learning_rate": 4.2484311812068836e-05,
      "loss": 0.6803,
      "step": 7876
    },
    {
      "epoch": 0.7043097281831188,
      "grad_norm": 0.18404424542162465,
      "learning_rate": 4.246062360587669e-05,
      "loss": 0.6756,
      "step": 7877
    },
    {
      "epoch": 0.7043991416309013,
      "grad_norm": 0.19838526527424835,
      "learning_rate": 4.243694022544251e-05,
      "loss": 0.6299,
      "step": 7878
    },
    {
      "epoch": 0.7044885550786838,
      "grad_norm": 0.15996228239136787,
      "learning_rate": 4.241326167275265e-05,
      "loss": 0.6447,
      "step": 7879
    },
    {
      "epoch": 0.7045779685264664,
      "grad_norm": 0.16177639754088444,
      "learning_rate": 4.238958794979302e-05,
      "loss": 0.6468,
      "step": 7880
    },
    {
      "epoch": 0.7046673819742489,
      "grad_norm": 0.15608345720506592,
      "learning_rate": 4.236591905854898e-05,
      "loss": 0.6636,
      "step": 7881
    },
    {
      "epoch": 0.7047567954220315,
      "grad_norm": 0.17050847675050246,
      "learning_rate": 4.23422550010058e-05,
      "loss": 0.636,
      "step": 7882
    },
    {
      "epoch": 0.704846208869814,
      "grad_norm": 0.15656488155878945,
      "learning_rate": 4.231859577914802e-05,
      "loss": 0.6118,
      "step": 7883
    },
    {
      "epoch": 0.7049356223175965,
      "grad_norm": 0.156692031991758,
      "learning_rate": 4.229494139495995e-05,
      "loss": 0.6432,
      "step": 7884
    },
    {
      "epoch": 0.7050250357653791,
      "grad_norm": 0.17258445990361757,
      "learning_rate": 4.2271291850425455e-05,
      "loss": 0.6779,
      "step": 7885
    },
    {
      "epoch": 0.7051144492131617,
      "grad_norm": 0.15165939206582918,
      "learning_rate": 4.224764714752803e-05,
      "loss": 0.6372,
      "step": 7886
    },
    {
      "epoch": 0.7052038626609443,
      "grad_norm": 0.17161250595692237,
      "learning_rate": 4.2224007288250645e-05,
      "loss": 0.6762,
      "step": 7887
    },
    {
      "epoch": 0.7052932761087267,
      "grad_norm": 0.1668160953840843,
      "learning_rate": 4.2200372274576e-05,
      "loss": 0.6711,
      "step": 7888
    },
    {
      "epoch": 0.7053826895565093,
      "grad_norm": 0.1541502529885879,
      "learning_rate": 4.2176742108486334e-05,
      "loss": 0.6162,
      "step": 7889
    },
    {
      "epoch": 0.7054721030042919,
      "grad_norm": 0.1367984602618887,
      "learning_rate": 4.2153116791963465e-05,
      "loss": 0.6221,
      "step": 7890
    },
    {
      "epoch": 0.7055615164520744,
      "grad_norm": 0.15398987990449708,
      "learning_rate": 4.212949632698887e-05,
      "loss": 0.6432,
      "step": 7891
    },
    {
      "epoch": 0.7056509298998569,
      "grad_norm": 0.1416092340414237,
      "learning_rate": 4.210588071554349e-05,
      "loss": 0.6302,
      "step": 7892
    },
    {
      "epoch": 0.7057403433476395,
      "grad_norm": 0.15442020289036604,
      "learning_rate": 4.208226995960798e-05,
      "loss": 0.6496,
      "step": 7893
    },
    {
      "epoch": 0.705829756795422,
      "grad_norm": 0.15823915444319794,
      "learning_rate": 4.205866406116258e-05,
      "loss": 0.625,
      "step": 7894
    },
    {
      "epoch": 0.7059191702432046,
      "grad_norm": 0.1778035424153899,
      "learning_rate": 4.203506302218697e-05,
      "loss": 0.7051,
      "step": 7895
    },
    {
      "epoch": 0.7060085836909872,
      "grad_norm": 0.1734121876493159,
      "learning_rate": 4.2011466844660655e-05,
      "loss": 0.6381,
      "step": 7896
    },
    {
      "epoch": 0.7060979971387696,
      "grad_norm": 0.1695396039206096,
      "learning_rate": 4.1987875530562624e-05,
      "loss": 0.6463,
      "step": 7897
    },
    {
      "epoch": 0.7061874105865522,
      "grad_norm": 0.17584238548419542,
      "learning_rate": 4.1964289081871376e-05,
      "loss": 0.6687,
      "step": 7898
    },
    {
      "epoch": 0.7062768240343348,
      "grad_norm": 0.16140930768072567,
      "learning_rate": 4.1940707500565114e-05,
      "loss": 0.6653,
      "step": 7899
    },
    {
      "epoch": 0.7063662374821174,
      "grad_norm": 0.15737818509112733,
      "learning_rate": 4.191713078862163e-05,
      "loss": 0.6778,
      "step": 7900
    },
    {
      "epoch": 0.7064556509298998,
      "grad_norm": 0.14978906648394197,
      "learning_rate": 4.189355894801821e-05,
      "loss": 0.6337,
      "step": 7901
    },
    {
      "epoch": 0.7065450643776824,
      "grad_norm": 0.16204275521816897,
      "learning_rate": 4.186999198073182e-05,
      "loss": 0.6179,
      "step": 7902
    },
    {
      "epoch": 0.706634477825465,
      "grad_norm": 0.13449136720688873,
      "learning_rate": 4.1846429888739005e-05,
      "loss": 0.6395,
      "step": 7903
    },
    {
      "epoch": 0.7067238912732475,
      "grad_norm": 0.1543879256344875,
      "learning_rate": 4.182287267401587e-05,
      "loss": 0.6843,
      "step": 7904
    },
    {
      "epoch": 0.70681330472103,
      "grad_norm": 0.13459732738310007,
      "learning_rate": 4.17993203385382e-05,
      "loss": 0.6197,
      "step": 7905
    },
    {
      "epoch": 0.7069027181688126,
      "grad_norm": 0.14765314742190386,
      "learning_rate": 4.1775772884281185e-05,
      "loss": 0.6106,
      "step": 7906
    },
    {
      "epoch": 0.7069921316165951,
      "grad_norm": 0.1658097843390479,
      "learning_rate": 4.17522303132198e-05,
      "loss": 0.6636,
      "step": 7907
    },
    {
      "epoch": 0.7070815450643777,
      "grad_norm": 0.17079025506397064,
      "learning_rate": 4.17286926273285e-05,
      "loss": 0.6422,
      "step": 7908
    },
    {
      "epoch": 0.7071709585121603,
      "grad_norm": 0.15240764178011829,
      "learning_rate": 4.170515982858139e-05,
      "loss": 0.6452,
      "step": 7909
    },
    {
      "epoch": 0.7072603719599427,
      "grad_norm": 0.15603104498425344,
      "learning_rate": 4.168163191895211e-05,
      "loss": 0.6596,
      "step": 7910
    },
    {
      "epoch": 0.7073497854077253,
      "grad_norm": 0.155813747067348,
      "learning_rate": 4.1658108900413975e-05,
      "loss": 0.6122,
      "step": 7911
    },
    {
      "epoch": 0.7074391988555079,
      "grad_norm": 0.15051425425411755,
      "learning_rate": 4.163459077493974e-05,
      "loss": 0.6093,
      "step": 7912
    },
    {
      "epoch": 0.7075286123032904,
      "grad_norm": 0.1470061679516007,
      "learning_rate": 4.16110775445019e-05,
      "loss": 0.6092,
      "step": 7913
    },
    {
      "epoch": 0.7076180257510729,
      "grad_norm": 0.17150726697863122,
      "learning_rate": 4.158756921107251e-05,
      "loss": 0.3785,
      "step": 7914
    },
    {
      "epoch": 0.7077074391988555,
      "grad_norm": 0.1517464136582086,
      "learning_rate": 4.15640657766231e-05,
      "loss": 0.6499,
      "step": 7915
    },
    {
      "epoch": 0.707796852646638,
      "grad_norm": 0.15703555761584378,
      "learning_rate": 4.1540567243124886e-05,
      "loss": 0.6316,
      "step": 7916
    },
    {
      "epoch": 0.7078862660944206,
      "grad_norm": 0.1306309273147412,
      "learning_rate": 4.1517073612548764e-05,
      "loss": 0.6437,
      "step": 7917
    },
    {
      "epoch": 0.7079756795422032,
      "grad_norm": 0.15907941575099674,
      "learning_rate": 4.1493584886865026e-05,
      "loss": 0.6837,
      "step": 7918
    },
    {
      "epoch": 0.7080650929899857,
      "grad_norm": 0.161143066548784,
      "learning_rate": 4.147010106804365e-05,
      "loss": 0.655,
      "step": 7919
    },
    {
      "epoch": 0.7081545064377682,
      "grad_norm": 0.1543448311206541,
      "learning_rate": 4.144662215805426e-05,
      "loss": 0.6673,
      "step": 7920
    },
    {
      "epoch": 0.7082439198855508,
      "grad_norm": 0.15182048824810454,
      "learning_rate": 4.142314815886591e-05,
      "loss": 0.6294,
      "step": 7921
    },
    {
      "epoch": 0.7083333333333334,
      "grad_norm": 0.17463032565176337,
      "learning_rate": 4.1399679072447384e-05,
      "loss": 0.6922,
      "step": 7922
    },
    {
      "epoch": 0.7084227467811158,
      "grad_norm": 0.14449014221241507,
      "learning_rate": 4.137621490076701e-05,
      "loss": 0.6336,
      "step": 7923
    },
    {
      "epoch": 0.7085121602288984,
      "grad_norm": 0.16579258647180634,
      "learning_rate": 4.135275564579268e-05,
      "loss": 0.6621,
      "step": 7924
    },
    {
      "epoch": 0.708601573676681,
      "grad_norm": 0.15837124715572262,
      "learning_rate": 4.1329301309491955e-05,
      "loss": 0.6618,
      "step": 7925
    },
    {
      "epoch": 0.7086909871244635,
      "grad_norm": 0.1538702227733489,
      "learning_rate": 4.130585189383183e-05,
      "loss": 0.6511,
      "step": 7926
    },
    {
      "epoch": 0.7087804005722461,
      "grad_norm": 0.15828730662377538,
      "learning_rate": 4.128240740077902e-05,
      "loss": 0.6487,
      "step": 7927
    },
    {
      "epoch": 0.7088698140200286,
      "grad_norm": 0.1686640228093828,
      "learning_rate": 4.1258967832299835e-05,
      "loss": 0.6713,
      "step": 7928
    },
    {
      "epoch": 0.7089592274678111,
      "grad_norm": 0.13572280394987357,
      "learning_rate": 4.123553319035999e-05,
      "loss": 0.6356,
      "step": 7929
    },
    {
      "epoch": 0.7090486409155937,
      "grad_norm": 0.15415316585110495,
      "learning_rate": 4.121210347692506e-05,
      "loss": 0.6663,
      "step": 7930
    },
    {
      "epoch": 0.7091380543633763,
      "grad_norm": 0.14908542922430784,
      "learning_rate": 4.1188678693960034e-05,
      "loss": 0.6353,
      "step": 7931
    },
    {
      "epoch": 0.7092274678111588,
      "grad_norm": 0.1593871652315684,
      "learning_rate": 4.116525884342947e-05,
      "loss": 0.647,
      "step": 7932
    },
    {
      "epoch": 0.7093168812589413,
      "grad_norm": 0.15313573887063,
      "learning_rate": 4.114184392729758e-05,
      "loss": 0.6399,
      "step": 7933
    },
    {
      "epoch": 0.7094062947067239,
      "grad_norm": 0.1596014203034635,
      "learning_rate": 4.1118433947528215e-05,
      "loss": 0.5842,
      "step": 7934
    },
    {
      "epoch": 0.7094957081545065,
      "grad_norm": 0.16348543281440128,
      "learning_rate": 4.109502890608463e-05,
      "loss": 0.6455,
      "step": 7935
    },
    {
      "epoch": 0.709585121602289,
      "grad_norm": 0.17650913024257348,
      "learning_rate": 4.107162880492984e-05,
      "loss": 0.6504,
      "step": 7936
    },
    {
      "epoch": 0.7096745350500715,
      "grad_norm": 0.16540266120734026,
      "learning_rate": 4.104823364602638e-05,
      "loss": 0.6452,
      "step": 7937
    },
    {
      "epoch": 0.7097639484978541,
      "grad_norm": 0.15014717068852562,
      "learning_rate": 4.1024843431336355e-05,
      "loss": 0.6654,
      "step": 7938
    },
    {
      "epoch": 0.7098533619456366,
      "grad_norm": 0.16196711551163367,
      "learning_rate": 4.100145816282154e-05,
      "loss": 0.6734,
      "step": 7939
    },
    {
      "epoch": 0.7099427753934192,
      "grad_norm": 0.13986701738024782,
      "learning_rate": 4.097807784244313e-05,
      "loss": 0.6318,
      "step": 7940
    },
    {
      "epoch": 0.7100321888412017,
      "grad_norm": 0.15278990075402,
      "learning_rate": 4.095470247216205e-05,
      "loss": 0.6467,
      "step": 7941
    },
    {
      "epoch": 0.7101216022889842,
      "grad_norm": 0.154628574187658,
      "learning_rate": 4.0931332053938766e-05,
      "loss": 0.6821,
      "step": 7942
    },
    {
      "epoch": 0.7102110157367668,
      "grad_norm": 0.1599835369272049,
      "learning_rate": 4.090796658973333e-05,
      "loss": 0.6915,
      "step": 7943
    },
    {
      "epoch": 0.7103004291845494,
      "grad_norm": 0.17754466205865457,
      "learning_rate": 4.0884606081505374e-05,
      "loss": 0.6789,
      "step": 7944
    },
    {
      "epoch": 0.710389842632332,
      "grad_norm": 0.1468025144293837,
      "learning_rate": 4.0861250531214136e-05,
      "loss": 0.6479,
      "step": 7945
    },
    {
      "epoch": 0.7104792560801144,
      "grad_norm": 0.1413198270619886,
      "learning_rate": 4.083789994081837e-05,
      "loss": 0.6396,
      "step": 7946
    },
    {
      "epoch": 0.710568669527897,
      "grad_norm": 0.17377831275368996,
      "learning_rate": 4.081455431227648e-05,
      "loss": 0.6865,
      "step": 7947
    },
    {
      "epoch": 0.7106580829756796,
      "grad_norm": 0.15129980464905793,
      "learning_rate": 4.0791213647546475e-05,
      "loss": 0.6967,
      "step": 7948
    },
    {
      "epoch": 0.7107474964234621,
      "grad_norm": 0.1599957369941266,
      "learning_rate": 4.0767877948585845e-05,
      "loss": 0.6518,
      "step": 7949
    },
    {
      "epoch": 0.7108369098712446,
      "grad_norm": 0.15022212590724787,
      "learning_rate": 4.0744547217351715e-05,
      "loss": 0.6484,
      "step": 7950
    },
    {
      "epoch": 0.7109263233190272,
      "grad_norm": 0.1533670527694685,
      "learning_rate": 4.072122145580093e-05,
      "loss": 0.6391,
      "step": 7951
    },
    {
      "epoch": 0.7110157367668097,
      "grad_norm": 0.1868224931562759,
      "learning_rate": 4.069790066588967e-05,
      "loss": 0.3754,
      "step": 7952
    },
    {
      "epoch": 0.7111051502145923,
      "grad_norm": 0.17157675906037373,
      "learning_rate": 4.067458484957386e-05,
      "loss": 0.6406,
      "step": 7953
    },
    {
      "epoch": 0.7111945636623748,
      "grad_norm": 0.17819235509662848,
      "learning_rate": 4.0651274008809004e-05,
      "loss": 0.3788,
      "step": 7954
    },
    {
      "epoch": 0.7112839771101573,
      "grad_norm": 0.14568493092863802,
      "learning_rate": 4.06279681455501e-05,
      "loss": 0.6396,
      "step": 7955
    },
    {
      "epoch": 0.7113733905579399,
      "grad_norm": 0.16071458932755955,
      "learning_rate": 4.060466726175179e-05,
      "loss": 0.6423,
      "step": 7956
    },
    {
      "epoch": 0.7114628040057225,
      "grad_norm": 0.13051076462642716,
      "learning_rate": 4.0581371359368315e-05,
      "loss": 0.6152,
      "step": 7957
    },
    {
      "epoch": 0.711552217453505,
      "grad_norm": 0.1425491125463729,
      "learning_rate": 4.0558080440353455e-05,
      "loss": 0.5966,
      "step": 7958
    },
    {
      "epoch": 0.7116416309012875,
      "grad_norm": 0.15494294931110106,
      "learning_rate": 4.0534794506660645e-05,
      "loss": 0.6346,
      "step": 7959
    },
    {
      "epoch": 0.7117310443490701,
      "grad_norm": 0.14166352289090348,
      "learning_rate": 4.0511513560242766e-05,
      "loss": 0.6519,
      "step": 7960
    },
    {
      "epoch": 0.7118204577968527,
      "grad_norm": 0.1567479419216292,
      "learning_rate": 4.0488237603052396e-05,
      "loss": 0.6645,
      "step": 7961
    },
    {
      "epoch": 0.7119098712446352,
      "grad_norm": 0.162895705594762,
      "learning_rate": 4.04649666370417e-05,
      "loss": 0.6875,
      "step": 7962
    },
    {
      "epoch": 0.7119992846924177,
      "grad_norm": 0.15009963683401836,
      "learning_rate": 4.044170066416233e-05,
      "loss": 0.6426,
      "step": 7963
    },
    {
      "epoch": 0.7120886981402003,
      "grad_norm": 0.14469558999141394,
      "learning_rate": 4.041843968636555e-05,
      "loss": 0.6226,
      "step": 7964
    },
    {
      "epoch": 0.7121781115879828,
      "grad_norm": 0.14786564811654068,
      "learning_rate": 4.0395183705602354e-05,
      "loss": 0.6083,
      "step": 7965
    },
    {
      "epoch": 0.7122675250357654,
      "grad_norm": 0.15247913356828507,
      "learning_rate": 4.037193272382308e-05,
      "loss": 0.6322,
      "step": 7966
    },
    {
      "epoch": 0.712356938483548,
      "grad_norm": 0.16414988921667303,
      "learning_rate": 4.034868674297779e-05,
      "loss": 0.6519,
      "step": 7967
    },
    {
      "epoch": 0.7124463519313304,
      "grad_norm": 0.17085585657607671,
      "learning_rate": 4.0325445765016145e-05,
      "loss": 0.6837,
      "step": 7968
    },
    {
      "epoch": 0.712535765379113,
      "grad_norm": 0.14218396295242341,
      "learning_rate": 4.030220979188726e-05,
      "loss": 0.6397,
      "step": 7969
    },
    {
      "epoch": 0.7126251788268956,
      "grad_norm": 0.21088056601531727,
      "learning_rate": 4.027897882553994e-05,
      "loss": 0.6513,
      "step": 7970
    },
    {
      "epoch": 0.7127145922746781,
      "grad_norm": 0.1450456020823325,
      "learning_rate": 4.025575286792254e-05,
      "loss": 0.6412,
      "step": 7971
    },
    {
      "epoch": 0.7128040057224606,
      "grad_norm": 0.14196164750522877,
      "learning_rate": 4.0232531920983e-05,
      "loss": 0.6322,
      "step": 7972
    },
    {
      "epoch": 0.7128934191702432,
      "grad_norm": 0.16825338191443312,
      "learning_rate": 4.020931598666882e-05,
      "loss": 0.6298,
      "step": 7973
    },
    {
      "epoch": 0.7129828326180258,
      "grad_norm": 0.14790728456414262,
      "learning_rate": 4.018610506692713e-05,
      "loss": 0.6274,
      "step": 7974
    },
    {
      "epoch": 0.7130722460658083,
      "grad_norm": 0.16517235048592496,
      "learning_rate": 4.0162899163704545e-05,
      "loss": 0.6825,
      "step": 7975
    },
    {
      "epoch": 0.7131616595135909,
      "grad_norm": 0.16343125065274156,
      "learning_rate": 4.0139698278947336e-05,
      "loss": 0.6023,
      "step": 7976
    },
    {
      "epoch": 0.7132510729613734,
      "grad_norm": 0.161790076738776,
      "learning_rate": 4.0116502414601384e-05,
      "loss": 0.6427,
      "step": 7977
    },
    {
      "epoch": 0.7133404864091559,
      "grad_norm": 0.15998213172860953,
      "learning_rate": 4.009331157261198e-05,
      "loss": 0.6269,
      "step": 7978
    },
    {
      "epoch": 0.7134298998569385,
      "grad_norm": 0.16002985693378882,
      "learning_rate": 4.007012575492425e-05,
      "loss": 0.6925,
      "step": 7979
    },
    {
      "epoch": 0.7135193133047211,
      "grad_norm": 0.16628749191055275,
      "learning_rate": 4.004694496348267e-05,
      "loss": 0.6499,
      "step": 7980
    },
    {
      "epoch": 0.7136087267525035,
      "grad_norm": 0.19658449747804757,
      "learning_rate": 4.0023769200231395e-05,
      "loss": 0.7096,
      "step": 7981
    },
    {
      "epoch": 0.7136981402002861,
      "grad_norm": 0.1752008710834928,
      "learning_rate": 4.0000598467114214e-05,
      "loss": 0.656,
      "step": 7982
    },
    {
      "epoch": 0.7137875536480687,
      "grad_norm": 0.14828389638115091,
      "learning_rate": 3.997743276607434e-05,
      "loss": 0.6295,
      "step": 7983
    },
    {
      "epoch": 0.7138769670958512,
      "grad_norm": 0.18683480382976214,
      "learning_rate": 3.995427209905469e-05,
      "loss": 0.6537,
      "step": 7984
    },
    {
      "epoch": 0.7139663805436338,
      "grad_norm": 0.17310857537250085,
      "learning_rate": 3.993111646799772e-05,
      "loss": 0.6572,
      "step": 7985
    },
    {
      "epoch": 0.7140557939914163,
      "grad_norm": 0.1691735427799881,
      "learning_rate": 3.990796587484548e-05,
      "loss": 0.6418,
      "step": 7986
    },
    {
      "epoch": 0.7141452074391988,
      "grad_norm": 0.1515740861394152,
      "learning_rate": 3.988482032153955e-05,
      "loss": 0.6461,
      "step": 7987
    },
    {
      "epoch": 0.7142346208869814,
      "grad_norm": 0.19550318659072297,
      "learning_rate": 3.986167981002118e-05,
      "loss": 0.6796,
      "step": 7988
    },
    {
      "epoch": 0.714324034334764,
      "grad_norm": 0.18604606335272672,
      "learning_rate": 3.983854434223107e-05,
      "loss": 0.4074,
      "step": 7989
    },
    {
      "epoch": 0.7144134477825465,
      "grad_norm": 0.13095752202622926,
      "learning_rate": 3.981541392010958e-05,
      "loss": 0.6137,
      "step": 7990
    },
    {
      "epoch": 0.714502861230329,
      "grad_norm": 0.15055844709871768,
      "learning_rate": 3.979228854559668e-05,
      "loss": 0.6381,
      "step": 7991
    },
    {
      "epoch": 0.7145922746781116,
      "grad_norm": 0.15462896706261917,
      "learning_rate": 3.9769168220631745e-05,
      "loss": 0.6199,
      "step": 7992
    },
    {
      "epoch": 0.7146816881258942,
      "grad_norm": 0.16357222753088685,
      "learning_rate": 3.974605294715402e-05,
      "loss": 0.6772,
      "step": 7993
    },
    {
      "epoch": 0.7147711015736766,
      "grad_norm": 0.1557079207966369,
      "learning_rate": 3.972294272710202e-05,
      "loss": 0.6525,
      "step": 7994
    },
    {
      "epoch": 0.7148605150214592,
      "grad_norm": 0.14555458418623005,
      "learning_rate": 3.9699837562414024e-05,
      "loss": 0.6319,
      "step": 7995
    },
    {
      "epoch": 0.7149499284692418,
      "grad_norm": 0.160566317913957,
      "learning_rate": 3.967673745502785e-05,
      "loss": 0.6455,
      "step": 7996
    },
    {
      "epoch": 0.7150393419170243,
      "grad_norm": 0.1384908020925043,
      "learning_rate": 3.965364240688083e-05,
      "loss": 0.6092,
      "step": 7997
    },
    {
      "epoch": 0.7151287553648069,
      "grad_norm": 0.16602249932738378,
      "learning_rate": 3.963055241990994e-05,
      "loss": 0.6771,
      "step": 7998
    },
    {
      "epoch": 0.7152181688125894,
      "grad_norm": 0.13680594445897568,
      "learning_rate": 3.96074674960517e-05,
      "loss": 0.6322,
      "step": 7999
    },
    {
      "epoch": 0.7153075822603719,
      "grad_norm": 0.14655046065647015,
      "learning_rate": 3.958438763724224e-05,
      "loss": 0.6048,
      "step": 8000
    },
    {
      "epoch": 0.7153969957081545,
      "grad_norm": 0.15901256620205598,
      "learning_rate": 3.956131284541722e-05,
      "loss": 0.6599,
      "step": 8001
    },
    {
      "epoch": 0.7154864091559371,
      "grad_norm": 0.16130359244761516,
      "learning_rate": 3.953824312251193e-05,
      "loss": 0.6595,
      "step": 8002
    },
    {
      "epoch": 0.7155758226037195,
      "grad_norm": 0.16651146842271422,
      "learning_rate": 3.951517847046113e-05,
      "loss": 0.6588,
      "step": 8003
    },
    {
      "epoch": 0.7156652360515021,
      "grad_norm": 0.15632565640069634,
      "learning_rate": 3.949211889119928e-05,
      "loss": 0.6513,
      "step": 8004
    },
    {
      "epoch": 0.7157546494992847,
      "grad_norm": 0.17454995427060174,
      "learning_rate": 3.946906438666037e-05,
      "loss": 0.6745,
      "step": 8005
    },
    {
      "epoch": 0.7158440629470673,
      "grad_norm": 0.14992175952406725,
      "learning_rate": 3.9446014958777863e-05,
      "loss": 0.6487,
      "step": 8006
    },
    {
      "epoch": 0.7159334763948498,
      "grad_norm": 0.1641007812963694,
      "learning_rate": 3.942297060948498e-05,
      "loss": 0.6256,
      "step": 8007
    },
    {
      "epoch": 0.7160228898426323,
      "grad_norm": 0.14823465578996245,
      "learning_rate": 3.9399931340714436e-05,
      "loss": 0.6158,
      "step": 8008
    },
    {
      "epoch": 0.7161123032904149,
      "grad_norm": 0.1683611739978576,
      "learning_rate": 3.937689715439842e-05,
      "loss": 0.3767,
      "step": 8009
    },
    {
      "epoch": 0.7162017167381974,
      "grad_norm": 0.15971901385350679,
      "learning_rate": 3.935386805246882e-05,
      "loss": 0.6579,
      "step": 8010
    },
    {
      "epoch": 0.71629113018598,
      "grad_norm": 0.16311004597997408,
      "learning_rate": 3.933084403685712e-05,
      "loss": 0.6697,
      "step": 8011
    },
    {
      "epoch": 0.7163805436337625,
      "grad_norm": 0.14340969429082526,
      "learning_rate": 3.930782510949418e-05,
      "loss": 0.6633,
      "step": 8012
    },
    {
      "epoch": 0.716469957081545,
      "grad_norm": 0.13852756820092552,
      "learning_rate": 3.9284811272310715e-05,
      "loss": 0.6323,
      "step": 8013
    },
    {
      "epoch": 0.7165593705293276,
      "grad_norm": 0.1573647516906072,
      "learning_rate": 3.9261802527236765e-05,
      "loss": 0.6372,
      "step": 8014
    },
    {
      "epoch": 0.7166487839771102,
      "grad_norm": 0.1611342101531088,
      "learning_rate": 3.92387988762021e-05,
      "loss": 0.6777,
      "step": 8015
    },
    {
      "epoch": 0.7167381974248928,
      "grad_norm": 0.15161683668941764,
      "learning_rate": 3.921580032113602e-05,
      "loss": 0.6153,
      "step": 8016
    },
    {
      "epoch": 0.7168276108726752,
      "grad_norm": 0.14640122537410902,
      "learning_rate": 3.919280686396732e-05,
      "loss": 0.606,
      "step": 8017
    },
    {
      "epoch": 0.7169170243204578,
      "grad_norm": 0.14578731249708193,
      "learning_rate": 3.916981850662448e-05,
      "loss": 0.5889,
      "step": 8018
    },
    {
      "epoch": 0.7170064377682404,
      "grad_norm": 0.1563178981795846,
      "learning_rate": 3.9146835251035485e-05,
      "loss": 0.6359,
      "step": 8019
    },
    {
      "epoch": 0.7170958512160229,
      "grad_norm": 0.15167215269291598,
      "learning_rate": 3.9123857099127936e-05,
      "loss": 0.6453,
      "step": 8020
    },
    {
      "epoch": 0.7171852646638054,
      "grad_norm": 0.1564574267509187,
      "learning_rate": 3.910088405282897e-05,
      "loss": 0.657,
      "step": 8021
    },
    {
      "epoch": 0.717274678111588,
      "grad_norm": 0.15993180217552247,
      "learning_rate": 3.907791611406534e-05,
      "loss": 0.6609,
      "step": 8022
    },
    {
      "epoch": 0.7173640915593705,
      "grad_norm": 0.1620224062618321,
      "learning_rate": 3.9054953284763284e-05,
      "loss": 0.5931,
      "step": 8023
    },
    {
      "epoch": 0.7174535050071531,
      "grad_norm": 0.16509563713726563,
      "learning_rate": 3.9031995566848687e-05,
      "loss": 0.6383,
      "step": 8024
    },
    {
      "epoch": 0.7175429184549357,
      "grad_norm": 0.1739022745840725,
      "learning_rate": 3.900904296224702e-05,
      "loss": 0.6141,
      "step": 8025
    },
    {
      "epoch": 0.7176323319027181,
      "grad_norm": 0.15445423282709614,
      "learning_rate": 3.89860954728832e-05,
      "loss": 0.6614,
      "step": 8026
    },
    {
      "epoch": 0.7177217453505007,
      "grad_norm": 0.15405406420776052,
      "learning_rate": 3.896315310068194e-05,
      "loss": 0.6642,
      "step": 8027
    },
    {
      "epoch": 0.7178111587982833,
      "grad_norm": 0.1257462946659644,
      "learning_rate": 3.8940215847567274e-05,
      "loss": 0.5663,
      "step": 8028
    },
    {
      "epoch": 0.7179005722460658,
      "grad_norm": 0.15695417330866665,
      "learning_rate": 3.891728371546297e-05,
      "loss": 0.6154,
      "step": 8029
    },
    {
      "epoch": 0.7179899856938483,
      "grad_norm": 0.15848690290548648,
      "learning_rate": 3.88943567062923e-05,
      "loss": 0.591,
      "step": 8030
    },
    {
      "epoch": 0.7180793991416309,
      "grad_norm": 0.1595806704733964,
      "learning_rate": 3.887143482197818e-05,
      "loss": 0.6364,
      "step": 8031
    },
    {
      "epoch": 0.7181688125894135,
      "grad_norm": 0.1379805923565997,
      "learning_rate": 3.884851806444296e-05,
      "loss": 0.6079,
      "step": 8032
    },
    {
      "epoch": 0.718258226037196,
      "grad_norm": 0.15539878603488058,
      "learning_rate": 3.882560643560869e-05,
      "loss": 0.6476,
      "step": 8033
    },
    {
      "epoch": 0.7183476394849786,
      "grad_norm": 0.1511811445479791,
      "learning_rate": 3.880269993739691e-05,
      "loss": 0.674,
      "step": 8034
    },
    {
      "epoch": 0.718437052932761,
      "grad_norm": 0.1487761776342269,
      "learning_rate": 3.8779798571728786e-05,
      "loss": 0.6303,
      "step": 8035
    },
    {
      "epoch": 0.7185264663805436,
      "grad_norm": 0.1690348448289253,
      "learning_rate": 3.8756902340525046e-05,
      "loss": 0.3677,
      "step": 8036
    },
    {
      "epoch": 0.7186158798283262,
      "grad_norm": 0.15809237829417283,
      "learning_rate": 3.8734011245705924e-05,
      "loss": 0.6477,
      "step": 8037
    },
    {
      "epoch": 0.7187052932761088,
      "grad_norm": 0.16118901670186309,
      "learning_rate": 3.871112528919128e-05,
      "loss": 0.6626,
      "step": 8038
    },
    {
      "epoch": 0.7187947067238912,
      "grad_norm": 0.13838258837727405,
      "learning_rate": 3.868824447290058e-05,
      "loss": 0.6102,
      "step": 8039
    },
    {
      "epoch": 0.7188841201716738,
      "grad_norm": 0.17889437127810476,
      "learning_rate": 3.866536879875269e-05,
      "loss": 0.3585,
      "step": 8040
    },
    {
      "epoch": 0.7189735336194564,
      "grad_norm": 0.17557561925458096,
      "learning_rate": 3.86424982686663e-05,
      "loss": 0.4258,
      "step": 8041
    },
    {
      "epoch": 0.719062947067239,
      "grad_norm": 0.152970156703111,
      "learning_rate": 3.861963288455949e-05,
      "loss": 0.6454,
      "step": 8042
    },
    {
      "epoch": 0.7191523605150214,
      "grad_norm": 0.15834556156257656,
      "learning_rate": 3.8596772648349924e-05,
      "loss": 0.6371,
      "step": 8043
    },
    {
      "epoch": 0.719241773962804,
      "grad_norm": 0.16233996370789522,
      "learning_rate": 3.857391756195487e-05,
      "loss": 0.6878,
      "step": 8044
    },
    {
      "epoch": 0.7193311874105865,
      "grad_norm": 0.14914441135615378,
      "learning_rate": 3.85510676272912e-05,
      "loss": 0.6427,
      "step": 8045
    },
    {
      "epoch": 0.7194206008583691,
      "grad_norm": 0.15839661182886683,
      "learning_rate": 3.852822284627524e-05,
      "loss": 0.6298,
      "step": 8046
    },
    {
      "epoch": 0.7195100143061517,
      "grad_norm": 0.16513623343755202,
      "learning_rate": 3.8505383220823e-05,
      "loss": 0.6309,
      "step": 8047
    },
    {
      "epoch": 0.7195994277539342,
      "grad_norm": 0.1619784716617631,
      "learning_rate": 3.848254875285e-05,
      "loss": 0.646,
      "step": 8048
    },
    {
      "epoch": 0.7196888412017167,
      "grad_norm": 0.1781108555854581,
      "learning_rate": 3.845971944427135e-05,
      "loss": 0.6661,
      "step": 8049
    },
    {
      "epoch": 0.7197782546494993,
      "grad_norm": 0.17613368348218458,
      "learning_rate": 3.8436895297001726e-05,
      "loss": 0.6013,
      "step": 8050
    },
    {
      "epoch": 0.7198676680972819,
      "grad_norm": 0.1574289102014185,
      "learning_rate": 3.841407631295532e-05,
      "loss": 0.6568,
      "step": 8051
    },
    {
      "epoch": 0.7199570815450643,
      "grad_norm": 0.15741087004814006,
      "learning_rate": 3.8391262494045955e-05,
      "loss": 0.6367,
      "step": 8052
    },
    {
      "epoch": 0.7200464949928469,
      "grad_norm": 0.1801765850611437,
      "learning_rate": 3.8368453842187026e-05,
      "loss": 0.3799,
      "step": 8053
    },
    {
      "epoch": 0.7201359084406295,
      "grad_norm": 0.1713735314871262,
      "learning_rate": 3.8345650359291384e-05,
      "loss": 0.6609,
      "step": 8054
    },
    {
      "epoch": 0.720225321888412,
      "grad_norm": 0.178369251602209,
      "learning_rate": 3.8322852047271615e-05,
      "loss": 0.6749,
      "step": 8055
    },
    {
      "epoch": 0.7203147353361946,
      "grad_norm": 0.16211691195415628,
      "learning_rate": 3.830005890803979e-05,
      "loss": 0.6452,
      "step": 8056
    },
    {
      "epoch": 0.7204041487839771,
      "grad_norm": 0.15856872278106177,
      "learning_rate": 3.8277270943507484e-05,
      "loss": 0.653,
      "step": 8057
    },
    {
      "epoch": 0.7204935622317596,
      "grad_norm": 0.18922216146688361,
      "learning_rate": 3.8254488155585924e-05,
      "loss": 0.6196,
      "step": 8058
    },
    {
      "epoch": 0.7205829756795422,
      "grad_norm": 0.15636897251067147,
      "learning_rate": 3.8231710546185895e-05,
      "loss": 0.6787,
      "step": 8059
    },
    {
      "epoch": 0.7206723891273248,
      "grad_norm": 0.15462578348476583,
      "learning_rate": 3.8208938117217674e-05,
      "loss": 0.6553,
      "step": 8060
    },
    {
      "epoch": 0.7207618025751072,
      "grad_norm": 0.15550558865229,
      "learning_rate": 3.8186170870591185e-05,
      "loss": 0.6465,
      "step": 8061
    },
    {
      "epoch": 0.7208512160228898,
      "grad_norm": 0.14022633044509628,
      "learning_rate": 3.8163408808215904e-05,
      "loss": 0.5729,
      "step": 8062
    },
    {
      "epoch": 0.7209406294706724,
      "grad_norm": 0.18176514268902796,
      "learning_rate": 3.814065193200084e-05,
      "loss": 0.675,
      "step": 8063
    },
    {
      "epoch": 0.721030042918455,
      "grad_norm": 0.15798869509963742,
      "learning_rate": 3.8117900243854595e-05,
      "loss": 0.6428,
      "step": 8064
    },
    {
      "epoch": 0.7211194563662375,
      "grad_norm": 0.1507408871855143,
      "learning_rate": 3.809515374568535e-05,
      "loss": 0.6633,
      "step": 8065
    },
    {
      "epoch": 0.72120886981402,
      "grad_norm": 0.1404570112942711,
      "learning_rate": 3.807241243940077e-05,
      "loss": 0.6092,
      "step": 8066
    },
    {
      "epoch": 0.7212982832618026,
      "grad_norm": 0.15193249108829027,
      "learning_rate": 3.804967632690817e-05,
      "loss": 0.6682,
      "step": 8067
    },
    {
      "epoch": 0.7213876967095851,
      "grad_norm": 0.16912464047394288,
      "learning_rate": 3.802694541011439e-05,
      "loss": 0.6809,
      "step": 8068
    },
    {
      "epoch": 0.7214771101573677,
      "grad_norm": 0.15385632644130345,
      "learning_rate": 3.8004219690925856e-05,
      "loss": 0.6157,
      "step": 8069
    },
    {
      "epoch": 0.7215665236051502,
      "grad_norm": 0.14904424198175345,
      "learning_rate": 3.7981499171248594e-05,
      "loss": 0.6397,
      "step": 8070
    },
    {
      "epoch": 0.7216559370529327,
      "grad_norm": 0.17178873976120704,
      "learning_rate": 3.795878385298804e-05,
      "loss": 0.6635,
      "step": 8071
    },
    {
      "epoch": 0.7217453505007153,
      "grad_norm": 0.15175121992670712,
      "learning_rate": 3.793607373804937e-05,
      "loss": 0.6373,
      "step": 8072
    },
    {
      "epoch": 0.7218347639484979,
      "grad_norm": 0.14262095952404377,
      "learning_rate": 3.7913368828337285e-05,
      "loss": 0.6166,
      "step": 8073
    },
    {
      "epoch": 0.7219241773962805,
      "grad_norm": 0.1432219088850121,
      "learning_rate": 3.789066912575593e-05,
      "loss": 0.6322,
      "step": 8074
    },
    {
      "epoch": 0.7220135908440629,
      "grad_norm": 0.13771282529084147,
      "learning_rate": 3.78679746322091e-05,
      "loss": 0.6309,
      "step": 8075
    },
    {
      "epoch": 0.7221030042918455,
      "grad_norm": 0.16744962253473586,
      "learning_rate": 3.784528534960029e-05,
      "loss": 0.6259,
      "step": 8076
    },
    {
      "epoch": 0.7221924177396281,
      "grad_norm": 0.17196318748643352,
      "learning_rate": 3.782260127983229e-05,
      "loss": 0.652,
      "step": 8077
    },
    {
      "epoch": 0.7222818311874106,
      "grad_norm": 0.17394998909965062,
      "learning_rate": 3.7799922424807634e-05,
      "loss": 0.6585,
      "step": 8078
    },
    {
      "epoch": 0.7223712446351931,
      "grad_norm": 0.15187642206906024,
      "learning_rate": 3.777724878642839e-05,
      "loss": 0.6361,
      "step": 8079
    },
    {
      "epoch": 0.7224606580829757,
      "grad_norm": 0.14282070858229765,
      "learning_rate": 3.7754580366596115e-05,
      "loss": 0.6399,
      "step": 8080
    },
    {
      "epoch": 0.7225500715307582,
      "grad_norm": 0.15388596718068048,
      "learning_rate": 3.773191716721202e-05,
      "loss": 0.686,
      "step": 8081
    },
    {
      "epoch": 0.7226394849785408,
      "grad_norm": 0.1794692822069206,
      "learning_rate": 3.7709259190176816e-05,
      "loss": 0.7112,
      "step": 8082
    },
    {
      "epoch": 0.7227288984263234,
      "grad_norm": 0.18592230869668727,
      "learning_rate": 3.768660643739083e-05,
      "loss": 0.6258,
      "step": 8083
    },
    {
      "epoch": 0.7228183118741058,
      "grad_norm": 0.13486941606511985,
      "learning_rate": 3.766395891075394e-05,
      "loss": 0.6271,
      "step": 8084
    },
    {
      "epoch": 0.7229077253218884,
      "grad_norm": 0.1605406094268267,
      "learning_rate": 3.764131661216549e-05,
      "loss": 0.6562,
      "step": 8085
    },
    {
      "epoch": 0.722997138769671,
      "grad_norm": 0.1796919517428339,
      "learning_rate": 3.7618679543524503e-05,
      "loss": 0.3702,
      "step": 8086
    },
    {
      "epoch": 0.7230865522174535,
      "grad_norm": 0.15722388566343318,
      "learning_rate": 3.759604770672953e-05,
      "loss": 0.6163,
      "step": 8087
    },
    {
      "epoch": 0.723175965665236,
      "grad_norm": 0.17359056867601302,
      "learning_rate": 3.757342110367871e-05,
      "loss": 0.6439,
      "step": 8088
    },
    {
      "epoch": 0.7232653791130186,
      "grad_norm": 0.14989010168921543,
      "learning_rate": 3.755079973626959e-05,
      "loss": 0.625,
      "step": 8089
    },
    {
      "epoch": 0.7233547925608012,
      "grad_norm": 0.1431280887027175,
      "learning_rate": 3.752818360639956e-05,
      "loss": 0.6152,
      "step": 8090
    },
    {
      "epoch": 0.7234442060085837,
      "grad_norm": 0.1709046903876459,
      "learning_rate": 3.7505572715965284e-05,
      "loss": 0.6812,
      "step": 8091
    },
    {
      "epoch": 0.7235336194563662,
      "grad_norm": 0.15405957448725596,
      "learning_rate": 3.748296706686315e-05,
      "loss": 0.6421,
      "step": 8092
    },
    {
      "epoch": 0.7236230329041488,
      "grad_norm": 0.16404895462496846,
      "learning_rate": 3.74603666609891e-05,
      "loss": 0.6629,
      "step": 8093
    },
    {
      "epoch": 0.7237124463519313,
      "grad_norm": 0.17810975618261263,
      "learning_rate": 3.7437771500238526e-05,
      "loss": 0.6796,
      "step": 8094
    },
    {
      "epoch": 0.7238018597997139,
      "grad_norm": 0.1504409199041355,
      "learning_rate": 3.741518158650648e-05,
      "loss": 0.641,
      "step": 8095
    },
    {
      "epoch": 0.7238912732474965,
      "grad_norm": 0.1613383115618441,
      "learning_rate": 3.739259692168764e-05,
      "loss": 0.6337,
      "step": 8096
    },
    {
      "epoch": 0.7239806866952789,
      "grad_norm": 0.13923082397712372,
      "learning_rate": 3.737001750767604e-05,
      "loss": 0.6327,
      "step": 8097
    },
    {
      "epoch": 0.7240701001430615,
      "grad_norm": 0.15815421647056546,
      "learning_rate": 3.734744334636544e-05,
      "loss": 0.6726,
      "step": 8098
    },
    {
      "epoch": 0.7241595135908441,
      "grad_norm": 0.15509973274608438,
      "learning_rate": 3.732487443964914e-05,
      "loss": 0.6406,
      "step": 8099
    },
    {
      "epoch": 0.7242489270386266,
      "grad_norm": 0.17402720397484578,
      "learning_rate": 3.730231078941988e-05,
      "loss": 0.682,
      "step": 8100
    },
    {
      "epoch": 0.7243383404864091,
      "grad_norm": 0.15828261457834042,
      "learning_rate": 3.727975239757011e-05,
      "loss": 0.6548,
      "step": 8101
    },
    {
      "epoch": 0.7244277539341917,
      "grad_norm": 0.13166145198934387,
      "learning_rate": 3.725719926599175e-05,
      "loss": 0.6599,
      "step": 8102
    },
    {
      "epoch": 0.7245171673819742,
      "grad_norm": 0.1437359862968944,
      "learning_rate": 3.723465139657632e-05,
      "loss": 0.6372,
      "step": 8103
    },
    {
      "epoch": 0.7246065808297568,
      "grad_norm": 0.15094686960133458,
      "learning_rate": 3.72121087912149e-05,
      "loss": 0.6452,
      "step": 8104
    },
    {
      "epoch": 0.7246959942775394,
      "grad_norm": 0.14379839853113435,
      "learning_rate": 3.7189571451798065e-05,
      "loss": 0.6017,
      "step": 8105
    },
    {
      "epoch": 0.7247854077253219,
      "grad_norm": 0.18712123900796634,
      "learning_rate": 3.7167039380216005e-05,
      "loss": 0.6925,
      "step": 8106
    },
    {
      "epoch": 0.7248748211731044,
      "grad_norm": 0.15211047445433884,
      "learning_rate": 3.714451257835852e-05,
      "loss": 0.657,
      "step": 8107
    },
    {
      "epoch": 0.724964234620887,
      "grad_norm": 0.1625364240241205,
      "learning_rate": 3.71219910481148e-05,
      "loss": 0.6724,
      "step": 8108
    },
    {
      "epoch": 0.7250536480686696,
      "grad_norm": 0.17515421575548695,
      "learning_rate": 3.7099474791373736e-05,
      "loss": 0.6955,
      "step": 8109
    },
    {
      "epoch": 0.725143061516452,
      "grad_norm": 0.1732028053953482,
      "learning_rate": 3.707696381002381e-05,
      "loss": 0.6741,
      "step": 8110
    },
    {
      "epoch": 0.7252324749642346,
      "grad_norm": 0.18386852867083422,
      "learning_rate": 3.705445810595291e-05,
      "loss": 0.6691,
      "step": 8111
    },
    {
      "epoch": 0.7253218884120172,
      "grad_norm": 0.14371174673337472,
      "learning_rate": 3.7031957681048604e-05,
      "loss": 0.6645,
      "step": 8112
    },
    {
      "epoch": 0.7254113018597997,
      "grad_norm": 0.2049711691512586,
      "learning_rate": 3.700946253719798e-05,
      "loss": 0.6745,
      "step": 8113
    },
    {
      "epoch": 0.7255007153075823,
      "grad_norm": 0.13701894586093954,
      "learning_rate": 3.6986972676287626e-05,
      "loss": 0.6093,
      "step": 8114
    },
    {
      "epoch": 0.7255901287553648,
      "grad_norm": 0.15291812989261505,
      "learning_rate": 3.6964488100203776e-05,
      "loss": 0.6826,
      "step": 8115
    },
    {
      "epoch": 0.7256795422031473,
      "grad_norm": 0.15558759275297457,
      "learning_rate": 3.6942008810832184e-05,
      "loss": 0.6154,
      "step": 8116
    },
    {
      "epoch": 0.7257689556509299,
      "grad_norm": 0.14230513170282394,
      "learning_rate": 3.6919534810058154e-05,
      "loss": 0.6153,
      "step": 8117
    },
    {
      "epoch": 0.7258583690987125,
      "grad_norm": 0.13617798931822592,
      "learning_rate": 3.68970660997666e-05,
      "loss": 0.6341,
      "step": 8118
    },
    {
      "epoch": 0.725947782546495,
      "grad_norm": 0.1472130632868453,
      "learning_rate": 3.687460268184185e-05,
      "loss": 0.6301,
      "step": 8119
    },
    {
      "epoch": 0.7260371959942775,
      "grad_norm": 0.16252838306521156,
      "learning_rate": 3.685214455816796e-05,
      "loss": 0.6409,
      "step": 8120
    },
    {
      "epoch": 0.7261266094420601,
      "grad_norm": 0.16137093340856473,
      "learning_rate": 3.682969173062842e-05,
      "loss": 0.6244,
      "step": 8121
    },
    {
      "epoch": 0.7262160228898427,
      "grad_norm": 0.15506158197223152,
      "learning_rate": 3.6807244201106394e-05,
      "loss": 0.6371,
      "step": 8122
    },
    {
      "epoch": 0.7263054363376252,
      "grad_norm": 0.16321723576022876,
      "learning_rate": 3.6784801971484405e-05,
      "loss": 0.6181,
      "step": 8123
    },
    {
      "epoch": 0.7263948497854077,
      "grad_norm": 0.15139814693159678,
      "learning_rate": 3.6762365043644806e-05,
      "loss": 0.656,
      "step": 8124
    },
    {
      "epoch": 0.7264842632331903,
      "grad_norm": 0.1396335599284074,
      "learning_rate": 3.673993341946924e-05,
      "loss": 0.6141,
      "step": 8125
    },
    {
      "epoch": 0.7265736766809728,
      "grad_norm": 0.15891042243149203,
      "learning_rate": 3.671750710083906e-05,
      "loss": 0.653,
      "step": 8126
    },
    {
      "epoch": 0.7266630901287554,
      "grad_norm": 0.168425364338267,
      "learning_rate": 3.6695086089635156e-05,
      "loss": 0.6675,
      "step": 8127
    },
    {
      "epoch": 0.7267525035765379,
      "grad_norm": 0.14915164385690433,
      "learning_rate": 3.667267038773791e-05,
      "loss": 0.6258,
      "step": 8128
    },
    {
      "epoch": 0.7268419170243204,
      "grad_norm": 0.1608082353698753,
      "learning_rate": 3.6650259997027315e-05,
      "loss": 0.6799,
      "step": 8129
    },
    {
      "epoch": 0.726931330472103,
      "grad_norm": 0.19204884555592053,
      "learning_rate": 3.66278549193829e-05,
      "loss": 0.6816,
      "step": 8130
    },
    {
      "epoch": 0.7270207439198856,
      "grad_norm": 0.15450593078705221,
      "learning_rate": 3.6605455156683766e-05,
      "loss": 0.6808,
      "step": 8131
    },
    {
      "epoch": 0.727110157367668,
      "grad_norm": 0.1589164876524375,
      "learning_rate": 3.658306071080855e-05,
      "loss": 0.6549,
      "step": 8132
    },
    {
      "epoch": 0.7271995708154506,
      "grad_norm": 0.16768481388039658,
      "learning_rate": 3.6560671583635467e-05,
      "loss": 0.6647,
      "step": 8133
    },
    {
      "epoch": 0.7272889842632332,
      "grad_norm": 0.16502223147586606,
      "learning_rate": 3.6538287777042215e-05,
      "loss": 0.6504,
      "step": 8134
    },
    {
      "epoch": 0.7273783977110158,
      "grad_norm": 0.1485130232000352,
      "learning_rate": 3.6515909292906126e-05,
      "loss": 0.6377,
      "step": 8135
    },
    {
      "epoch": 0.7274678111587983,
      "grad_norm": 0.14499433520418648,
      "learning_rate": 3.649353613310409e-05,
      "loss": 0.6345,
      "step": 8136
    },
    {
      "epoch": 0.7275572246065808,
      "grad_norm": 0.15418235020629284,
      "learning_rate": 3.6471168299512405e-05,
      "loss": 0.6277,
      "step": 8137
    },
    {
      "epoch": 0.7276466380543634,
      "grad_norm": 0.16669034324069393,
      "learning_rate": 3.644880579400719e-05,
      "loss": 0.6759,
      "step": 8138
    },
    {
      "epoch": 0.7277360515021459,
      "grad_norm": 0.16664593269848973,
      "learning_rate": 3.6426448618463836e-05,
      "loss": 0.6807,
      "step": 8139
    },
    {
      "epoch": 0.7278254649499285,
      "grad_norm": 0.12926247305321403,
      "learning_rate": 3.640409677475748e-05,
      "loss": 0.6416,
      "step": 8140
    },
    {
      "epoch": 0.727914878397711,
      "grad_norm": 0.14408017133465142,
      "learning_rate": 3.6381750264762734e-05,
      "loss": 0.628,
      "step": 8141
    },
    {
      "epoch": 0.7280042918454935,
      "grad_norm": 0.14391061967870677,
      "learning_rate": 3.6359409090353744e-05,
      "loss": 0.638,
      "step": 8142
    },
    {
      "epoch": 0.7280937052932761,
      "grad_norm": 0.15197522646031772,
      "learning_rate": 3.633707325340425e-05,
      "loss": 0.6688,
      "step": 8143
    },
    {
      "epoch": 0.7281831187410587,
      "grad_norm": 0.16462460550697708,
      "learning_rate": 3.631474275578754e-05,
      "loss": 0.6285,
      "step": 8144
    },
    {
      "epoch": 0.7282725321888412,
      "grad_norm": 0.14620362290242153,
      "learning_rate": 3.6292417599376436e-05,
      "loss": 0.6028,
      "step": 8145
    },
    {
      "epoch": 0.7283619456366237,
      "grad_norm": 0.13490855960313933,
      "learning_rate": 3.627009778604333e-05,
      "loss": 0.645,
      "step": 8146
    },
    {
      "epoch": 0.7284513590844063,
      "grad_norm": 0.14652594584432277,
      "learning_rate": 3.624778331766019e-05,
      "loss": 0.6389,
      "step": 8147
    },
    {
      "epoch": 0.7285407725321889,
      "grad_norm": 0.15642438645678147,
      "learning_rate": 3.6225474196098444e-05,
      "loss": 0.6676,
      "step": 8148
    },
    {
      "epoch": 0.7286301859799714,
      "grad_norm": 0.17590200412047013,
      "learning_rate": 3.620317042322915e-05,
      "loss": 0.398,
      "step": 8149
    },
    {
      "epoch": 0.7287195994277539,
      "grad_norm": 0.15049971366138115,
      "learning_rate": 3.6180872000922935e-05,
      "loss": 0.6485,
      "step": 8150
    },
    {
      "epoch": 0.7288090128755365,
      "grad_norm": 0.15557136807367955,
      "learning_rate": 3.615857893104986e-05,
      "loss": 0.6454,
      "step": 8151
    },
    {
      "epoch": 0.728898426323319,
      "grad_norm": 0.17207261444158878,
      "learning_rate": 3.613629121547969e-05,
      "loss": 0.6547,
      "step": 8152
    },
    {
      "epoch": 0.7289878397711016,
      "grad_norm": 0.14331123003834775,
      "learning_rate": 3.611400885608168e-05,
      "loss": 0.6423,
      "step": 8153
    },
    {
      "epoch": 0.7290772532188842,
      "grad_norm": 0.17408001364929365,
      "learning_rate": 3.6091731854724566e-05,
      "loss": 0.6538,
      "step": 8154
    },
    {
      "epoch": 0.7291666666666666,
      "grad_norm": 0.149400608018516,
      "learning_rate": 3.606946021327672e-05,
      "loss": 0.6046,
      "step": 8155
    },
    {
      "epoch": 0.7292560801144492,
      "grad_norm": 0.16153026105508472,
      "learning_rate": 3.604719393360606e-05,
      "loss": 0.6379,
      "step": 8156
    },
    {
      "epoch": 0.7293454935622318,
      "grad_norm": 0.18979790359482293,
      "learning_rate": 3.6024933017579984e-05,
      "loss": 0.6457,
      "step": 8157
    },
    {
      "epoch": 0.7294349070100143,
      "grad_norm": 0.15862715861600266,
      "learning_rate": 3.600267746706552e-05,
      "loss": 0.6709,
      "step": 8158
    },
    {
      "epoch": 0.7295243204577968,
      "grad_norm": 0.14732702050811736,
      "learning_rate": 3.59804272839292e-05,
      "loss": 0.618,
      "step": 8159
    },
    {
      "epoch": 0.7296137339055794,
      "grad_norm": 0.16383716733285156,
      "learning_rate": 3.595818247003713e-05,
      "loss": 0.6293,
      "step": 8160
    },
    {
      "epoch": 0.729703147353362,
      "grad_norm": 0.17504278011326607,
      "learning_rate": 3.593594302725498e-05,
      "loss": 0.6506,
      "step": 8161
    },
    {
      "epoch": 0.7297925608011445,
      "grad_norm": 0.14379321810234422,
      "learning_rate": 3.591370895744789e-05,
      "loss": 0.6326,
      "step": 8162
    },
    {
      "epoch": 0.7298819742489271,
      "grad_norm": 0.1396613649467618,
      "learning_rate": 3.5891480262480635e-05,
      "loss": 0.6308,
      "step": 8163
    },
    {
      "epoch": 0.7299713876967096,
      "grad_norm": 0.14934011899480454,
      "learning_rate": 3.5869256944217535e-05,
      "loss": 0.5938,
      "step": 8164
    },
    {
      "epoch": 0.7300608011444921,
      "grad_norm": 0.14634154017699044,
      "learning_rate": 3.584703900452234e-05,
      "loss": 0.5834,
      "step": 8165
    },
    {
      "epoch": 0.7301502145922747,
      "grad_norm": 0.14029104246902743,
      "learning_rate": 3.582482644525854e-05,
      "loss": 0.6157,
      "step": 8166
    },
    {
      "epoch": 0.7302396280400573,
      "grad_norm": 0.17832260048885298,
      "learning_rate": 3.580261926828908e-05,
      "loss": 0.3688,
      "step": 8167
    },
    {
      "epoch": 0.7303290414878397,
      "grad_norm": 0.16339013440415243,
      "learning_rate": 3.578041747547638e-05,
      "loss": 0.6343,
      "step": 8168
    },
    {
      "epoch": 0.7304184549356223,
      "grad_norm": 0.15608789092997424,
      "learning_rate": 3.57582210686825e-05,
      "loss": 0.6816,
      "step": 8169
    },
    {
      "epoch": 0.7305078683834049,
      "grad_norm": 0.16400419003711367,
      "learning_rate": 3.5736030049769074e-05,
      "loss": 0.6754,
      "step": 8170
    },
    {
      "epoch": 0.7305972818311874,
      "grad_norm": 0.1531688590972862,
      "learning_rate": 3.5713844420597155e-05,
      "loss": 0.6594,
      "step": 8171
    },
    {
      "epoch": 0.73068669527897,
      "grad_norm": 0.14858561730133138,
      "learning_rate": 3.569166418302747e-05,
      "loss": 0.6009,
      "step": 8172
    },
    {
      "epoch": 0.7307761087267525,
      "grad_norm": 0.13650486751064672,
      "learning_rate": 3.566948933892025e-05,
      "loss": 0.6544,
      "step": 8173
    },
    {
      "epoch": 0.730865522174535,
      "grad_norm": 0.1621134125419909,
      "learning_rate": 3.564731989013527e-05,
      "loss": 0.6256,
      "step": 8174
    },
    {
      "epoch": 0.7309549356223176,
      "grad_norm": 0.16070057763604387,
      "learning_rate": 3.5625155838531877e-05,
      "loss": 0.6288,
      "step": 8175
    },
    {
      "epoch": 0.7310443490701002,
      "grad_norm": 0.14212556580388652,
      "learning_rate": 3.560299718596889e-05,
      "loss": 0.6303,
      "step": 8176
    },
    {
      "epoch": 0.7311337625178826,
      "grad_norm": 0.1844979408804667,
      "learning_rate": 3.558084393430475e-05,
      "loss": 0.6798,
      "step": 8177
    },
    {
      "epoch": 0.7312231759656652,
      "grad_norm": 0.16034035685970593,
      "learning_rate": 3.555869608539743e-05,
      "loss": 0.6703,
      "step": 8178
    },
    {
      "epoch": 0.7313125894134478,
      "grad_norm": 0.1488224726532525,
      "learning_rate": 3.5536553641104465e-05,
      "loss": 0.6033,
      "step": 8179
    },
    {
      "epoch": 0.7314020028612304,
      "grad_norm": 0.17312241972154013,
      "learning_rate": 3.5514416603282876e-05,
      "loss": 0.6485,
      "step": 8180
    },
    {
      "epoch": 0.7314914163090128,
      "grad_norm": 0.16636857564077234,
      "learning_rate": 3.549228497378932e-05,
      "loss": 0.6458,
      "step": 8181
    },
    {
      "epoch": 0.7315808297567954,
      "grad_norm": 0.15625964074059154,
      "learning_rate": 3.547015875447989e-05,
      "loss": 0.6587,
      "step": 8182
    },
    {
      "epoch": 0.731670243204578,
      "grad_norm": 0.14221849661607205,
      "learning_rate": 3.544803794721031e-05,
      "loss": 0.6427,
      "step": 8183
    },
    {
      "epoch": 0.7317596566523605,
      "grad_norm": 0.17767519335780396,
      "learning_rate": 3.542592255383586e-05,
      "loss": 0.6677,
      "step": 8184
    },
    {
      "epoch": 0.7318490701001431,
      "grad_norm": 0.16213068723454266,
      "learning_rate": 3.5403812576211246e-05,
      "loss": 0.6317,
      "step": 8185
    },
    {
      "epoch": 0.7319384835479256,
      "grad_norm": 0.15959168357756343,
      "learning_rate": 3.538170801619088e-05,
      "loss": 0.6232,
      "step": 8186
    },
    {
      "epoch": 0.7320278969957081,
      "grad_norm": 0.1403290934956418,
      "learning_rate": 3.535960887562866e-05,
      "loss": 0.5902,
      "step": 8187
    },
    {
      "epoch": 0.7321173104434907,
      "grad_norm": 0.14638156275915984,
      "learning_rate": 3.533751515637794e-05,
      "loss": 0.6161,
      "step": 8188
    },
    {
      "epoch": 0.7322067238912733,
      "grad_norm": 0.15757547755027873,
      "learning_rate": 3.531542686029173e-05,
      "loss": 0.6225,
      "step": 8189
    },
    {
      "epoch": 0.7322961373390557,
      "grad_norm": 0.1665867501029156,
      "learning_rate": 3.5293343989222593e-05,
      "loss": 0.6291,
      "step": 8190
    },
    {
      "epoch": 0.7323855507868383,
      "grad_norm": 0.1479477715436586,
      "learning_rate": 3.527126654502252e-05,
      "loss": 0.6254,
      "step": 8191
    },
    {
      "epoch": 0.7324749642346209,
      "grad_norm": 0.16241897299481733,
      "learning_rate": 3.5249194529543137e-05,
      "loss": 0.6282,
      "step": 8192
    },
    {
      "epoch": 0.7325643776824035,
      "grad_norm": 0.175096956926952,
      "learning_rate": 3.5227127944635606e-05,
      "loss": 0.7124,
      "step": 8193
    },
    {
      "epoch": 0.732653791130186,
      "grad_norm": 0.1518876162626128,
      "learning_rate": 3.520506679215064e-05,
      "loss": 0.6567,
      "step": 8194
    },
    {
      "epoch": 0.7327432045779685,
      "grad_norm": 0.1665717631007988,
      "learning_rate": 3.51830110739385e-05,
      "loss": 0.6297,
      "step": 8195
    },
    {
      "epoch": 0.7328326180257511,
      "grad_norm": 0.14729159357607446,
      "learning_rate": 3.516096079184891e-05,
      "loss": 0.6327,
      "step": 8196
    },
    {
      "epoch": 0.7329220314735336,
      "grad_norm": 0.15598550302785238,
      "learning_rate": 3.513891594773123e-05,
      "loss": 0.6203,
      "step": 8197
    },
    {
      "epoch": 0.7330114449213162,
      "grad_norm": 0.13871659844276757,
      "learning_rate": 3.5116876543434374e-05,
      "loss": 0.6056,
      "step": 8198
    },
    {
      "epoch": 0.7331008583690987,
      "grad_norm": 0.15360866903354511,
      "learning_rate": 3.509484258080665e-05,
      "loss": 0.6045,
      "step": 8199
    },
    {
      "epoch": 0.7331902718168812,
      "grad_norm": 0.1605975208457456,
      "learning_rate": 3.507281406169614e-05,
      "loss": 0.6862,
      "step": 8200
    },
    {
      "epoch": 0.7332796852646638,
      "grad_norm": 0.14324235136046692,
      "learning_rate": 3.505079098795032e-05,
      "loss": 0.6205,
      "step": 8201
    },
    {
      "epoch": 0.7333690987124464,
      "grad_norm": 0.15355771488235342,
      "learning_rate": 3.502877336141619e-05,
      "loss": 0.6785,
      "step": 8202
    },
    {
      "epoch": 0.733458512160229,
      "grad_norm": 0.16999487716892228,
      "learning_rate": 3.5006761183940386e-05,
      "loss": 0.6304,
      "step": 8203
    },
    {
      "epoch": 0.7335479256080114,
      "grad_norm": 0.16172224937634588,
      "learning_rate": 3.498475445736905e-05,
      "loss": 0.6734,
      "step": 8204
    },
    {
      "epoch": 0.733637339055794,
      "grad_norm": 0.15731115648489746,
      "learning_rate": 3.4962753183547806e-05,
      "loss": 0.6346,
      "step": 8205
    },
    {
      "epoch": 0.7337267525035766,
      "grad_norm": 0.16299239312721658,
      "learning_rate": 3.4940757364321906e-05,
      "loss": 0.6743,
      "step": 8206
    },
    {
      "epoch": 0.7338161659513591,
      "grad_norm": 0.16599394523071007,
      "learning_rate": 3.491876700153612e-05,
      "loss": 0.6442,
      "step": 8207
    },
    {
      "epoch": 0.7339055793991416,
      "grad_norm": 0.15937485028326517,
      "learning_rate": 3.489678209703475e-05,
      "loss": 0.6067,
      "step": 8208
    },
    {
      "epoch": 0.7339949928469242,
      "grad_norm": 0.1587859548947493,
      "learning_rate": 3.487480265266164e-05,
      "loss": 0.6683,
      "step": 8209
    },
    {
      "epoch": 0.7340844062947067,
      "grad_norm": 0.15713246981838097,
      "learning_rate": 3.485282867026021e-05,
      "loss": 0.6339,
      "step": 8210
    },
    {
      "epoch": 0.7341738197424893,
      "grad_norm": 0.1574133457260417,
      "learning_rate": 3.483086015167333e-05,
      "loss": 0.6654,
      "step": 8211
    },
    {
      "epoch": 0.7342632331902719,
      "grad_norm": 0.15396020691901677,
      "learning_rate": 3.48088970987435e-05,
      "loss": 0.6512,
      "step": 8212
    },
    {
      "epoch": 0.7343526466380543,
      "grad_norm": 0.14413221612367724,
      "learning_rate": 3.4786939513312744e-05,
      "loss": 0.6647,
      "step": 8213
    },
    {
      "epoch": 0.7344420600858369,
      "grad_norm": 0.14933565657107278,
      "learning_rate": 3.4764987397222614e-05,
      "loss": 0.5988,
      "step": 8214
    },
    {
      "epoch": 0.7345314735336195,
      "grad_norm": 0.17833740853969457,
      "learning_rate": 3.474304075231424e-05,
      "loss": 0.6748,
      "step": 8215
    },
    {
      "epoch": 0.734620886981402,
      "grad_norm": 0.18166295759471027,
      "learning_rate": 3.472109958042819e-05,
      "loss": 0.7262,
      "step": 8216
    },
    {
      "epoch": 0.7347103004291845,
      "grad_norm": 0.1417529299271747,
      "learning_rate": 3.4699163883404685e-05,
      "loss": 0.5837,
      "step": 8217
    },
    {
      "epoch": 0.7347997138769671,
      "grad_norm": 0.16572277829930435,
      "learning_rate": 3.467723366308348e-05,
      "loss": 0.7012,
      "step": 8218
    },
    {
      "epoch": 0.7348891273247496,
      "grad_norm": 0.17118960182347756,
      "learning_rate": 3.465530892130375e-05,
      "loss": 0.6862,
      "step": 8219
    },
    {
      "epoch": 0.7349785407725322,
      "grad_norm": 0.14199085328807104,
      "learning_rate": 3.4633389659904324e-05,
      "loss": 0.6358,
      "step": 8220
    },
    {
      "epoch": 0.7350679542203148,
      "grad_norm": 0.16697972300237557,
      "learning_rate": 3.461147588072362e-05,
      "loss": 0.6439,
      "step": 8221
    },
    {
      "epoch": 0.7351573676680973,
      "grad_norm": 0.15380169709245098,
      "learning_rate": 3.458956758559945e-05,
      "loss": 0.6253,
      "step": 8222
    },
    {
      "epoch": 0.7352467811158798,
      "grad_norm": 0.15538184285901238,
      "learning_rate": 3.4567664776369236e-05,
      "loss": 0.649,
      "step": 8223
    },
    {
      "epoch": 0.7353361945636624,
      "grad_norm": 0.16695954486065295,
      "learning_rate": 3.4545767454869995e-05,
      "loss": 0.6397,
      "step": 8224
    },
    {
      "epoch": 0.735425608011445,
      "grad_norm": 0.17857794166658922,
      "learning_rate": 3.452387562293814e-05,
      "loss": 0.6629,
      "step": 8225
    },
    {
      "epoch": 0.7355150214592274,
      "grad_norm": 0.15666717715662554,
      "learning_rate": 3.4501989282409776e-05,
      "loss": 0.6372,
      "step": 8226
    },
    {
      "epoch": 0.73560443490701,
      "grad_norm": 0.17126632803589598,
      "learning_rate": 3.448010843512046e-05,
      "loss": 0.6504,
      "step": 8227
    },
    {
      "epoch": 0.7356938483547926,
      "grad_norm": 0.15463964566735178,
      "learning_rate": 3.4458233082905334e-05,
      "loss": 0.6474,
      "step": 8228
    },
    {
      "epoch": 0.7357832618025751,
      "grad_norm": 0.15828648266164191,
      "learning_rate": 3.443636322759908e-05,
      "loss": 0.6252,
      "step": 8229
    },
    {
      "epoch": 0.7358726752503576,
      "grad_norm": 0.14994346716716667,
      "learning_rate": 3.4414498871035816e-05,
      "loss": 0.5758,
      "step": 8230
    },
    {
      "epoch": 0.7359620886981402,
      "grad_norm": 0.15313166035049133,
      "learning_rate": 3.4392640015049325e-05,
      "loss": 0.685,
      "step": 8231
    },
    {
      "epoch": 0.7360515021459227,
      "grad_norm": 0.1793985026753346,
      "learning_rate": 3.437078666147292e-05,
      "loss": 0.6277,
      "step": 8232
    },
    {
      "epoch": 0.7361409155937053,
      "grad_norm": 0.15254026252974995,
      "learning_rate": 3.434893881213934e-05,
      "loss": 0.5826,
      "step": 8233
    },
    {
      "epoch": 0.7362303290414879,
      "grad_norm": 0.1538585334529617,
      "learning_rate": 3.432709646888095e-05,
      "loss": 0.6527,
      "step": 8234
    },
    {
      "epoch": 0.7363197424892703,
      "grad_norm": 0.1495803409457343,
      "learning_rate": 3.430525963352973e-05,
      "loss": 0.6645,
      "step": 8235
    },
    {
      "epoch": 0.7364091559370529,
      "grad_norm": 0.16419723346183074,
      "learning_rate": 3.428342830791701e-05,
      "loss": 0.63,
      "step": 8236
    },
    {
      "epoch": 0.7364985693848355,
      "grad_norm": 0.14166681157920036,
      "learning_rate": 3.426160249387379e-05,
      "loss": 0.6054,
      "step": 8237
    },
    {
      "epoch": 0.7365879828326181,
      "grad_norm": 0.15731656785948855,
      "learning_rate": 3.423978219323062e-05,
      "loss": 0.6787,
      "step": 8238
    },
    {
      "epoch": 0.7366773962804005,
      "grad_norm": 0.14284181317674902,
      "learning_rate": 3.421796740781745e-05,
      "loss": 0.6359,
      "step": 8239
    },
    {
      "epoch": 0.7367668097281831,
      "grad_norm": 0.1506587797996635,
      "learning_rate": 3.4196158139463915e-05,
      "loss": 0.6227,
      "step": 8240
    },
    {
      "epoch": 0.7368562231759657,
      "grad_norm": 0.17345084325369148,
      "learning_rate": 3.417435438999914e-05,
      "loss": 0.6672,
      "step": 8241
    },
    {
      "epoch": 0.7369456366237482,
      "grad_norm": 0.1813514599751101,
      "learning_rate": 3.4152556161251744e-05,
      "loss": 0.6094,
      "step": 8242
    },
    {
      "epoch": 0.7370350500715308,
      "grad_norm": 0.16502146197171982,
      "learning_rate": 3.413076345504995e-05,
      "loss": 0.6099,
      "step": 8243
    },
    {
      "epoch": 0.7371244635193133,
      "grad_norm": 0.15927508540239846,
      "learning_rate": 3.410897627322152e-05,
      "loss": 0.6587,
      "step": 8244
    },
    {
      "epoch": 0.7372138769670958,
      "grad_norm": 0.15921965118341097,
      "learning_rate": 3.408719461759362e-05,
      "loss": 0.6558,
      "step": 8245
    },
    {
      "epoch": 0.7373032904148784,
      "grad_norm": 0.1675415166143671,
      "learning_rate": 3.406541848999312e-05,
      "loss": 0.6348,
      "step": 8246
    },
    {
      "epoch": 0.737392703862661,
      "grad_norm": 0.14984611272078321,
      "learning_rate": 3.404364789224637e-05,
      "loss": 0.6027,
      "step": 8247
    },
    {
      "epoch": 0.7374821173104434,
      "grad_norm": 0.16829171611360974,
      "learning_rate": 3.402188282617914e-05,
      "loss": 0.6915,
      "step": 8248
    },
    {
      "epoch": 0.737571530758226,
      "grad_norm": 0.15303913910684244,
      "learning_rate": 3.4000123293616995e-05,
      "loss": 0.6509,
      "step": 8249
    },
    {
      "epoch": 0.7376609442060086,
      "grad_norm": 0.161515796912941,
      "learning_rate": 3.397836929638476e-05,
      "loss": 0.6695,
      "step": 8250
    },
    {
      "epoch": 0.7377503576537912,
      "grad_norm": 0.14757286562634236,
      "learning_rate": 3.395662083630696e-05,
      "loss": 0.5944,
      "step": 8251
    },
    {
      "epoch": 0.7378397711015737,
      "grad_norm": 0.15175548596984248,
      "learning_rate": 3.393487791520765e-05,
      "loss": 0.6204,
      "step": 8252
    },
    {
      "epoch": 0.7379291845493562,
      "grad_norm": 0.1551372927515642,
      "learning_rate": 3.391314053491031e-05,
      "loss": 0.6113,
      "step": 8253
    },
    {
      "epoch": 0.7380185979971388,
      "grad_norm": 0.16577086884481182,
      "learning_rate": 3.389140869723806e-05,
      "loss": 0.6221,
      "step": 8254
    },
    {
      "epoch": 0.7381080114449213,
      "grad_norm": 0.17579407109831677,
      "learning_rate": 3.3869682404013516e-05,
      "loss": 0.6378,
      "step": 8255
    },
    {
      "epoch": 0.7381974248927039,
      "grad_norm": 0.14548222911542552,
      "learning_rate": 3.3847961657058845e-05,
      "loss": 0.6385,
      "step": 8256
    },
    {
      "epoch": 0.7382868383404864,
      "grad_norm": 0.16400223320503424,
      "learning_rate": 3.382624645819574e-05,
      "loss": 0.6627,
      "step": 8257
    },
    {
      "epoch": 0.7383762517882689,
      "grad_norm": 0.1862328276730062,
      "learning_rate": 3.3804536809245455e-05,
      "loss": 0.6242,
      "step": 8258
    },
    {
      "epoch": 0.7384656652360515,
      "grad_norm": 0.15174639684366376,
      "learning_rate": 3.37828327120287e-05,
      "loss": 0.6114,
      "step": 8259
    },
    {
      "epoch": 0.7385550786838341,
      "grad_norm": 0.1594445524462399,
      "learning_rate": 3.376113416836579e-05,
      "loss": 0.6436,
      "step": 8260
    },
    {
      "epoch": 0.7386444921316166,
      "grad_norm": 0.15300878616220337,
      "learning_rate": 3.373944118007657e-05,
      "loss": 0.6569,
      "step": 8261
    },
    {
      "epoch": 0.7387339055793991,
      "grad_norm": 0.1622670140999343,
      "learning_rate": 3.371775374898038e-05,
      "loss": 0.6688,
      "step": 8262
    },
    {
      "epoch": 0.7388233190271817,
      "grad_norm": 0.17056808209224386,
      "learning_rate": 3.369607187689618e-05,
      "loss": 0.6433,
      "step": 8263
    },
    {
      "epoch": 0.7389127324749643,
      "grad_norm": 0.1501220906006184,
      "learning_rate": 3.3674395565642324e-05,
      "loss": 0.648,
      "step": 8264
    },
    {
      "epoch": 0.7390021459227468,
      "grad_norm": 0.1609949120462858,
      "learning_rate": 3.365272481703681e-05,
      "loss": 0.6154,
      "step": 8265
    },
    {
      "epoch": 0.7390915593705293,
      "grad_norm": 0.15504275235908377,
      "learning_rate": 3.3631059632897135e-05,
      "loss": 0.675,
      "step": 8266
    },
    {
      "epoch": 0.7391809728183119,
      "grad_norm": 0.14549749109229537,
      "learning_rate": 3.360940001504037e-05,
      "loss": 0.6294,
      "step": 8267
    },
    {
      "epoch": 0.7392703862660944,
      "grad_norm": 0.1448761635718634,
      "learning_rate": 3.358774596528298e-05,
      "loss": 0.6273,
      "step": 8268
    },
    {
      "epoch": 0.739359799713877,
      "grad_norm": 0.18265318589141621,
      "learning_rate": 3.35660974854412e-05,
      "loss": 0.4222,
      "step": 8269
    },
    {
      "epoch": 0.7394492131616596,
      "grad_norm": 0.16142035714060354,
      "learning_rate": 3.354445457733054e-05,
      "loss": 0.6469,
      "step": 8270
    },
    {
      "epoch": 0.739538626609442,
      "grad_norm": 0.1724175992703685,
      "learning_rate": 3.352281724276623e-05,
      "loss": 0.6848,
      "step": 8271
    },
    {
      "epoch": 0.7396280400572246,
      "grad_norm": 0.19062723459111192,
      "learning_rate": 3.3501185483562994e-05,
      "loss": 0.6807,
      "step": 8272
    },
    {
      "epoch": 0.7397174535050072,
      "grad_norm": 0.15704813274144255,
      "learning_rate": 3.347955930153498e-05,
      "loss": 0.6366,
      "step": 8273
    },
    {
      "epoch": 0.7398068669527897,
      "grad_norm": 0.1677745793646298,
      "learning_rate": 3.3457938698496e-05,
      "loss": 0.643,
      "step": 8274
    },
    {
      "epoch": 0.7398962804005722,
      "grad_norm": 0.1479525592242989,
      "learning_rate": 3.343632367625932e-05,
      "loss": 0.6311,
      "step": 8275
    },
    {
      "epoch": 0.7399856938483548,
      "grad_norm": 0.13849716732499145,
      "learning_rate": 3.34147142366378e-05,
      "loss": 0.6378,
      "step": 8276
    },
    {
      "epoch": 0.7400751072961373,
      "grad_norm": 0.17535111299957315,
      "learning_rate": 3.339311038144378e-05,
      "loss": 0.6808,
      "step": 8277
    },
    {
      "epoch": 0.7401645207439199,
      "grad_norm": 0.16989402532740716,
      "learning_rate": 3.337151211248918e-05,
      "loss": 0.6194,
      "step": 8278
    },
    {
      "epoch": 0.7402539341917024,
      "grad_norm": 0.14299995156905665,
      "learning_rate": 3.3349919431585366e-05,
      "loss": 0.6091,
      "step": 8279
    },
    {
      "epoch": 0.740343347639485,
      "grad_norm": 0.16941526465718046,
      "learning_rate": 3.332833234054331e-05,
      "loss": 0.6931,
      "step": 8280
    },
    {
      "epoch": 0.7404327610872675,
      "grad_norm": 0.16701764419232562,
      "learning_rate": 3.330675084117354e-05,
      "loss": 0.5945,
      "step": 8281
    },
    {
      "epoch": 0.7405221745350501,
      "grad_norm": 0.16015314139357315,
      "learning_rate": 3.3285174935285954e-05,
      "loss": 0.6494,
      "step": 8282
    },
    {
      "epoch": 0.7406115879828327,
      "grad_norm": 0.16811532984464686,
      "learning_rate": 3.3263604624690257e-05,
      "loss": 0.6684,
      "step": 8283
    },
    {
      "epoch": 0.7407010014306151,
      "grad_norm": 0.14698701190679928,
      "learning_rate": 3.32420399111954e-05,
      "loss": 0.6623,
      "step": 8284
    },
    {
      "epoch": 0.7407904148783977,
      "grad_norm": 0.16717422606406235,
      "learning_rate": 3.322048079661004e-05,
      "loss": 0.6417,
      "step": 8285
    },
    {
      "epoch": 0.7408798283261803,
      "grad_norm": 0.1481469582801285,
      "learning_rate": 3.3198927282742334e-05,
      "loss": 0.6406,
      "step": 8286
    },
    {
      "epoch": 0.7409692417739628,
      "grad_norm": 0.15585033211241914,
      "learning_rate": 3.3177379371399886e-05,
      "loss": 0.6378,
      "step": 8287
    },
    {
      "epoch": 0.7410586552217453,
      "grad_norm": 0.14387846723012063,
      "learning_rate": 3.315583706438994e-05,
      "loss": 0.6315,
      "step": 8288
    },
    {
      "epoch": 0.7411480686695279,
      "grad_norm": 0.17191501465084913,
      "learning_rate": 3.31343003635192e-05,
      "loss": 0.6478,
      "step": 8289
    },
    {
      "epoch": 0.7412374821173104,
      "grad_norm": 0.16381975632895068,
      "learning_rate": 3.311276927059393e-05,
      "loss": 0.6588,
      "step": 8290
    },
    {
      "epoch": 0.741326895565093,
      "grad_norm": 0.16244072303608476,
      "learning_rate": 3.3091243787419944e-05,
      "loss": 0.6199,
      "step": 8291
    },
    {
      "epoch": 0.7414163090128756,
      "grad_norm": 0.15645734314861456,
      "learning_rate": 3.306972391580255e-05,
      "loss": 0.6344,
      "step": 8292
    },
    {
      "epoch": 0.741505722460658,
      "grad_norm": 0.161365709151379,
      "learning_rate": 3.304820965754656e-05,
      "loss": 0.6309,
      "step": 8293
    },
    {
      "epoch": 0.7415951359084406,
      "grad_norm": 0.1589007362804074,
      "learning_rate": 3.302670101445636e-05,
      "loss": 0.6671,
      "step": 8294
    },
    {
      "epoch": 0.7416845493562232,
      "grad_norm": 0.15893879763782282,
      "learning_rate": 3.3005197988335904e-05,
      "loss": 0.6805,
      "step": 8295
    },
    {
      "epoch": 0.7417739628040058,
      "grad_norm": 0.1578914612863572,
      "learning_rate": 3.2983700580988505e-05,
      "loss": 0.6214,
      "step": 8296
    },
    {
      "epoch": 0.7418633762517882,
      "grad_norm": 0.15573278110688454,
      "learning_rate": 3.2962208794217275e-05,
      "loss": 0.6313,
      "step": 8297
    },
    {
      "epoch": 0.7419527896995708,
      "grad_norm": 0.16575008252799964,
      "learning_rate": 3.2940722629824604e-05,
      "loss": 0.6591,
      "step": 8298
    },
    {
      "epoch": 0.7420422031473534,
      "grad_norm": 0.16092140446312572,
      "learning_rate": 3.291924208961253e-05,
      "loss": 0.6343,
      "step": 8299
    },
    {
      "epoch": 0.7421316165951359,
      "grad_norm": 0.1904231823062563,
      "learning_rate": 3.289776717538262e-05,
      "loss": 0.7051,
      "step": 8300
    },
    {
      "epoch": 0.7422210300429185,
      "grad_norm": 0.1500091300039426,
      "learning_rate": 3.287629788893596e-05,
      "loss": 0.6427,
      "step": 8301
    },
    {
      "epoch": 0.742310443490701,
      "grad_norm": 0.17454085119717114,
      "learning_rate": 3.2854834232073105e-05,
      "loss": 0.675,
      "step": 8302
    },
    {
      "epoch": 0.7423998569384835,
      "grad_norm": 0.15160721009287034,
      "learning_rate": 3.283337620659421e-05,
      "loss": 0.6646,
      "step": 8303
    },
    {
      "epoch": 0.7424892703862661,
      "grad_norm": 0.17498868933690234,
      "learning_rate": 3.281192381429894e-05,
      "loss": 0.6414,
      "step": 8304
    },
    {
      "epoch": 0.7425786838340487,
      "grad_norm": 0.17083989181162584,
      "learning_rate": 3.279047705698647e-05,
      "loss": 0.6776,
      "step": 8305
    },
    {
      "epoch": 0.7426680972818311,
      "grad_norm": 0.15193804898143032,
      "learning_rate": 3.276903593645555e-05,
      "loss": 0.6741,
      "step": 8306
    },
    {
      "epoch": 0.7427575107296137,
      "grad_norm": 0.14439430828146912,
      "learning_rate": 3.2747600454504366e-05,
      "loss": 0.6271,
      "step": 8307
    },
    {
      "epoch": 0.7428469241773963,
      "grad_norm": 0.15406210305752244,
      "learning_rate": 3.2726170612930716e-05,
      "loss": 0.6173,
      "step": 8308
    },
    {
      "epoch": 0.7429363376251789,
      "grad_norm": 0.18204454078640628,
      "learning_rate": 3.270474641353192e-05,
      "loss": 0.6758,
      "step": 8309
    },
    {
      "epoch": 0.7430257510729614,
      "grad_norm": 0.14061829812224017,
      "learning_rate": 3.26833278581047e-05,
      "loss": 0.6433,
      "step": 8310
    },
    {
      "epoch": 0.7431151645207439,
      "grad_norm": 0.17212408459791825,
      "learning_rate": 3.266191494844552e-05,
      "loss": 0.676,
      "step": 8311
    },
    {
      "epoch": 0.7432045779685265,
      "grad_norm": 0.1587410061651612,
      "learning_rate": 3.264050768635022e-05,
      "loss": 0.675,
      "step": 8312
    },
    {
      "epoch": 0.743293991416309,
      "grad_norm": 0.18008932150891768,
      "learning_rate": 3.261910607361417e-05,
      "loss": 0.6851,
      "step": 8313
    },
    {
      "epoch": 0.7433834048640916,
      "grad_norm": 0.1728336579187888,
      "learning_rate": 3.259771011203232e-05,
      "loss": 0.6018,
      "step": 8314
    },
    {
      "epoch": 0.7434728183118741,
      "grad_norm": 0.16564940936197364,
      "learning_rate": 3.257631980339916e-05,
      "loss": 0.6986,
      "step": 8315
    },
    {
      "epoch": 0.7435622317596566,
      "grad_norm": 0.15812437959845682,
      "learning_rate": 3.2554935149508584e-05,
      "loss": 0.6222,
      "step": 8316
    },
    {
      "epoch": 0.7436516452074392,
      "grad_norm": 0.15679299341314054,
      "learning_rate": 3.253355615215416e-05,
      "loss": 0.6477,
      "step": 8317
    },
    {
      "epoch": 0.7437410586552218,
      "grad_norm": 0.17553984487455027,
      "learning_rate": 3.251218281312889e-05,
      "loss": 0.6745,
      "step": 8318
    },
    {
      "epoch": 0.7438304721030042,
      "grad_norm": 0.15107488927570395,
      "learning_rate": 3.249081513422534e-05,
      "loss": 0.6732,
      "step": 8319
    },
    {
      "epoch": 0.7439198855507868,
      "grad_norm": 0.15337126202436013,
      "learning_rate": 3.246945311723564e-05,
      "loss": 0.6376,
      "step": 8320
    },
    {
      "epoch": 0.7440092989985694,
      "grad_norm": 0.1878196584981343,
      "learning_rate": 3.244809676395131e-05,
      "loss": 0.6714,
      "step": 8321
    },
    {
      "epoch": 0.744098712446352,
      "grad_norm": 0.15180998317950192,
      "learning_rate": 3.2426746076163514e-05,
      "loss": 0.6352,
      "step": 8322
    },
    {
      "epoch": 0.7441881258941345,
      "grad_norm": 0.15551341511875774,
      "learning_rate": 3.240540105566293e-05,
      "loss": 0.6234,
      "step": 8323
    },
    {
      "epoch": 0.744277539341917,
      "grad_norm": 0.15294568742033945,
      "learning_rate": 3.238406170423972e-05,
      "loss": 0.6335,
      "step": 8324
    },
    {
      "epoch": 0.7443669527896996,
      "grad_norm": 0.18080019981233048,
      "learning_rate": 3.2362728023683594e-05,
      "loss": 0.6832,
      "step": 8325
    },
    {
      "epoch": 0.7444563662374821,
      "grad_norm": 0.15243632424718698,
      "learning_rate": 3.234140001578383e-05,
      "loss": 0.6224,
      "step": 8326
    },
    {
      "epoch": 0.7445457796852647,
      "grad_norm": 0.14904637406936488,
      "learning_rate": 3.23200776823291e-05,
      "loss": 0.6484,
      "step": 8327
    },
    {
      "epoch": 0.7446351931330472,
      "grad_norm": 0.1466488150705806,
      "learning_rate": 3.2298761025107706e-05,
      "loss": 0.6386,
      "step": 8328
    },
    {
      "epoch": 0.7447246065808297,
      "grad_norm": 0.16687146646689255,
      "learning_rate": 3.22774500459075e-05,
      "loss": 0.6793,
      "step": 8329
    },
    {
      "epoch": 0.7448140200286123,
      "grad_norm": 0.16129515966717212,
      "learning_rate": 3.2256144746515735e-05,
      "loss": 0.6491,
      "step": 8330
    },
    {
      "epoch": 0.7449034334763949,
      "grad_norm": 0.16582923789808418,
      "learning_rate": 3.223484512871927e-05,
      "loss": 0.6428,
      "step": 8331
    },
    {
      "epoch": 0.7449928469241774,
      "grad_norm": 0.17179187477319366,
      "learning_rate": 3.221355119430456e-05,
      "loss": 0.6286,
      "step": 8332
    },
    {
      "epoch": 0.7450822603719599,
      "grad_norm": 0.1420041599603083,
      "learning_rate": 3.219226294505743e-05,
      "loss": 0.652,
      "step": 8333
    },
    {
      "epoch": 0.7451716738197425,
      "grad_norm": 0.14746023865375008,
      "learning_rate": 3.2170980382763306e-05,
      "loss": 0.6181,
      "step": 8334
    },
    {
      "epoch": 0.745261087267525,
      "grad_norm": 0.15945014159828946,
      "learning_rate": 3.214970350920716e-05,
      "loss": 0.6565,
      "step": 8335
    },
    {
      "epoch": 0.7453505007153076,
      "grad_norm": 0.16327831668610157,
      "learning_rate": 3.212843232617343e-05,
      "loss": 0.6281,
      "step": 8336
    },
    {
      "epoch": 0.7454399141630901,
      "grad_norm": 0.15716110972295458,
      "learning_rate": 3.21071668354461e-05,
      "loss": 0.6591,
      "step": 8337
    },
    {
      "epoch": 0.7455293276108726,
      "grad_norm": 0.15090202561994087,
      "learning_rate": 3.2085907038808695e-05,
      "loss": 0.646,
      "step": 8338
    },
    {
      "epoch": 0.7456187410586552,
      "grad_norm": 0.17667207372393506,
      "learning_rate": 3.2064652938044246e-05,
      "loss": 0.6901,
      "step": 8339
    },
    {
      "epoch": 0.7457081545064378,
      "grad_norm": 0.15539981576941578,
      "learning_rate": 3.204340453493534e-05,
      "loss": 0.6945,
      "step": 8340
    },
    {
      "epoch": 0.7457975679542204,
      "grad_norm": 0.16334062941194621,
      "learning_rate": 3.2022161831264e-05,
      "loss": 0.6506,
      "step": 8341
    },
    {
      "epoch": 0.7458869814020028,
      "grad_norm": 0.16676305335164432,
      "learning_rate": 3.200092482881184e-05,
      "loss": 0.6591,
      "step": 8342
    },
    {
      "epoch": 0.7459763948497854,
      "grad_norm": 0.16632642993788527,
      "learning_rate": 3.197969352936003e-05,
      "loss": 0.6314,
      "step": 8343
    },
    {
      "epoch": 0.746065808297568,
      "grad_norm": 0.14737135898718567,
      "learning_rate": 3.1958467934689153e-05,
      "loss": 0.6646,
      "step": 8344
    },
    {
      "epoch": 0.7461552217453505,
      "grad_norm": 0.15979265499510928,
      "learning_rate": 3.193724804657936e-05,
      "loss": 0.623,
      "step": 8345
    },
    {
      "epoch": 0.746244635193133,
      "grad_norm": 0.17497296169318013,
      "learning_rate": 3.1916033866810436e-05,
      "loss": 0.7013,
      "step": 8346
    },
    {
      "epoch": 0.7463340486409156,
      "grad_norm": 0.15209757571263188,
      "learning_rate": 3.189482539716149e-05,
      "loss": 0.6163,
      "step": 8347
    },
    {
      "epoch": 0.7464234620886981,
      "grad_norm": 0.16193621018547236,
      "learning_rate": 3.1873622639411293e-05,
      "loss": 0.6594,
      "step": 8348
    },
    {
      "epoch": 0.7465128755364807,
      "grad_norm": 0.1421120507055272,
      "learning_rate": 3.185242559533812e-05,
      "loss": 0.655,
      "step": 8349
    },
    {
      "epoch": 0.7466022889842633,
      "grad_norm": 0.16581366643499681,
      "learning_rate": 3.183123426671968e-05,
      "loss": 0.362,
      "step": 8350
    },
    {
      "epoch": 0.7466917024320457,
      "grad_norm": 0.1497571159926878,
      "learning_rate": 3.181004865533329e-05,
      "loss": 0.6159,
      "step": 8351
    },
    {
      "epoch": 0.7467811158798283,
      "grad_norm": 0.17183408875846518,
      "learning_rate": 3.178886876295578e-05,
      "loss": 0.6921,
      "step": 8352
    },
    {
      "epoch": 0.7468705293276109,
      "grad_norm": 0.15108082245254834,
      "learning_rate": 3.176769459136346e-05,
      "loss": 0.6321,
      "step": 8353
    },
    {
      "epoch": 0.7469599427753935,
      "grad_norm": 0.15815246084312973,
      "learning_rate": 3.174652614233222e-05,
      "loss": 0.6626,
      "step": 8354
    },
    {
      "epoch": 0.7470493562231759,
      "grad_norm": 0.1504452310974208,
      "learning_rate": 3.172536341763738e-05,
      "loss": 0.6585,
      "step": 8355
    },
    {
      "epoch": 0.7471387696709585,
      "grad_norm": 0.16369105320710792,
      "learning_rate": 3.170420641905384e-05,
      "loss": 0.6554,
      "step": 8356
    },
    {
      "epoch": 0.7472281831187411,
      "grad_norm": 0.16534075438933474,
      "learning_rate": 3.1683055148356044e-05,
      "loss": 0.6828,
      "step": 8357
    },
    {
      "epoch": 0.7473175965665236,
      "grad_norm": 0.15280223915215663,
      "learning_rate": 3.1661909607317894e-05,
      "loss": 0.617,
      "step": 8358
    },
    {
      "epoch": 0.7474070100143062,
      "grad_norm": 0.1572681682457927,
      "learning_rate": 3.164076979771287e-05,
      "loss": 0.6524,
      "step": 8359
    },
    {
      "epoch": 0.7474964234620887,
      "grad_norm": 0.15212621989235683,
      "learning_rate": 3.161963572131393e-05,
      "loss": 0.6148,
      "step": 8360
    },
    {
      "epoch": 0.7475858369098712,
      "grad_norm": 0.1573594312043399,
      "learning_rate": 3.159850737989355e-05,
      "loss": 0.6399,
      "step": 8361
    },
    {
      "epoch": 0.7476752503576538,
      "grad_norm": 0.15907470193950124,
      "learning_rate": 3.1577384775223754e-05,
      "loss": 0.6701,
      "step": 8362
    },
    {
      "epoch": 0.7477646638054364,
      "grad_norm": 0.16531709931453561,
      "learning_rate": 3.1556267909076076e-05,
      "loss": 0.6372,
      "step": 8363
    },
    {
      "epoch": 0.7478540772532188,
      "grad_norm": 0.15867275296930017,
      "learning_rate": 3.153515678322152e-05,
      "loss": 0.6659,
      "step": 8364
    },
    {
      "epoch": 0.7479434907010014,
      "grad_norm": 0.16140893291604175,
      "learning_rate": 3.1514051399430654e-05,
      "loss": 0.6525,
      "step": 8365
    },
    {
      "epoch": 0.748032904148784,
      "grad_norm": 0.15482074669827806,
      "learning_rate": 3.149295175947365e-05,
      "loss": 0.6544,
      "step": 8366
    },
    {
      "epoch": 0.7481223175965666,
      "grad_norm": 0.15096506322185752,
      "learning_rate": 3.1471857865120016e-05,
      "loss": 0.5995,
      "step": 8367
    },
    {
      "epoch": 0.748211731044349,
      "grad_norm": 0.16360464709110564,
      "learning_rate": 3.145076971813891e-05,
      "loss": 0.6258,
      "step": 8368
    },
    {
      "epoch": 0.7483011444921316,
      "grad_norm": 0.1570692118385133,
      "learning_rate": 3.1429687320298976e-05,
      "loss": 0.6353,
      "step": 8369
    },
    {
      "epoch": 0.7483905579399142,
      "grad_norm": 0.1603967392515253,
      "learning_rate": 3.1408610673368333e-05,
      "loss": 0.6684,
      "step": 8370
    },
    {
      "epoch": 0.7484799713876967,
      "grad_norm": 0.1509604249923729,
      "learning_rate": 3.138753977911467e-05,
      "loss": 0.6486,
      "step": 8371
    },
    {
      "epoch": 0.7485693848354793,
      "grad_norm": 0.1370169327664703,
      "learning_rate": 3.1366474639305185e-05,
      "loss": 0.6304,
      "step": 8372
    },
    {
      "epoch": 0.7486587982832618,
      "grad_norm": 0.15156881356801186,
      "learning_rate": 3.134541525570659e-05,
      "loss": 0.6294,
      "step": 8373
    },
    {
      "epoch": 0.7487482117310443,
      "grad_norm": 0.17692949472851469,
      "learning_rate": 3.132436163008512e-05,
      "loss": 0.6366,
      "step": 8374
    },
    {
      "epoch": 0.7488376251788269,
      "grad_norm": 0.14084067788179974,
      "learning_rate": 3.1303313764206486e-05,
      "loss": 0.6727,
      "step": 8375
    },
    {
      "epoch": 0.7489270386266095,
      "grad_norm": 0.1665198888632795,
      "learning_rate": 3.1282271659835946e-05,
      "loss": 0.6773,
      "step": 8376
    },
    {
      "epoch": 0.7490164520743919,
      "grad_norm": 0.16098737163310672,
      "learning_rate": 3.1261235318738336e-05,
      "loss": 0.6446,
      "step": 8377
    },
    {
      "epoch": 0.7491058655221745,
      "grad_norm": 0.1504033198750931,
      "learning_rate": 3.124020474267787e-05,
      "loss": 0.6741,
      "step": 8378
    },
    {
      "epoch": 0.7491952789699571,
      "grad_norm": 0.1665697266687884,
      "learning_rate": 3.1219179933418365e-05,
      "loss": 0.6571,
      "step": 8379
    },
    {
      "epoch": 0.7492846924177397,
      "grad_norm": 0.14080977009434245,
      "learning_rate": 3.1198160892723225e-05,
      "loss": 0.6132,
      "step": 8380
    },
    {
      "epoch": 0.7493741058655222,
      "grad_norm": 0.14768387909647743,
      "learning_rate": 3.117714762235522e-05,
      "loss": 0.6215,
      "step": 8381
    },
    {
      "epoch": 0.7494635193133047,
      "grad_norm": 0.1705820071339456,
      "learning_rate": 3.1156140124076714e-05,
      "loss": 0.6752,
      "step": 8382
    },
    {
      "epoch": 0.7495529327610873,
      "grad_norm": 0.14344819834389977,
      "learning_rate": 3.113513839964963e-05,
      "loss": 0.6551,
      "step": 8383
    },
    {
      "epoch": 0.7496423462088698,
      "grad_norm": 0.1531156914075605,
      "learning_rate": 3.1114142450835294e-05,
      "loss": 0.63,
      "step": 8384
    },
    {
      "epoch": 0.7497317596566524,
      "grad_norm": 0.16380015527214498,
      "learning_rate": 3.1093152279394635e-05,
      "loss": 0.6684,
      "step": 8385
    },
    {
      "epoch": 0.7498211731044349,
      "grad_norm": 0.15314104506600054,
      "learning_rate": 3.1072167887088065e-05,
      "loss": 0.6497,
      "step": 8386
    },
    {
      "epoch": 0.7499105865522174,
      "grad_norm": 0.1486436158184625,
      "learning_rate": 3.105118927567554e-05,
      "loss": 0.6189,
      "step": 8387
    },
    {
      "epoch": 0.75,
      "grad_norm": 0.16759353146485534,
      "learning_rate": 3.103021644691651e-05,
      "loss": 0.6387,
      "step": 8388
    },
    {
      "epoch": 0.7500894134477826,
      "grad_norm": 0.1656966056010548,
      "learning_rate": 3.1009249402569954e-05,
      "loss": 0.6887,
      "step": 8389
    },
    {
      "epoch": 0.7501788268955651,
      "grad_norm": 0.16244536070763957,
      "learning_rate": 3.098828814439429e-05,
      "loss": 0.6508,
      "step": 8390
    },
    {
      "epoch": 0.7502682403433476,
      "grad_norm": 0.17237690439405065,
      "learning_rate": 3.096733267414758e-05,
      "loss": 0.6324,
      "step": 8391
    },
    {
      "epoch": 0.7503576537911302,
      "grad_norm": 0.15428884527616038,
      "learning_rate": 3.094638299358732e-05,
      "loss": 0.67,
      "step": 8392
    },
    {
      "epoch": 0.7504470672389127,
      "grad_norm": 0.16918751279463537,
      "learning_rate": 3.0925439104470456e-05,
      "loss": 0.6781,
      "step": 8393
    },
    {
      "epoch": 0.7505364806866953,
      "grad_norm": 0.16454647635980096,
      "learning_rate": 3.090450100855367e-05,
      "loss": 0.6753,
      "step": 8394
    },
    {
      "epoch": 0.7506258941344778,
      "grad_norm": 0.14380293408622455,
      "learning_rate": 3.08835687075929e-05,
      "loss": 0.6156,
      "step": 8395
    },
    {
      "epoch": 0.7507153075822603,
      "grad_norm": 0.16121695504056746,
      "learning_rate": 3.086264220334375e-05,
      "loss": 0.6368,
      "step": 8396
    },
    {
      "epoch": 0.7508047210300429,
      "grad_norm": 0.16216670843718445,
      "learning_rate": 3.084172149756134e-05,
      "loss": 0.6082,
      "step": 8397
    },
    {
      "epoch": 0.7508941344778255,
      "grad_norm": 0.143494308957586,
      "learning_rate": 3.082080659200018e-05,
      "loss": 0.6432,
      "step": 8398
    },
    {
      "epoch": 0.7509835479256081,
      "grad_norm": 0.1621937012053476,
      "learning_rate": 3.079989748841444e-05,
      "loss": 0.63,
      "step": 8399
    },
    {
      "epoch": 0.7510729613733905,
      "grad_norm": 0.14859415268667336,
      "learning_rate": 3.077899418855772e-05,
      "loss": 0.6226,
      "step": 8400
    },
    {
      "epoch": 0.7511623748211731,
      "grad_norm": 0.15248524195675672,
      "learning_rate": 3.075809669418316e-05,
      "loss": 0.6006,
      "step": 8401
    },
    {
      "epoch": 0.7512517882689557,
      "grad_norm": 0.17275905213893206,
      "learning_rate": 3.07372050070434e-05,
      "loss": 0.6647,
      "step": 8402
    },
    {
      "epoch": 0.7513412017167382,
      "grad_norm": 0.18147340558967603,
      "learning_rate": 3.071631912889065e-05,
      "loss": 0.6269,
      "step": 8403
    },
    {
      "epoch": 0.7514306151645207,
      "grad_norm": 0.16595002876197323,
      "learning_rate": 3.0695439061476504e-05,
      "loss": 0.6577,
      "step": 8404
    },
    {
      "epoch": 0.7515200286123033,
      "grad_norm": 0.14882486801764386,
      "learning_rate": 3.0674564806552187e-05,
      "loss": 0.6093,
      "step": 8405
    },
    {
      "epoch": 0.7516094420600858,
      "grad_norm": 0.15227748705272623,
      "learning_rate": 3.0653696365868424e-05,
      "loss": 0.6287,
      "step": 8406
    },
    {
      "epoch": 0.7516988555078684,
      "grad_norm": 0.14314753337557215,
      "learning_rate": 3.0632833741175336e-05,
      "loss": 0.6505,
      "step": 8407
    },
    {
      "epoch": 0.751788268955651,
      "grad_norm": 0.17054003538908916,
      "learning_rate": 3.061197693422278e-05,
      "loss": 0.6558,
      "step": 8408
    },
    {
      "epoch": 0.7518776824034334,
      "grad_norm": 0.18610687312336605,
      "learning_rate": 3.059112594675987e-05,
      "loss": 0.6919,
      "step": 8409
    },
    {
      "epoch": 0.751967095851216,
      "grad_norm": 0.164588767411992,
      "learning_rate": 3.057028078053541e-05,
      "loss": 0.6973,
      "step": 8410
    },
    {
      "epoch": 0.7520565092989986,
      "grad_norm": 0.1603562107418169,
      "learning_rate": 3.054944143729769e-05,
      "loss": 0.6386,
      "step": 8411
    },
    {
      "epoch": 0.7521459227467812,
      "grad_norm": 0.15641655025225193,
      "learning_rate": 3.0528607918794395e-05,
      "loss": 0.6743,
      "step": 8412
    },
    {
      "epoch": 0.7522353361945636,
      "grad_norm": 0.18626671904525174,
      "learning_rate": 3.0507780226772863e-05,
      "loss": 0.3674,
      "step": 8413
    },
    {
      "epoch": 0.7523247496423462,
      "grad_norm": 0.1678065392477169,
      "learning_rate": 3.048695836297988e-05,
      "loss": 0.6153,
      "step": 8414
    },
    {
      "epoch": 0.7524141630901288,
      "grad_norm": 0.18007094982746363,
      "learning_rate": 3.046614232916174e-05,
      "loss": 0.6473,
      "step": 8415
    },
    {
      "epoch": 0.7525035765379113,
      "grad_norm": 0.1626563020726614,
      "learning_rate": 3.0445332127064275e-05,
      "loss": 0.6541,
      "step": 8416
    },
    {
      "epoch": 0.7525929899856938,
      "grad_norm": 0.1672116083263857,
      "learning_rate": 3.042452775843284e-05,
      "loss": 0.65,
      "step": 8417
    },
    {
      "epoch": 0.7526824034334764,
      "grad_norm": 0.15958718670985214,
      "learning_rate": 3.040372922501219e-05,
      "loss": 0.6464,
      "step": 8418
    },
    {
      "epoch": 0.7527718168812589,
      "grad_norm": 0.1425033975489206,
      "learning_rate": 3.0382936528546735e-05,
      "loss": 0.6389,
      "step": 8419
    },
    {
      "epoch": 0.7528612303290415,
      "grad_norm": 0.16962787696018933,
      "learning_rate": 3.036214967078034e-05,
      "loss": 0.6452,
      "step": 8420
    },
    {
      "epoch": 0.7529506437768241,
      "grad_norm": 0.16026455100009218,
      "learning_rate": 3.0341368653456283e-05,
      "loss": 0.5891,
      "step": 8421
    },
    {
      "epoch": 0.7530400572246065,
      "grad_norm": 0.16679792644069755,
      "learning_rate": 3.032059347831755e-05,
      "loss": 0.6508,
      "step": 8422
    },
    {
      "epoch": 0.7531294706723891,
      "grad_norm": 0.16946786535458178,
      "learning_rate": 3.0299824147106516e-05,
      "loss": 0.6513,
      "step": 8423
    },
    {
      "epoch": 0.7532188841201717,
      "grad_norm": 0.13659394451095405,
      "learning_rate": 3.0279060661565028e-05,
      "loss": 0.6174,
      "step": 8424
    },
    {
      "epoch": 0.7533082975679543,
      "grad_norm": 0.13897613727107308,
      "learning_rate": 3.025830302343452e-05,
      "loss": 0.6265,
      "step": 8425
    },
    {
      "epoch": 0.7533977110157367,
      "grad_norm": 0.16620934363153192,
      "learning_rate": 3.023755123445594e-05,
      "loss": 0.6629,
      "step": 8426
    },
    {
      "epoch": 0.7534871244635193,
      "grad_norm": 0.1843833952454965,
      "learning_rate": 3.0216805296369654e-05,
      "loss": 0.4097,
      "step": 8427
    },
    {
      "epoch": 0.7535765379113019,
      "grad_norm": 0.17133550208803686,
      "learning_rate": 3.0196065210915637e-05,
      "loss": 0.681,
      "step": 8428
    },
    {
      "epoch": 0.7536659513590844,
      "grad_norm": 0.1661221805260811,
      "learning_rate": 3.017533097983333e-05,
      "loss": 0.6431,
      "step": 8429
    },
    {
      "epoch": 0.753755364806867,
      "grad_norm": 0.1831271995954749,
      "learning_rate": 3.015460260486168e-05,
      "loss": 0.7011,
      "step": 8430
    },
    {
      "epoch": 0.7538447782546495,
      "grad_norm": 0.1486023025760698,
      "learning_rate": 3.0133880087739184e-05,
      "loss": 0.6543,
      "step": 8431
    },
    {
      "epoch": 0.753934191702432,
      "grad_norm": 0.1538065346024975,
      "learning_rate": 3.0113163430203772e-05,
      "loss": 0.6516,
      "step": 8432
    },
    {
      "epoch": 0.7540236051502146,
      "grad_norm": 0.1478531178071845,
      "learning_rate": 3.009245263399293e-05,
      "loss": 0.635,
      "step": 8433
    },
    {
      "epoch": 0.7541130185979972,
      "grad_norm": 0.16717285422878214,
      "learning_rate": 3.0071747700843667e-05,
      "loss": 0.6502,
      "step": 8434
    },
    {
      "epoch": 0.7542024320457796,
      "grad_norm": 0.1535856946851355,
      "learning_rate": 3.0051048632492463e-05,
      "loss": 0.6435,
      "step": 8435
    },
    {
      "epoch": 0.7542918454935622,
      "grad_norm": 0.16788344971835067,
      "learning_rate": 3.003035543067534e-05,
      "loss": 0.689,
      "step": 8436
    },
    {
      "epoch": 0.7543812589413448,
      "grad_norm": 0.16158794495167453,
      "learning_rate": 3.000966809712783e-05,
      "loss": 0.6451,
      "step": 8437
    },
    {
      "epoch": 0.7544706723891274,
      "grad_norm": 0.17579202374664482,
      "learning_rate": 2.9988986633584902e-05,
      "loss": 0.6696,
      "step": 8438
    },
    {
      "epoch": 0.7545600858369099,
      "grad_norm": 0.14149143592471508,
      "learning_rate": 2.9968311041781116e-05,
      "loss": 0.6168,
      "step": 8439
    },
    {
      "epoch": 0.7546494992846924,
      "grad_norm": 0.15034637605569906,
      "learning_rate": 2.9947641323450535e-05,
      "loss": 0.6477,
      "step": 8440
    },
    {
      "epoch": 0.754738912732475,
      "grad_norm": 0.15493979718556494,
      "learning_rate": 2.992697748032661e-05,
      "loss": 0.6159,
      "step": 8441
    },
    {
      "epoch": 0.7548283261802575,
      "grad_norm": 0.14467423085883413,
      "learning_rate": 2.990631951414252e-05,
      "loss": 0.6198,
      "step": 8442
    },
    {
      "epoch": 0.7549177396280401,
      "grad_norm": 0.16956660242653712,
      "learning_rate": 2.9885667426630737e-05,
      "loss": 0.6687,
      "step": 8443
    },
    {
      "epoch": 0.7550071530758226,
      "grad_norm": 0.14779761547301998,
      "learning_rate": 2.9865021219523337e-05,
      "loss": 0.6559,
      "step": 8444
    },
    {
      "epoch": 0.7550965665236051,
      "grad_norm": 0.18002667592695556,
      "learning_rate": 2.9844380894551916e-05,
      "loss": 0.6847,
      "step": 8445
    },
    {
      "epoch": 0.7551859799713877,
      "grad_norm": 0.1604715285434875,
      "learning_rate": 2.9823746453447565e-05,
      "loss": 0.6336,
      "step": 8446
    },
    {
      "epoch": 0.7552753934191703,
      "grad_norm": 0.16888349436830913,
      "learning_rate": 2.9803117897940826e-05,
      "loss": 0.6671,
      "step": 8447
    },
    {
      "epoch": 0.7553648068669528,
      "grad_norm": 0.14211347054773768,
      "learning_rate": 2.9782495229761808e-05,
      "loss": 0.6482,
      "step": 8448
    },
    {
      "epoch": 0.7554542203147353,
      "grad_norm": 0.16380229224279913,
      "learning_rate": 2.9761878450640112e-05,
      "loss": 0.6285,
      "step": 8449
    },
    {
      "epoch": 0.7555436337625179,
      "grad_norm": 0.1793659584707665,
      "learning_rate": 2.9741267562304854e-05,
      "loss": 0.6655,
      "step": 8450
    },
    {
      "epoch": 0.7556330472103004,
      "grad_norm": 0.15378972188725012,
      "learning_rate": 2.972066256648465e-05,
      "loss": 0.6306,
      "step": 8451
    },
    {
      "epoch": 0.755722460658083,
      "grad_norm": 0.1709346831122723,
      "learning_rate": 2.9700063464907578e-05,
      "loss": 0.6567,
      "step": 8452
    },
    {
      "epoch": 0.7558118741058655,
      "grad_norm": 0.14663975187506745,
      "learning_rate": 2.967947025930128e-05,
      "loss": 0.6074,
      "step": 8453
    },
    {
      "epoch": 0.755901287553648,
      "grad_norm": 0.1643234379928507,
      "learning_rate": 2.9658882951392918e-05,
      "loss": 0.6344,
      "step": 8454
    },
    {
      "epoch": 0.7559907010014306,
      "grad_norm": 0.16991618126014038,
      "learning_rate": 2.963830154290903e-05,
      "loss": 0.6626,
      "step": 8455
    },
    {
      "epoch": 0.7560801144492132,
      "grad_norm": 0.15469203973204834,
      "learning_rate": 2.9617726035575855e-05,
      "loss": 0.6667,
      "step": 8456
    },
    {
      "epoch": 0.7561695278969958,
      "grad_norm": 0.14975006454419973,
      "learning_rate": 2.9597156431119023e-05,
      "loss": 0.6258,
      "step": 8457
    },
    {
      "epoch": 0.7562589413447782,
      "grad_norm": 0.16884192058798678,
      "learning_rate": 2.957659273126362e-05,
      "loss": 0.6401,
      "step": 8458
    },
    {
      "epoch": 0.7563483547925608,
      "grad_norm": 0.15570609260837906,
      "learning_rate": 2.9556034937734332e-05,
      "loss": 0.64,
      "step": 8459
    },
    {
      "epoch": 0.7564377682403434,
      "grad_norm": 0.15533868613953844,
      "learning_rate": 2.9535483052255365e-05,
      "loss": 0.6316,
      "step": 8460
    },
    {
      "epoch": 0.7565271816881259,
      "grad_norm": 0.17186166487722437,
      "learning_rate": 2.9514937076550286e-05,
      "loss": 0.6302,
      "step": 8461
    },
    {
      "epoch": 0.7566165951359084,
      "grad_norm": 0.16391709014754,
      "learning_rate": 2.9494397012342322e-05,
      "loss": 0.6413,
      "step": 8462
    },
    {
      "epoch": 0.756706008583691,
      "grad_norm": 0.15525209452757807,
      "learning_rate": 2.9473862861354128e-05,
      "loss": 0.6529,
      "step": 8463
    },
    {
      "epoch": 0.7567954220314735,
      "grad_norm": 0.14757508678268869,
      "learning_rate": 2.945333462530788e-05,
      "loss": 0.634,
      "step": 8464
    },
    {
      "epoch": 0.7568848354792561,
      "grad_norm": 0.17838090873045662,
      "learning_rate": 2.9432812305925295e-05,
      "loss": 0.7095,
      "step": 8465
    },
    {
      "epoch": 0.7569742489270386,
      "grad_norm": 0.16122270023917257,
      "learning_rate": 2.941229590492748e-05,
      "loss": 0.655,
      "step": 8466
    },
    {
      "epoch": 0.7570636623748211,
      "grad_norm": 0.18192086276243397,
      "learning_rate": 2.9391785424035167e-05,
      "loss": 0.6615,
      "step": 8467
    },
    {
      "epoch": 0.7571530758226037,
      "grad_norm": 0.15783698555764575,
      "learning_rate": 2.9371280864968565e-05,
      "loss": 0.6107,
      "step": 8468
    },
    {
      "epoch": 0.7572424892703863,
      "grad_norm": 0.15193481068142203,
      "learning_rate": 2.935078222944727e-05,
      "loss": 0.6299,
      "step": 8469
    },
    {
      "epoch": 0.7573319027181689,
      "grad_norm": 0.16443086465480108,
      "learning_rate": 2.933028951919058e-05,
      "loss": 0.6304,
      "step": 8470
    },
    {
      "epoch": 0.7574213161659513,
      "grad_norm": 0.1688771874462315,
      "learning_rate": 2.93098027359172e-05,
      "loss": 0.6579,
      "step": 8471
    },
    {
      "epoch": 0.7575107296137339,
      "grad_norm": 0.16218381930316123,
      "learning_rate": 2.9289321881345254e-05,
      "loss": 0.6082,
      "step": 8472
    },
    {
      "epoch": 0.7576001430615165,
      "grad_norm": 0.16000756550513417,
      "learning_rate": 2.9268846957192485e-05,
      "loss": 0.6571,
      "step": 8473
    },
    {
      "epoch": 0.757689556509299,
      "grad_norm": 0.13939973082464072,
      "learning_rate": 2.9248377965176134e-05,
      "loss": 0.616,
      "step": 8474
    },
    {
      "epoch": 0.7577789699570815,
      "grad_norm": 0.1763403766656239,
      "learning_rate": 2.9227914907012845e-05,
      "loss": 0.6229,
      "step": 8475
    },
    {
      "epoch": 0.7578683834048641,
      "grad_norm": 0.14659972438521385,
      "learning_rate": 2.9207457784418835e-05,
      "loss": 0.5787,
      "step": 8476
    },
    {
      "epoch": 0.7579577968526466,
      "grad_norm": 0.15041343660540518,
      "learning_rate": 2.91870065991099e-05,
      "loss": 0.6304,
      "step": 8477
    },
    {
      "epoch": 0.7580472103004292,
      "grad_norm": 0.16627941256675974,
      "learning_rate": 2.9166561352801182e-05,
      "loss": 0.6454,
      "step": 8478
    },
    {
      "epoch": 0.7581366237482118,
      "grad_norm": 0.13826174060331894,
      "learning_rate": 2.91461220472074e-05,
      "loss": 0.6175,
      "step": 8479
    },
    {
      "epoch": 0.7582260371959942,
      "grad_norm": 0.1625453241266907,
      "learning_rate": 2.912568868404284e-05,
      "loss": 0.6588,
      "step": 8480
    },
    {
      "epoch": 0.7583154506437768,
      "grad_norm": 0.1528138090375483,
      "learning_rate": 2.9105261265021133e-05,
      "loss": 0.6728,
      "step": 8481
    },
    {
      "epoch": 0.7584048640915594,
      "grad_norm": 0.1590157071340275,
      "learning_rate": 2.9084839791855544e-05,
      "loss": 0.6661,
      "step": 8482
    },
    {
      "epoch": 0.758494277539342,
      "grad_norm": 0.18270002747824837,
      "learning_rate": 2.9064424266258805e-05,
      "loss": 0.6555,
      "step": 8483
    },
    {
      "epoch": 0.7585836909871244,
      "grad_norm": 0.1727318842587024,
      "learning_rate": 2.9044014689943132e-05,
      "loss": 0.6661,
      "step": 8484
    },
    {
      "epoch": 0.758673104434907,
      "grad_norm": 0.17082907894532184,
      "learning_rate": 2.902361106462028e-05,
      "loss": 0.6639,
      "step": 8485
    },
    {
      "epoch": 0.7587625178826896,
      "grad_norm": 0.14786858552587318,
      "learning_rate": 2.9003213392001426e-05,
      "loss": 0.6029,
      "step": 8486
    },
    {
      "epoch": 0.7588519313304721,
      "grad_norm": 0.1586369434250639,
      "learning_rate": 2.8982821673797322e-05,
      "loss": 0.638,
      "step": 8487
    },
    {
      "epoch": 0.7589413447782547,
      "grad_norm": 0.1635703064885046,
      "learning_rate": 2.8962435911718222e-05,
      "loss": 0.6466,
      "step": 8488
    },
    {
      "epoch": 0.7590307582260372,
      "grad_norm": 0.15529626957873482,
      "learning_rate": 2.8942056107473802e-05,
      "loss": 0.6646,
      "step": 8489
    },
    {
      "epoch": 0.7591201716738197,
      "grad_norm": 0.14607294556064065,
      "learning_rate": 2.89216822627733e-05,
      "loss": 0.6329,
      "step": 8490
    },
    {
      "epoch": 0.7592095851216023,
      "grad_norm": 0.16875984114367135,
      "learning_rate": 2.8901314379325517e-05,
      "loss": 0.6413,
      "step": 8491
    },
    {
      "epoch": 0.7592989985693849,
      "grad_norm": 0.17254578962204575,
      "learning_rate": 2.8880952458838593e-05,
      "loss": 0.6587,
      "step": 8492
    },
    {
      "epoch": 0.7593884120171673,
      "grad_norm": 0.14814448743224262,
      "learning_rate": 2.886059650302031e-05,
      "loss": 0.6468,
      "step": 8493
    },
    {
      "epoch": 0.7594778254649499,
      "grad_norm": 0.1535484314746067,
      "learning_rate": 2.8840246513577907e-05,
      "loss": 0.6329,
      "step": 8494
    },
    {
      "epoch": 0.7595672389127325,
      "grad_norm": 0.17427595464377701,
      "learning_rate": 2.8819902492218066e-05,
      "loss": 0.6823,
      "step": 8495
    },
    {
      "epoch": 0.759656652360515,
      "grad_norm": 0.14639964229462815,
      "learning_rate": 2.879956444064703e-05,
      "loss": 0.6304,
      "step": 8496
    },
    {
      "epoch": 0.7597460658082976,
      "grad_norm": 0.15858131597164002,
      "learning_rate": 2.877923236057054e-05,
      "loss": 0.5953,
      "step": 8497
    },
    {
      "epoch": 0.7598354792560801,
      "grad_norm": 0.1642744613410193,
      "learning_rate": 2.8758906253693818e-05,
      "loss": 0.6399,
      "step": 8498
    },
    {
      "epoch": 0.7599248927038627,
      "grad_norm": 0.18483352571011047,
      "learning_rate": 2.8738586121721634e-05,
      "loss": 0.3568,
      "step": 8499
    },
    {
      "epoch": 0.7600143061516452,
      "grad_norm": 0.1500866434649398,
      "learning_rate": 2.8718271966358124e-05,
      "loss": 0.6067,
      "step": 8500
    },
    {
      "epoch": 0.7601037195994278,
      "grad_norm": 0.1521055626277793,
      "learning_rate": 2.869796378930706e-05,
      "loss": 0.6646,
      "step": 8501
    },
    {
      "epoch": 0.7601931330472103,
      "grad_norm": 0.1594244706769378,
      "learning_rate": 2.8677661592271666e-05,
      "loss": 0.6431,
      "step": 8502
    },
    {
      "epoch": 0.7602825464949928,
      "grad_norm": 0.17355879449589376,
      "learning_rate": 2.8657365376954692e-05,
      "loss": 0.6347,
      "step": 8503
    },
    {
      "epoch": 0.7603719599427754,
      "grad_norm": 0.185605460522497,
      "learning_rate": 2.8637075145058257e-05,
      "loss": 0.681,
      "step": 8504
    },
    {
      "epoch": 0.760461373390558,
      "grad_norm": 0.17141055225890026,
      "learning_rate": 2.8616790898284207e-05,
      "loss": 0.6723,
      "step": 8505
    },
    {
      "epoch": 0.7605507868383404,
      "grad_norm": 0.16028863810552743,
      "learning_rate": 2.859651263833366e-05,
      "loss": 0.6566,
      "step": 8506
    },
    {
      "epoch": 0.760640200286123,
      "grad_norm": 0.15597178423561336,
      "learning_rate": 2.857624036690737e-05,
      "loss": 0.6611,
      "step": 8507
    },
    {
      "epoch": 0.7607296137339056,
      "grad_norm": 0.1574200110742235,
      "learning_rate": 2.8555974085705573e-05,
      "loss": 0.6374,
      "step": 8508
    },
    {
      "epoch": 0.7608190271816881,
      "grad_norm": 0.14953667753338543,
      "learning_rate": 2.853571379642792e-05,
      "loss": 0.6572,
      "step": 8509
    },
    {
      "epoch": 0.7609084406294707,
      "grad_norm": 0.15708706777702672,
      "learning_rate": 2.8515459500773633e-05,
      "loss": 0.6302,
      "step": 8510
    },
    {
      "epoch": 0.7609978540772532,
      "grad_norm": 0.13815261757661385,
      "learning_rate": 2.849521120044144e-05,
      "loss": 0.6137,
      "step": 8511
    },
    {
      "epoch": 0.7610872675250357,
      "grad_norm": 0.1649657592351612,
      "learning_rate": 2.847496889712952e-05,
      "loss": 0.6514,
      "step": 8512
    },
    {
      "epoch": 0.7611766809728183,
      "grad_norm": 0.15402384915681916,
      "learning_rate": 2.845473259253557e-05,
      "loss": 0.6237,
      "step": 8513
    },
    {
      "epoch": 0.7612660944206009,
      "grad_norm": 0.17378308847853446,
      "learning_rate": 2.8434502288356835e-05,
      "loss": 0.676,
      "step": 8514
    },
    {
      "epoch": 0.7613555078683834,
      "grad_norm": 0.16310455422312292,
      "learning_rate": 2.8414277986289928e-05,
      "loss": 0.6549,
      "step": 8515
    },
    {
      "epoch": 0.7614449213161659,
      "grad_norm": 0.18539576171356878,
      "learning_rate": 2.839405968803108e-05,
      "loss": 0.637,
      "step": 8516
    },
    {
      "epoch": 0.7615343347639485,
      "grad_norm": 0.16149685090337249,
      "learning_rate": 2.8373847395275966e-05,
      "loss": 0.6332,
      "step": 8517
    },
    {
      "epoch": 0.7616237482117311,
      "grad_norm": 0.16780559099249712,
      "learning_rate": 2.8353641109719764e-05,
      "loss": 0.6528,
      "step": 8518
    },
    {
      "epoch": 0.7617131616595136,
      "grad_norm": 0.1466220760324847,
      "learning_rate": 2.833344083305719e-05,
      "loss": 0.6677,
      "step": 8519
    },
    {
      "epoch": 0.7618025751072961,
      "grad_norm": 0.16057587541220436,
      "learning_rate": 2.8313246566982345e-05,
      "loss": 0.6776,
      "step": 8520
    },
    {
      "epoch": 0.7618919885550787,
      "grad_norm": 0.16922559576235283,
      "learning_rate": 2.8293058313188935e-05,
      "loss": 0.6024,
      "step": 8521
    },
    {
      "epoch": 0.7619814020028612,
      "grad_norm": 0.17375581806121967,
      "learning_rate": 2.827287607337016e-05,
      "loss": 0.3553,
      "step": 8522
    },
    {
      "epoch": 0.7620708154506438,
      "grad_norm": 0.16932977284574555,
      "learning_rate": 2.8252699849218613e-05,
      "loss": 0.6052,
      "step": 8523
    },
    {
      "epoch": 0.7621602288984263,
      "grad_norm": 0.16313788136961088,
      "learning_rate": 2.823252964242644e-05,
      "loss": 0.6322,
      "step": 8524
    },
    {
      "epoch": 0.7622496423462088,
      "grad_norm": 0.1703058967768774,
      "learning_rate": 2.8212365454685408e-05,
      "loss": 0.6521,
      "step": 8525
    },
    {
      "epoch": 0.7623390557939914,
      "grad_norm": 0.17228020891102766,
      "learning_rate": 2.8192207287686555e-05,
      "loss": 0.6553,
      "step": 8526
    },
    {
      "epoch": 0.762428469241774,
      "grad_norm": 0.15689089161290473,
      "learning_rate": 2.8172055143120546e-05,
      "loss": 0.6289,
      "step": 8527
    },
    {
      "epoch": 0.7625178826895566,
      "grad_norm": 0.15787559937378132,
      "learning_rate": 2.815190902267757e-05,
      "loss": 0.6127,
      "step": 8528
    },
    {
      "epoch": 0.762607296137339,
      "grad_norm": 0.1492256318849056,
      "learning_rate": 2.8131768928047176e-05,
      "loss": 0.6399,
      "step": 8529
    },
    {
      "epoch": 0.7626967095851216,
      "grad_norm": 0.15783156892685563,
      "learning_rate": 2.8111634860918524e-05,
      "loss": 0.6351,
      "step": 8530
    },
    {
      "epoch": 0.7627861230329042,
      "grad_norm": 0.1622058135622249,
      "learning_rate": 2.809150682298024e-05,
      "loss": 0.6396,
      "step": 8531
    },
    {
      "epoch": 0.7628755364806867,
      "grad_norm": 0.1542180736233491,
      "learning_rate": 2.807138481592043e-05,
      "loss": 0.6078,
      "step": 8532
    },
    {
      "epoch": 0.7629649499284692,
      "grad_norm": 0.1768801143113586,
      "learning_rate": 2.8051268841426713e-05,
      "loss": 0.6664,
      "step": 8533
    },
    {
      "epoch": 0.7630543633762518,
      "grad_norm": 0.15641772412829988,
      "learning_rate": 2.803115890118623e-05,
      "loss": 0.6444,
      "step": 8534
    },
    {
      "epoch": 0.7631437768240343,
      "grad_norm": 0.15618643889669673,
      "learning_rate": 2.8011054996885477e-05,
      "loss": 0.6415,
      "step": 8535
    },
    {
      "epoch": 0.7632331902718169,
      "grad_norm": 0.16522030782477648,
      "learning_rate": 2.7990957130210617e-05,
      "loss": 0.6256,
      "step": 8536
    },
    {
      "epoch": 0.7633226037195995,
      "grad_norm": 0.14637528392925994,
      "learning_rate": 2.797086530284725e-05,
      "loss": 0.654,
      "step": 8537
    },
    {
      "epoch": 0.7634120171673819,
      "grad_norm": 0.16392138539006143,
      "learning_rate": 2.795077951648035e-05,
      "loss": 0.6321,
      "step": 8538
    },
    {
      "epoch": 0.7635014306151645,
      "grad_norm": 0.15658651718263578,
      "learning_rate": 2.7930699772794623e-05,
      "loss": 0.6565,
      "step": 8539
    },
    {
      "epoch": 0.7635908440629471,
      "grad_norm": 0.16353534552834276,
      "learning_rate": 2.7910626073474045e-05,
      "loss": 0.662,
      "step": 8540
    },
    {
      "epoch": 0.7636802575107297,
      "grad_norm": 0.16561121445310492,
      "learning_rate": 2.7890558420202185e-05,
      "loss": 0.6527,
      "step": 8541
    },
    {
      "epoch": 0.7637696709585121,
      "grad_norm": 0.15744690311475992,
      "learning_rate": 2.787049681466214e-05,
      "loss": 0.6426,
      "step": 8542
    },
    {
      "epoch": 0.7638590844062947,
      "grad_norm": 0.14774668771351918,
      "learning_rate": 2.7850441258536386e-05,
      "loss": 0.6072,
      "step": 8543
    },
    {
      "epoch": 0.7639484978540773,
      "grad_norm": 0.1754267044420749,
      "learning_rate": 2.783039175350699e-05,
      "loss": 0.6809,
      "step": 8544
    },
    {
      "epoch": 0.7640379113018598,
      "grad_norm": 0.14408840896607597,
      "learning_rate": 2.7810348301255486e-05,
      "loss": 0.6171,
      "step": 8545
    },
    {
      "epoch": 0.7641273247496424,
      "grad_norm": 0.1491264025854974,
      "learning_rate": 2.779031090346287e-05,
      "loss": 0.6252,
      "step": 8546
    },
    {
      "epoch": 0.7642167381974249,
      "grad_norm": 0.17780134484675872,
      "learning_rate": 2.7770279561809686e-05,
      "loss": 0.6434,
      "step": 8547
    },
    {
      "epoch": 0.7643061516452074,
      "grad_norm": 0.1697591285452829,
      "learning_rate": 2.775025427797594e-05,
      "loss": 0.6323,
      "step": 8548
    },
    {
      "epoch": 0.76439556509299,
      "grad_norm": 0.1528855857320281,
      "learning_rate": 2.7730235053641096e-05,
      "loss": 0.608,
      "step": 8549
    },
    {
      "epoch": 0.7644849785407726,
      "grad_norm": 0.18547130307277218,
      "learning_rate": 2.7710221890484157e-05,
      "loss": 0.6652,
      "step": 8550
    },
    {
      "epoch": 0.764574391988555,
      "grad_norm": 0.1513027115056403,
      "learning_rate": 2.7690214790183622e-05,
      "loss": 0.5945,
      "step": 8551
    },
    {
      "epoch": 0.7646638054363376,
      "grad_norm": 0.15634722092954714,
      "learning_rate": 2.7670213754417396e-05,
      "loss": 0.6567,
      "step": 8552
    },
    {
      "epoch": 0.7647532188841202,
      "grad_norm": 0.15542479547126778,
      "learning_rate": 2.7650218784863047e-05,
      "loss": 0.6487,
      "step": 8553
    },
    {
      "epoch": 0.7648426323319027,
      "grad_norm": 0.21538146245539389,
      "learning_rate": 2.7630229883197433e-05,
      "loss": 0.7247,
      "step": 8554
    },
    {
      "epoch": 0.7649320457796852,
      "grad_norm": 0.1686630866115725,
      "learning_rate": 2.761024705109705e-05,
      "loss": 0.5941,
      "step": 8555
    },
    {
      "epoch": 0.7650214592274678,
      "grad_norm": 0.16694623247684975,
      "learning_rate": 2.7590270290237852e-05,
      "loss": 0.6447,
      "step": 8556
    },
    {
      "epoch": 0.7651108726752504,
      "grad_norm": 0.16913755662232294,
      "learning_rate": 2.75702996022952e-05,
      "loss": 0.669,
      "step": 8557
    },
    {
      "epoch": 0.7652002861230329,
      "grad_norm": 0.16925033658949487,
      "learning_rate": 2.755033498894405e-05,
      "loss": 0.6158,
      "step": 8558
    },
    {
      "epoch": 0.7652896995708155,
      "grad_norm": 0.15559110465462272,
      "learning_rate": 2.7530376451858807e-05,
      "loss": 0.5994,
      "step": 8559
    },
    {
      "epoch": 0.765379113018598,
      "grad_norm": 0.14555113487674076,
      "learning_rate": 2.7510423992713374e-05,
      "loss": 0.6529,
      "step": 8560
    },
    {
      "epoch": 0.7654685264663805,
      "grad_norm": 0.15709192928640014,
      "learning_rate": 2.749047761318113e-05,
      "loss": 0.6235,
      "step": 8561
    },
    {
      "epoch": 0.7655579399141631,
      "grad_norm": 0.137428920430898,
      "learning_rate": 2.7470537314934997e-05,
      "loss": 0.6126,
      "step": 8562
    },
    {
      "epoch": 0.7656473533619457,
      "grad_norm": 0.15161353627188318,
      "learning_rate": 2.7450603099647266e-05,
      "loss": 0.6022,
      "step": 8563
    },
    {
      "epoch": 0.7657367668097281,
      "grad_norm": 0.18661061943566112,
      "learning_rate": 2.7430674968989832e-05,
      "loss": 0.67,
      "step": 8564
    },
    {
      "epoch": 0.7658261802575107,
      "grad_norm": 0.14272545816168575,
      "learning_rate": 2.7410752924634088e-05,
      "loss": 0.6408,
      "step": 8565
    },
    {
      "epoch": 0.7659155937052933,
      "grad_norm": 0.16358914693986026,
      "learning_rate": 2.7390836968250766e-05,
      "loss": 0.6952,
      "step": 8566
    },
    {
      "epoch": 0.7660050071530758,
      "grad_norm": 0.159844324524595,
      "learning_rate": 2.737092710151029e-05,
      "loss": 0.6837,
      "step": 8567
    },
    {
      "epoch": 0.7660944206008584,
      "grad_norm": 0.1714142777401397,
      "learning_rate": 2.735102332608247e-05,
      "loss": 0.658,
      "step": 8568
    },
    {
      "epoch": 0.7661838340486409,
      "grad_norm": 0.1611375500556213,
      "learning_rate": 2.7331125643636567e-05,
      "loss": 0.6591,
      "step": 8569
    },
    {
      "epoch": 0.7662732474964234,
      "grad_norm": 0.16912139846354637,
      "learning_rate": 2.7311234055841382e-05,
      "loss": 0.6683,
      "step": 8570
    },
    {
      "epoch": 0.766362660944206,
      "grad_norm": 0.1457470847619191,
      "learning_rate": 2.7291348564365248e-05,
      "loss": 0.6557,
      "step": 8571
    },
    {
      "epoch": 0.7664520743919886,
      "grad_norm": 0.15645953439975077,
      "learning_rate": 2.7271469170875863e-05,
      "loss": 0.6301,
      "step": 8572
    },
    {
      "epoch": 0.766541487839771,
      "grad_norm": 0.1528955655484353,
      "learning_rate": 2.7251595877040538e-05,
      "loss": 0.625,
      "step": 8573
    },
    {
      "epoch": 0.7666309012875536,
      "grad_norm": 0.16013321822397356,
      "learning_rate": 2.7231728684525992e-05,
      "loss": 0.6178,
      "step": 8574
    },
    {
      "epoch": 0.7667203147353362,
      "grad_norm": 0.16210502361525664,
      "learning_rate": 2.7211867594998486e-05,
      "loss": 0.6854,
      "step": 8575
    },
    {
      "epoch": 0.7668097281831188,
      "grad_norm": 0.1553825983170706,
      "learning_rate": 2.7192012610123774e-05,
      "loss": 0.5815,
      "step": 8576
    },
    {
      "epoch": 0.7668991416309013,
      "grad_norm": 0.15141239396026016,
      "learning_rate": 2.7172163731567e-05,
      "loss": 0.6305,
      "step": 8577
    },
    {
      "epoch": 0.7669885550786838,
      "grad_norm": 0.15726862695369756,
      "learning_rate": 2.7152320960992905e-05,
      "loss": 0.6561,
      "step": 8578
    },
    {
      "epoch": 0.7670779685264664,
      "grad_norm": 0.14934801220306046,
      "learning_rate": 2.71324843000657e-05,
      "loss": 0.6495,
      "step": 8579
    },
    {
      "epoch": 0.7671673819742489,
      "grad_norm": 0.1674824911511953,
      "learning_rate": 2.711265375044897e-05,
      "loss": 0.6375,
      "step": 8580
    },
    {
      "epoch": 0.7672567954220315,
      "grad_norm": 0.16440472894975633,
      "learning_rate": 2.709282931380598e-05,
      "loss": 0.6658,
      "step": 8581
    },
    {
      "epoch": 0.767346208869814,
      "grad_norm": 0.16877238563185876,
      "learning_rate": 2.7073010991799376e-05,
      "loss": 0.6602,
      "step": 8582
    },
    {
      "epoch": 0.7674356223175965,
      "grad_norm": 0.16294336891620656,
      "learning_rate": 2.705319878609124e-05,
      "loss": 0.6438,
      "step": 8583
    },
    {
      "epoch": 0.7675250357653791,
      "grad_norm": 0.15845820240548592,
      "learning_rate": 2.7033392698343218e-05,
      "loss": 0.6706,
      "step": 8584
    },
    {
      "epoch": 0.7676144492131617,
      "grad_norm": 0.1628262369499424,
      "learning_rate": 2.7013592730216465e-05,
      "loss": 0.6379,
      "step": 8585
    },
    {
      "epoch": 0.7677038626609443,
      "grad_norm": 0.16200580637597134,
      "learning_rate": 2.69937988833715e-05,
      "loss": 0.663,
      "step": 8586
    },
    {
      "epoch": 0.7677932761087267,
      "grad_norm": 0.183793913874026,
      "learning_rate": 2.697401115946847e-05,
      "loss": 0.69,
      "step": 8587
    },
    {
      "epoch": 0.7678826895565093,
      "grad_norm": 0.1633839549641916,
      "learning_rate": 2.6954229560166923e-05,
      "loss": 0.6718,
      "step": 8588
    },
    {
      "epoch": 0.7679721030042919,
      "grad_norm": 0.14357719491172774,
      "learning_rate": 2.6934454087125926e-05,
      "loss": 0.6537,
      "step": 8589
    },
    {
      "epoch": 0.7680615164520744,
      "grad_norm": 0.16993598259323148,
      "learning_rate": 2.6914684742004028e-05,
      "loss": 0.6991,
      "step": 8590
    },
    {
      "epoch": 0.7681509298998569,
      "grad_norm": 0.16062308431400113,
      "learning_rate": 2.689492152645928e-05,
      "loss": 0.6651,
      "step": 8591
    },
    {
      "epoch": 0.7682403433476395,
      "grad_norm": 0.19766162918754746,
      "learning_rate": 2.6875164442149147e-05,
      "loss": 0.6545,
      "step": 8592
    },
    {
      "epoch": 0.768329756795422,
      "grad_norm": 0.16369905689283965,
      "learning_rate": 2.685541349073066e-05,
      "loss": 0.6526,
      "step": 8593
    },
    {
      "epoch": 0.7684191702432046,
      "grad_norm": 0.1416151299385409,
      "learning_rate": 2.6835668673860314e-05,
      "loss": 0.6555,
      "step": 8594
    },
    {
      "epoch": 0.7685085836909872,
      "grad_norm": 0.1672418348221549,
      "learning_rate": 2.6815929993194067e-05,
      "loss": 0.6424,
      "step": 8595
    },
    {
      "epoch": 0.7685979971387696,
      "grad_norm": 0.2174886799465148,
      "learning_rate": 2.679619745038743e-05,
      "loss": 0.6231,
      "step": 8596
    },
    {
      "epoch": 0.7686874105865522,
      "grad_norm": 0.1596990976445894,
      "learning_rate": 2.6776471047095263e-05,
      "loss": 0.6434,
      "step": 8597
    },
    {
      "epoch": 0.7687768240343348,
      "grad_norm": 0.15399246324485522,
      "learning_rate": 2.675675078497204e-05,
      "loss": 0.66,
      "step": 8598
    },
    {
      "epoch": 0.7688662374821174,
      "grad_norm": 0.14459666812643396,
      "learning_rate": 2.67370366656717e-05,
      "loss": 0.6183,
      "step": 8599
    },
    {
      "epoch": 0.7689556509298998,
      "grad_norm": 0.16212909142441378,
      "learning_rate": 2.6717328690847565e-05,
      "loss": 0.6191,
      "step": 8600
    },
    {
      "epoch": 0.7690450643776824,
      "grad_norm": 0.1534209131558005,
      "learning_rate": 2.669762686215259e-05,
      "loss": 0.653,
      "step": 8601
    },
    {
      "epoch": 0.769134477825465,
      "grad_norm": 0.17710377815488448,
      "learning_rate": 2.6677931181239158e-05,
      "loss": 0.3691,
      "step": 8602
    },
    {
      "epoch": 0.7692238912732475,
      "grad_norm": 0.18330987948097519,
      "learning_rate": 2.6658241649759062e-05,
      "loss": 0.3907,
      "step": 8603
    },
    {
      "epoch": 0.76931330472103,
      "grad_norm": 0.16258929488231022,
      "learning_rate": 2.6638558269363654e-05,
      "loss": 0.6467,
      "step": 8604
    },
    {
      "epoch": 0.7694027181688126,
      "grad_norm": 0.16402627174027887,
      "learning_rate": 2.6618881041703804e-05,
      "loss": 0.6449,
      "step": 8605
    },
    {
      "epoch": 0.7694921316165951,
      "grad_norm": 0.17968696409353016,
      "learning_rate": 2.659920996842975e-05,
      "loss": 0.4148,
      "step": 8606
    },
    {
      "epoch": 0.7695815450643777,
      "grad_norm": 0.16161821228048137,
      "learning_rate": 2.6579545051191302e-05,
      "loss": 0.656,
      "step": 8607
    },
    {
      "epoch": 0.7696709585121603,
      "grad_norm": 0.1525489026080038,
      "learning_rate": 2.6559886291637748e-05,
      "loss": 0.6467,
      "step": 8608
    },
    {
      "epoch": 0.7697603719599427,
      "grad_norm": 0.18478872857981882,
      "learning_rate": 2.6540233691417837e-05,
      "loss": 0.6334,
      "step": 8609
    },
    {
      "epoch": 0.7698497854077253,
      "grad_norm": 0.15876143095487363,
      "learning_rate": 2.652058725217983e-05,
      "loss": 0.6688,
      "step": 8610
    },
    {
      "epoch": 0.7699391988555079,
      "grad_norm": 0.16646746696309736,
      "learning_rate": 2.6500946975571405e-05,
      "loss": 0.6239,
      "step": 8611
    },
    {
      "epoch": 0.7700286123032904,
      "grad_norm": 0.16713445716742484,
      "learning_rate": 2.6481312863239804e-05,
      "loss": 0.646,
      "step": 8612
    },
    {
      "epoch": 0.7701180257510729,
      "grad_norm": 0.1595117052008537,
      "learning_rate": 2.646168491683172e-05,
      "loss": 0.6272,
      "step": 8613
    },
    {
      "epoch": 0.7702074391988555,
      "grad_norm": 0.16054730920205273,
      "learning_rate": 2.6442063137993255e-05,
      "loss": 0.6337,
      "step": 8614
    },
    {
      "epoch": 0.770296852646638,
      "grad_norm": 0.15599841365314102,
      "learning_rate": 2.6422447528370152e-05,
      "loss": 0.6395,
      "step": 8615
    },
    {
      "epoch": 0.7703862660944206,
      "grad_norm": 0.14581067680247398,
      "learning_rate": 2.640283808960754e-05,
      "loss": 0.6451,
      "step": 8616
    },
    {
      "epoch": 0.7704756795422032,
      "grad_norm": 0.17099401855699392,
      "learning_rate": 2.638323482334999e-05,
      "loss": 0.6846,
      "step": 8617
    },
    {
      "epoch": 0.7705650929899857,
      "grad_norm": 0.16946783526939802,
      "learning_rate": 2.636363773124163e-05,
      "loss": 0.6671,
      "step": 8618
    },
    {
      "epoch": 0.7706545064377682,
      "grad_norm": 0.15450967463358564,
      "learning_rate": 2.634404681492607e-05,
      "loss": 0.644,
      "step": 8619
    },
    {
      "epoch": 0.7707439198855508,
      "grad_norm": 0.15689553888023114,
      "learning_rate": 2.6324462076046318e-05,
      "loss": 0.6355,
      "step": 8620
    },
    {
      "epoch": 0.7708333333333334,
      "grad_norm": 0.14572432417443756,
      "learning_rate": 2.630488351624496e-05,
      "loss": 0.6376,
      "step": 8621
    },
    {
      "epoch": 0.7709227467811158,
      "grad_norm": 0.17388914592893148,
      "learning_rate": 2.6285311137164013e-05,
      "loss": 0.6431,
      "step": 8622
    },
    {
      "epoch": 0.7710121602288984,
      "grad_norm": 0.1498716534093433,
      "learning_rate": 2.6265744940445003e-05,
      "loss": 0.6423,
      "step": 8623
    },
    {
      "epoch": 0.771101573676681,
      "grad_norm": 0.172109805227499,
      "learning_rate": 2.624618492772891e-05,
      "loss": 0.6449,
      "step": 8624
    },
    {
      "epoch": 0.7711909871244635,
      "grad_norm": 0.141482172588073,
      "learning_rate": 2.622663110065625e-05,
      "loss": 0.6555,
      "step": 8625
    },
    {
      "epoch": 0.7712804005722461,
      "grad_norm": 0.14998647951304658,
      "learning_rate": 2.6207083460866912e-05,
      "loss": 0.6017,
      "step": 8626
    },
    {
      "epoch": 0.7713698140200286,
      "grad_norm": 0.16421356780977528,
      "learning_rate": 2.6187542010000367e-05,
      "loss": 0.648,
      "step": 8627
    },
    {
      "epoch": 0.7714592274678111,
      "grad_norm": 0.16064325862240575,
      "learning_rate": 2.616800674969553e-05,
      "loss": 0.6218,
      "step": 8628
    },
    {
      "epoch": 0.7715486409155937,
      "grad_norm": 0.15749947685505566,
      "learning_rate": 2.61484776815908e-05,
      "loss": 0.6575,
      "step": 8629
    },
    {
      "epoch": 0.7716380543633763,
      "grad_norm": 0.16775833283562527,
      "learning_rate": 2.612895480732408e-05,
      "loss": 0.6566,
      "step": 8630
    },
    {
      "epoch": 0.7717274678111588,
      "grad_norm": 0.17490309447572902,
      "learning_rate": 2.610943812853268e-05,
      "loss": 0.6738,
      "step": 8631
    },
    {
      "epoch": 0.7718168812589413,
      "grad_norm": 0.1814244106490481,
      "learning_rate": 2.6089927646853474e-05,
      "loss": 0.6829,
      "step": 8632
    },
    {
      "epoch": 0.7719062947067239,
      "grad_norm": 0.1577527380294224,
      "learning_rate": 2.6070423363922803e-05,
      "loss": 0.6261,
      "step": 8633
    },
    {
      "epoch": 0.7719957081545065,
      "grad_norm": 0.18120360017392534,
      "learning_rate": 2.6050925281376403e-05,
      "loss": 0.6775,
      "step": 8634
    },
    {
      "epoch": 0.772085121602289,
      "grad_norm": 0.1472219556146123,
      "learning_rate": 2.603143340084957e-05,
      "loss": 0.6458,
      "step": 8635
    },
    {
      "epoch": 0.7721745350500715,
      "grad_norm": 0.16110556882047677,
      "learning_rate": 2.601194772397715e-05,
      "loss": 0.6663,
      "step": 8636
    },
    {
      "epoch": 0.7722639484978541,
      "grad_norm": 0.1827155133651948,
      "learning_rate": 2.5992468252393275e-05,
      "loss": 0.3962,
      "step": 8637
    },
    {
      "epoch": 0.7723533619456366,
      "grad_norm": 0.15569417897258797,
      "learning_rate": 2.5972994987731714e-05,
      "loss": 0.6697,
      "step": 8638
    },
    {
      "epoch": 0.7724427753934192,
      "grad_norm": 0.15003889055763434,
      "learning_rate": 2.59535279316257e-05,
      "loss": 0.6221,
      "step": 8639
    },
    {
      "epoch": 0.7725321888412017,
      "grad_norm": 0.1544152092635783,
      "learning_rate": 2.5934067085707834e-05,
      "loss": 0.6307,
      "step": 8640
    },
    {
      "epoch": 0.7726216022889842,
      "grad_norm": 0.17270408663914985,
      "learning_rate": 2.591461245161032e-05,
      "loss": 0.6407,
      "step": 8641
    },
    {
      "epoch": 0.7727110157367668,
      "grad_norm": 0.15802739280970846,
      "learning_rate": 2.589516403096478e-05,
      "loss": 0.6248,
      "step": 8642
    },
    {
      "epoch": 0.7728004291845494,
      "grad_norm": 0.14695107166459634,
      "learning_rate": 2.5875721825402342e-05,
      "loss": 0.6248,
      "step": 8643
    },
    {
      "epoch": 0.772889842632332,
      "grad_norm": 0.15975703348207895,
      "learning_rate": 2.585628583655362e-05,
      "loss": 0.6355,
      "step": 8644
    },
    {
      "epoch": 0.7729792560801144,
      "grad_norm": 0.1777090673947866,
      "learning_rate": 2.583685606604863e-05,
      "loss": 0.678,
      "step": 8645
    },
    {
      "epoch": 0.773068669527897,
      "grad_norm": 0.1478535466139106,
      "learning_rate": 2.581743251551697e-05,
      "loss": 0.6204,
      "step": 8646
    },
    {
      "epoch": 0.7731580829756796,
      "grad_norm": 0.1591833348853112,
      "learning_rate": 2.5798015186587643e-05,
      "loss": 0.6394,
      "step": 8647
    },
    {
      "epoch": 0.7732474964234621,
      "grad_norm": 0.15623925916071052,
      "learning_rate": 2.5778604080889202e-05,
      "loss": 0.6358,
      "step": 8648
    },
    {
      "epoch": 0.7733369098712446,
      "grad_norm": 0.17958818086513273,
      "learning_rate": 2.5759199200049534e-05,
      "loss": 0.3813,
      "step": 8649
    },
    {
      "epoch": 0.7734263233190272,
      "grad_norm": 0.16253839499240164,
      "learning_rate": 2.5739800545696237e-05,
      "loss": 0.6635,
      "step": 8650
    },
    {
      "epoch": 0.7735157367668097,
      "grad_norm": 0.15779855229322848,
      "learning_rate": 2.5720408119456152e-05,
      "loss": 0.6351,
      "step": 8651
    },
    {
      "epoch": 0.7736051502145923,
      "grad_norm": 0.1611766216202898,
      "learning_rate": 2.5701021922955727e-05,
      "loss": 0.652,
      "step": 8652
    },
    {
      "epoch": 0.7736945636623748,
      "grad_norm": 0.16028288396130674,
      "learning_rate": 2.56816419578209e-05,
      "loss": 0.6357,
      "step": 8653
    },
    {
      "epoch": 0.7737839771101573,
      "grad_norm": 0.15909461517924847,
      "learning_rate": 2.5662268225676976e-05,
      "loss": 0.6207,
      "step": 8654
    },
    {
      "epoch": 0.7738733905579399,
      "grad_norm": 0.15531713629371618,
      "learning_rate": 2.5642900728148832e-05,
      "loss": 0.6474,
      "step": 8655
    },
    {
      "epoch": 0.7739628040057225,
      "grad_norm": 0.1602397572827394,
      "learning_rate": 2.5623539466860813e-05,
      "loss": 0.648,
      "step": 8656
    },
    {
      "epoch": 0.774052217453505,
      "grad_norm": 0.16608856516402568,
      "learning_rate": 2.5604184443436707e-05,
      "loss": 0.6511,
      "step": 8657
    },
    {
      "epoch": 0.7741416309012875,
      "grad_norm": 0.15261304058563022,
      "learning_rate": 2.5584835659499807e-05,
      "loss": 0.6253,
      "step": 8658
    },
    {
      "epoch": 0.7742310443490701,
      "grad_norm": 0.15582065006053308,
      "learning_rate": 2.5565493116672902e-05,
      "loss": 0.6511,
      "step": 8659
    },
    {
      "epoch": 0.7743204577968527,
      "grad_norm": 0.14569402832053224,
      "learning_rate": 2.5546156816578158e-05,
      "loss": 0.6687,
      "step": 8660
    },
    {
      "epoch": 0.7744098712446352,
      "grad_norm": 0.1684356161752261,
      "learning_rate": 2.552682676083733e-05,
      "loss": 0.6427,
      "step": 8661
    },
    {
      "epoch": 0.7744992846924177,
      "grad_norm": 0.15970849472556228,
      "learning_rate": 2.5507502951071637e-05,
      "loss": 0.6626,
      "step": 8662
    },
    {
      "epoch": 0.7745886981402003,
      "grad_norm": 0.16361106511281576,
      "learning_rate": 2.5488185388901642e-05,
      "loss": 0.6326,
      "step": 8663
    },
    {
      "epoch": 0.7746781115879828,
      "grad_norm": 0.15211596469767574,
      "learning_rate": 2.54688740759476e-05,
      "loss": 0.6212,
      "step": 8664
    },
    {
      "epoch": 0.7747675250357654,
      "grad_norm": 0.1648406074961734,
      "learning_rate": 2.5449569013829066e-05,
      "loss": 0.6533,
      "step": 8665
    },
    {
      "epoch": 0.774856938483548,
      "grad_norm": 0.1686172567601702,
      "learning_rate": 2.543027020416514e-05,
      "loss": 0.653,
      "step": 8666
    },
    {
      "epoch": 0.7749463519313304,
      "grad_norm": 0.1669897424114391,
      "learning_rate": 2.541097764857442e-05,
      "loss": 0.6797,
      "step": 8667
    },
    {
      "epoch": 0.775035765379113,
      "grad_norm": 0.16521249373079347,
      "learning_rate": 2.5391691348674894e-05,
      "loss": 0.365,
      "step": 8668
    },
    {
      "epoch": 0.7751251788268956,
      "grad_norm": 0.152799196514688,
      "learning_rate": 2.537241130608411e-05,
      "loss": 0.6219,
      "step": 8669
    },
    {
      "epoch": 0.7752145922746781,
      "grad_norm": 0.15979233596325296,
      "learning_rate": 2.5353137522419067e-05,
      "loss": 0.6414,
      "step": 8670
    },
    {
      "epoch": 0.7753040057224606,
      "grad_norm": 0.14148503343481852,
      "learning_rate": 2.5333869999296223e-05,
      "loss": 0.6385,
      "step": 8671
    },
    {
      "epoch": 0.7753934191702432,
      "grad_norm": 0.1566581125461257,
      "learning_rate": 2.5314608738331537e-05,
      "loss": 0.6413,
      "step": 8672
    },
    {
      "epoch": 0.7754828326180258,
      "grad_norm": 0.162541985505034,
      "learning_rate": 2.529535374114044e-05,
      "loss": 0.6821,
      "step": 8673
    },
    {
      "epoch": 0.7755722460658083,
      "grad_norm": 0.17746234243420503,
      "learning_rate": 2.527610500933778e-05,
      "loss": 0.6412,
      "step": 8674
    },
    {
      "epoch": 0.7756616595135909,
      "grad_norm": 0.17472850692500522,
      "learning_rate": 2.525686254453795e-05,
      "loss": 0.6485,
      "step": 8675
    },
    {
      "epoch": 0.7757510729613734,
      "grad_norm": 0.16573946815184365,
      "learning_rate": 2.5237626348354813e-05,
      "loss": 0.6104,
      "step": 8676
    },
    {
      "epoch": 0.7758404864091559,
      "grad_norm": 0.15380506161994495,
      "learning_rate": 2.5218396422401614e-05,
      "loss": 0.63,
      "step": 8677
    },
    {
      "epoch": 0.7759298998569385,
      "grad_norm": 0.1764223163375959,
      "learning_rate": 2.5199172768291248e-05,
      "loss": 0.6969,
      "step": 8678
    },
    {
      "epoch": 0.7760193133047211,
      "grad_norm": 0.1474469945274781,
      "learning_rate": 2.51799553876359e-05,
      "loss": 0.6388,
      "step": 8679
    },
    {
      "epoch": 0.7761087267525035,
      "grad_norm": 0.18201571153582569,
      "learning_rate": 2.5160744282047333e-05,
      "loss": 0.6826,
      "step": 8680
    },
    {
      "epoch": 0.7761981402002861,
      "grad_norm": 0.17046304613390012,
      "learning_rate": 2.5141539453136755e-05,
      "loss": 0.6154,
      "step": 8681
    },
    {
      "epoch": 0.7762875536480687,
      "grad_norm": 0.1605864025528493,
      "learning_rate": 2.5122340902514897e-05,
      "loss": 0.6214,
      "step": 8682
    },
    {
      "epoch": 0.7763769670958512,
      "grad_norm": 0.14987754609143408,
      "learning_rate": 2.510314863179184e-05,
      "loss": 0.6071,
      "step": 8683
    },
    {
      "epoch": 0.7764663805436338,
      "grad_norm": 0.17365660043015602,
      "learning_rate": 2.508396264257725e-05,
      "loss": 0.6265,
      "step": 8684
    },
    {
      "epoch": 0.7765557939914163,
      "grad_norm": 0.15594188143132667,
      "learning_rate": 2.5064782936480248e-05,
      "loss": 0.6631,
      "step": 8685
    },
    {
      "epoch": 0.7766452074391988,
      "grad_norm": 0.16580581414816722,
      "learning_rate": 2.5045609515109403e-05,
      "loss": 0.65,
      "step": 8686
    },
    {
      "epoch": 0.7767346208869814,
      "grad_norm": 0.166394401236153,
      "learning_rate": 2.502644238007279e-05,
      "loss": 0.6417,
      "step": 8687
    },
    {
      "epoch": 0.776824034334764,
      "grad_norm": 0.14739384741617828,
      "learning_rate": 2.500728153297788e-05,
      "loss": 0.6117,
      "step": 8688
    },
    {
      "epoch": 0.7769134477825465,
      "grad_norm": 0.14493212310943593,
      "learning_rate": 2.498812697543169e-05,
      "loss": 0.6628,
      "step": 8689
    },
    {
      "epoch": 0.777002861230329,
      "grad_norm": 0.1738930587246002,
      "learning_rate": 2.4968978709040713e-05,
      "loss": 0.6468,
      "step": 8690
    },
    {
      "epoch": 0.7770922746781116,
      "grad_norm": 0.15761153026649144,
      "learning_rate": 2.4949836735410882e-05,
      "loss": 0.6169,
      "step": 8691
    },
    {
      "epoch": 0.7771816881258942,
      "grad_norm": 0.15236336666738537,
      "learning_rate": 2.4930701056147586e-05,
      "loss": 0.6179,
      "step": 8692
    },
    {
      "epoch": 0.7772711015736766,
      "grad_norm": 0.1627879597400873,
      "learning_rate": 2.491157167285578e-05,
      "loss": 0.6111,
      "step": 8693
    },
    {
      "epoch": 0.7773605150214592,
      "grad_norm": 0.15036001376046507,
      "learning_rate": 2.489244858713974e-05,
      "loss": 0.609,
      "step": 8694
    },
    {
      "epoch": 0.7774499284692418,
      "grad_norm": 0.1422588817373828,
      "learning_rate": 2.4873331800603327e-05,
      "loss": 0.6062,
      "step": 8695
    },
    {
      "epoch": 0.7775393419170243,
      "grad_norm": 0.1537680297225194,
      "learning_rate": 2.485422131484987e-05,
      "loss": 0.6393,
      "step": 8696
    },
    {
      "epoch": 0.7776287553648069,
      "grad_norm": 0.16646629166878774,
      "learning_rate": 2.4835117131482067e-05,
      "loss": 0.6718,
      "step": 8697
    },
    {
      "epoch": 0.7777181688125894,
      "grad_norm": 0.13179220522339435,
      "learning_rate": 2.4816019252102273e-05,
      "loss": 0.6416,
      "step": 8698
    },
    {
      "epoch": 0.7778075822603719,
      "grad_norm": 0.15617345401557736,
      "learning_rate": 2.479692767831211e-05,
      "loss": 0.647,
      "step": 8699
    },
    {
      "epoch": 0.7778969957081545,
      "grad_norm": 0.1805551557759135,
      "learning_rate": 2.4777842411712805e-05,
      "loss": 0.6589,
      "step": 8700
    },
    {
      "epoch": 0.7779864091559371,
      "grad_norm": 0.161833321563237,
      "learning_rate": 2.4758763453905044e-05,
      "loss": 0.6173,
      "step": 8701
    },
    {
      "epoch": 0.7780758226037195,
      "grad_norm": 0.15677735928720388,
      "learning_rate": 2.473969080648889e-05,
      "loss": 0.6597,
      "step": 8702
    },
    {
      "epoch": 0.7781652360515021,
      "grad_norm": 0.15070670379639192,
      "learning_rate": 2.472062447106398e-05,
      "loss": 0.6399,
      "step": 8703
    },
    {
      "epoch": 0.7782546494992847,
      "grad_norm": 0.15321489232731886,
      "learning_rate": 2.4701564449229374e-05,
      "loss": 0.6149,
      "step": 8704
    },
    {
      "epoch": 0.7783440629470673,
      "grad_norm": 0.15030665484808833,
      "learning_rate": 2.468251074258362e-05,
      "loss": 0.6338,
      "step": 8705
    },
    {
      "epoch": 0.7784334763948498,
      "grad_norm": 0.1572714433757976,
      "learning_rate": 2.4663463352724737e-05,
      "loss": 0.6404,
      "step": 8706
    },
    {
      "epoch": 0.7785228898426323,
      "grad_norm": 0.1654651475476128,
      "learning_rate": 2.4644422281250223e-05,
      "loss": 0.6108,
      "step": 8707
    },
    {
      "epoch": 0.7786123032904149,
      "grad_norm": 0.16567234510535475,
      "learning_rate": 2.462538752975698e-05,
      "loss": 0.6527,
      "step": 8708
    },
    {
      "epoch": 0.7787017167381974,
      "grad_norm": 0.16260679019792093,
      "learning_rate": 2.4606359099841457e-05,
      "loss": 0.6492,
      "step": 8709
    },
    {
      "epoch": 0.77879113018598,
      "grad_norm": 0.16209027861474357,
      "learning_rate": 2.4587336993099574e-05,
      "loss": 0.6858,
      "step": 8710
    },
    {
      "epoch": 0.7788805436337625,
      "grad_norm": 0.16640733845918218,
      "learning_rate": 2.4568321211126598e-05,
      "loss": 0.6559,
      "step": 8711
    },
    {
      "epoch": 0.778969957081545,
      "grad_norm": 0.16848623583844463,
      "learning_rate": 2.4549311755517457e-05,
      "loss": 0.6589,
      "step": 8712
    },
    {
      "epoch": 0.7790593705293276,
      "grad_norm": 0.14819809382161866,
      "learning_rate": 2.4530308627866438e-05,
      "loss": 0.6505,
      "step": 8713
    },
    {
      "epoch": 0.7791487839771102,
      "grad_norm": 0.13792669098011603,
      "learning_rate": 2.451131182976727e-05,
      "loss": 0.6291,
      "step": 8714
    },
    {
      "epoch": 0.7792381974248928,
      "grad_norm": 0.16576809550322585,
      "learning_rate": 2.4492321362813207e-05,
      "loss": 0.6361,
      "step": 8715
    },
    {
      "epoch": 0.7793276108726752,
      "grad_norm": 0.14736491252691641,
      "learning_rate": 2.4473337228596994e-05,
      "loss": 0.6487,
      "step": 8716
    },
    {
      "epoch": 0.7794170243204578,
      "grad_norm": 0.16208527131019687,
      "learning_rate": 2.445435942871074e-05,
      "loss": 0.6398,
      "step": 8717
    },
    {
      "epoch": 0.7795064377682404,
      "grad_norm": 0.17464089211101105,
      "learning_rate": 2.4435387964746127e-05,
      "loss": 0.6953,
      "step": 8718
    },
    {
      "epoch": 0.7795958512160229,
      "grad_norm": 0.1814523631582961,
      "learning_rate": 2.4416422838294273e-05,
      "loss": 0.6962,
      "step": 8719
    },
    {
      "epoch": 0.7796852646638054,
      "grad_norm": 0.1528388155482932,
      "learning_rate": 2.439746405094575e-05,
      "loss": 0.6315,
      "step": 8720
    },
    {
      "epoch": 0.779774678111588,
      "grad_norm": 0.16711018935852592,
      "learning_rate": 2.4378511604290632e-05,
      "loss": 0.6545,
      "step": 8721
    },
    {
      "epoch": 0.7798640915593705,
      "grad_norm": 0.17127841019008147,
      "learning_rate": 2.4359565499918402e-05,
      "loss": 0.6959,
      "step": 8722
    },
    {
      "epoch": 0.7799535050071531,
      "grad_norm": 0.19582879205476283,
      "learning_rate": 2.4340625739418055e-05,
      "loss": 0.6433,
      "step": 8723
    },
    {
      "epoch": 0.7800429184549357,
      "grad_norm": 0.1796727484977759,
      "learning_rate": 2.4321692324378087e-05,
      "loss": 0.6454,
      "step": 8724
    },
    {
      "epoch": 0.7801323319027181,
      "grad_norm": 0.1587945606744661,
      "learning_rate": 2.4302765256386327e-05,
      "loss": 0.6491,
      "step": 8725
    },
    {
      "epoch": 0.7802217453505007,
      "grad_norm": 0.16496326344349546,
      "learning_rate": 2.4283844537030252e-05,
      "loss": 0.676,
      "step": 8726
    },
    {
      "epoch": 0.7803111587982833,
      "grad_norm": 0.16003224183627,
      "learning_rate": 2.4264930167896727e-05,
      "loss": 0.641,
      "step": 8727
    },
    {
      "epoch": 0.7804005722460658,
      "grad_norm": 0.1861469969951361,
      "learning_rate": 2.4246022150572024e-05,
      "loss": 0.6483,
      "step": 8728
    },
    {
      "epoch": 0.7804899856938483,
      "grad_norm": 0.13943109551274194,
      "learning_rate": 2.422712048664194e-05,
      "loss": 0.6354,
      "step": 8729
    },
    {
      "epoch": 0.7805793991416309,
      "grad_norm": 0.17000410773005378,
      "learning_rate": 2.420822517769179e-05,
      "loss": 0.6358,
      "step": 8730
    },
    {
      "epoch": 0.7806688125894135,
      "grad_norm": 0.15688605439940767,
      "learning_rate": 2.4189336225306225e-05,
      "loss": 0.6674,
      "step": 8731
    },
    {
      "epoch": 0.780758226037196,
      "grad_norm": 0.16272711243467614,
      "learning_rate": 2.417045363106948e-05,
      "loss": 0.6668,
      "step": 8732
    },
    {
      "epoch": 0.7808476394849786,
      "grad_norm": 0.1532469873968499,
      "learning_rate": 2.4151577396565205e-05,
      "loss": 0.6313,
      "step": 8733
    },
    {
      "epoch": 0.780937052932761,
      "grad_norm": 0.14257449733392774,
      "learning_rate": 2.413270752337653e-05,
      "loss": 0.6371,
      "step": 8734
    },
    {
      "epoch": 0.7810264663805436,
      "grad_norm": 0.15969763442863527,
      "learning_rate": 2.4113844013086083e-05,
      "loss": 0.6208,
      "step": 8735
    },
    {
      "epoch": 0.7811158798283262,
      "grad_norm": 0.14197997459850617,
      "learning_rate": 2.409498686727587e-05,
      "loss": 0.6355,
      "step": 8736
    },
    {
      "epoch": 0.7812052932761088,
      "grad_norm": 0.14831868539601437,
      "learning_rate": 2.4076136087527435e-05,
      "loss": 0.6218,
      "step": 8737
    },
    {
      "epoch": 0.7812947067238912,
      "grad_norm": 0.15215638823033628,
      "learning_rate": 2.4057291675421768e-05,
      "loss": 0.6227,
      "step": 8738
    },
    {
      "epoch": 0.7813841201716738,
      "grad_norm": 0.1626850780613697,
      "learning_rate": 2.4038453632539338e-05,
      "loss": 0.673,
      "step": 8739
    },
    {
      "epoch": 0.7814735336194564,
      "grad_norm": 0.1569212533031255,
      "learning_rate": 2.4019621960460058e-05,
      "loss": 0.6537,
      "step": 8740
    },
    {
      "epoch": 0.781562947067239,
      "grad_norm": 0.1628752853810012,
      "learning_rate": 2.4000796660763346e-05,
      "loss": 0.6806,
      "step": 8741
    },
    {
      "epoch": 0.7816523605150214,
      "grad_norm": 0.15754309522953075,
      "learning_rate": 2.3981977735028018e-05,
      "loss": 0.6364,
      "step": 8742
    },
    {
      "epoch": 0.781741773962804,
      "grad_norm": 0.15461040237488374,
      "learning_rate": 2.3963165184832403e-05,
      "loss": 0.6432,
      "step": 8743
    },
    {
      "epoch": 0.7818311874105865,
      "grad_norm": 0.15695046259546622,
      "learning_rate": 2.3944359011754336e-05,
      "loss": 0.686,
      "step": 8744
    },
    {
      "epoch": 0.7819206008583691,
      "grad_norm": 0.15803192446291614,
      "learning_rate": 2.3925559217370987e-05,
      "loss": 0.6532,
      "step": 8745
    },
    {
      "epoch": 0.7820100143061517,
      "grad_norm": 0.17825750233090723,
      "learning_rate": 2.3906765803259078e-05,
      "loss": 0.6382,
      "step": 8746
    },
    {
      "epoch": 0.7820994277539342,
      "grad_norm": 0.1545459457430273,
      "learning_rate": 2.388797877099489e-05,
      "loss": 0.6618,
      "step": 8747
    },
    {
      "epoch": 0.7821888412017167,
      "grad_norm": 0.15847320416354996,
      "learning_rate": 2.386919812215398e-05,
      "loss": 0.6442,
      "step": 8748
    },
    {
      "epoch": 0.7822782546494993,
      "grad_norm": 0.15644179800353641,
      "learning_rate": 2.3850423858311466e-05,
      "loss": 0.6508,
      "step": 8749
    },
    {
      "epoch": 0.7823676680972819,
      "grad_norm": 0.14386278847520273,
      "learning_rate": 2.3831655981041977e-05,
      "loss": 0.6272,
      "step": 8750
    },
    {
      "epoch": 0.7824570815450643,
      "grad_norm": 0.17169277321627197,
      "learning_rate": 2.381289449191948e-05,
      "loss": 0.6544,
      "step": 8751
    },
    {
      "epoch": 0.7825464949928469,
      "grad_norm": 0.15994173998552708,
      "learning_rate": 2.379413939251751e-05,
      "loss": 0.6737,
      "step": 8752
    },
    {
      "epoch": 0.7826359084406295,
      "grad_norm": 0.15908086493346732,
      "learning_rate": 2.3775390684409037e-05,
      "loss": 0.6564,
      "step": 8753
    },
    {
      "epoch": 0.782725321888412,
      "grad_norm": 0.1546575932369098,
      "learning_rate": 2.375664836916649e-05,
      "loss": 0.5774,
      "step": 8754
    },
    {
      "epoch": 0.7828147353361946,
      "grad_norm": 0.16911142587033345,
      "learning_rate": 2.3737912448361798e-05,
      "loss": 0.6948,
      "step": 8755
    },
    {
      "epoch": 0.7829041487839771,
      "grad_norm": 0.16006934251882493,
      "learning_rate": 2.3719182923566263e-05,
      "loss": 0.6754,
      "step": 8756
    },
    {
      "epoch": 0.7829935622317596,
      "grad_norm": 0.184544618145652,
      "learning_rate": 2.3700459796350726e-05,
      "loss": 0.6435,
      "step": 8757
    },
    {
      "epoch": 0.7830829756795422,
      "grad_norm": 0.13747761065664146,
      "learning_rate": 2.36817430682855e-05,
      "loss": 0.6184,
      "step": 8758
    },
    {
      "epoch": 0.7831723891273248,
      "grad_norm": 0.155248650019245,
      "learning_rate": 2.3663032740940293e-05,
      "loss": 0.5963,
      "step": 8759
    },
    {
      "epoch": 0.7832618025751072,
      "grad_norm": 0.17877301375416885,
      "learning_rate": 2.364432881588431e-05,
      "loss": 0.6684,
      "step": 8760
    },
    {
      "epoch": 0.7833512160228898,
      "grad_norm": 0.17908722904793528,
      "learning_rate": 2.362563129468631e-05,
      "loss": 0.6014,
      "step": 8761
    },
    {
      "epoch": 0.7834406294706724,
      "grad_norm": 0.16914437899088616,
      "learning_rate": 2.360694017891436e-05,
      "loss": 0.6963,
      "step": 8762
    },
    {
      "epoch": 0.783530042918455,
      "grad_norm": 0.1591918428948004,
      "learning_rate": 2.358825547013607e-05,
      "loss": 0.6674,
      "step": 8763
    },
    {
      "epoch": 0.7836194563662375,
      "grad_norm": 0.1580531681051575,
      "learning_rate": 2.3569577169918532e-05,
      "loss": 0.6322,
      "step": 8764
    },
    {
      "epoch": 0.78370886981402,
      "grad_norm": 0.18740790878605565,
      "learning_rate": 2.355090527982823e-05,
      "loss": 0.6952,
      "step": 8765
    },
    {
      "epoch": 0.7837982832618026,
      "grad_norm": 0.14509034213468666,
      "learning_rate": 2.353223980143118e-05,
      "loss": 0.6503,
      "step": 8766
    },
    {
      "epoch": 0.7838876967095851,
      "grad_norm": 0.17327935797872762,
      "learning_rate": 2.351358073629282e-05,
      "loss": 0.6434,
      "step": 8767
    },
    {
      "epoch": 0.7839771101573677,
      "grad_norm": 0.16778092388906665,
      "learning_rate": 2.3494928085978073e-05,
      "loss": 0.6612,
      "step": 8768
    },
    {
      "epoch": 0.7840665236051502,
      "grad_norm": 0.1494928040831609,
      "learning_rate": 2.3476281852051308e-05,
      "loss": 0.5972,
      "step": 8769
    },
    {
      "epoch": 0.7841559370529327,
      "grad_norm": 0.15990504399917319,
      "learning_rate": 2.345764203607641e-05,
      "loss": 0.639,
      "step": 8770
    },
    {
      "epoch": 0.7842453505007153,
      "grad_norm": 0.15801341226274795,
      "learning_rate": 2.343900863961659e-05,
      "loss": 0.6142,
      "step": 8771
    },
    {
      "epoch": 0.7843347639484979,
      "grad_norm": 0.14224672096590837,
      "learning_rate": 2.342038166423466e-05,
      "loss": 0.6369,
      "step": 8772
    },
    {
      "epoch": 0.7844241773962805,
      "grad_norm": 0.1574702859724382,
      "learning_rate": 2.3401761111492836e-05,
      "loss": 0.6168,
      "step": 8773
    },
    {
      "epoch": 0.7845135908440629,
      "grad_norm": 0.16112430494740995,
      "learning_rate": 2.338314698295281e-05,
      "loss": 0.6394,
      "step": 8774
    },
    {
      "epoch": 0.7846030042918455,
      "grad_norm": 0.14927479085860562,
      "learning_rate": 2.3364539280175734e-05,
      "loss": 0.6314,
      "step": 8775
    },
    {
      "epoch": 0.7846924177396281,
      "grad_norm": 0.13245772547240553,
      "learning_rate": 2.3345938004722168e-05,
      "loss": 0.6063,
      "step": 8776
    },
    {
      "epoch": 0.7847818311874106,
      "grad_norm": 0.15653830809408767,
      "learning_rate": 2.3327343158152205e-05,
      "loss": 0.6178,
      "step": 8777
    },
    {
      "epoch": 0.7848712446351931,
      "grad_norm": 0.16106033469368766,
      "learning_rate": 2.3308754742025406e-05,
      "loss": 0.6809,
      "step": 8778
    },
    {
      "epoch": 0.7849606580829757,
      "grad_norm": 0.15790232411428407,
      "learning_rate": 2.3290172757900696e-05,
      "loss": 0.678,
      "step": 8779
    },
    {
      "epoch": 0.7850500715307582,
      "grad_norm": 0.16087881627967165,
      "learning_rate": 2.3271597207336526e-05,
      "loss": 0.6355,
      "step": 8780
    },
    {
      "epoch": 0.7851394849785408,
      "grad_norm": 0.1449828334764069,
      "learning_rate": 2.3253028091890893e-05,
      "loss": 0.6278,
      "step": 8781
    },
    {
      "epoch": 0.7852288984263234,
      "grad_norm": 0.18466402493374853,
      "learning_rate": 2.3234465413121086e-05,
      "loss": 0.7166,
      "step": 8782
    },
    {
      "epoch": 0.7853183118741058,
      "grad_norm": 0.14859366143825384,
      "learning_rate": 2.321590917258395e-05,
      "loss": 0.6134,
      "step": 8783
    },
    {
      "epoch": 0.7854077253218884,
      "grad_norm": 0.18355157576003553,
      "learning_rate": 2.3197359371835802e-05,
      "loss": 0.6467,
      "step": 8784
    },
    {
      "epoch": 0.785497138769671,
      "grad_norm": 0.15093130768037966,
      "learning_rate": 2.3178816012432346e-05,
      "loss": 0.668,
      "step": 8785
    },
    {
      "epoch": 0.7855865522174535,
      "grad_norm": 0.1492641489832995,
      "learning_rate": 2.3160279095928817e-05,
      "loss": 0.6162,
      "step": 8786
    },
    {
      "epoch": 0.785675965665236,
      "grad_norm": 0.15083451632017603,
      "learning_rate": 2.3141748623879878e-05,
      "loss": 0.5941,
      "step": 8787
    },
    {
      "epoch": 0.7857653791130186,
      "grad_norm": 0.14185392467133756,
      "learning_rate": 2.3123224597839664e-05,
      "loss": 0.6842,
      "step": 8788
    },
    {
      "epoch": 0.7858547925608012,
      "grad_norm": 0.1722064515397913,
      "learning_rate": 2.3104707019361782e-05,
      "loss": 0.7001,
      "step": 8789
    },
    {
      "epoch": 0.7859442060085837,
      "grad_norm": 0.15908169881940715,
      "learning_rate": 2.3086195889999228e-05,
      "loss": 0.6628,
      "step": 8790
    },
    {
      "epoch": 0.7860336194563662,
      "grad_norm": 0.17191316627875416,
      "learning_rate": 2.3067691211304544e-05,
      "loss": 0.6537,
      "step": 8791
    },
    {
      "epoch": 0.7861230329041488,
      "grad_norm": 0.1486952488060409,
      "learning_rate": 2.3049192984829715e-05,
      "loss": 0.6275,
      "step": 8792
    },
    {
      "epoch": 0.7862124463519313,
      "grad_norm": 0.1519261719424936,
      "learning_rate": 2.3030701212126106e-05,
      "loss": 0.6283,
      "step": 8793
    },
    {
      "epoch": 0.7863018597997139,
      "grad_norm": 0.1619259303210547,
      "learning_rate": 2.3012215894744593e-05,
      "loss": 0.6549,
      "step": 8794
    },
    {
      "epoch": 0.7863912732474965,
      "grad_norm": 0.16159439419895102,
      "learning_rate": 2.299373703423563e-05,
      "loss": 0.6753,
      "step": 8795
    },
    {
      "epoch": 0.7864806866952789,
      "grad_norm": 0.1466221036396059,
      "learning_rate": 2.2975264632148896e-05,
      "loss": 0.6281,
      "step": 8796
    },
    {
      "epoch": 0.7865701001430615,
      "grad_norm": 0.15630322066735589,
      "learning_rate": 2.2956798690033708e-05,
      "loss": 0.5876,
      "step": 8797
    },
    {
      "epoch": 0.7866595135908441,
      "grad_norm": 0.16145669694154166,
      "learning_rate": 2.2938339209438797e-05,
      "loss": 0.6099,
      "step": 8798
    },
    {
      "epoch": 0.7867489270386266,
      "grad_norm": 0.14405538937950657,
      "learning_rate": 2.2919886191912277e-05,
      "loss": 0.6078,
      "step": 8799
    },
    {
      "epoch": 0.7868383404864091,
      "grad_norm": 0.18336170634416085,
      "learning_rate": 2.290143963900181e-05,
      "loss": 0.6873,
      "step": 8800
    },
    {
      "epoch": 0.7869277539341917,
      "grad_norm": 0.15101545336261285,
      "learning_rate": 2.2882999552254492e-05,
      "loss": 0.6287,
      "step": 8801
    },
    {
      "epoch": 0.7870171673819742,
      "grad_norm": 0.1640153501663695,
      "learning_rate": 2.2864565933216865e-05,
      "loss": 0.6999,
      "step": 8802
    },
    {
      "epoch": 0.7871065808297568,
      "grad_norm": 0.15862018693601335,
      "learning_rate": 2.2846138783434944e-05,
      "loss": 0.648,
      "step": 8803
    },
    {
      "epoch": 0.7871959942775394,
      "grad_norm": 0.17460045766356078,
      "learning_rate": 2.282771810445421e-05,
      "loss": 0.6618,
      "step": 8804
    },
    {
      "epoch": 0.7872854077253219,
      "grad_norm": 0.16040276514942395,
      "learning_rate": 2.280930389781952e-05,
      "loss": 0.6677,
      "step": 8805
    },
    {
      "epoch": 0.7873748211731044,
      "grad_norm": 0.14819733536976545,
      "learning_rate": 2.2790896165075305e-05,
      "loss": 0.6432,
      "step": 8806
    },
    {
      "epoch": 0.787464234620887,
      "grad_norm": 0.1623864945867077,
      "learning_rate": 2.2772494907765406e-05,
      "loss": 0.619,
      "step": 8807
    },
    {
      "epoch": 0.7875536480686696,
      "grad_norm": 0.1674084804457524,
      "learning_rate": 2.275410012743303e-05,
      "loss": 0.6323,
      "step": 8808
    },
    {
      "epoch": 0.787643061516452,
      "grad_norm": 0.17133674495909024,
      "learning_rate": 2.2735711825621052e-05,
      "loss": 0.6603,
      "step": 8809
    },
    {
      "epoch": 0.7877324749642346,
      "grad_norm": 0.16267420954148848,
      "learning_rate": 2.2717330003871573e-05,
      "loss": 0.6262,
      "step": 8810
    },
    {
      "epoch": 0.7878218884120172,
      "grad_norm": 0.16322193579520733,
      "learning_rate": 2.26989546637263e-05,
      "loss": 0.6072,
      "step": 8811
    },
    {
      "epoch": 0.7879113018597997,
      "grad_norm": 0.14017568346137954,
      "learning_rate": 2.2680585806726373e-05,
      "loss": 0.6244,
      "step": 8812
    },
    {
      "epoch": 0.7880007153075823,
      "grad_norm": 0.15206567706315396,
      "learning_rate": 2.266222343441231e-05,
      "loss": 0.6247,
      "step": 8813
    },
    {
      "epoch": 0.7880901287553648,
      "grad_norm": 0.1550217437049678,
      "learning_rate": 2.264386754832416e-05,
      "loss": 0.638,
      "step": 8814
    },
    {
      "epoch": 0.7881795422031473,
      "grad_norm": 0.15402231966997915,
      "learning_rate": 2.2625518150001425e-05,
      "loss": 0.6409,
      "step": 8815
    },
    {
      "epoch": 0.7882689556509299,
      "grad_norm": 0.15881040468700675,
      "learning_rate": 2.2607175240983026e-05,
      "loss": 0.6357,
      "step": 8816
    },
    {
      "epoch": 0.7883583690987125,
      "grad_norm": 0.16586387767452254,
      "learning_rate": 2.2588838822807378e-05,
      "loss": 0.6509,
      "step": 8817
    },
    {
      "epoch": 0.788447782546495,
      "grad_norm": 0.14257263195731606,
      "learning_rate": 2.2570508897012355e-05,
      "loss": 0.6132,
      "step": 8818
    },
    {
      "epoch": 0.7885371959942775,
      "grad_norm": 0.1801366259138565,
      "learning_rate": 2.2552185465135224e-05,
      "loss": 0.6655,
      "step": 8819
    },
    {
      "epoch": 0.7886266094420601,
      "grad_norm": 0.15835253724387935,
      "learning_rate": 2.2533868528712755e-05,
      "loss": 0.6344,
      "step": 8820
    },
    {
      "epoch": 0.7887160228898427,
      "grad_norm": 0.15243066803353025,
      "learning_rate": 2.2515558089281196e-05,
      "loss": 0.652,
      "step": 8821
    },
    {
      "epoch": 0.7888054363376252,
      "grad_norm": 0.16409449088145117,
      "learning_rate": 2.2497254148376157e-05,
      "loss": 0.6665,
      "step": 8822
    },
    {
      "epoch": 0.7888948497854077,
      "grad_norm": 0.16356170162442626,
      "learning_rate": 2.247895670753287e-05,
      "loss": 0.6487,
      "step": 8823
    },
    {
      "epoch": 0.7889842632331903,
      "grad_norm": 0.16396955923396725,
      "learning_rate": 2.2460665768285826e-05,
      "loss": 0.6478,
      "step": 8824
    },
    {
      "epoch": 0.7890736766809728,
      "grad_norm": 0.17066179674918106,
      "learning_rate": 2.2442381332169115e-05,
      "loss": 0.6701,
      "step": 8825
    },
    {
      "epoch": 0.7891630901287554,
      "grad_norm": 0.16462303679673812,
      "learning_rate": 2.2424103400716203e-05,
      "loss": 0.6579,
      "step": 8826
    },
    {
      "epoch": 0.7892525035765379,
      "grad_norm": 0.154759292015825,
      "learning_rate": 2.240583197546008e-05,
      "loss": 0.6199,
      "step": 8827
    },
    {
      "epoch": 0.7893419170243204,
      "grad_norm": 0.1743533638802536,
      "learning_rate": 2.23875670579331e-05,
      "loss": 0.6442,
      "step": 8828
    },
    {
      "epoch": 0.789431330472103,
      "grad_norm": 0.16249714893872816,
      "learning_rate": 2.236930864966713e-05,
      "loss": 0.6454,
      "step": 8829
    },
    {
      "epoch": 0.7895207439198856,
      "grad_norm": 0.17620479986187368,
      "learning_rate": 2.235105675219349e-05,
      "loss": 0.6783,
      "step": 8830
    },
    {
      "epoch": 0.789610157367668,
      "grad_norm": 0.18252812181818104,
      "learning_rate": 2.2332811367042948e-05,
      "loss": 0.612,
      "step": 8831
    },
    {
      "epoch": 0.7896995708154506,
      "grad_norm": 0.15644107825845413,
      "learning_rate": 2.2314572495745746e-05,
      "loss": 0.6445,
      "step": 8832
    },
    {
      "epoch": 0.7897889842632332,
      "grad_norm": 0.1713431645581847,
      "learning_rate": 2.2296340139831494e-05,
      "loss": 0.6678,
      "step": 8833
    },
    {
      "epoch": 0.7898783977110158,
      "grad_norm": 0.17454683833179807,
      "learning_rate": 2.2278114300829356e-05,
      "loss": 0.652,
      "step": 8834
    },
    {
      "epoch": 0.7899678111587983,
      "grad_norm": 0.16048783837784866,
      "learning_rate": 2.2259894980267937e-05,
      "loss": 0.6437,
      "step": 8835
    },
    {
      "epoch": 0.7900572246065808,
      "grad_norm": 0.1637851810130765,
      "learning_rate": 2.224168217967518e-05,
      "loss": 0.6312,
      "step": 8836
    },
    {
      "epoch": 0.7901466380543634,
      "grad_norm": 0.16121544830409726,
      "learning_rate": 2.2223475900578674e-05,
      "loss": 0.6522,
      "step": 8837
    },
    {
      "epoch": 0.7902360515021459,
      "grad_norm": 0.15871478467652667,
      "learning_rate": 2.220527614450533e-05,
      "loss": 0.617,
      "step": 8838
    },
    {
      "epoch": 0.7903254649499285,
      "grad_norm": 0.14801404419150246,
      "learning_rate": 2.2187082912981493e-05,
      "loss": 0.6243,
      "step": 8839
    },
    {
      "epoch": 0.790414878397711,
      "grad_norm": 0.15344995080537965,
      "learning_rate": 2.216889620753304e-05,
      "loss": 0.6472,
      "step": 8840
    },
    {
      "epoch": 0.7905042918454935,
      "grad_norm": 0.15404650520448024,
      "learning_rate": 2.215071602968529e-05,
      "loss": 0.6349,
      "step": 8841
    },
    {
      "epoch": 0.7905937052932761,
      "grad_norm": 0.15085227444140023,
      "learning_rate": 2.213254238096295e-05,
      "loss": 0.6113,
      "step": 8842
    },
    {
      "epoch": 0.7906831187410587,
      "grad_norm": 0.14320794773839385,
      "learning_rate": 2.211437526289023e-05,
      "loss": 0.6234,
      "step": 8843
    },
    {
      "epoch": 0.7907725321888412,
      "grad_norm": 0.1795213117096643,
      "learning_rate": 2.20962146769908e-05,
      "loss": 0.38,
      "step": 8844
    },
    {
      "epoch": 0.7908619456366237,
      "grad_norm": 0.16026221206077412,
      "learning_rate": 2.2078060624787757e-05,
      "loss": 0.6515,
      "step": 8845
    },
    {
      "epoch": 0.7909513590844063,
      "grad_norm": 0.15567026740843995,
      "learning_rate": 2.2059913107803697e-05,
      "loss": 0.6253,
      "step": 8846
    },
    {
      "epoch": 0.7910407725321889,
      "grad_norm": 0.14162415989949048,
      "learning_rate": 2.2041772127560566e-05,
      "loss": 0.6159,
      "step": 8847
    },
    {
      "epoch": 0.7911301859799714,
      "grad_norm": 0.15121744161080974,
      "learning_rate": 2.2023637685579856e-05,
      "loss": 0.6666,
      "step": 8848
    },
    {
      "epoch": 0.7912195994277539,
      "grad_norm": 0.15398697996122884,
      "learning_rate": 2.2005509783382517e-05,
      "loss": 0.6055,
      "step": 8849
    },
    {
      "epoch": 0.7913090128755365,
      "grad_norm": 0.17072246592408624,
      "learning_rate": 2.198738842248882e-05,
      "loss": 0.657,
      "step": 8850
    },
    {
      "epoch": 0.791398426323319,
      "grad_norm": 0.15061867910283322,
      "learning_rate": 2.196927360441866e-05,
      "loss": 0.6169,
      "step": 8851
    },
    {
      "epoch": 0.7914878397711016,
      "grad_norm": 0.15967207429530209,
      "learning_rate": 2.1951165330691324e-05,
      "loss": 0.6619,
      "step": 8852
    },
    {
      "epoch": 0.7915772532188842,
      "grad_norm": 0.16486333454516608,
      "learning_rate": 2.1933063602825455e-05,
      "loss": 0.6681,
      "step": 8853
    },
    {
      "epoch": 0.7916666666666666,
      "grad_norm": 0.167834035600991,
      "learning_rate": 2.1914968422339266e-05,
      "loss": 0.6795,
      "step": 8854
    },
    {
      "epoch": 0.7917560801144492,
      "grad_norm": 0.17277261753615422,
      "learning_rate": 2.1896879790750403e-05,
      "loss": 0.651,
      "step": 8855
    },
    {
      "epoch": 0.7918454935622318,
      "grad_norm": 0.15466273601152894,
      "learning_rate": 2.1878797709575847e-05,
      "loss": 0.6516,
      "step": 8856
    },
    {
      "epoch": 0.7919349070100143,
      "grad_norm": 0.15559178450292163,
      "learning_rate": 2.186072218033224e-05,
      "loss": 0.6644,
      "step": 8857
    },
    {
      "epoch": 0.7920243204577968,
      "grad_norm": 0.16501711504232436,
      "learning_rate": 2.1842653204535466e-05,
      "loss": 0.61,
      "step": 8858
    },
    {
      "epoch": 0.7921137339055794,
      "grad_norm": 0.1741166582945073,
      "learning_rate": 2.1824590783700982e-05,
      "loss": 0.6405,
      "step": 8859
    },
    {
      "epoch": 0.792203147353362,
      "grad_norm": 0.15160877487301494,
      "learning_rate": 2.1806534919343647e-05,
      "loss": 0.6432,
      "step": 8860
    },
    {
      "epoch": 0.7922925608011445,
      "grad_norm": 0.16416933397436126,
      "learning_rate": 2.1788485612977827e-05,
      "loss": 0.6506,
      "step": 8861
    },
    {
      "epoch": 0.7923819742489271,
      "grad_norm": 0.15878750339038092,
      "learning_rate": 2.1770442866117236e-05,
      "loss": 0.6442,
      "step": 8862
    },
    {
      "epoch": 0.7924713876967096,
      "grad_norm": 0.15721386944090796,
      "learning_rate": 2.1752406680275126e-05,
      "loss": 0.6135,
      "step": 8863
    },
    {
      "epoch": 0.7925608011444921,
      "grad_norm": 0.15376253633359227,
      "learning_rate": 2.1734377056964172e-05,
      "loss": 0.6131,
      "step": 8864
    },
    {
      "epoch": 0.7926502145922747,
      "grad_norm": 0.15672495473101353,
      "learning_rate": 2.1716353997696482e-05,
      "loss": 0.641,
      "step": 8865
    },
    {
      "epoch": 0.7927396280400573,
      "grad_norm": 0.16353280124240208,
      "learning_rate": 2.169833750398368e-05,
      "loss": 0.6337,
      "step": 8866
    },
    {
      "epoch": 0.7928290414878397,
      "grad_norm": 0.17047817844188834,
      "learning_rate": 2.1680327577336712e-05,
      "loss": 0.3897,
      "step": 8867
    },
    {
      "epoch": 0.7929184549356223,
      "grad_norm": 0.16981207190186293,
      "learning_rate": 2.1662324219266083e-05,
      "loss": 0.6466,
      "step": 8868
    },
    {
      "epoch": 0.7930078683834049,
      "grad_norm": 0.1696111220067512,
      "learning_rate": 2.1644327431281742e-05,
      "loss": 0.6485,
      "step": 8869
    },
    {
      "epoch": 0.7930972818311874,
      "grad_norm": 0.16968279441089107,
      "learning_rate": 2.1626337214892978e-05,
      "loss": 0.6664,
      "step": 8870
    },
    {
      "epoch": 0.79318669527897,
      "grad_norm": 0.14470211888664733,
      "learning_rate": 2.1608353571608685e-05,
      "loss": 0.6207,
      "step": 8871
    },
    {
      "epoch": 0.7932761087267525,
      "grad_norm": 0.14979714182948023,
      "learning_rate": 2.1590376502937136e-05,
      "loss": 0.6512,
      "step": 8872
    },
    {
      "epoch": 0.793365522174535,
      "grad_norm": 0.1645394546052364,
      "learning_rate": 2.1572406010385983e-05,
      "loss": 0.6702,
      "step": 8873
    },
    {
      "epoch": 0.7934549356223176,
      "grad_norm": 0.16767169502365914,
      "learning_rate": 2.1554442095462422e-05,
      "loss": 0.6511,
      "step": 8874
    },
    {
      "epoch": 0.7935443490701002,
      "grad_norm": 0.15919718176196748,
      "learning_rate": 2.1536484759673092e-05,
      "loss": 0.6368,
      "step": 8875
    },
    {
      "epoch": 0.7936337625178826,
      "grad_norm": 0.1513683316141127,
      "learning_rate": 2.1518534004523993e-05,
      "loss": 0.6222,
      "step": 8876
    },
    {
      "epoch": 0.7937231759656652,
      "grad_norm": 0.15451861923395,
      "learning_rate": 2.150058983152068e-05,
      "loss": 0.6818,
      "step": 8877
    },
    {
      "epoch": 0.7938125894134478,
      "grad_norm": 0.15640365666851552,
      "learning_rate": 2.1482652242168077e-05,
      "loss": 0.6372,
      "step": 8878
    },
    {
      "epoch": 0.7939020028612304,
      "grad_norm": 0.1610145879927292,
      "learning_rate": 2.146472123797062e-05,
      "loss": 0.6845,
      "step": 8879
    },
    {
      "epoch": 0.7939914163090128,
      "grad_norm": 0.14979004345563188,
      "learning_rate": 2.1446796820432167e-05,
      "loss": 0.6229,
      "step": 8880
    },
    {
      "epoch": 0.7940808297567954,
      "grad_norm": 0.1847616189384055,
      "learning_rate": 2.1428878991055966e-05,
      "loss": 0.7008,
      "step": 8881
    },
    {
      "epoch": 0.794170243204578,
      "grad_norm": 0.15551345373015324,
      "learning_rate": 2.1410967751344803e-05,
      "loss": 0.6022,
      "step": 8882
    },
    {
      "epoch": 0.7942596566523605,
      "grad_norm": 0.1512996691505279,
      "learning_rate": 2.1393063102800847e-05,
      "loss": 0.6301,
      "step": 8883
    },
    {
      "epoch": 0.7943490701001431,
      "grad_norm": 0.15292788828254775,
      "learning_rate": 2.137516504692577e-05,
      "loss": 0.6193,
      "step": 8884
    },
    {
      "epoch": 0.7944384835479256,
      "grad_norm": 0.14053231476321554,
      "learning_rate": 2.135727358522064e-05,
      "loss": 0.6195,
      "step": 8885
    },
    {
      "epoch": 0.7945278969957081,
      "grad_norm": 0.16779046128169506,
      "learning_rate": 2.1339388719186028e-05,
      "loss": 0.6731,
      "step": 8886
    },
    {
      "epoch": 0.7946173104434907,
      "grad_norm": 0.175852476729302,
      "learning_rate": 2.1321510450321858e-05,
      "loss": 0.6579,
      "step": 8887
    },
    {
      "epoch": 0.7947067238912733,
      "grad_norm": 0.17416262234374366,
      "learning_rate": 2.1303638780127588e-05,
      "loss": 0.7178,
      "step": 8888
    },
    {
      "epoch": 0.7947961373390557,
      "grad_norm": 0.16257572023767955,
      "learning_rate": 2.128577371010212e-05,
      "loss": 0.6499,
      "step": 8889
    },
    {
      "epoch": 0.7948855507868383,
      "grad_norm": 0.17812042904824793,
      "learning_rate": 2.126791524174372e-05,
      "loss": 0.3893,
      "step": 8890
    },
    {
      "epoch": 0.7949749642346209,
      "grad_norm": 0.16341807835487251,
      "learning_rate": 2.1250063376550154e-05,
      "loss": 0.6593,
      "step": 8891
    },
    {
      "epoch": 0.7950643776824035,
      "grad_norm": 0.16411299730452725,
      "learning_rate": 2.1232218116018722e-05,
      "loss": 0.644,
      "step": 8892
    },
    {
      "epoch": 0.795153791130186,
      "grad_norm": 0.16765174033367045,
      "learning_rate": 2.1214379461646005e-05,
      "loss": 0.6774,
      "step": 8893
    },
    {
      "epoch": 0.7952432045779685,
      "grad_norm": 0.17008973872447405,
      "learning_rate": 2.1196547414928137e-05,
      "loss": 0.6726,
      "step": 8894
    },
    {
      "epoch": 0.7953326180257511,
      "grad_norm": 0.16260345851282687,
      "learning_rate": 2.1178721977360684e-05,
      "loss": 0.6362,
      "step": 8895
    },
    {
      "epoch": 0.7954220314735336,
      "grad_norm": 0.16995428151411565,
      "learning_rate": 2.1160903150438605e-05,
      "loss": 0.6267,
      "step": 8896
    },
    {
      "epoch": 0.7955114449213162,
      "grad_norm": 0.18115342728210473,
      "learning_rate": 2.114309093565637e-05,
      "loss": 0.6636,
      "step": 8897
    },
    {
      "epoch": 0.7956008583690987,
      "grad_norm": 0.15785098558160032,
      "learning_rate": 2.112528533450786e-05,
      "loss": 0.6244,
      "step": 8898
    },
    {
      "epoch": 0.7956902718168812,
      "grad_norm": 0.17569885039871846,
      "learning_rate": 2.1107486348486406e-05,
      "loss": 0.6117,
      "step": 8899
    },
    {
      "epoch": 0.7957796852646638,
      "grad_norm": 0.1688355186131056,
      "learning_rate": 2.1089693979084825e-05,
      "loss": 0.3707,
      "step": 8900
    },
    {
      "epoch": 0.7958690987124464,
      "grad_norm": 0.18768522216239916,
      "learning_rate": 2.107190822779529e-05,
      "loss": 0.6757,
      "step": 8901
    },
    {
      "epoch": 0.795958512160229,
      "grad_norm": 0.15788701244459882,
      "learning_rate": 2.1054129096109486e-05,
      "loss": 0.6386,
      "step": 8902
    },
    {
      "epoch": 0.7960479256080114,
      "grad_norm": 0.15484215698285117,
      "learning_rate": 2.103635658551856e-05,
      "loss": 0.6387,
      "step": 8903
    },
    {
      "epoch": 0.796137339055794,
      "grad_norm": 0.16677240230472876,
      "learning_rate": 2.101859069751301e-05,
      "loss": 0.6835,
      "step": 8904
    },
    {
      "epoch": 0.7962267525035766,
      "grad_norm": 0.13827042533399975,
      "learning_rate": 2.1000831433582856e-05,
      "loss": 0.6248,
      "step": 8905
    },
    {
      "epoch": 0.7963161659513591,
      "grad_norm": 0.15181420616568372,
      "learning_rate": 2.0983078795217603e-05,
      "loss": 0.6426,
      "step": 8906
    },
    {
      "epoch": 0.7964055793991416,
      "grad_norm": 0.14950039361470388,
      "learning_rate": 2.0965332783906087e-05,
      "loss": 0.6353,
      "step": 8907
    },
    {
      "epoch": 0.7964949928469242,
      "grad_norm": 0.1477845503986821,
      "learning_rate": 2.0947593401136657e-05,
      "loss": 0.6345,
      "step": 8908
    },
    {
      "epoch": 0.7965844062947067,
      "grad_norm": 0.1598060990821273,
      "learning_rate": 2.0929860648397126e-05,
      "loss": 0.6464,
      "step": 8909
    },
    {
      "epoch": 0.7966738197424893,
      "grad_norm": 0.14646316001289578,
      "learning_rate": 2.0912134527174664e-05,
      "loss": 0.6166,
      "step": 8910
    },
    {
      "epoch": 0.7967632331902719,
      "grad_norm": 0.145690782384842,
      "learning_rate": 2.0894415038955962e-05,
      "loss": 0.6625,
      "step": 8911
    },
    {
      "epoch": 0.7968526466380543,
      "grad_norm": 0.17770371023475157,
      "learning_rate": 2.0876702185227137e-05,
      "loss": 0.6812,
      "step": 8912
    },
    {
      "epoch": 0.7969420600858369,
      "grad_norm": 0.16416815638399276,
      "learning_rate": 2.085899596747375e-05,
      "loss": 0.6514,
      "step": 8913
    },
    {
      "epoch": 0.7970314735336195,
      "grad_norm": 0.14929443282650434,
      "learning_rate": 2.084129638718081e-05,
      "loss": 0.6302,
      "step": 8914
    },
    {
      "epoch": 0.797120886981402,
      "grad_norm": 0.14331836958133912,
      "learning_rate": 2.082360344583272e-05,
      "loss": 0.6123,
      "step": 8915
    },
    {
      "epoch": 0.7972103004291845,
      "grad_norm": 0.15669269455235632,
      "learning_rate": 2.080591714491339e-05,
      "loss": 0.605,
      "step": 8916
    },
    {
      "epoch": 0.7972997138769671,
      "grad_norm": 0.15562113977341946,
      "learning_rate": 2.0788237485906135e-05,
      "loss": 0.6523,
      "step": 8917
    },
    {
      "epoch": 0.7973891273247496,
      "grad_norm": 0.1553899815325605,
      "learning_rate": 2.0770564470293775e-05,
      "loss": 0.5983,
      "step": 8918
    },
    {
      "epoch": 0.7974785407725322,
      "grad_norm": 0.15648419904584468,
      "learning_rate": 2.0752898099558437e-05,
      "loss": 0.6417,
      "step": 8919
    },
    {
      "epoch": 0.7975679542203148,
      "grad_norm": 0.1480167764182581,
      "learning_rate": 2.0735238375181875e-05,
      "loss": 0.6386,
      "step": 8920
    },
    {
      "epoch": 0.7976573676680973,
      "grad_norm": 0.16935703715486317,
      "learning_rate": 2.0717585298645127e-05,
      "loss": 0.6699,
      "step": 8921
    },
    {
      "epoch": 0.7977467811158798,
      "grad_norm": 0.14524801013880823,
      "learning_rate": 2.069993887142874e-05,
      "loss": 0.5956,
      "step": 8922
    },
    {
      "epoch": 0.7978361945636624,
      "grad_norm": 0.14615518842239636,
      "learning_rate": 2.0682299095012747e-05,
      "loss": 0.6376,
      "step": 8923
    },
    {
      "epoch": 0.797925608011445,
      "grad_norm": 0.16368889415740293,
      "learning_rate": 2.0664665970876496e-05,
      "loss": 0.6862,
      "step": 8924
    },
    {
      "epoch": 0.7980150214592274,
      "grad_norm": 0.15822955631036287,
      "learning_rate": 2.064703950049891e-05,
      "loss": 0.6299,
      "step": 8925
    },
    {
      "epoch": 0.79810443490701,
      "grad_norm": 0.1601444560895906,
      "learning_rate": 2.0629419685358286e-05,
      "loss": 0.6491,
      "step": 8926
    },
    {
      "epoch": 0.7981938483547926,
      "grad_norm": 0.1486461807789737,
      "learning_rate": 2.0611806526932364e-05,
      "loss": 0.6297,
      "step": 8927
    },
    {
      "epoch": 0.7982832618025751,
      "grad_norm": 0.16335003471113108,
      "learning_rate": 2.0594200026698363e-05,
      "loss": 0.6503,
      "step": 8928
    },
    {
      "epoch": 0.7983726752503576,
      "grad_norm": 0.1641898182402899,
      "learning_rate": 2.0576600186132934e-05,
      "loss": 0.669,
      "step": 8929
    },
    {
      "epoch": 0.7984620886981402,
      "grad_norm": 0.15901290624638315,
      "learning_rate": 2.0559007006712106e-05,
      "loss": 0.6381,
      "step": 8930
    },
    {
      "epoch": 0.7985515021459227,
      "grad_norm": 0.16214611870231044,
      "learning_rate": 2.0541420489911413e-05,
      "loss": 0.6362,
      "step": 8931
    },
    {
      "epoch": 0.7986409155937053,
      "grad_norm": 0.15080352583954704,
      "learning_rate": 2.052384063720585e-05,
      "loss": 0.6354,
      "step": 8932
    },
    {
      "epoch": 0.7987303290414879,
      "grad_norm": 0.16443425283764887,
      "learning_rate": 2.0506267450069737e-05,
      "loss": 0.6353,
      "step": 8933
    },
    {
      "epoch": 0.7988197424892703,
      "grad_norm": 0.1374881172338854,
      "learning_rate": 2.048870092997702e-05,
      "loss": 0.6251,
      "step": 8934
    },
    {
      "epoch": 0.7989091559370529,
      "grad_norm": 0.1812905680235768,
      "learning_rate": 2.0471141078400912e-05,
      "loss": 0.6594,
      "step": 8935
    },
    {
      "epoch": 0.7989985693848355,
      "grad_norm": 0.1573430258659933,
      "learning_rate": 2.0453587896814142e-05,
      "loss": 0.6377,
      "step": 8936
    },
    {
      "epoch": 0.7990879828326181,
      "grad_norm": 0.17376769171228043,
      "learning_rate": 2.0436041386688932e-05,
      "loss": 0.6416,
      "step": 8937
    },
    {
      "epoch": 0.7991773962804005,
      "grad_norm": 0.1601620153010939,
      "learning_rate": 2.0418501549496792e-05,
      "loss": 0.6605,
      "step": 8938
    },
    {
      "epoch": 0.7992668097281831,
      "grad_norm": 0.1488246141505633,
      "learning_rate": 2.040096838670881e-05,
      "loss": 0.637,
      "step": 8939
    },
    {
      "epoch": 0.7993562231759657,
      "grad_norm": 0.18398897260344804,
      "learning_rate": 2.0383441899795518e-05,
      "loss": 0.6643,
      "step": 8940
    },
    {
      "epoch": 0.7994456366237482,
      "grad_norm": 0.14404006232904984,
      "learning_rate": 2.0365922090226784e-05,
      "loss": 0.646,
      "step": 8941
    },
    {
      "epoch": 0.7995350500715308,
      "grad_norm": 0.1788926259287501,
      "learning_rate": 2.034840895947199e-05,
      "loss": 0.685,
      "step": 8942
    },
    {
      "epoch": 0.7996244635193133,
      "grad_norm": 0.1629055418754766,
      "learning_rate": 2.033090250899997e-05,
      "loss": 0.6483,
      "step": 8943
    },
    {
      "epoch": 0.7997138769670958,
      "grad_norm": 0.15489410092434835,
      "learning_rate": 2.0313402740278908e-05,
      "loss": 0.6581,
      "step": 8944
    },
    {
      "epoch": 0.7998032904148784,
      "grad_norm": 0.16340235902767605,
      "learning_rate": 2.0295909654776524e-05,
      "loss": 0.639,
      "step": 8945
    },
    {
      "epoch": 0.799892703862661,
      "grad_norm": 0.16504417466167873,
      "learning_rate": 2.0278423253959934e-05,
      "loss": 0.692,
      "step": 8946
    },
    {
      "epoch": 0.7999821173104434,
      "grad_norm": 0.16400508892957125,
      "learning_rate": 2.026094353929572e-05,
      "loss": 0.642,
      "step": 8947
    },
    {
      "epoch": 0.800071530758226,
      "grad_norm": 0.1550751197521263,
      "learning_rate": 2.024347051224985e-05,
      "loss": 0.6235,
      "step": 8948
    },
    {
      "epoch": 0.8001609442060086,
      "grad_norm": 0.16520980653634618,
      "learning_rate": 2.0226004174287827e-05,
      "loss": 0.6493,
      "step": 8949
    },
    {
      "epoch": 0.8002503576537912,
      "grad_norm": 0.16920980902529123,
      "learning_rate": 2.0208544526874475e-05,
      "loss": 0.6465,
      "step": 8950
    },
    {
      "epoch": 0.8003397711015737,
      "grad_norm": 0.1502675332812525,
      "learning_rate": 2.0191091571474108e-05,
      "loss": 0.611,
      "step": 8951
    },
    {
      "epoch": 0.8004291845493562,
      "grad_norm": 0.14152859241166865,
      "learning_rate": 2.0173645309550548e-05,
      "loss": 0.641,
      "step": 8952
    },
    {
      "epoch": 0.8005185979971388,
      "grad_norm": 0.1484782806857852,
      "learning_rate": 2.0156205742566892e-05,
      "loss": 0.6198,
      "step": 8953
    },
    {
      "epoch": 0.8006080114449213,
      "grad_norm": 0.15799623026550977,
      "learning_rate": 2.013877287198588e-05,
      "loss": 0.6511,
      "step": 8954
    },
    {
      "epoch": 0.8006974248927039,
      "grad_norm": 0.1570994489419194,
      "learning_rate": 2.0121346699269516e-05,
      "loss": 0.6456,
      "step": 8955
    },
    {
      "epoch": 0.8007868383404864,
      "grad_norm": 0.1593924731572863,
      "learning_rate": 2.0103927225879336e-05,
      "loss": 0.657,
      "step": 8956
    },
    {
      "epoch": 0.8008762517882689,
      "grad_norm": 0.15830542302575665,
      "learning_rate": 2.008651445327633e-05,
      "loss": 0.6626,
      "step": 8957
    },
    {
      "epoch": 0.8009656652360515,
      "grad_norm": 0.15666880695402602,
      "learning_rate": 2.00691083829208e-05,
      "loss": 0.6496,
      "step": 8958
    },
    {
      "epoch": 0.8010550786838341,
      "grad_norm": 0.1773117245078302,
      "learning_rate": 2.0051709016272625e-05,
      "loss": 0.6402,
      "step": 8959
    },
    {
      "epoch": 0.8011444921316166,
      "grad_norm": 0.16465319580151802,
      "learning_rate": 2.0034316354791062e-05,
      "loss": 0.6161,
      "step": 8960
    },
    {
      "epoch": 0.8012339055793991,
      "grad_norm": 0.18635112674405685,
      "learning_rate": 2.001693039993482e-05,
      "loss": 0.6656,
      "step": 8961
    },
    {
      "epoch": 0.8013233190271817,
      "grad_norm": 0.15528078230637984,
      "learning_rate": 1.9999551153162022e-05,
      "loss": 0.6327,
      "step": 8962
    },
    {
      "epoch": 0.8014127324749643,
      "grad_norm": 0.1629521483777779,
      "learning_rate": 1.998217861593028e-05,
      "loss": 0.6842,
      "step": 8963
    },
    {
      "epoch": 0.8015021459227468,
      "grad_norm": 0.15020415451020683,
      "learning_rate": 1.996481278969655e-05,
      "loss": 0.6487,
      "step": 8964
    },
    {
      "epoch": 0.8015915593705293,
      "grad_norm": 0.14505991447231587,
      "learning_rate": 1.9947453675917316e-05,
      "loss": 0.6108,
      "step": 8965
    },
    {
      "epoch": 0.8016809728183119,
      "grad_norm": 0.19582013649128452,
      "learning_rate": 1.9930101276048485e-05,
      "loss": 0.6902,
      "step": 8966
    },
    {
      "epoch": 0.8017703862660944,
      "grad_norm": 0.1468101227316847,
      "learning_rate": 1.9912755591545317e-05,
      "loss": 0.6226,
      "step": 8967
    },
    {
      "epoch": 0.801859799713877,
      "grad_norm": 0.15158990472725117,
      "learning_rate": 1.9895416623862662e-05,
      "loss": 0.6342,
      "step": 8968
    },
    {
      "epoch": 0.8019492131616596,
      "grad_norm": 0.16898339342854202,
      "learning_rate": 1.9878084374454653e-05,
      "loss": 0.6664,
      "step": 8969
    },
    {
      "epoch": 0.802038626609442,
      "grad_norm": 0.1754213773976348,
      "learning_rate": 1.986075884477494e-05,
      "loss": 0.6356,
      "step": 8970
    },
    {
      "epoch": 0.8021280400572246,
      "grad_norm": 0.1793505425852128,
      "learning_rate": 1.984344003627663e-05,
      "loss": 0.6423,
      "step": 8971
    },
    {
      "epoch": 0.8022174535050072,
      "grad_norm": 0.17365142449005672,
      "learning_rate": 1.9826127950412167e-05,
      "loss": 0.6802,
      "step": 8972
    },
    {
      "epoch": 0.8023068669527897,
      "grad_norm": 0.1688702841661948,
      "learning_rate": 1.9808822588633535e-05,
      "loss": 0.6426,
      "step": 8973
    },
    {
      "epoch": 0.8023962804005722,
      "grad_norm": 0.1577415351984343,
      "learning_rate": 1.97915239523921e-05,
      "loss": 0.6678,
      "step": 8974
    },
    {
      "epoch": 0.8024856938483548,
      "grad_norm": 0.162298206000204,
      "learning_rate": 1.9774232043138685e-05,
      "loss": 0.6594,
      "step": 8975
    },
    {
      "epoch": 0.8025751072961373,
      "grad_norm": 0.17955038500469223,
      "learning_rate": 1.9756946862323535e-05,
      "loss": 0.6067,
      "step": 8976
    },
    {
      "epoch": 0.8026645207439199,
      "grad_norm": 0.16936399078936584,
      "learning_rate": 1.9739668411396383e-05,
      "loss": 0.6563,
      "step": 8977
    },
    {
      "epoch": 0.8027539341917024,
      "grad_norm": 0.17089221623099063,
      "learning_rate": 1.9722396691806267e-05,
      "loss": 0.6619,
      "step": 8978
    },
    {
      "epoch": 0.802843347639485,
      "grad_norm": 0.16779331053797464,
      "learning_rate": 1.97051317050018e-05,
      "loss": 0.6498,
      "step": 8979
    },
    {
      "epoch": 0.8029327610872675,
      "grad_norm": 0.16588583830437278,
      "learning_rate": 1.9687873452430995e-05,
      "loss": 0.6694,
      "step": 8980
    },
    {
      "epoch": 0.8030221745350501,
      "grad_norm": 0.15794999257210826,
      "learning_rate": 1.967062193554119e-05,
      "loss": 0.6486,
      "step": 8981
    },
    {
      "epoch": 0.8031115879828327,
      "grad_norm": 0.16301384257396787,
      "learning_rate": 1.965337715577934e-05,
      "loss": 0.6778,
      "step": 8982
    },
    {
      "epoch": 0.8032010014306151,
      "grad_norm": 0.17099692754125656,
      "learning_rate": 1.9636139114591747e-05,
      "loss": 0.6382,
      "step": 8983
    },
    {
      "epoch": 0.8032904148783977,
      "grad_norm": 0.17501695608440862,
      "learning_rate": 1.961890781342408e-05,
      "loss": 0.6521,
      "step": 8984
    },
    {
      "epoch": 0.8033798283261803,
      "grad_norm": 0.1479800288245182,
      "learning_rate": 1.9601683253721536e-05,
      "loss": 0.6636,
      "step": 8985
    },
    {
      "epoch": 0.8034692417739628,
      "grad_norm": 0.1730316102332704,
      "learning_rate": 1.9584465436928745e-05,
      "loss": 0.6603,
      "step": 8986
    },
    {
      "epoch": 0.8035586552217453,
      "grad_norm": 0.16056625290947668,
      "learning_rate": 1.9567254364489694e-05,
      "loss": 0.6388,
      "step": 8987
    },
    {
      "epoch": 0.8036480686695279,
      "grad_norm": 0.13702771587109477,
      "learning_rate": 1.955005003784789e-05,
      "loss": 0.6415,
      "step": 8988
    },
    {
      "epoch": 0.8037374821173104,
      "grad_norm": 0.17301551523394523,
      "learning_rate": 1.9532852458446228e-05,
      "loss": 0.6624,
      "step": 8989
    },
    {
      "epoch": 0.803826895565093,
      "grad_norm": 0.16545739475014654,
      "learning_rate": 1.9515661627727044e-05,
      "loss": 0.617,
      "step": 8990
    },
    {
      "epoch": 0.8039163090128756,
      "grad_norm": 0.17573799874783128,
      "learning_rate": 1.9498477547132154e-05,
      "loss": 0.6699,
      "step": 8991
    },
    {
      "epoch": 0.804005722460658,
      "grad_norm": 0.16723723167941248,
      "learning_rate": 1.9481300218102692e-05,
      "loss": 0.6886,
      "step": 8992
    },
    {
      "epoch": 0.8040951359084406,
      "grad_norm": 0.1581027410648163,
      "learning_rate": 1.9464129642079355e-05,
      "loss": 0.6487,
      "step": 8993
    },
    {
      "epoch": 0.8041845493562232,
      "grad_norm": 0.17130228750268173,
      "learning_rate": 1.9446965820502218e-05,
      "loss": 0.6763,
      "step": 8994
    },
    {
      "epoch": 0.8042739628040058,
      "grad_norm": 0.14879763085679865,
      "learning_rate": 1.9429808754810717e-05,
      "loss": 0.6266,
      "step": 8995
    },
    {
      "epoch": 0.8043633762517882,
      "grad_norm": 0.16068583156716224,
      "learning_rate": 1.9412658446443887e-05,
      "loss": 0.6428,
      "step": 8996
    },
    {
      "epoch": 0.8044527896995708,
      "grad_norm": 0.1392089749461186,
      "learning_rate": 1.9395514896840093e-05,
      "loss": 0.6322,
      "step": 8997
    },
    {
      "epoch": 0.8045422031473534,
      "grad_norm": 0.15183570091376472,
      "learning_rate": 1.93783781074371e-05,
      "loss": 0.625,
      "step": 8998
    },
    {
      "epoch": 0.8046316165951359,
      "grad_norm": 0.1651739507057697,
      "learning_rate": 1.9361248079672158e-05,
      "loss": 0.6795,
      "step": 8999
    },
    {
      "epoch": 0.8047210300429185,
      "grad_norm": 0.1495124108641577,
      "learning_rate": 1.934412481498198e-05,
      "loss": 0.6223,
      "step": 9000
    },
    {
      "epoch": 0.804810443490701,
      "grad_norm": 0.15659505378933522,
      "learning_rate": 1.932700831480262e-05,
      "loss": 0.6434,
      "step": 9001
    },
    {
      "epoch": 0.8048998569384835,
      "grad_norm": 0.14882664084869793,
      "learning_rate": 1.930989858056965e-05,
      "loss": 0.6604,
      "step": 9002
    },
    {
      "epoch": 0.8049892703862661,
      "grad_norm": 0.1507554444361054,
      "learning_rate": 1.929279561371803e-05,
      "loss": 0.6092,
      "step": 9003
    },
    {
      "epoch": 0.8050786838340487,
      "grad_norm": 0.18125786558236423,
      "learning_rate": 1.927569941568218e-05,
      "loss": 0.6713,
      "step": 9004
    },
    {
      "epoch": 0.8051680972818311,
      "grad_norm": 0.15276211116598626,
      "learning_rate": 1.9258609987895926e-05,
      "loss": 0.6515,
      "step": 9005
    },
    {
      "epoch": 0.8052575107296137,
      "grad_norm": 0.1597372982033548,
      "learning_rate": 1.9241527331792562e-05,
      "loss": 0.6251,
      "step": 9006
    },
    {
      "epoch": 0.8053469241773963,
      "grad_norm": 0.1634410785346991,
      "learning_rate": 1.922445144880475e-05,
      "loss": 0.6701,
      "step": 9007
    },
    {
      "epoch": 0.8054363376251789,
      "grad_norm": 0.14936817009901815,
      "learning_rate": 1.9207382340364634e-05,
      "loss": 0.6496,
      "step": 9008
    },
    {
      "epoch": 0.8055257510729614,
      "grad_norm": 0.1632703601943483,
      "learning_rate": 1.9190320007903796e-05,
      "loss": 0.6728,
      "step": 9009
    },
    {
      "epoch": 0.8056151645207439,
      "grad_norm": 0.14189300121089862,
      "learning_rate": 1.9173264452853222e-05,
      "loss": 0.6122,
      "step": 9010
    },
    {
      "epoch": 0.8057045779685265,
      "grad_norm": 0.14342347280523926,
      "learning_rate": 1.9156215676643375e-05,
      "loss": 0.6338,
      "step": 9011
    },
    {
      "epoch": 0.805793991416309,
      "grad_norm": 0.1867570887500712,
      "learning_rate": 1.913917368070406e-05,
      "loss": 0.6193,
      "step": 9012
    },
    {
      "epoch": 0.8058834048640916,
      "grad_norm": 0.14797928631778554,
      "learning_rate": 1.912213846646459e-05,
      "loss": 0.6208,
      "step": 9013
    },
    {
      "epoch": 0.8059728183118741,
      "grad_norm": 0.1541520742881915,
      "learning_rate": 1.9105110035353714e-05,
      "loss": 0.6386,
      "step": 9014
    },
    {
      "epoch": 0.8060622317596566,
      "grad_norm": 0.1545930905529475,
      "learning_rate": 1.9088088388799542e-05,
      "loss": 0.6501,
      "step": 9015
    },
    {
      "epoch": 0.8061516452074392,
      "grad_norm": 0.17328218735211826,
      "learning_rate": 1.9071073528229655e-05,
      "loss": 0.6587,
      "step": 9016
    },
    {
      "epoch": 0.8062410586552218,
      "grad_norm": 0.1674215898453676,
      "learning_rate": 1.9054065455071136e-05,
      "loss": 0.6522,
      "step": 9017
    },
    {
      "epoch": 0.8063304721030042,
      "grad_norm": 0.1612211588510034,
      "learning_rate": 1.9037064170750373e-05,
      "loss": 0.66,
      "step": 9018
    },
    {
      "epoch": 0.8064198855507868,
      "grad_norm": 0.1666921600966413,
      "learning_rate": 1.9020069676693252e-05,
      "loss": 0.6888,
      "step": 9019
    },
    {
      "epoch": 0.8065092989985694,
      "grad_norm": 0.15873132843582077,
      "learning_rate": 1.9003081974325122e-05,
      "loss": 0.6675,
      "step": 9020
    },
    {
      "epoch": 0.806598712446352,
      "grad_norm": 0.1567948210903377,
      "learning_rate": 1.898610106507066e-05,
      "loss": 0.6256,
      "step": 9021
    },
    {
      "epoch": 0.8066881258941345,
      "grad_norm": 0.14391328293073227,
      "learning_rate": 1.8969126950354055e-05,
      "loss": 0.6195,
      "step": 9022
    },
    {
      "epoch": 0.806777539341917,
      "grad_norm": 0.17770154510798736,
      "learning_rate": 1.8952159631598922e-05,
      "loss": 0.3813,
      "step": 9023
    },
    {
      "epoch": 0.8068669527896996,
      "grad_norm": 0.1496564807073731,
      "learning_rate": 1.8935199110228275e-05,
      "loss": 0.6422,
      "step": 9024
    },
    {
      "epoch": 0.8069563662374821,
      "grad_norm": 0.16696680347050855,
      "learning_rate": 1.8918245387664602e-05,
      "loss": 0.6607,
      "step": 9025
    },
    {
      "epoch": 0.8070457796852647,
      "grad_norm": 0.15589320784571067,
      "learning_rate": 1.8901298465329743e-05,
      "loss": 0.625,
      "step": 9026
    },
    {
      "epoch": 0.8071351931330472,
      "grad_norm": 0.1574289646796077,
      "learning_rate": 1.8884358344645025e-05,
      "loss": 0.6405,
      "step": 9027
    },
    {
      "epoch": 0.8072246065808297,
      "grad_norm": 0.16616492956976048,
      "learning_rate": 1.886742502703125e-05,
      "loss": 0.598,
      "step": 9028
    },
    {
      "epoch": 0.8073140200286123,
      "grad_norm": 0.15980828898688818,
      "learning_rate": 1.88504985139085e-05,
      "loss": 0.6508,
      "step": 9029
    },
    {
      "epoch": 0.8074034334763949,
      "grad_norm": 0.1693040718626595,
      "learning_rate": 1.883357880669646e-05,
      "loss": 0.6133,
      "step": 9030
    },
    {
      "epoch": 0.8074928469241774,
      "grad_norm": 0.16829713175352748,
      "learning_rate": 1.8816665906814178e-05,
      "loss": 0.6232,
      "step": 9031
    },
    {
      "epoch": 0.8075822603719599,
      "grad_norm": 0.14237619223368228,
      "learning_rate": 1.879975981568004e-05,
      "loss": 0.6446,
      "step": 9032
    },
    {
      "epoch": 0.8076716738197425,
      "grad_norm": 0.17126854515103063,
      "learning_rate": 1.8782860534711998e-05,
      "loss": 0.6448,
      "step": 9033
    },
    {
      "epoch": 0.807761087267525,
      "grad_norm": 0.16422814027692706,
      "learning_rate": 1.8765968065327367e-05,
      "loss": 0.6318,
      "step": 9034
    },
    {
      "epoch": 0.8078505007153076,
      "grad_norm": 0.14979925722093299,
      "learning_rate": 1.8749082408942876e-05,
      "loss": 0.6068,
      "step": 9035
    },
    {
      "epoch": 0.8079399141630901,
      "grad_norm": 0.14473493462395012,
      "learning_rate": 1.8732203566974705e-05,
      "loss": 0.6096,
      "step": 9036
    },
    {
      "epoch": 0.8080293276108726,
      "grad_norm": 0.17020829256010506,
      "learning_rate": 1.8715331540838487e-05,
      "loss": 0.62,
      "step": 9037
    },
    {
      "epoch": 0.8081187410586552,
      "grad_norm": 0.14971480699590392,
      "learning_rate": 1.8698466331949238e-05,
      "loss": 0.615,
      "step": 9038
    },
    {
      "epoch": 0.8082081545064378,
      "grad_norm": 0.15840616253515744,
      "learning_rate": 1.8681607941721425e-05,
      "loss": 0.6355,
      "step": 9039
    },
    {
      "epoch": 0.8082975679542204,
      "grad_norm": 0.16238927377514112,
      "learning_rate": 1.866475637156898e-05,
      "loss": 0.6656,
      "step": 9040
    },
    {
      "epoch": 0.8083869814020028,
      "grad_norm": 0.1698074698379889,
      "learning_rate": 1.8647911622905168e-05,
      "loss": 0.6108,
      "step": 9041
    },
    {
      "epoch": 0.8084763948497854,
      "grad_norm": 0.15512050740352626,
      "learning_rate": 1.8631073697142754e-05,
      "loss": 0.6583,
      "step": 9042
    },
    {
      "epoch": 0.808565808297568,
      "grad_norm": 0.15465857858584875,
      "learning_rate": 1.8614242595693908e-05,
      "loss": 0.6251,
      "step": 9043
    },
    {
      "epoch": 0.8086552217453505,
      "grad_norm": 0.15410942093225125,
      "learning_rate": 1.8597418319970262e-05,
      "loss": 0.6407,
      "step": 9044
    },
    {
      "epoch": 0.808744635193133,
      "grad_norm": 0.1337018037377276,
      "learning_rate": 1.8580600871382857e-05,
      "loss": 0.6284,
      "step": 9045
    },
    {
      "epoch": 0.8088340486409156,
      "grad_norm": 0.15621212364433745,
      "learning_rate": 1.8563790251342095e-05,
      "loss": 0.6202,
      "step": 9046
    },
    {
      "epoch": 0.8089234620886981,
      "grad_norm": 0.14623529657077625,
      "learning_rate": 1.85469864612579e-05,
      "loss": 0.6024,
      "step": 9047
    },
    {
      "epoch": 0.8090128755364807,
      "grad_norm": 0.12755141555606514,
      "learning_rate": 1.8530189502539607e-05,
      "loss": 0.585,
      "step": 9048
    },
    {
      "epoch": 0.8091022889842633,
      "grad_norm": 0.15478074532966432,
      "learning_rate": 1.8513399376595895e-05,
      "loss": 0.6497,
      "step": 9049
    },
    {
      "epoch": 0.8091917024320457,
      "grad_norm": 0.14936329824973107,
      "learning_rate": 1.849661608483495e-05,
      "loss": 0.6311,
      "step": 9050
    },
    {
      "epoch": 0.8092811158798283,
      "grad_norm": 0.15642142633619063,
      "learning_rate": 1.847983962866443e-05,
      "loss": 0.6792,
      "step": 9051
    },
    {
      "epoch": 0.8093705293276109,
      "grad_norm": 0.14960268205512345,
      "learning_rate": 1.846307000949129e-05,
      "loss": 0.5979,
      "step": 9052
    },
    {
      "epoch": 0.8094599427753935,
      "grad_norm": 0.16751569419046847,
      "learning_rate": 1.844630722872199e-05,
      "loss": 0.6374,
      "step": 9053
    },
    {
      "epoch": 0.8095493562231759,
      "grad_norm": 0.1478821613038394,
      "learning_rate": 1.8429551287762435e-05,
      "loss": 0.6362,
      "step": 9054
    },
    {
      "epoch": 0.8096387696709585,
      "grad_norm": 0.16327689041911989,
      "learning_rate": 1.8412802188017885e-05,
      "loss": 0.6054,
      "step": 9055
    },
    {
      "epoch": 0.8097281831187411,
      "grad_norm": 0.15860934615393738,
      "learning_rate": 1.839605993089307e-05,
      "loss": 0.6204,
      "step": 9056
    },
    {
      "epoch": 0.8098175965665236,
      "grad_norm": 0.1523552812281992,
      "learning_rate": 1.8379324517792163e-05,
      "loss": 0.6171,
      "step": 9057
    },
    {
      "epoch": 0.8099070100143062,
      "grad_norm": 0.15095996690580304,
      "learning_rate": 1.8362595950118733e-05,
      "loss": 0.644,
      "step": 9058
    },
    {
      "epoch": 0.8099964234620887,
      "grad_norm": 0.14798625869441484,
      "learning_rate": 1.8345874229275816e-05,
      "loss": 0.6186,
      "step": 9059
    },
    {
      "epoch": 0.8100858369098712,
      "grad_norm": 0.15168357689165254,
      "learning_rate": 1.8329159356665793e-05,
      "loss": 0.6423,
      "step": 9060
    },
    {
      "epoch": 0.8101752503576538,
      "grad_norm": 0.16775425314516404,
      "learning_rate": 1.8312451333690538e-05,
      "loss": 0.4089,
      "step": 9061
    },
    {
      "epoch": 0.8102646638054364,
      "grad_norm": 0.15429807263214376,
      "learning_rate": 1.8295750161751334e-05,
      "loss": 0.652,
      "step": 9062
    },
    {
      "epoch": 0.8103540772532188,
      "grad_norm": 0.15272873562996014,
      "learning_rate": 1.8279055842248915e-05,
      "loss": 0.6382,
      "step": 9063
    },
    {
      "epoch": 0.8104434907010014,
      "grad_norm": 0.17546052645995905,
      "learning_rate": 1.826236837658334e-05,
      "loss": 0.6459,
      "step": 9064
    },
    {
      "epoch": 0.810532904148784,
      "grad_norm": 0.14498349079848982,
      "learning_rate": 1.8245687766154262e-05,
      "loss": 0.6144,
      "step": 9065
    },
    {
      "epoch": 0.8106223175965666,
      "grad_norm": 0.15394750343908875,
      "learning_rate": 1.822901401236059e-05,
      "loss": 0.6469,
      "step": 9066
    },
    {
      "epoch": 0.810711731044349,
      "grad_norm": 0.16282595128316196,
      "learning_rate": 1.821234711660077e-05,
      "loss": 0.6746,
      "step": 9067
    },
    {
      "epoch": 0.8108011444921316,
      "grad_norm": 0.16744046120930234,
      "learning_rate": 1.819568708027264e-05,
      "loss": 0.6809,
      "step": 9068
    },
    {
      "epoch": 0.8108905579399142,
      "grad_norm": 0.13593250787492273,
      "learning_rate": 1.817903390477341e-05,
      "loss": 0.595,
      "step": 9069
    },
    {
      "epoch": 0.8109799713876967,
      "grad_norm": 0.15702771523461065,
      "learning_rate": 1.8162387591499796e-05,
      "loss": 0.6399,
      "step": 9070
    },
    {
      "epoch": 0.8110693848354793,
      "grad_norm": 0.15220459631706112,
      "learning_rate": 1.8145748141847908e-05,
      "loss": 0.614,
      "step": 9071
    },
    {
      "epoch": 0.8111587982832618,
      "grad_norm": 0.1684313038418329,
      "learning_rate": 1.8129115557213262e-05,
      "loss": 0.6566,
      "step": 9072
    },
    {
      "epoch": 0.8112482117310443,
      "grad_norm": 0.17407316734327266,
      "learning_rate": 1.811248983899082e-05,
      "loss": 0.6298,
      "step": 9073
    },
    {
      "epoch": 0.8113376251788269,
      "grad_norm": 0.17542171083392882,
      "learning_rate": 1.809587098857498e-05,
      "loss": 0.6509,
      "step": 9074
    },
    {
      "epoch": 0.8114270386266095,
      "grad_norm": 0.17303666970748424,
      "learning_rate": 1.8079259007359506e-05,
      "loss": 0.6678,
      "step": 9075
    },
    {
      "epoch": 0.8115164520743919,
      "grad_norm": 0.17593296391211058,
      "learning_rate": 1.8062653896737647e-05,
      "loss": 0.6308,
      "step": 9076
    },
    {
      "epoch": 0.8116058655221745,
      "grad_norm": 0.16668704321892233,
      "learning_rate": 1.804605565810207e-05,
      "loss": 0.6228,
      "step": 9077
    },
    {
      "epoch": 0.8116952789699571,
      "grad_norm": 0.17815277563225812,
      "learning_rate": 1.8029464292844778e-05,
      "loss": 0.3756,
      "step": 9078
    },
    {
      "epoch": 0.8117846924177397,
      "grad_norm": 0.17880714731990488,
      "learning_rate": 1.8012879802357374e-05,
      "loss": 0.699,
      "step": 9079
    },
    {
      "epoch": 0.8118741058655222,
      "grad_norm": 0.16336028586750878,
      "learning_rate": 1.79963021880307e-05,
      "loss": 0.6345,
      "step": 9080
    },
    {
      "epoch": 0.8119635193133047,
      "grad_norm": 0.16011011166891126,
      "learning_rate": 1.797973145125512e-05,
      "loss": 0.6319,
      "step": 9081
    },
    {
      "epoch": 0.8120529327610873,
      "grad_norm": 0.16772419839446767,
      "learning_rate": 1.7963167593420438e-05,
      "loss": 0.6876,
      "step": 9082
    },
    {
      "epoch": 0.8121423462088698,
      "grad_norm": 0.18203020976171733,
      "learning_rate": 1.7946610615915792e-05,
      "loss": 0.653,
      "step": 9083
    },
    {
      "epoch": 0.8122317596566524,
      "grad_norm": 0.15811924850387926,
      "learning_rate": 1.793006052012981e-05,
      "loss": 0.6622,
      "step": 9084
    },
    {
      "epoch": 0.8123211731044349,
      "grad_norm": 0.1732419290986232,
      "learning_rate": 1.7913517307450544e-05,
      "loss": 0.6588,
      "step": 9085
    },
    {
      "epoch": 0.8124105865522174,
      "grad_norm": 0.1989585834288242,
      "learning_rate": 1.7896980979265443e-05,
      "loss": 0.6671,
      "step": 9086
    },
    {
      "epoch": 0.8125,
      "grad_norm": 0.14439678972413508,
      "learning_rate": 1.7880451536961394e-05,
      "loss": 0.6296,
      "step": 9087
    },
    {
      "epoch": 0.8125894134477826,
      "grad_norm": 0.1554727928692863,
      "learning_rate": 1.7863928981924726e-05,
      "loss": 0.6554,
      "step": 9088
    },
    {
      "epoch": 0.8126788268955651,
      "grad_norm": 0.15494075449869474,
      "learning_rate": 1.7847413315541118e-05,
      "loss": 0.6485,
      "step": 9089
    },
    {
      "epoch": 0.8127682403433476,
      "grad_norm": 0.1633125153012249,
      "learning_rate": 1.7830904539195726e-05,
      "loss": 0.6166,
      "step": 9090
    },
    {
      "epoch": 0.8128576537911302,
      "grad_norm": 0.1777674808221347,
      "learning_rate": 1.7814402654273167e-05,
      "loss": 0.6784,
      "step": 9091
    },
    {
      "epoch": 0.8129470672389127,
      "grad_norm": 0.16339948293313042,
      "learning_rate": 1.7797907662157355e-05,
      "loss": 0.6539,
      "step": 9092
    },
    {
      "epoch": 0.8130364806866953,
      "grad_norm": 0.17889468568654016,
      "learning_rate": 1.7781419564231805e-05,
      "loss": 0.6322,
      "step": 9093
    },
    {
      "epoch": 0.8131258941344778,
      "grad_norm": 0.1780477183745608,
      "learning_rate": 1.776493836187927e-05,
      "loss": 0.6746,
      "step": 9094
    },
    {
      "epoch": 0.8132153075822603,
      "grad_norm": 0.16968584181955979,
      "learning_rate": 1.774846405648204e-05,
      "loss": 0.6557,
      "step": 9095
    },
    {
      "epoch": 0.8133047210300429,
      "grad_norm": 0.1535472641516057,
      "learning_rate": 1.7731996649421802e-05,
      "loss": 0.594,
      "step": 9096
    },
    {
      "epoch": 0.8133941344778255,
      "grad_norm": 0.1603813238793191,
      "learning_rate": 1.771553614207967e-05,
      "loss": 0.6407,
      "step": 9097
    },
    {
      "epoch": 0.8134835479256081,
      "grad_norm": 0.1759525206290626,
      "learning_rate": 1.769908253583612e-05,
      "loss": 0.6528,
      "step": 9098
    },
    {
      "epoch": 0.8135729613733905,
      "grad_norm": 0.15531449402644606,
      "learning_rate": 1.7682635832071125e-05,
      "loss": 0.6251,
      "step": 9099
    },
    {
      "epoch": 0.8136623748211731,
      "grad_norm": 0.15577239473321813,
      "learning_rate": 1.766619603216405e-05,
      "loss": 0.6465,
      "step": 9100
    },
    {
      "epoch": 0.8137517882689557,
      "grad_norm": 0.13830559548221222,
      "learning_rate": 1.7649763137493682e-05,
      "loss": 0.6314,
      "step": 9101
    },
    {
      "epoch": 0.8138412017167382,
      "grad_norm": 0.16501002058668895,
      "learning_rate": 1.7633337149438246e-05,
      "loss": 0.6217,
      "step": 9102
    },
    {
      "epoch": 0.8139306151645207,
      "grad_norm": 0.17269019404229197,
      "learning_rate": 1.7616918069375322e-05,
      "loss": 0.6597,
      "step": 9103
    },
    {
      "epoch": 0.8140200286123033,
      "grad_norm": 0.15447765812025058,
      "learning_rate": 1.7600505898681997e-05,
      "loss": 0.6427,
      "step": 9104
    },
    {
      "epoch": 0.8141094420600858,
      "grad_norm": 0.15166168808264005,
      "learning_rate": 1.7584100638734745e-05,
      "loss": 0.6144,
      "step": 9105
    },
    {
      "epoch": 0.8141988555078684,
      "grad_norm": 0.18508612662558513,
      "learning_rate": 1.7567702290909393e-05,
      "loss": 0.6787,
      "step": 9106
    },
    {
      "epoch": 0.814288268955651,
      "grad_norm": 0.16885383550625974,
      "learning_rate": 1.7551310856581316e-05,
      "loss": 0.6467,
      "step": 9107
    },
    {
      "epoch": 0.8143776824034334,
      "grad_norm": 0.18093994153779647,
      "learning_rate": 1.7534926337125257e-05,
      "loss": 0.6922,
      "step": 9108
    },
    {
      "epoch": 0.814467095851216,
      "grad_norm": 0.1525977251805468,
      "learning_rate": 1.751854873391531e-05,
      "loss": 0.6212,
      "step": 9109
    },
    {
      "epoch": 0.8145565092989986,
      "grad_norm": 0.14560868969748508,
      "learning_rate": 1.750217804832506e-05,
      "loss": 0.6486,
      "step": 9110
    },
    {
      "epoch": 0.8146459227467812,
      "grad_norm": 0.15422359969658264,
      "learning_rate": 1.7485814281727532e-05,
      "loss": 0.5894,
      "step": 9111
    },
    {
      "epoch": 0.8147353361945636,
      "grad_norm": 0.16970643830836904,
      "learning_rate": 1.7469457435495063e-05,
      "loss": 0.6702,
      "step": 9112
    },
    {
      "epoch": 0.8148247496423462,
      "grad_norm": 0.16346606158084107,
      "learning_rate": 1.7453107510999568e-05,
      "loss": 0.6826,
      "step": 9113
    },
    {
      "epoch": 0.8149141630901288,
      "grad_norm": 0.16139494204344443,
      "learning_rate": 1.7436764509612237e-05,
      "loss": 0.67,
      "step": 9114
    },
    {
      "epoch": 0.8150035765379113,
      "grad_norm": 0.18510747411380266,
      "learning_rate": 1.742042843270375e-05,
      "loss": 0.6802,
      "step": 9115
    },
    {
      "epoch": 0.8150929899856938,
      "grad_norm": 0.16127704074669702,
      "learning_rate": 1.7404099281644237e-05,
      "loss": 0.6574,
      "step": 9116
    },
    {
      "epoch": 0.8151824034334764,
      "grad_norm": 0.16216851741222293,
      "learning_rate": 1.7387777057803134e-05,
      "loss": 0.6458,
      "step": 9117
    },
    {
      "epoch": 0.8152718168812589,
      "grad_norm": 0.18109448760497543,
      "learning_rate": 1.737146176254939e-05,
      "loss": 0.6281,
      "step": 9118
    },
    {
      "epoch": 0.8153612303290415,
      "grad_norm": 0.16494543336665854,
      "learning_rate": 1.735515339725137e-05,
      "loss": 0.6507,
      "step": 9119
    },
    {
      "epoch": 0.8154506437768241,
      "grad_norm": 0.14618771081046744,
      "learning_rate": 1.7338851963276825e-05,
      "loss": 0.6534,
      "step": 9120
    },
    {
      "epoch": 0.8155400572246065,
      "grad_norm": 0.16220081299542377,
      "learning_rate": 1.7322557461992926e-05,
      "loss": 0.6429,
      "step": 9121
    },
    {
      "epoch": 0.8156294706723891,
      "grad_norm": 0.1894750821908423,
      "learning_rate": 1.7306269894766312e-05,
      "loss": 0.6552,
      "step": 9122
    },
    {
      "epoch": 0.8157188841201717,
      "grad_norm": 0.14662299760124783,
      "learning_rate": 1.728998926296296e-05,
      "loss": 0.6034,
      "step": 9123
    },
    {
      "epoch": 0.8158082975679543,
      "grad_norm": 0.16335947380462326,
      "learning_rate": 1.727371556794831e-05,
      "loss": 0.6283,
      "step": 9124
    },
    {
      "epoch": 0.8158977110157367,
      "grad_norm": 0.17427426888442799,
      "learning_rate": 1.725744881108725e-05,
      "loss": 0.6431,
      "step": 9125
    },
    {
      "epoch": 0.8159871244635193,
      "grad_norm": 0.17727755899911918,
      "learning_rate": 1.7241188993743984e-05,
      "loss": 0.6459,
      "step": 9126
    },
    {
      "epoch": 0.8160765379113019,
      "grad_norm": 0.16283111161091918,
      "learning_rate": 1.7224936117282276e-05,
      "loss": 0.6512,
      "step": 9127
    },
    {
      "epoch": 0.8161659513590844,
      "grad_norm": 0.14880717747859876,
      "learning_rate": 1.7208690183065236e-05,
      "loss": 0.624,
      "step": 9128
    },
    {
      "epoch": 0.816255364806867,
      "grad_norm": 0.16899496836937553,
      "learning_rate": 1.719245119245534e-05,
      "loss": 0.6542,
      "step": 9129
    },
    {
      "epoch": 0.8163447782546495,
      "grad_norm": 0.15122392924797579,
      "learning_rate": 1.7176219146814542e-05,
      "loss": 0.6415,
      "step": 9130
    },
    {
      "epoch": 0.816434191702432,
      "grad_norm": 0.18365768781304798,
      "learning_rate": 1.715999404750426e-05,
      "loss": 0.6575,
      "step": 9131
    },
    {
      "epoch": 0.8165236051502146,
      "grad_norm": 0.14249985158763329,
      "learning_rate": 1.7143775895885195e-05,
      "loss": 0.6231,
      "step": 9132
    },
    {
      "epoch": 0.8166130185979972,
      "grad_norm": 0.16628552837213634,
      "learning_rate": 1.712756469331759e-05,
      "loss": 0.6553,
      "step": 9133
    },
    {
      "epoch": 0.8167024320457796,
      "grad_norm": 0.16744971076178958,
      "learning_rate": 1.7111360441161038e-05,
      "loss": 0.6434,
      "step": 9134
    },
    {
      "epoch": 0.8167918454935622,
      "grad_norm": 0.1663012592599709,
      "learning_rate": 1.7095163140774596e-05,
      "loss": 0.6007,
      "step": 9135
    },
    {
      "epoch": 0.8168812589413448,
      "grad_norm": 0.17110865704253986,
      "learning_rate": 1.707897279351671e-05,
      "loss": 0.6281,
      "step": 9136
    },
    {
      "epoch": 0.8169706723891274,
      "grad_norm": 0.17422647928264726,
      "learning_rate": 1.7062789400745215e-05,
      "loss": 0.3408,
      "step": 9137
    },
    {
      "epoch": 0.8170600858369099,
      "grad_norm": 0.19313833839239397,
      "learning_rate": 1.704661296381741e-05,
      "loss": 0.6615,
      "step": 9138
    },
    {
      "epoch": 0.8171494992846924,
      "grad_norm": 0.14803171561446687,
      "learning_rate": 1.703044348409002e-05,
      "loss": 0.6379,
      "step": 9139
    },
    {
      "epoch": 0.817238912732475,
      "grad_norm": 0.1597445148950899,
      "learning_rate": 1.701428096291908e-05,
      "loss": 0.6796,
      "step": 9140
    },
    {
      "epoch": 0.8173283261802575,
      "grad_norm": 0.17173447911693968,
      "learning_rate": 1.6998125401660202e-05,
      "loss": 0.3629,
      "step": 9141
    },
    {
      "epoch": 0.8174177396280401,
      "grad_norm": 0.15633245772304255,
      "learning_rate": 1.698197680166832e-05,
      "loss": 0.6171,
      "step": 9142
    },
    {
      "epoch": 0.8175071530758226,
      "grad_norm": 0.15504804651078402,
      "learning_rate": 1.6965835164297773e-05,
      "loss": 0.6103,
      "step": 9143
    },
    {
      "epoch": 0.8175965665236051,
      "grad_norm": 0.16952477481495934,
      "learning_rate": 1.6949700490902344e-05,
      "loss": 0.657,
      "step": 9144
    },
    {
      "epoch": 0.8176859799713877,
      "grad_norm": 0.17314227442374558,
      "learning_rate": 1.693357278283526e-05,
      "loss": 0.6981,
      "step": 9145
    },
    {
      "epoch": 0.8177753934191703,
      "grad_norm": 0.17739450466006473,
      "learning_rate": 1.6917452041449077e-05,
      "loss": 0.6534,
      "step": 9146
    },
    {
      "epoch": 0.8178648068669528,
      "grad_norm": 0.1557469438921674,
      "learning_rate": 1.6901338268095866e-05,
      "loss": 0.6454,
      "step": 9147
    },
    {
      "epoch": 0.8179542203147353,
      "grad_norm": 0.15537912233185486,
      "learning_rate": 1.688523146412705e-05,
      "loss": 0.6465,
      "step": 9148
    },
    {
      "epoch": 0.8180436337625179,
      "grad_norm": 0.1473296766745138,
      "learning_rate": 1.68691316308935e-05,
      "loss": 0.6066,
      "step": 9149
    },
    {
      "epoch": 0.8181330472103004,
      "grad_norm": 0.15256403749479186,
      "learning_rate": 1.6853038769745467e-05,
      "loss": 0.6039,
      "step": 9150
    },
    {
      "epoch": 0.818222460658083,
      "grad_norm": 0.16048735742629697,
      "learning_rate": 1.6836952882032698e-05,
      "loss": 0.6355,
      "step": 9151
    },
    {
      "epoch": 0.8183118741058655,
      "grad_norm": 0.16705449454571925,
      "learning_rate": 1.682087396910422e-05,
      "loss": 0.6282,
      "step": 9152
    },
    {
      "epoch": 0.818401287553648,
      "grad_norm": 0.18374190453366088,
      "learning_rate": 1.68048020323086e-05,
      "loss": 0.6572,
      "step": 9153
    },
    {
      "epoch": 0.8184907010014306,
      "grad_norm": 0.19471945165770274,
      "learning_rate": 1.6788737072993744e-05,
      "loss": 0.6229,
      "step": 9154
    },
    {
      "epoch": 0.8185801144492132,
      "grad_norm": 0.14014015537641206,
      "learning_rate": 1.6772679092507025e-05,
      "loss": 0.6372,
      "step": 9155
    },
    {
      "epoch": 0.8186695278969958,
      "grad_norm": 0.16308918253668958,
      "learning_rate": 1.6756628092195214e-05,
      "loss": 0.6395,
      "step": 9156
    },
    {
      "epoch": 0.8187589413447782,
      "grad_norm": 0.156496765069545,
      "learning_rate": 1.6740584073404454e-05,
      "loss": 0.6451,
      "step": 9157
    },
    {
      "epoch": 0.8188483547925608,
      "grad_norm": 0.15516053208352323,
      "learning_rate": 1.6724547037480355e-05,
      "loss": 0.6477,
      "step": 9158
    },
    {
      "epoch": 0.8189377682403434,
      "grad_norm": 0.16570989698711497,
      "learning_rate": 1.6708516985767953e-05,
      "loss": 0.6246,
      "step": 9159
    },
    {
      "epoch": 0.8190271816881259,
      "grad_norm": 0.1739071180282261,
      "learning_rate": 1.6692493919611606e-05,
      "loss": 0.6249,
      "step": 9160
    },
    {
      "epoch": 0.8191165951359084,
      "grad_norm": 0.1645989021940463,
      "learning_rate": 1.6676477840355166e-05,
      "loss": 0.6599,
      "step": 9161
    },
    {
      "epoch": 0.819206008583691,
      "grad_norm": 0.16202451043268437,
      "learning_rate": 1.666046874934195e-05,
      "loss": 0.6253,
      "step": 9162
    },
    {
      "epoch": 0.8192954220314735,
      "grad_norm": 0.15279152513316321,
      "learning_rate": 1.6644466647914546e-05,
      "loss": 0.6607,
      "step": 9163
    },
    {
      "epoch": 0.8193848354792561,
      "grad_norm": 0.17689244482496638,
      "learning_rate": 1.662847153741506e-05,
      "loss": 0.6425,
      "step": 9164
    },
    {
      "epoch": 0.8194742489270386,
      "grad_norm": 0.13461158945892285,
      "learning_rate": 1.6612483419185e-05,
      "loss": 0.5875,
      "step": 9165
    },
    {
      "epoch": 0.8195636623748211,
      "grad_norm": 0.17939190425495666,
      "learning_rate": 1.659650229456522e-05,
      "loss": 0.6165,
      "step": 9166
    },
    {
      "epoch": 0.8196530758226037,
      "grad_norm": 0.15226954887423186,
      "learning_rate": 1.658052816489607e-05,
      "loss": 0.6061,
      "step": 9167
    },
    {
      "epoch": 0.8197424892703863,
      "grad_norm": 0.1708809469917734,
      "learning_rate": 1.656456103151728e-05,
      "loss": 0.6302,
      "step": 9168
    },
    {
      "epoch": 0.8198319027181689,
      "grad_norm": 0.14902319590862642,
      "learning_rate": 1.6548600895767997e-05,
      "loss": 0.6232,
      "step": 9169
    },
    {
      "epoch": 0.8199213161659513,
      "grad_norm": 0.15985114696462085,
      "learning_rate": 1.6532647758986786e-05,
      "loss": 0.6405,
      "step": 9170
    },
    {
      "epoch": 0.8200107296137339,
      "grad_norm": 0.1545772928979645,
      "learning_rate": 1.6516701622511588e-05,
      "loss": 0.6166,
      "step": 9171
    },
    {
      "epoch": 0.8201001430615165,
      "grad_norm": 0.16277878093772863,
      "learning_rate": 1.65007624876798e-05,
      "loss": 0.6649,
      "step": 9172
    },
    {
      "epoch": 0.820189556509299,
      "grad_norm": 0.15729550741556902,
      "learning_rate": 1.6484830355828242e-05,
      "loss": 0.61,
      "step": 9173
    },
    {
      "epoch": 0.8202789699570815,
      "grad_norm": 0.17911453362629026,
      "learning_rate": 1.6468905228293073e-05,
      "loss": 0.6581,
      "step": 9174
    },
    {
      "epoch": 0.8203683834048641,
      "grad_norm": 0.16276899298906028,
      "learning_rate": 1.6452987106409935e-05,
      "loss": 0.6587,
      "step": 9175
    },
    {
      "epoch": 0.8204577968526466,
      "grad_norm": 0.16264910492434892,
      "learning_rate": 1.6437075991513905e-05,
      "loss": 0.6929,
      "step": 9176
    },
    {
      "epoch": 0.8205472103004292,
      "grad_norm": 0.19154589433969826,
      "learning_rate": 1.6421171884939368e-05,
      "loss": 0.6546,
      "step": 9177
    },
    {
      "epoch": 0.8206366237482118,
      "grad_norm": 0.18803586368529226,
      "learning_rate": 1.640527478802021e-05,
      "loss": 0.6664,
      "step": 9178
    },
    {
      "epoch": 0.8207260371959942,
      "grad_norm": 0.1462041355233539,
      "learning_rate": 1.638938470208973e-05,
      "loss": 0.6575,
      "step": 9179
    },
    {
      "epoch": 0.8208154506437768,
      "grad_norm": 0.18076902500121173,
      "learning_rate": 1.6373501628480535e-05,
      "loss": 0.6608,
      "step": 9180
    },
    {
      "epoch": 0.8209048640915594,
      "grad_norm": 0.15794831348024455,
      "learning_rate": 1.6357625568524783e-05,
      "loss": 0.6061,
      "step": 9181
    },
    {
      "epoch": 0.820994277539342,
      "grad_norm": 0.14944036669728653,
      "learning_rate": 1.6341756523553954e-05,
      "loss": 0.6275,
      "step": 9182
    },
    {
      "epoch": 0.8210836909871244,
      "grad_norm": 0.16981240842638365,
      "learning_rate": 1.6325894494898975e-05,
      "loss": 0.6235,
      "step": 9183
    },
    {
      "epoch": 0.821173104434907,
      "grad_norm": 0.1662353033668006,
      "learning_rate": 1.631003948389016e-05,
      "loss": 0.6362,
      "step": 9184
    },
    {
      "epoch": 0.8212625178826896,
      "grad_norm": 0.1786618482076307,
      "learning_rate": 1.629419149185729e-05,
      "loss": 0.6926,
      "step": 9185
    },
    {
      "epoch": 0.8213519313304721,
      "grad_norm": 0.1569642233834125,
      "learning_rate": 1.627835052012947e-05,
      "loss": 0.6267,
      "step": 9186
    },
    {
      "epoch": 0.8214413447782547,
      "grad_norm": 0.1423188314477187,
      "learning_rate": 1.626251657003528e-05,
      "loss": 0.6096,
      "step": 9187
    },
    {
      "epoch": 0.8215307582260372,
      "grad_norm": 0.18742346197054163,
      "learning_rate": 1.6246689642902725e-05,
      "loss": 0.6637,
      "step": 9188
    },
    {
      "epoch": 0.8216201716738197,
      "grad_norm": 0.14649477259109195,
      "learning_rate": 1.6230869740059106e-05,
      "loss": 0.6288,
      "step": 9189
    },
    {
      "epoch": 0.8217095851216023,
      "grad_norm": 0.15211104956539745,
      "learning_rate": 1.6215056862831324e-05,
      "loss": 0.6261,
      "step": 9190
    },
    {
      "epoch": 0.8217989985693849,
      "grad_norm": 0.16653889367106509,
      "learning_rate": 1.6199251012545512e-05,
      "loss": 0.6556,
      "step": 9191
    },
    {
      "epoch": 0.8218884120171673,
      "grad_norm": 0.179233855053648,
      "learning_rate": 1.6183452190527316e-05,
      "loss": 0.6735,
      "step": 9192
    },
    {
      "epoch": 0.8219778254649499,
      "grad_norm": 0.17522024456625965,
      "learning_rate": 1.616766039810178e-05,
      "loss": 0.6356,
      "step": 9193
    },
    {
      "epoch": 0.8220672389127325,
      "grad_norm": 0.15115025895863013,
      "learning_rate": 1.6151875636593306e-05,
      "loss": 0.621,
      "step": 9194
    },
    {
      "epoch": 0.822156652360515,
      "grad_norm": 0.16699896010153178,
      "learning_rate": 1.613609790732572e-05,
      "loss": 0.6588,
      "step": 9195
    },
    {
      "epoch": 0.8222460658082976,
      "grad_norm": 0.14966377561681368,
      "learning_rate": 1.6120327211622375e-05,
      "loss": 0.5951,
      "step": 9196
    },
    {
      "epoch": 0.8223354792560801,
      "grad_norm": 0.1689191408396603,
      "learning_rate": 1.6104563550805875e-05,
      "loss": 0.658,
      "step": 9197
    },
    {
      "epoch": 0.8224248927038627,
      "grad_norm": 0.16738046779997387,
      "learning_rate": 1.6088806926198297e-05,
      "loss": 0.653,
      "step": 9198
    },
    {
      "epoch": 0.8225143061516452,
      "grad_norm": 0.1629354991837869,
      "learning_rate": 1.6073057339121166e-05,
      "loss": 0.5963,
      "step": 9199
    },
    {
      "epoch": 0.8226037195994278,
      "grad_norm": 0.16042431941001534,
      "learning_rate": 1.605731479089534e-05,
      "loss": 0.6131,
      "step": 9200
    },
    {
      "epoch": 0.8226931330472103,
      "grad_norm": 0.15897747962891717,
      "learning_rate": 1.6041579282841145e-05,
      "loss": 0.6219,
      "step": 9201
    },
    {
      "epoch": 0.8227825464949928,
      "grad_norm": 0.16256678007387962,
      "learning_rate": 1.6025850816278297e-05,
      "loss": 0.6324,
      "step": 9202
    },
    {
      "epoch": 0.8228719599427754,
      "grad_norm": 0.159778439662725,
      "learning_rate": 1.601012939252592e-05,
      "loss": 0.6378,
      "step": 9203
    },
    {
      "epoch": 0.822961373390558,
      "grad_norm": 0.1632789589166518,
      "learning_rate": 1.5994415012902587e-05,
      "loss": 0.6688,
      "step": 9204
    },
    {
      "epoch": 0.8230507868383404,
      "grad_norm": 0.16826936015967697,
      "learning_rate": 1.597870767872619e-05,
      "loss": 0.6328,
      "step": 9205
    },
    {
      "epoch": 0.823140200286123,
      "grad_norm": 0.16799390105256495,
      "learning_rate": 1.5963007391314113e-05,
      "loss": 0.6373,
      "step": 9206
    },
    {
      "epoch": 0.8232296137339056,
      "grad_norm": 0.16510052788847923,
      "learning_rate": 1.5947314151983105e-05,
      "loss": 0.6634,
      "step": 9207
    },
    {
      "epoch": 0.8233190271816881,
      "grad_norm": 0.16603472767910396,
      "learning_rate": 1.5931627962049378e-05,
      "loss": 0.6532,
      "step": 9208
    },
    {
      "epoch": 0.8234084406294707,
      "grad_norm": 0.15782560864942494,
      "learning_rate": 1.591594882282844e-05,
      "loss": 0.6182,
      "step": 9209
    },
    {
      "epoch": 0.8234978540772532,
      "grad_norm": 0.15657540733211509,
      "learning_rate": 1.5900276735635367e-05,
      "loss": 0.635,
      "step": 9210
    },
    {
      "epoch": 0.8235872675250357,
      "grad_norm": 0.15791012825391768,
      "learning_rate": 1.5884611701784504e-05,
      "loss": 0.6285,
      "step": 9211
    },
    {
      "epoch": 0.8236766809728183,
      "grad_norm": 0.17680140204095782,
      "learning_rate": 1.5868953722589663e-05,
      "loss": 0.638,
      "step": 9212
    },
    {
      "epoch": 0.8237660944206009,
      "grad_norm": 0.1688478415847324,
      "learning_rate": 1.585330279936409e-05,
      "loss": 0.6639,
      "step": 9213
    },
    {
      "epoch": 0.8238555078683834,
      "grad_norm": 0.16881118220948765,
      "learning_rate": 1.5837658933420375e-05,
      "loss": 0.6414,
      "step": 9214
    },
    {
      "epoch": 0.8239449213161659,
      "grad_norm": 0.14498139202500768,
      "learning_rate": 1.5822022126070556e-05,
      "loss": 0.6192,
      "step": 9215
    },
    {
      "epoch": 0.8240343347639485,
      "grad_norm": 0.17766779241193245,
      "learning_rate": 1.580639237862608e-05,
      "loss": 0.6662,
      "step": 9216
    },
    {
      "epoch": 0.8241237482117311,
      "grad_norm": 0.1537323913181005,
      "learning_rate": 1.57907696923978e-05,
      "loss": 0.637,
      "step": 9217
    },
    {
      "epoch": 0.8242131616595136,
      "grad_norm": 0.14649733410028226,
      "learning_rate": 1.5775154068695963e-05,
      "loss": 0.5945,
      "step": 9218
    },
    {
      "epoch": 0.8243025751072961,
      "grad_norm": 0.16667753725495735,
      "learning_rate": 1.5759545508830252e-05,
      "loss": 0.6122,
      "step": 9219
    },
    {
      "epoch": 0.8243919885550787,
      "grad_norm": 0.16861257620813608,
      "learning_rate": 1.5743944014109713e-05,
      "loss": 0.6359,
      "step": 9220
    },
    {
      "epoch": 0.8244814020028612,
      "grad_norm": 0.1483896503513663,
      "learning_rate": 1.5728349585842827e-05,
      "loss": 0.6204,
      "step": 9221
    },
    {
      "epoch": 0.8245708154506438,
      "grad_norm": 0.1522468791441389,
      "learning_rate": 1.571276222533751e-05,
      "loss": 0.6341,
      "step": 9222
    },
    {
      "epoch": 0.8246602288984263,
      "grad_norm": 0.14480132919460673,
      "learning_rate": 1.5697181933900985e-05,
      "loss": 0.6289,
      "step": 9223
    },
    {
      "epoch": 0.8247496423462088,
      "grad_norm": 0.16764110545773833,
      "learning_rate": 1.5681608712840046e-05,
      "loss": 0.6352,
      "step": 9224
    },
    {
      "epoch": 0.8248390557939914,
      "grad_norm": 0.15688543972824323,
      "learning_rate": 1.5666042563460737e-05,
      "loss": 0.6277,
      "step": 9225
    },
    {
      "epoch": 0.824928469241774,
      "grad_norm": 0.14231287331110737,
      "learning_rate": 1.565048348706858e-05,
      "loss": 0.5806,
      "step": 9226
    },
    {
      "epoch": 0.8250178826895566,
      "grad_norm": 0.14619656420291785,
      "learning_rate": 1.563493148496853e-05,
      "loss": 0.5804,
      "step": 9227
    },
    {
      "epoch": 0.825107296137339,
      "grad_norm": 0.15449159328240947,
      "learning_rate": 1.5619386558464865e-05,
      "loss": 0.5902,
      "step": 9228
    },
    {
      "epoch": 0.8251967095851216,
      "grad_norm": 0.1564918301196478,
      "learning_rate": 1.5603848708861347e-05,
      "loss": 0.6018,
      "step": 9229
    },
    {
      "epoch": 0.8252861230329042,
      "grad_norm": 0.16515735978559432,
      "learning_rate": 1.5588317937461105e-05,
      "loss": 0.6433,
      "step": 9230
    },
    {
      "epoch": 0.8253755364806867,
      "grad_norm": 0.1574453704570925,
      "learning_rate": 1.55727942455667e-05,
      "loss": 0.6175,
      "step": 9231
    },
    {
      "epoch": 0.8254649499284692,
      "grad_norm": 0.1497596836771522,
      "learning_rate": 1.5557277634480083e-05,
      "loss": 0.6133,
      "step": 9232
    },
    {
      "epoch": 0.8255543633762518,
      "grad_norm": 0.16075082298470125,
      "learning_rate": 1.554176810550263e-05,
      "loss": 0.6706,
      "step": 9233
    },
    {
      "epoch": 0.8256437768240343,
      "grad_norm": 0.14491757614842235,
      "learning_rate": 1.552626565993507e-05,
      "loss": 0.6643,
      "step": 9234
    },
    {
      "epoch": 0.8257331902718169,
      "grad_norm": 0.15844118677173777,
      "learning_rate": 1.551077029907758e-05,
      "loss": 0.5933,
      "step": 9235
    },
    {
      "epoch": 0.8258226037195995,
      "grad_norm": 0.17943221425606495,
      "learning_rate": 1.5495282024229775e-05,
      "loss": 0.668,
      "step": 9236
    },
    {
      "epoch": 0.8259120171673819,
      "grad_norm": 0.1748360141453468,
      "learning_rate": 1.547980083669056e-05,
      "loss": 0.64,
      "step": 9237
    },
    {
      "epoch": 0.8260014306151645,
      "grad_norm": 0.1668673201254287,
      "learning_rate": 1.5464326737758428e-05,
      "loss": 0.6375,
      "step": 9238
    },
    {
      "epoch": 0.8260908440629471,
      "grad_norm": 0.1491460988025092,
      "learning_rate": 1.544885972873109e-05,
      "loss": 0.6141,
      "step": 9239
    },
    {
      "epoch": 0.8261802575107297,
      "grad_norm": 0.13083635493500723,
      "learning_rate": 1.543339981090578e-05,
      "loss": 0.592,
      "step": 9240
    },
    {
      "epoch": 0.8262696709585121,
      "grad_norm": 0.17190129948588273,
      "learning_rate": 1.541794698557909e-05,
      "loss": 0.6379,
      "step": 9241
    },
    {
      "epoch": 0.8263590844062947,
      "grad_norm": 0.16042970518672137,
      "learning_rate": 1.5402501254047065e-05,
      "loss": 0.6638,
      "step": 9242
    },
    {
      "epoch": 0.8264484978540773,
      "grad_norm": 0.1639581832159465,
      "learning_rate": 1.5387062617605064e-05,
      "loss": 0.6441,
      "step": 9243
    },
    {
      "epoch": 0.8265379113018598,
      "grad_norm": 0.15834750496944003,
      "learning_rate": 1.5371631077547942e-05,
      "loss": 0.6342,
      "step": 9244
    },
    {
      "epoch": 0.8266273247496424,
      "grad_norm": 0.16449008516671024,
      "learning_rate": 1.5356206635169912e-05,
      "loss": 0.664,
      "step": 9245
    },
    {
      "epoch": 0.8267167381974249,
      "grad_norm": 0.1696615649460959,
      "learning_rate": 1.5340789291764612e-05,
      "loss": 0.6355,
      "step": 9246
    },
    {
      "epoch": 0.8268061516452074,
      "grad_norm": 0.1771910576443685,
      "learning_rate": 1.532537904862509e-05,
      "loss": 0.6622,
      "step": 9247
    },
    {
      "epoch": 0.82689556509299,
      "grad_norm": 0.1460866883716527,
      "learning_rate": 1.530997590704375e-05,
      "loss": 0.5782,
      "step": 9248
    },
    {
      "epoch": 0.8269849785407726,
      "grad_norm": 0.1762740102675004,
      "learning_rate": 1.529457986831244e-05,
      "loss": 0.6745,
      "step": 9249
    },
    {
      "epoch": 0.827074391988555,
      "grad_norm": 0.17613134804264066,
      "learning_rate": 1.5279190933722443e-05,
      "loss": 0.6578,
      "step": 9250
    },
    {
      "epoch": 0.8271638054363376,
      "grad_norm": 0.1889799604830389,
      "learning_rate": 1.5263809104564353e-05,
      "loss": 0.7,
      "step": 9251
    },
    {
      "epoch": 0.8272532188841202,
      "grad_norm": 0.1549474236691368,
      "learning_rate": 1.5248434382128263e-05,
      "loss": 0.6298,
      "step": 9252
    },
    {
      "epoch": 0.8273426323319027,
      "grad_norm": 0.16498876797150464,
      "learning_rate": 1.5233066767703663e-05,
      "loss": 0.6505,
      "step": 9253
    },
    {
      "epoch": 0.8274320457796852,
      "grad_norm": 0.16065778593575086,
      "learning_rate": 1.5217706262579356e-05,
      "loss": 0.623,
      "step": 9254
    },
    {
      "epoch": 0.8275214592274678,
      "grad_norm": 0.14162169812533348,
      "learning_rate": 1.5202352868043624e-05,
      "loss": 0.6573,
      "step": 9255
    },
    {
      "epoch": 0.8276108726752504,
      "grad_norm": 0.16133089923600122,
      "learning_rate": 1.5187006585384179e-05,
      "loss": 0.6465,
      "step": 9256
    },
    {
      "epoch": 0.8277002861230329,
      "grad_norm": 0.1459925399846604,
      "learning_rate": 1.5171667415888046e-05,
      "loss": 0.6275,
      "step": 9257
    },
    {
      "epoch": 0.8277896995708155,
      "grad_norm": 0.16959551795943117,
      "learning_rate": 1.515633536084171e-05,
      "loss": 0.6637,
      "step": 9258
    },
    {
      "epoch": 0.827879113018598,
      "grad_norm": 0.18165527640007031,
      "learning_rate": 1.5141010421531066e-05,
      "loss": 0.6109,
      "step": 9259
    },
    {
      "epoch": 0.8279685264663805,
      "grad_norm": 0.15916988852197358,
      "learning_rate": 1.5125692599241391e-05,
      "loss": 0.6427,
      "step": 9260
    },
    {
      "epoch": 0.8280579399141631,
      "grad_norm": 0.1613655502483781,
      "learning_rate": 1.5110381895257408e-05,
      "loss": 0.6499,
      "step": 9261
    },
    {
      "epoch": 0.8281473533619457,
      "grad_norm": 0.1511697975066312,
      "learning_rate": 1.5095078310863142e-05,
      "loss": 0.6404,
      "step": 9262
    },
    {
      "epoch": 0.8282367668097281,
      "grad_norm": 0.1463801941475687,
      "learning_rate": 1.5079781847342123e-05,
      "loss": 0.6464,
      "step": 9263
    },
    {
      "epoch": 0.8283261802575107,
      "grad_norm": 0.15961754283408142,
      "learning_rate": 1.5064492505977234e-05,
      "loss": 0.6724,
      "step": 9264
    },
    {
      "epoch": 0.8284155937052933,
      "grad_norm": 0.1672597676544202,
      "learning_rate": 1.5049210288050796e-05,
      "loss": 0.6407,
      "step": 9265
    },
    {
      "epoch": 0.8285050071530758,
      "grad_norm": 0.1374340368171704,
      "learning_rate": 1.5033935194844484e-05,
      "loss": 0.6234,
      "step": 9266
    },
    {
      "epoch": 0.8285944206008584,
      "grad_norm": 0.15402962550426819,
      "learning_rate": 1.501866722763945e-05,
      "loss": 0.6582,
      "step": 9267
    },
    {
      "epoch": 0.8286838340486409,
      "grad_norm": 0.15992282094549962,
      "learning_rate": 1.5003406387716134e-05,
      "loss": 0.6534,
      "step": 9268
    },
    {
      "epoch": 0.8287732474964234,
      "grad_norm": 0.15435047499460944,
      "learning_rate": 1.4988152676354472e-05,
      "loss": 0.6083,
      "step": 9269
    },
    {
      "epoch": 0.828862660944206,
      "grad_norm": 0.15089832775805379,
      "learning_rate": 1.4972906094833805e-05,
      "loss": 0.6478,
      "step": 9270
    },
    {
      "epoch": 0.8289520743919886,
      "grad_norm": 0.16938600807515156,
      "learning_rate": 1.4957666644432788e-05,
      "loss": 0.6694,
      "step": 9271
    },
    {
      "epoch": 0.829041487839771,
      "grad_norm": 0.15738025663060343,
      "learning_rate": 1.4942434326429544e-05,
      "loss": 0.6299,
      "step": 9272
    },
    {
      "epoch": 0.8291309012875536,
      "grad_norm": 0.1585062727433545,
      "learning_rate": 1.4927209142101662e-05,
      "loss": 0.6362,
      "step": 9273
    },
    {
      "epoch": 0.8292203147353362,
      "grad_norm": 0.16712263999887023,
      "learning_rate": 1.4911991092725985e-05,
      "loss": 0.6382,
      "step": 9274
    },
    {
      "epoch": 0.8293097281831188,
      "grad_norm": 0.16332257597491076,
      "learning_rate": 1.489678017957884e-05,
      "loss": 0.626,
      "step": 9275
    },
    {
      "epoch": 0.8293991416309013,
      "grad_norm": 0.17975327356346382,
      "learning_rate": 1.4881576403936004e-05,
      "loss": 0.6376,
      "step": 9276
    },
    {
      "epoch": 0.8294885550786838,
      "grad_norm": 0.16589562704564054,
      "learning_rate": 1.4866379767072525e-05,
      "loss": 0.6583,
      "step": 9277
    },
    {
      "epoch": 0.8295779685264664,
      "grad_norm": 0.15486729316504508,
      "learning_rate": 1.485119027026296e-05,
      "loss": 0.6337,
      "step": 9278
    },
    {
      "epoch": 0.8296673819742489,
      "grad_norm": 0.16242902801397294,
      "learning_rate": 1.4836007914781225e-05,
      "loss": 0.6702,
      "step": 9279
    },
    {
      "epoch": 0.8297567954220315,
      "grad_norm": 0.17241893747503756,
      "learning_rate": 1.4820832701900667e-05,
      "loss": 0.6506,
      "step": 9280
    },
    {
      "epoch": 0.829846208869814,
      "grad_norm": 0.16239443557068306,
      "learning_rate": 1.4805664632894024e-05,
      "loss": 0.6465,
      "step": 9281
    },
    {
      "epoch": 0.8299356223175965,
      "grad_norm": 0.1394167609663897,
      "learning_rate": 1.4790503709033365e-05,
      "loss": 0.6441,
      "step": 9282
    },
    {
      "epoch": 0.8300250357653791,
      "grad_norm": 0.16211999207924377,
      "learning_rate": 1.4775349931590266e-05,
      "loss": 0.6522,
      "step": 9283
    },
    {
      "epoch": 0.8301144492131617,
      "grad_norm": 0.157197289834446,
      "learning_rate": 1.4760203301835652e-05,
      "loss": 0.6417,
      "step": 9284
    },
    {
      "epoch": 0.8302038626609443,
      "grad_norm": 0.1509091023768107,
      "learning_rate": 1.4745063821039806e-05,
      "loss": 0.628,
      "step": 9285
    },
    {
      "epoch": 0.8302932761087267,
      "grad_norm": 0.18490257379373362,
      "learning_rate": 1.4729931490472515e-05,
      "loss": 0.6622,
      "step": 9286
    },
    {
      "epoch": 0.8303826895565093,
      "grad_norm": 0.17345221709760253,
      "learning_rate": 1.4714806311402918e-05,
      "loss": 0.68,
      "step": 9287
    },
    {
      "epoch": 0.8304721030042919,
      "grad_norm": 0.14391463147342404,
      "learning_rate": 1.4699688285099489e-05,
      "loss": 0.6304,
      "step": 9288
    },
    {
      "epoch": 0.8305615164520744,
      "grad_norm": 0.18853733429281513,
      "learning_rate": 1.4684577412830191e-05,
      "loss": 0.6652,
      "step": 9289
    },
    {
      "epoch": 0.8306509298998569,
      "grad_norm": 0.17728893378995486,
      "learning_rate": 1.4669473695862368e-05,
      "loss": 0.6532,
      "step": 9290
    },
    {
      "epoch": 0.8307403433476395,
      "grad_norm": 0.1425267529761874,
      "learning_rate": 1.4654377135462715e-05,
      "loss": 0.6251,
      "step": 9291
    },
    {
      "epoch": 0.830829756795422,
      "grad_norm": 0.14621519626971902,
      "learning_rate": 1.4639287732897377e-05,
      "loss": 0.6008,
      "step": 9292
    },
    {
      "epoch": 0.8309191702432046,
      "grad_norm": 0.15462763569488172,
      "learning_rate": 1.4624205489431886e-05,
      "loss": 0.6357,
      "step": 9293
    },
    {
      "epoch": 0.8310085836909872,
      "grad_norm": 0.16234431925015158,
      "learning_rate": 1.4609130406331172e-05,
      "loss": 0.6394,
      "step": 9294
    },
    {
      "epoch": 0.8310979971387696,
      "grad_norm": 0.14194382272378317,
      "learning_rate": 1.4594062484859595e-05,
      "loss": 0.6099,
      "step": 9295
    },
    {
      "epoch": 0.8311874105865522,
      "grad_norm": 0.16646203338515614,
      "learning_rate": 1.4579001726280828e-05,
      "loss": 0.6735,
      "step": 9296
    },
    {
      "epoch": 0.8312768240343348,
      "grad_norm": 0.157426060083834,
      "learning_rate": 1.4563948131858018e-05,
      "loss": 0.628,
      "step": 9297
    },
    {
      "epoch": 0.8313662374821174,
      "grad_norm": 0.16854728636210475,
      "learning_rate": 1.4548901702853701e-05,
      "loss": 0.6245,
      "step": 9298
    },
    {
      "epoch": 0.8314556509298998,
      "grad_norm": 0.16212642222962131,
      "learning_rate": 1.4533862440529799e-05,
      "loss": 0.6466,
      "step": 9299
    },
    {
      "epoch": 0.8315450643776824,
      "grad_norm": 0.16445186494363484,
      "learning_rate": 1.4518830346147638e-05,
      "loss": 0.6535,
      "step": 9300
    },
    {
      "epoch": 0.831634477825465,
      "grad_norm": 0.15694427926567442,
      "learning_rate": 1.4503805420967964e-05,
      "loss": 0.6151,
      "step": 9301
    },
    {
      "epoch": 0.8317238912732475,
      "grad_norm": 0.15889261966155144,
      "learning_rate": 1.4488787666250858e-05,
      "loss": 0.6366,
      "step": 9302
    },
    {
      "epoch": 0.83181330472103,
      "grad_norm": 0.15931310696649942,
      "learning_rate": 1.4473777083255857e-05,
      "loss": 0.6365,
      "step": 9303
    },
    {
      "epoch": 0.8319027181688126,
      "grad_norm": 0.15779221615464323,
      "learning_rate": 1.4458773673241899e-05,
      "loss": 0.656,
      "step": 9304
    },
    {
      "epoch": 0.8319921316165951,
      "grad_norm": 0.13533069275090504,
      "learning_rate": 1.4443777437467265e-05,
      "loss": 0.6095,
      "step": 9305
    },
    {
      "epoch": 0.8320815450643777,
      "grad_norm": 0.15429377925522278,
      "learning_rate": 1.4428788377189672e-05,
      "loss": 0.6421,
      "step": 9306
    },
    {
      "epoch": 0.8321709585121603,
      "grad_norm": 0.17703823420248957,
      "learning_rate": 1.4413806493666293e-05,
      "loss": 0.6242,
      "step": 9307
    },
    {
      "epoch": 0.8322603719599427,
      "grad_norm": 0.14635775055580935,
      "learning_rate": 1.4398831788153588e-05,
      "loss": 0.6021,
      "step": 9308
    },
    {
      "epoch": 0.8323497854077253,
      "grad_norm": 0.16221300772055214,
      "learning_rate": 1.4383864261907476e-05,
      "loss": 0.6502,
      "step": 9309
    },
    {
      "epoch": 0.8324391988555079,
      "grad_norm": 0.17362994183438338,
      "learning_rate": 1.4368903916183296e-05,
      "loss": 0.6603,
      "step": 9310
    },
    {
      "epoch": 0.8325286123032904,
      "grad_norm": 0.1467999269726033,
      "learning_rate": 1.4353950752235702e-05,
      "loss": 0.6114,
      "step": 9311
    },
    {
      "epoch": 0.8326180257510729,
      "grad_norm": 0.1731792326915753,
      "learning_rate": 1.433900477131882e-05,
      "loss": 0.6375,
      "step": 9312
    },
    {
      "epoch": 0.8327074391988555,
      "grad_norm": 0.15754917884397956,
      "learning_rate": 1.4324065974686162e-05,
      "loss": 0.6702,
      "step": 9313
    },
    {
      "epoch": 0.832796852646638,
      "grad_norm": 0.16352959042622017,
      "learning_rate": 1.4309134363590615e-05,
      "loss": 0.6112,
      "step": 9314
    },
    {
      "epoch": 0.8328862660944206,
      "grad_norm": 0.16982722154404103,
      "learning_rate": 1.4294209939284509e-05,
      "loss": 0.6658,
      "step": 9315
    },
    {
      "epoch": 0.8329756795422032,
      "grad_norm": 0.1495202480965002,
      "learning_rate": 1.4279292703019486e-05,
      "loss": 0.6396,
      "step": 9316
    },
    {
      "epoch": 0.8330650929899857,
      "grad_norm": 0.16956388483864973,
      "learning_rate": 1.426438265604666e-05,
      "loss": 0.6365,
      "step": 9317
    },
    {
      "epoch": 0.8331545064377682,
      "grad_norm": 0.17815962898966992,
      "learning_rate": 1.4249479799616538e-05,
      "loss": 0.6658,
      "step": 9318
    },
    {
      "epoch": 0.8332439198855508,
      "grad_norm": 0.16637629176836802,
      "learning_rate": 1.4234584134978956e-05,
      "loss": 0.6489,
      "step": 9319
    },
    {
      "epoch": 0.8333333333333334,
      "grad_norm": 0.1674620174389648,
      "learning_rate": 1.4219695663383214e-05,
      "loss": 0.3293,
      "step": 9320
    },
    {
      "epoch": 0.8334227467811158,
      "grad_norm": 0.1667331060955825,
      "learning_rate": 1.4204814386078036e-05,
      "loss": 0.7041,
      "step": 9321
    },
    {
      "epoch": 0.8335121602288984,
      "grad_norm": 0.15383035675047566,
      "learning_rate": 1.4189940304311444e-05,
      "loss": 0.594,
      "step": 9322
    },
    {
      "epoch": 0.833601573676681,
      "grad_norm": 0.1456906122562588,
      "learning_rate": 1.417507341933092e-05,
      "loss": 0.6102,
      "step": 9323
    },
    {
      "epoch": 0.8336909871244635,
      "grad_norm": 0.15470498798908808,
      "learning_rate": 1.4160213732383364e-05,
      "loss": 0.6194,
      "step": 9324
    },
    {
      "epoch": 0.8337804005722461,
      "grad_norm": 0.1488401795119551,
      "learning_rate": 1.4145361244714995e-05,
      "loss": 0.6003,
      "step": 9325
    },
    {
      "epoch": 0.8338698140200286,
      "grad_norm": 0.1398744330750677,
      "learning_rate": 1.4130515957571488e-05,
      "loss": 0.6217,
      "step": 9326
    },
    {
      "epoch": 0.8339592274678111,
      "grad_norm": 0.15752887764047535,
      "learning_rate": 1.4115677872197908e-05,
      "loss": 0.6416,
      "step": 9327
    },
    {
      "epoch": 0.8340486409155937,
      "grad_norm": 0.16153673065611274,
      "learning_rate": 1.41008469898387e-05,
      "loss": 0.6157,
      "step": 9328
    },
    {
      "epoch": 0.8341380543633763,
      "grad_norm": 0.16313187798394363,
      "learning_rate": 1.4086023311737716e-05,
      "loss": 0.631,
      "step": 9329
    },
    {
      "epoch": 0.8342274678111588,
      "grad_norm": 0.15338084192809787,
      "learning_rate": 1.4071206839138217e-05,
      "loss": 0.6429,
      "step": 9330
    },
    {
      "epoch": 0.8343168812589413,
      "grad_norm": 0.14095961535494647,
      "learning_rate": 1.4056397573282808e-05,
      "loss": 0.6248,
      "step": 9331
    },
    {
      "epoch": 0.8344062947067239,
      "grad_norm": 0.15856706894328768,
      "learning_rate": 1.4041595515413542e-05,
      "loss": 0.645,
      "step": 9332
    },
    {
      "epoch": 0.8344957081545065,
      "grad_norm": 0.14200657403091424,
      "learning_rate": 1.4026800666771867e-05,
      "loss": 0.6135,
      "step": 9333
    },
    {
      "epoch": 0.834585121602289,
      "grad_norm": 0.16761816207068758,
      "learning_rate": 1.4012013028598547e-05,
      "loss": 0.3706,
      "step": 9334
    },
    {
      "epoch": 0.8346745350500715,
      "grad_norm": 0.17101505443275308,
      "learning_rate": 1.3997232602133892e-05,
      "loss": 0.6794,
      "step": 9335
    },
    {
      "epoch": 0.8347639484978541,
      "grad_norm": 0.14771158644873525,
      "learning_rate": 1.3982459388617452e-05,
      "loss": 0.6319,
      "step": 9336
    },
    {
      "epoch": 0.8348533619456366,
      "grad_norm": 0.16383543150625138,
      "learning_rate": 1.3967693389288261e-05,
      "loss": 0.6949,
      "step": 9337
    },
    {
      "epoch": 0.8349427753934192,
      "grad_norm": 0.15025515544003015,
      "learning_rate": 1.3952934605384749e-05,
      "loss": 0.5888,
      "step": 9338
    },
    {
      "epoch": 0.8350321888412017,
      "grad_norm": 0.16383320886420463,
      "learning_rate": 1.393818303814467e-05,
      "loss": 0.6828,
      "step": 9339
    },
    {
      "epoch": 0.8351216022889842,
      "grad_norm": 0.16234739663782463,
      "learning_rate": 1.3923438688805235e-05,
      "loss": 0.6367,
      "step": 9340
    },
    {
      "epoch": 0.8352110157367668,
      "grad_norm": 0.1649349764674383,
      "learning_rate": 1.3908701558603054e-05,
      "loss": 0.621,
      "step": 9341
    },
    {
      "epoch": 0.8353004291845494,
      "grad_norm": 0.1976354991162109,
      "learning_rate": 1.3893971648774095e-05,
      "loss": 0.6492,
      "step": 9342
    },
    {
      "epoch": 0.835389842632332,
      "grad_norm": 0.15677751530137407,
      "learning_rate": 1.3879248960553737e-05,
      "loss": 0.6389,
      "step": 9343
    },
    {
      "epoch": 0.8354792560801144,
      "grad_norm": 0.17111738474249819,
      "learning_rate": 1.386453349517679e-05,
      "loss": 0.6567,
      "step": 9344
    },
    {
      "epoch": 0.835568669527897,
      "grad_norm": 0.17751213886071018,
      "learning_rate": 1.384982525387738e-05,
      "loss": 0.7105,
      "step": 9345
    },
    {
      "epoch": 0.8356580829756796,
      "grad_norm": 0.15646410230199265,
      "learning_rate": 1.3835124237889074e-05,
      "loss": 0.6298,
      "step": 9346
    },
    {
      "epoch": 0.8357474964234621,
      "grad_norm": 0.17825701935587918,
      "learning_rate": 1.3820430448444866e-05,
      "loss": 0.3726,
      "step": 9347
    },
    {
      "epoch": 0.8358369098712446,
      "grad_norm": 0.167135049185593,
      "learning_rate": 1.3805743886777022e-05,
      "loss": 0.6657,
      "step": 9348
    },
    {
      "epoch": 0.8359263233190272,
      "grad_norm": 0.16867062939180688,
      "learning_rate": 1.3791064554117394e-05,
      "loss": 0.6234,
      "step": 9349
    },
    {
      "epoch": 0.8360157367668097,
      "grad_norm": 0.16703759038368557,
      "learning_rate": 1.377639245169704e-05,
      "loss": 0.6542,
      "step": 9350
    },
    {
      "epoch": 0.8361051502145923,
      "grad_norm": 0.1577281835908658,
      "learning_rate": 1.376172758074653e-05,
      "loss": 0.6323,
      "step": 9351
    },
    {
      "epoch": 0.8361945636623748,
      "grad_norm": 0.1646717372345021,
      "learning_rate": 1.3747069942495794e-05,
      "loss": 0.66,
      "step": 9352
    },
    {
      "epoch": 0.8362839771101573,
      "grad_norm": 0.181143818349725,
      "learning_rate": 1.3732419538174112e-05,
      "loss": 0.6698,
      "step": 9353
    },
    {
      "epoch": 0.8363733905579399,
      "grad_norm": 0.16541757026149337,
      "learning_rate": 1.3717776369010216e-05,
      "loss": 0.5959,
      "step": 9354
    },
    {
      "epoch": 0.8364628040057225,
      "grad_norm": 0.1550811753035461,
      "learning_rate": 1.370314043623222e-05,
      "loss": 0.6302,
      "step": 9355
    },
    {
      "epoch": 0.836552217453505,
      "grad_norm": 0.14678462535756506,
      "learning_rate": 1.368851174106761e-05,
      "loss": 0.6367,
      "step": 9356
    },
    {
      "epoch": 0.8366416309012875,
      "grad_norm": 0.15922817042882437,
      "learning_rate": 1.3673890284743285e-05,
      "loss": 0.6079,
      "step": 9357
    },
    {
      "epoch": 0.8367310443490701,
      "grad_norm": 0.1715633671252808,
      "learning_rate": 1.3659276068485549e-05,
      "loss": 0.6767,
      "step": 9358
    },
    {
      "epoch": 0.8368204577968527,
      "grad_norm": 0.1690705467812442,
      "learning_rate": 1.3644669093520035e-05,
      "loss": 0.6297,
      "step": 9359
    },
    {
      "epoch": 0.8369098712446352,
      "grad_norm": 0.16342309511054923,
      "learning_rate": 1.363006936107183e-05,
      "loss": 0.6478,
      "step": 9360
    },
    {
      "epoch": 0.8369992846924177,
      "grad_norm": 0.15382625508079253,
      "learning_rate": 1.3615476872365419e-05,
      "loss": 0.6289,
      "step": 9361
    },
    {
      "epoch": 0.8370886981402003,
      "grad_norm": 0.17987759775653647,
      "learning_rate": 1.3600891628624601e-05,
      "loss": 0.3757,
      "step": 9362
    },
    {
      "epoch": 0.8371781115879828,
      "grad_norm": 0.16582637218784857,
      "learning_rate": 1.3586313631072668e-05,
      "loss": 0.6457,
      "step": 9363
    },
    {
      "epoch": 0.8372675250357654,
      "grad_norm": 0.14959404979023028,
      "learning_rate": 1.357174288093228e-05,
      "loss": 0.6462,
      "step": 9364
    },
    {
      "epoch": 0.837356938483548,
      "grad_norm": 0.16139183300418342,
      "learning_rate": 1.355717937942541e-05,
      "loss": 0.6365,
      "step": 9365
    },
    {
      "epoch": 0.8374463519313304,
      "grad_norm": 0.16044748698017172,
      "learning_rate": 1.3542623127773523e-05,
      "loss": 0.6672,
      "step": 9366
    },
    {
      "epoch": 0.837535765379113,
      "grad_norm": 0.17128179228145762,
      "learning_rate": 1.3528074127197432e-05,
      "loss": 0.6018,
      "step": 9367
    },
    {
      "epoch": 0.8376251788268956,
      "grad_norm": 0.16434954573388086,
      "learning_rate": 1.3513532378917281e-05,
      "loss": 0.6094,
      "step": 9368
    },
    {
      "epoch": 0.8377145922746781,
      "grad_norm": 0.17942072933752284,
      "learning_rate": 1.3498997884152776e-05,
      "loss": 0.6452,
      "step": 9369
    },
    {
      "epoch": 0.8378040057224606,
      "grad_norm": 0.16006484595403642,
      "learning_rate": 1.3484470644122826e-05,
      "loss": 0.6405,
      "step": 9370
    },
    {
      "epoch": 0.8378934191702432,
      "grad_norm": 0.1617254624531045,
      "learning_rate": 1.3469950660045838e-05,
      "loss": 0.6349,
      "step": 9371
    },
    {
      "epoch": 0.8379828326180258,
      "grad_norm": 0.15405598289431968,
      "learning_rate": 1.3455437933139614e-05,
      "loss": 0.6365,
      "step": 9372
    },
    {
      "epoch": 0.8380722460658083,
      "grad_norm": 0.17341190403323836,
      "learning_rate": 1.344093246462126e-05,
      "loss": 0.6328,
      "step": 9373
    },
    {
      "epoch": 0.8381616595135909,
      "grad_norm": 0.16836251593853815,
      "learning_rate": 1.3426434255707365e-05,
      "loss": 0.6715,
      "step": 9374
    },
    {
      "epoch": 0.8382510729613734,
      "grad_norm": 0.15771299948497422,
      "learning_rate": 1.3411943307613883e-05,
      "loss": 0.6347,
      "step": 9375
    },
    {
      "epoch": 0.8383404864091559,
      "grad_norm": 0.17028508653651817,
      "learning_rate": 1.339745962155613e-05,
      "loss": 0.6219,
      "step": 9376
    },
    {
      "epoch": 0.8384298998569385,
      "grad_norm": 0.14876612570638761,
      "learning_rate": 1.3382983198748855e-05,
      "loss": 0.6298,
      "step": 9377
    },
    {
      "epoch": 0.8385193133047211,
      "grad_norm": 0.14203479575575756,
      "learning_rate": 1.33685140404062e-05,
      "loss": 0.6352,
      "step": 9378
    },
    {
      "epoch": 0.8386087267525035,
      "grad_norm": 0.19258124163343135,
      "learning_rate": 1.3354052147741625e-05,
      "loss": 0.6587,
      "step": 9379
    },
    {
      "epoch": 0.8386981402002861,
      "grad_norm": 0.16057758335244382,
      "learning_rate": 1.333959752196805e-05,
      "loss": 0.5909,
      "step": 9380
    },
    {
      "epoch": 0.8387875536480687,
      "grad_norm": 0.16241651837041046,
      "learning_rate": 1.3325150164297796e-05,
      "loss": 0.6431,
      "step": 9381
    },
    {
      "epoch": 0.8388769670958512,
      "grad_norm": 0.15484709937815763,
      "learning_rate": 1.3310710075942479e-05,
      "loss": 0.6497,
      "step": 9382
    },
    {
      "epoch": 0.8389663805436338,
      "grad_norm": 0.17877424957854732,
      "learning_rate": 1.3296277258113254e-05,
      "loss": 0.6367,
      "step": 9383
    },
    {
      "epoch": 0.8390557939914163,
      "grad_norm": 0.17551748693021882,
      "learning_rate": 1.328185171202052e-05,
      "loss": 0.6803,
      "step": 9384
    },
    {
      "epoch": 0.8391452074391988,
      "grad_norm": 0.16708846588810172,
      "learning_rate": 1.3267433438874155e-05,
      "loss": 0.6333,
      "step": 9385
    },
    {
      "epoch": 0.8392346208869814,
      "grad_norm": 0.17864643780444014,
      "learning_rate": 1.3253022439883412e-05,
      "loss": 0.6717,
      "step": 9386
    },
    {
      "epoch": 0.839324034334764,
      "grad_norm": 0.17118294294032554,
      "learning_rate": 1.3238618716256923e-05,
      "loss": 0.6587,
      "step": 9387
    },
    {
      "epoch": 0.8394134477825465,
      "grad_norm": 0.1686874688342724,
      "learning_rate": 1.322422226920268e-05,
      "loss": 0.6619,
      "step": 9388
    },
    {
      "epoch": 0.839502861230329,
      "grad_norm": 0.1747209003300159,
      "learning_rate": 1.3209833099928114e-05,
      "loss": 0.6765,
      "step": 9389
    },
    {
      "epoch": 0.8395922746781116,
      "grad_norm": 0.16059381397427194,
      "learning_rate": 1.3195451209640041e-05,
      "loss": 0.6598,
      "step": 9390
    },
    {
      "epoch": 0.8396816881258942,
      "grad_norm": 0.15145766606204325,
      "learning_rate": 1.3181076599544629e-05,
      "loss": 0.6392,
      "step": 9391
    },
    {
      "epoch": 0.8397711015736766,
      "grad_norm": 0.162125341104238,
      "learning_rate": 1.3166709270847511e-05,
      "loss": 0.6471,
      "step": 9392
    },
    {
      "epoch": 0.8398605150214592,
      "grad_norm": 0.1586241888493795,
      "learning_rate": 1.3152349224753579e-05,
      "loss": 0.6695,
      "step": 9393
    },
    {
      "epoch": 0.8399499284692418,
      "grad_norm": 0.17447977126869715,
      "learning_rate": 1.3137996462467239e-05,
      "loss": 0.6283,
      "step": 9394
    },
    {
      "epoch": 0.8400393419170243,
      "grad_norm": 0.16744169003963674,
      "learning_rate": 1.312365098519226e-05,
      "loss": 0.6663,
      "step": 9395
    },
    {
      "epoch": 0.8401287553648069,
      "grad_norm": 0.16159133015127905,
      "learning_rate": 1.31093127941317e-05,
      "loss": 0.6623,
      "step": 9396
    },
    {
      "epoch": 0.8402181688125894,
      "grad_norm": 0.14992408324344633,
      "learning_rate": 1.3094981890488167e-05,
      "loss": 0.59,
      "step": 9397
    },
    {
      "epoch": 0.8403075822603719,
      "grad_norm": 0.17565997213042642,
      "learning_rate": 1.3080658275463565e-05,
      "loss": 0.6156,
      "step": 9398
    },
    {
      "epoch": 0.8403969957081545,
      "grad_norm": 0.16392479262631884,
      "learning_rate": 1.3066341950259165e-05,
      "loss": 0.6407,
      "step": 9399
    },
    {
      "epoch": 0.8404864091559371,
      "grad_norm": 0.16020701008596755,
      "learning_rate": 1.3052032916075674e-05,
      "loss": 0.6533,
      "step": 9400
    },
    {
      "epoch": 0.8405758226037195,
      "grad_norm": 0.18432493371995373,
      "learning_rate": 1.3037731174113188e-05,
      "loss": 0.6968,
      "step": 9401
    },
    {
      "epoch": 0.8406652360515021,
      "grad_norm": 0.15876902364344211,
      "learning_rate": 1.3023436725571158e-05,
      "loss": 0.6214,
      "step": 9402
    },
    {
      "epoch": 0.8407546494992847,
      "grad_norm": 0.15341825207650955,
      "learning_rate": 1.3009149571648438e-05,
      "loss": 0.6423,
      "step": 9403
    },
    {
      "epoch": 0.8408440629470673,
      "grad_norm": 0.15686904437995125,
      "learning_rate": 1.2994869713543289e-05,
      "loss": 0.664,
      "step": 9404
    },
    {
      "epoch": 0.8409334763948498,
      "grad_norm": 0.1649714600547217,
      "learning_rate": 1.2980597152453344e-05,
      "loss": 0.5971,
      "step": 9405
    },
    {
      "epoch": 0.8410228898426323,
      "grad_norm": 0.14302653782276503,
      "learning_rate": 1.2966331889575644e-05,
      "loss": 0.6325,
      "step": 9406
    },
    {
      "epoch": 0.8411123032904149,
      "grad_norm": 0.1388939329122434,
      "learning_rate": 1.2952073926106556e-05,
      "loss": 0.6182,
      "step": 9407
    },
    {
      "epoch": 0.8412017167381974,
      "grad_norm": 0.15585902551613337,
      "learning_rate": 1.29378232632419e-05,
      "loss": 0.6481,
      "step": 9408
    },
    {
      "epoch": 0.84129113018598,
      "grad_norm": 0.1652956301920795,
      "learning_rate": 1.2923579902176886e-05,
      "loss": 0.6526,
      "step": 9409
    },
    {
      "epoch": 0.8413805436337625,
      "grad_norm": 0.16386779018695302,
      "learning_rate": 1.2909343844106014e-05,
      "loss": 0.6436,
      "step": 9410
    },
    {
      "epoch": 0.841469957081545,
      "grad_norm": 0.1736854227197271,
      "learning_rate": 1.289511509022332e-05,
      "loss": 0.631,
      "step": 9411
    },
    {
      "epoch": 0.8415593705293276,
      "grad_norm": 0.15885741051270974,
      "learning_rate": 1.2880893641722147e-05,
      "loss": 0.6503,
      "step": 9412
    },
    {
      "epoch": 0.8416487839771102,
      "grad_norm": 0.15676998014639992,
      "learning_rate": 1.2866679499795198e-05,
      "loss": 0.643,
      "step": 9413
    },
    {
      "epoch": 0.8417381974248928,
      "grad_norm": 0.1402412864812182,
      "learning_rate": 1.2852472665634607e-05,
      "loss": 0.6541,
      "step": 9414
    },
    {
      "epoch": 0.8418276108726752,
      "grad_norm": 0.17258538914474847,
      "learning_rate": 1.28382731404319e-05,
      "loss": 0.6512,
      "step": 9415
    },
    {
      "epoch": 0.8419170243204578,
      "grad_norm": 0.15865986109437377,
      "learning_rate": 1.2824080925377945e-05,
      "loss": 0.6547,
      "step": 9416
    },
    {
      "epoch": 0.8420064377682404,
      "grad_norm": 0.15881777888284815,
      "learning_rate": 1.2809896021663037e-05,
      "loss": 0.6569,
      "step": 9417
    },
    {
      "epoch": 0.8420958512160229,
      "grad_norm": 0.15809956499166317,
      "learning_rate": 1.2795718430476854e-05,
      "loss": 0.6472,
      "step": 9418
    },
    {
      "epoch": 0.8421852646638054,
      "grad_norm": 0.15954257021339466,
      "learning_rate": 1.278154815300845e-05,
      "loss": 0.6299,
      "step": 9419
    },
    {
      "epoch": 0.842274678111588,
      "grad_norm": 0.1686664973429743,
      "learning_rate": 1.2767385190446257e-05,
      "loss": 0.6614,
      "step": 9420
    },
    {
      "epoch": 0.8423640915593705,
      "grad_norm": 0.1640053435333076,
      "learning_rate": 1.2753229543978151e-05,
      "loss": 0.6537,
      "step": 9421
    },
    {
      "epoch": 0.8424535050071531,
      "grad_norm": 0.1838503750496039,
      "learning_rate": 1.2739081214791293e-05,
      "loss": 0.6499,
      "step": 9422
    },
    {
      "epoch": 0.8425429184549357,
      "grad_norm": 0.14985069750235883,
      "learning_rate": 1.2724940204072311e-05,
      "loss": 0.6495,
      "step": 9423
    },
    {
      "epoch": 0.8426323319027181,
      "grad_norm": 0.13886903498628694,
      "learning_rate": 1.271080651300719e-05,
      "loss": 0.6003,
      "step": 9424
    },
    {
      "epoch": 0.8427217453505007,
      "grad_norm": 0.17734068711138326,
      "learning_rate": 1.2696680142781313e-05,
      "loss": 0.6722,
      "step": 9425
    },
    {
      "epoch": 0.8428111587982833,
      "grad_norm": 0.15036275024694537,
      "learning_rate": 1.2682561094579448e-05,
      "loss": 0.6695,
      "step": 9426
    },
    {
      "epoch": 0.8429005722460658,
      "grad_norm": 0.16318318183519648,
      "learning_rate": 1.2668449369585723e-05,
      "loss": 0.6625,
      "step": 9427
    },
    {
      "epoch": 0.8429899856938483,
      "grad_norm": 0.17568537241074,
      "learning_rate": 1.2654344968983668e-05,
      "loss": 0.6366,
      "step": 9428
    },
    {
      "epoch": 0.8430793991416309,
      "grad_norm": 0.14246036917696714,
      "learning_rate": 1.2640247893956236e-05,
      "loss": 0.6253,
      "step": 9429
    },
    {
      "epoch": 0.8431688125894135,
      "grad_norm": 0.14886061371629183,
      "learning_rate": 1.2626158145685696e-05,
      "loss": 0.596,
      "step": 9430
    },
    {
      "epoch": 0.843258226037196,
      "grad_norm": 0.1501890676357727,
      "learning_rate": 1.2612075725353722e-05,
      "loss": 0.6394,
      "step": 9431
    },
    {
      "epoch": 0.8433476394849786,
      "grad_norm": 0.17796640349173753,
      "learning_rate": 1.259800063414146e-05,
      "loss": 0.6526,
      "step": 9432
    },
    {
      "epoch": 0.843437052932761,
      "grad_norm": 0.15746714289204636,
      "learning_rate": 1.258393287322932e-05,
      "loss": 0.6518,
      "step": 9433
    },
    {
      "epoch": 0.8435264663805436,
      "grad_norm": 0.16594288295131204,
      "learning_rate": 1.2569872443797148e-05,
      "loss": 0.6107,
      "step": 9434
    },
    {
      "epoch": 0.8436158798283262,
      "grad_norm": 0.13896647537239518,
      "learning_rate": 1.2555819347024211e-05,
      "loss": 0.6098,
      "step": 9435
    },
    {
      "epoch": 0.8437052932761088,
      "grad_norm": 0.15326138218945676,
      "learning_rate": 1.2541773584089079e-05,
      "loss": 0.6613,
      "step": 9436
    },
    {
      "epoch": 0.8437947067238912,
      "grad_norm": 0.14380677000648245,
      "learning_rate": 1.2527735156169773e-05,
      "loss": 0.659,
      "step": 9437
    },
    {
      "epoch": 0.8438841201716738,
      "grad_norm": 0.16485467220898872,
      "learning_rate": 1.2513704064443677e-05,
      "loss": 0.6401,
      "step": 9438
    },
    {
      "epoch": 0.8439735336194564,
      "grad_norm": 0.1561538528793126,
      "learning_rate": 1.249968031008757e-05,
      "loss": 0.6235,
      "step": 9439
    },
    {
      "epoch": 0.844062947067239,
      "grad_norm": 0.18331392394519966,
      "learning_rate": 1.2485663894277611e-05,
      "loss": 0.6756,
      "step": 9440
    },
    {
      "epoch": 0.8441523605150214,
      "grad_norm": 0.16205302463335342,
      "learning_rate": 1.2471654818189316e-05,
      "loss": 0.6217,
      "step": 9441
    },
    {
      "epoch": 0.844241773962804,
      "grad_norm": 0.15930001964915977,
      "learning_rate": 1.2457653082997634e-05,
      "loss": 0.6596,
      "step": 9442
    },
    {
      "epoch": 0.8443311874105865,
      "grad_norm": 0.15345440023244303,
      "learning_rate": 1.2443658689876847e-05,
      "loss": 0.6584,
      "step": 9443
    },
    {
      "epoch": 0.8444206008583691,
      "grad_norm": 0.18069374160510437,
      "learning_rate": 1.2429671640000695e-05,
      "loss": 0.6546,
      "step": 9444
    },
    {
      "epoch": 0.8445100143061517,
      "grad_norm": 0.17317147638873237,
      "learning_rate": 1.2415691934542183e-05,
      "loss": 0.6767,
      "step": 9445
    },
    {
      "epoch": 0.8445994277539342,
      "grad_norm": 0.15690505295732224,
      "learning_rate": 1.2401719574673854e-05,
      "loss": 0.6357,
      "step": 9446
    },
    {
      "epoch": 0.8446888412017167,
      "grad_norm": 0.1547566223910505,
      "learning_rate": 1.2387754561567488e-05,
      "loss": 0.6142,
      "step": 9447
    },
    {
      "epoch": 0.8447782546494993,
      "grad_norm": 0.1793089231914206,
      "learning_rate": 1.237379689639434e-05,
      "loss": 0.6099,
      "step": 9448
    },
    {
      "epoch": 0.8448676680972819,
      "grad_norm": 0.15492713428089314,
      "learning_rate": 1.2359846580325041e-05,
      "loss": 0.6185,
      "step": 9449
    },
    {
      "epoch": 0.8449570815450643,
      "grad_norm": 0.1697272545681045,
      "learning_rate": 1.2345903614529552e-05,
      "loss": 0.6677,
      "step": 9450
    },
    {
      "epoch": 0.8450464949928469,
      "grad_norm": 0.13486000994712555,
      "learning_rate": 1.233196800017724e-05,
      "loss": 0.6168,
      "step": 9451
    },
    {
      "epoch": 0.8451359084406295,
      "grad_norm": 0.15468192609340012,
      "learning_rate": 1.2318039738436936e-05,
      "loss": 0.6697,
      "step": 9452
    },
    {
      "epoch": 0.845225321888412,
      "grad_norm": 0.15260968443976017,
      "learning_rate": 1.230411883047673e-05,
      "loss": 0.6273,
      "step": 9453
    },
    {
      "epoch": 0.8453147353361946,
      "grad_norm": 0.18224708880559434,
      "learning_rate": 1.2290205277464161e-05,
      "loss": 0.6374,
      "step": 9454
    },
    {
      "epoch": 0.8454041487839771,
      "grad_norm": 0.15960628332526017,
      "learning_rate": 1.2276299080566178e-05,
      "loss": 0.6339,
      "step": 9455
    },
    {
      "epoch": 0.8454935622317596,
      "grad_norm": 0.1537796224602484,
      "learning_rate": 1.2262400240949023e-05,
      "loss": 0.6595,
      "step": 9456
    },
    {
      "epoch": 0.8455829756795422,
      "grad_norm": 0.14210563911344465,
      "learning_rate": 1.22485087597784e-05,
      "loss": 0.6132,
      "step": 9457
    },
    {
      "epoch": 0.8456723891273248,
      "grad_norm": 0.17040506909001057,
      "learning_rate": 1.2234624638219372e-05,
      "loss": 0.3915,
      "step": 9458
    },
    {
      "epoch": 0.8457618025751072,
      "grad_norm": 0.17384071713963842,
      "learning_rate": 1.2220747877436378e-05,
      "loss": 0.6888,
      "step": 9459
    },
    {
      "epoch": 0.8458512160228898,
      "grad_norm": 0.16934753823552587,
      "learning_rate": 1.2206878478593276e-05,
      "loss": 0.6485,
      "step": 9460
    },
    {
      "epoch": 0.8459406294706724,
      "grad_norm": 0.16104010411582223,
      "learning_rate": 1.2193016442853221e-05,
      "loss": 0.6194,
      "step": 9461
    },
    {
      "epoch": 0.846030042918455,
      "grad_norm": 0.1659524176440258,
      "learning_rate": 1.2179161771378845e-05,
      "loss": 0.6309,
      "step": 9462
    },
    {
      "epoch": 0.8461194563662375,
      "grad_norm": 0.16862795877297412,
      "learning_rate": 1.2165314465332122e-05,
      "loss": 0.6382,
      "step": 9463
    },
    {
      "epoch": 0.84620886981402,
      "grad_norm": 0.17826129281242947,
      "learning_rate": 1.2151474525874374e-05,
      "loss": 0.651,
      "step": 9464
    },
    {
      "epoch": 0.8462982832618026,
      "grad_norm": 0.1734937389519231,
      "learning_rate": 1.2137641954166346e-05,
      "loss": 0.6901,
      "step": 9465
    },
    {
      "epoch": 0.8463876967095851,
      "grad_norm": 0.1567085476740857,
      "learning_rate": 1.212381675136821e-05,
      "loss": 0.6414,
      "step": 9466
    },
    {
      "epoch": 0.8464771101573677,
      "grad_norm": 0.16734428242704102,
      "learning_rate": 1.2109998918639431e-05,
      "loss": 0.6601,
      "step": 9467
    },
    {
      "epoch": 0.8465665236051502,
      "grad_norm": 0.16615028354249348,
      "learning_rate": 1.209618845713889e-05,
      "loss": 0.606,
      "step": 9468
    },
    {
      "epoch": 0.8466559370529327,
      "grad_norm": 0.17535569059268666,
      "learning_rate": 1.2082385368024884e-05,
      "loss": 0.657,
      "step": 9469
    },
    {
      "epoch": 0.8467453505007153,
      "grad_norm": 0.15733500911832815,
      "learning_rate": 1.2068589652455008e-05,
      "loss": 0.6246,
      "step": 9470
    },
    {
      "epoch": 0.8468347639484979,
      "grad_norm": 0.17103041072109143,
      "learning_rate": 1.205480131158634e-05,
      "loss": 0.613,
      "step": 9471
    },
    {
      "epoch": 0.8469241773962805,
      "grad_norm": 0.14616063672931634,
      "learning_rate": 1.2041020346575272e-05,
      "loss": 0.6098,
      "step": 9472
    },
    {
      "epoch": 0.8470135908440629,
      "grad_norm": 0.15465154760884514,
      "learning_rate": 1.2027246758577593e-05,
      "loss": 0.6508,
      "step": 9473
    },
    {
      "epoch": 0.8471030042918455,
      "grad_norm": 0.17304574550172058,
      "learning_rate": 1.2013480548748512e-05,
      "loss": 0.7127,
      "step": 9474
    },
    {
      "epoch": 0.8471924177396281,
      "grad_norm": 0.16546225586381622,
      "learning_rate": 1.199972171824253e-05,
      "loss": 0.6345,
      "step": 9475
    },
    {
      "epoch": 0.8472818311874106,
      "grad_norm": 0.1533755432611764,
      "learning_rate": 1.198597026821361e-05,
      "loss": 0.6215,
      "step": 9476
    },
    {
      "epoch": 0.8473712446351931,
      "grad_norm": 0.17308552690628123,
      "learning_rate": 1.1972226199815074e-05,
      "loss": 0.6473,
      "step": 9477
    },
    {
      "epoch": 0.8474606580829757,
      "grad_norm": 0.16367555263400865,
      "learning_rate": 1.1958489514199634e-05,
      "loss": 0.6638,
      "step": 9478
    },
    {
      "epoch": 0.8475500715307582,
      "grad_norm": 0.17530689020873036,
      "learning_rate": 1.1944760212519313e-05,
      "loss": 0.6353,
      "step": 9479
    },
    {
      "epoch": 0.8476394849785408,
      "grad_norm": 0.15946194697606372,
      "learning_rate": 1.1931038295925645e-05,
      "loss": 0.6361,
      "step": 9480
    },
    {
      "epoch": 0.8477288984263234,
      "grad_norm": 0.1537906772426355,
      "learning_rate": 1.1917323765569411e-05,
      "loss": 0.5821,
      "step": 9481
    },
    {
      "epoch": 0.8478183118741058,
      "grad_norm": 0.16564147465029327,
      "learning_rate": 1.1903616622600866e-05,
      "loss": 0.6643,
      "step": 9482
    },
    {
      "epoch": 0.8479077253218884,
      "grad_norm": 0.17759473688646293,
      "learning_rate": 1.1889916868169614e-05,
      "loss": 0.393,
      "step": 9483
    },
    {
      "epoch": 0.847997138769671,
      "grad_norm": 0.15979027527777426,
      "learning_rate": 1.1876224503424615e-05,
      "loss": 0.6454,
      "step": 9484
    },
    {
      "epoch": 0.8480865522174535,
      "grad_norm": 0.16193779319038717,
      "learning_rate": 1.1862539529514228e-05,
      "loss": 0.6359,
      "step": 9485
    },
    {
      "epoch": 0.848175965665236,
      "grad_norm": 0.15840229256617505,
      "learning_rate": 1.184886194758621e-05,
      "loss": 0.628,
      "step": 9486
    },
    {
      "epoch": 0.8482653791130186,
      "grad_norm": 0.14356875949421843,
      "learning_rate": 1.183519175878769e-05,
      "loss": 0.6392,
      "step": 9487
    },
    {
      "epoch": 0.8483547925608012,
      "grad_norm": 0.18131479457434477,
      "learning_rate": 1.182152896426515e-05,
      "loss": 0.6717,
      "step": 9488
    },
    {
      "epoch": 0.8484442060085837,
      "grad_norm": 0.18047708355047345,
      "learning_rate": 1.1807873565164506e-05,
      "loss": 0.667,
      "step": 9489
    },
    {
      "epoch": 0.8485336194563662,
      "grad_norm": 0.16494302675527356,
      "learning_rate": 1.1794225562630978e-05,
      "loss": 0.6818,
      "step": 9490
    },
    {
      "epoch": 0.8486230329041488,
      "grad_norm": 0.15344712999452623,
      "learning_rate": 1.1780584957809227e-05,
      "loss": 0.6131,
      "step": 9491
    },
    {
      "epoch": 0.8487124463519313,
      "grad_norm": 0.15292231371749976,
      "learning_rate": 1.1766951751843292e-05,
      "loss": 0.6337,
      "step": 9492
    },
    {
      "epoch": 0.8488018597997139,
      "grad_norm": 0.16554045958015698,
      "learning_rate": 1.1753325945876515e-05,
      "loss": 0.5853,
      "step": 9493
    },
    {
      "epoch": 0.8488912732474965,
      "grad_norm": 0.14213658879124377,
      "learning_rate": 1.173970754105176e-05,
      "loss": 0.603,
      "step": 9494
    },
    {
      "epoch": 0.8489806866952789,
      "grad_norm": 0.15357275942556947,
      "learning_rate": 1.1726096538511122e-05,
      "loss": 0.6307,
      "step": 9495
    },
    {
      "epoch": 0.8490701001430615,
      "grad_norm": 0.1619150619118243,
      "learning_rate": 1.1712492939396157e-05,
      "loss": 0.6419,
      "step": 9496
    },
    {
      "epoch": 0.8491595135908441,
      "grad_norm": 0.1734773473172626,
      "learning_rate": 1.1698896744847809e-05,
      "loss": 0.6397,
      "step": 9497
    },
    {
      "epoch": 0.8492489270386266,
      "grad_norm": 0.154222470553884,
      "learning_rate": 1.168530795600632e-05,
      "loss": 0.6148,
      "step": 9498
    },
    {
      "epoch": 0.8493383404864091,
      "grad_norm": 0.14390898003106534,
      "learning_rate": 1.1671726574011399e-05,
      "loss": 0.6252,
      "step": 9499
    },
    {
      "epoch": 0.8494277539341917,
      "grad_norm": 0.16276788502177023,
      "learning_rate": 1.1658152600002104e-05,
      "loss": 0.639,
      "step": 9500
    },
    {
      "epoch": 0.8495171673819742,
      "grad_norm": 0.1599850867431523,
      "learning_rate": 1.1644586035116856e-05,
      "loss": 0.6149,
      "step": 9501
    },
    {
      "epoch": 0.8496065808297568,
      "grad_norm": 0.1657388011128715,
      "learning_rate": 1.1631026880493468e-05,
      "loss": 0.6167,
      "step": 9502
    },
    {
      "epoch": 0.8496959942775394,
      "grad_norm": 0.1574780367529582,
      "learning_rate": 1.1617475137269152e-05,
      "loss": 0.648,
      "step": 9503
    },
    {
      "epoch": 0.8497854077253219,
      "grad_norm": 0.14580808722455546,
      "learning_rate": 1.1603930806580444e-05,
      "loss": 0.6433,
      "step": 9504
    },
    {
      "epoch": 0.8498748211731044,
      "grad_norm": 0.13598252880197279,
      "learning_rate": 1.15903938895633e-05,
      "loss": 0.6261,
      "step": 9505
    },
    {
      "epoch": 0.849964234620887,
      "grad_norm": 0.14399058557594938,
      "learning_rate": 1.157686438735307e-05,
      "loss": 0.652,
      "step": 9506
    },
    {
      "epoch": 0.8500536480686696,
      "grad_norm": 0.14456079693661686,
      "learning_rate": 1.156334230108439e-05,
      "loss": 0.6462,
      "step": 9507
    },
    {
      "epoch": 0.850143061516452,
      "grad_norm": 0.15807544545520552,
      "learning_rate": 1.1549827631891418e-05,
      "loss": 0.6507,
      "step": 9508
    },
    {
      "epoch": 0.8502324749642346,
      "grad_norm": 0.1771393507710016,
      "learning_rate": 1.1536320380907596e-05,
      "loss": 0.6772,
      "step": 9509
    },
    {
      "epoch": 0.8503218884120172,
      "grad_norm": 0.17101741481921165,
      "learning_rate": 1.1522820549265723e-05,
      "loss": 0.6212,
      "step": 9510
    },
    {
      "epoch": 0.8504113018597997,
      "grad_norm": 0.14050965955994407,
      "learning_rate": 1.1509328138098041e-05,
      "loss": 0.6265,
      "step": 9511
    },
    {
      "epoch": 0.8505007153075823,
      "grad_norm": 0.14702600944153008,
      "learning_rate": 1.1495843148536157e-05,
      "loss": 0.6173,
      "step": 9512
    },
    {
      "epoch": 0.8505901287553648,
      "grad_norm": 0.18831273881838284,
      "learning_rate": 1.1482365581711008e-05,
      "loss": 0.6942,
      "step": 9513
    },
    {
      "epoch": 0.8506795422031473,
      "grad_norm": 0.17191955313156845,
      "learning_rate": 1.1468895438752947e-05,
      "loss": 0.6885,
      "step": 9514
    },
    {
      "epoch": 0.8507689556509299,
      "grad_norm": 0.14416465355825184,
      "learning_rate": 1.1455432720791714e-05,
      "loss": 0.6358,
      "step": 9515
    },
    {
      "epoch": 0.8508583690987125,
      "grad_norm": 0.14492946797830414,
      "learning_rate": 1.1441977428956396e-05,
      "loss": 0.6555,
      "step": 9516
    },
    {
      "epoch": 0.850947782546495,
      "grad_norm": 0.1327132314010567,
      "learning_rate": 1.1428529564375502e-05,
      "loss": 0.5835,
      "step": 9517
    },
    {
      "epoch": 0.8510371959942775,
      "grad_norm": 0.19010800268747383,
      "learning_rate": 1.1415089128176847e-05,
      "loss": 0.6249,
      "step": 9518
    },
    {
      "epoch": 0.8511266094420601,
      "grad_norm": 0.16880897868306868,
      "learning_rate": 1.1401656121487692e-05,
      "loss": 0.6622,
      "step": 9519
    },
    {
      "epoch": 0.8512160228898427,
      "grad_norm": 0.1625968025407872,
      "learning_rate": 1.1388230545434653e-05,
      "loss": 0.6159,
      "step": 9520
    },
    {
      "epoch": 0.8513054363376252,
      "grad_norm": 0.17373559852349596,
      "learning_rate": 1.1374812401143653e-05,
      "loss": 0.6657,
      "step": 9521
    },
    {
      "epoch": 0.8513948497854077,
      "grad_norm": 0.16490331489074278,
      "learning_rate": 1.1361401689740137e-05,
      "loss": 0.6106,
      "step": 9522
    },
    {
      "epoch": 0.8514842632331903,
      "grad_norm": 0.17536483953997958,
      "learning_rate": 1.1347998412348825e-05,
      "loss": 0.6638,
      "step": 9523
    },
    {
      "epoch": 0.8515736766809728,
      "grad_norm": 0.16576155693847577,
      "learning_rate": 1.13346025700938e-05,
      "loss": 0.6717,
      "step": 9524
    },
    {
      "epoch": 0.8516630901287554,
      "grad_norm": 0.16761739513037746,
      "learning_rate": 1.1321214164098582e-05,
      "loss": 0.6199,
      "step": 9525
    },
    {
      "epoch": 0.8517525035765379,
      "grad_norm": 0.1730443277753247,
      "learning_rate": 1.1307833195486062e-05,
      "loss": 0.6491,
      "step": 9526
    },
    {
      "epoch": 0.8518419170243204,
      "grad_norm": 0.1602941051397057,
      "learning_rate": 1.1294459665378432e-05,
      "loss": 0.651,
      "step": 9527
    },
    {
      "epoch": 0.851931330472103,
      "grad_norm": 0.14702589538428296,
      "learning_rate": 1.1281093574897338e-05,
      "loss": 0.6433,
      "step": 9528
    },
    {
      "epoch": 0.8520207439198856,
      "grad_norm": 0.17192935958686603,
      "learning_rate": 1.1267734925163787e-05,
      "loss": 0.6634,
      "step": 9529
    },
    {
      "epoch": 0.852110157367668,
      "grad_norm": 0.15982839552518135,
      "learning_rate": 1.1254383717298134e-05,
      "loss": 0.6696,
      "step": 9530
    },
    {
      "epoch": 0.8521995708154506,
      "grad_norm": 0.14835230029244326,
      "learning_rate": 1.1241039952420173e-05,
      "loss": 0.6117,
      "step": 9531
    },
    {
      "epoch": 0.8522889842632332,
      "grad_norm": 0.14581724929408324,
      "learning_rate": 1.1227703631648978e-05,
      "loss": 0.6307,
      "step": 9532
    },
    {
      "epoch": 0.8523783977110158,
      "grad_norm": 0.13716848305278367,
      "learning_rate": 1.1214374756103064e-05,
      "loss": 0.5994,
      "step": 9533
    },
    {
      "epoch": 0.8524678111587983,
      "grad_norm": 0.17989355283065608,
      "learning_rate": 1.1201053326900313e-05,
      "loss": 0.6118,
      "step": 9534
    },
    {
      "epoch": 0.8525572246065808,
      "grad_norm": 0.16025387982579062,
      "learning_rate": 1.1187739345157977e-05,
      "loss": 0.6036,
      "step": 9535
    },
    {
      "epoch": 0.8526466380543634,
      "grad_norm": 0.14023655592524703,
      "learning_rate": 1.1174432811992685e-05,
      "loss": 0.617,
      "step": 9536
    },
    {
      "epoch": 0.8527360515021459,
      "grad_norm": 0.14470440438118418,
      "learning_rate": 1.1161133728520467e-05,
      "loss": 0.6283,
      "step": 9537
    },
    {
      "epoch": 0.8528254649499285,
      "grad_norm": 0.15297863771523043,
      "learning_rate": 1.1147842095856642e-05,
      "loss": 0.6203,
      "step": 9538
    },
    {
      "epoch": 0.852914878397711,
      "grad_norm": 0.1417201206394635,
      "learning_rate": 1.1134557915115994e-05,
      "loss": 0.662,
      "step": 9539
    },
    {
      "epoch": 0.8530042918454935,
      "grad_norm": 0.1445341201888869,
      "learning_rate": 1.112128118741268e-05,
      "loss": 0.5853,
      "step": 9540
    },
    {
      "epoch": 0.8530937052932761,
      "grad_norm": 0.1576210386008755,
      "learning_rate": 1.1108011913860128e-05,
      "loss": 0.6383,
      "step": 9541
    },
    {
      "epoch": 0.8531831187410587,
      "grad_norm": 0.18145136188434877,
      "learning_rate": 1.1094750095571282e-05,
      "loss": 0.669,
      "step": 9542
    },
    {
      "epoch": 0.8532725321888412,
      "grad_norm": 0.17135311831513048,
      "learning_rate": 1.1081495733658409e-05,
      "loss": 0.6654,
      "step": 9543
    },
    {
      "epoch": 0.8533619456366237,
      "grad_norm": 0.16321539991376638,
      "learning_rate": 1.1068248829233063e-05,
      "loss": 0.6392,
      "step": 9544
    },
    {
      "epoch": 0.8534513590844063,
      "grad_norm": 0.1581212593897179,
      "learning_rate": 1.10550093834063e-05,
      "loss": 0.6497,
      "step": 9545
    },
    {
      "epoch": 0.8535407725321889,
      "grad_norm": 0.17589917247863476,
      "learning_rate": 1.1041777397288488e-05,
      "loss": 0.6552,
      "step": 9546
    },
    {
      "epoch": 0.8536301859799714,
      "grad_norm": 0.1473321539491113,
      "learning_rate": 1.1028552871989362e-05,
      "loss": 0.6189,
      "step": 9547
    },
    {
      "epoch": 0.8537195994277539,
      "grad_norm": 0.169973526236367,
      "learning_rate": 1.1015335808618055e-05,
      "loss": 0.6457,
      "step": 9548
    },
    {
      "epoch": 0.8538090128755365,
      "grad_norm": 0.1776930824099972,
      "learning_rate": 1.100212620828307e-05,
      "loss": 0.654,
      "step": 9549
    },
    {
      "epoch": 0.853898426323319,
      "grad_norm": 0.20065244419372916,
      "learning_rate": 1.0988924072092266e-05,
      "loss": 0.6647,
      "step": 9550
    },
    {
      "epoch": 0.8539878397711016,
      "grad_norm": 0.16080444904792382,
      "learning_rate": 1.0975729401152934e-05,
      "loss": 0.625,
      "step": 9551
    },
    {
      "epoch": 0.8540772532188842,
      "grad_norm": 0.17243991294679686,
      "learning_rate": 1.0962542196571634e-05,
      "loss": 0.6647,
      "step": 9552
    },
    {
      "epoch": 0.8541666666666666,
      "grad_norm": 0.16286459869127276,
      "learning_rate": 1.0949362459454393e-05,
      "loss": 0.6437,
      "step": 9553
    },
    {
      "epoch": 0.8542560801144492,
      "grad_norm": 0.13272994338769692,
      "learning_rate": 1.0936190190906603e-05,
      "loss": 0.6172,
      "step": 9554
    },
    {
      "epoch": 0.8543454935622318,
      "grad_norm": 0.15240750748812484,
      "learning_rate": 1.0923025392032937e-05,
      "loss": 0.6404,
      "step": 9555
    },
    {
      "epoch": 0.8544349070100143,
      "grad_norm": 0.15669552046940105,
      "learning_rate": 1.0909868063937567e-05,
      "loss": 0.6497,
      "step": 9556
    },
    {
      "epoch": 0.8545243204577968,
      "grad_norm": 0.15994901165077094,
      "learning_rate": 1.0896718207723988e-05,
      "loss": 0.674,
      "step": 9557
    },
    {
      "epoch": 0.8546137339055794,
      "grad_norm": 0.1564802705901878,
      "learning_rate": 1.0883575824495029e-05,
      "loss": 0.6508,
      "step": 9558
    },
    {
      "epoch": 0.854703147353362,
      "grad_norm": 0.16917601118173314,
      "learning_rate": 1.0870440915352942e-05,
      "loss": 0.6541,
      "step": 9559
    },
    {
      "epoch": 0.8547925608011445,
      "grad_norm": 0.1630683069253721,
      "learning_rate": 1.0857313481399355e-05,
      "loss": 0.6212,
      "step": 9560
    },
    {
      "epoch": 0.8548819742489271,
      "grad_norm": 0.1682824451485015,
      "learning_rate": 1.0844193523735202e-05,
      "loss": 0.6898,
      "step": 9561
    },
    {
      "epoch": 0.8549713876967096,
      "grad_norm": 0.1440100182408994,
      "learning_rate": 1.0831081043460868e-05,
      "loss": 0.6416,
      "step": 9562
    },
    {
      "epoch": 0.8550608011444921,
      "grad_norm": 0.1710989914392556,
      "learning_rate": 1.081797604167608e-05,
      "loss": 0.6583,
      "step": 9563
    },
    {
      "epoch": 0.8551502145922747,
      "grad_norm": 0.16840087219147798,
      "learning_rate": 1.0804878519479943e-05,
      "loss": 0.6714,
      "step": 9564
    },
    {
      "epoch": 0.8552396280400573,
      "grad_norm": 0.16979116257457252,
      "learning_rate": 1.079178847797091e-05,
      "loss": 0.5985,
      "step": 9565
    },
    {
      "epoch": 0.8553290414878397,
      "grad_norm": 0.18331595022932465,
      "learning_rate": 1.0778705918246867e-05,
      "loss": 0.6378,
      "step": 9566
    },
    {
      "epoch": 0.8554184549356223,
      "grad_norm": 0.17496826577116723,
      "learning_rate": 1.0765630841404994e-05,
      "loss": 0.668,
      "step": 9567
    },
    {
      "epoch": 0.8555078683834049,
      "grad_norm": 0.152549422420171,
      "learning_rate": 1.0752563248541891e-05,
      "loss": 0.6796,
      "step": 9568
    },
    {
      "epoch": 0.8555972818311874,
      "grad_norm": 0.1730070475372429,
      "learning_rate": 1.0739503140753516e-05,
      "loss": 0.664,
      "step": 9569
    },
    {
      "epoch": 0.85568669527897,
      "grad_norm": 0.1546133183211104,
      "learning_rate": 1.0726450519135222e-05,
      "loss": 0.6231,
      "step": 9570
    },
    {
      "epoch": 0.8557761087267525,
      "grad_norm": 0.15641706218835685,
      "learning_rate": 1.0713405384781727e-05,
      "loss": 0.6505,
      "step": 9571
    },
    {
      "epoch": 0.855865522174535,
      "grad_norm": 0.17327873992893109,
      "learning_rate": 1.0700367738787064e-05,
      "loss": 0.642,
      "step": 9572
    },
    {
      "epoch": 0.8559549356223176,
      "grad_norm": 0.16746774741694023,
      "learning_rate": 1.0687337582244727e-05,
      "loss": 0.6703,
      "step": 9573
    },
    {
      "epoch": 0.8560443490701002,
      "grad_norm": 0.144659635474647,
      "learning_rate": 1.067431491624753e-05,
      "loss": 0.5922,
      "step": 9574
    },
    {
      "epoch": 0.8561337625178826,
      "grad_norm": 0.1699630533705952,
      "learning_rate": 1.0661299741887654e-05,
      "loss": 0.6213,
      "step": 9575
    },
    {
      "epoch": 0.8562231759656652,
      "grad_norm": 0.15707631284224446,
      "learning_rate": 1.0648292060256649e-05,
      "loss": 0.6145,
      "step": 9576
    },
    {
      "epoch": 0.8563125894134478,
      "grad_norm": 0.16364580521340727,
      "learning_rate": 1.0635291872445518e-05,
      "loss": 0.6286,
      "step": 9577
    },
    {
      "epoch": 0.8564020028612304,
      "grad_norm": 0.16331222731968373,
      "learning_rate": 1.0622299179544516e-05,
      "loss": 0.6473,
      "step": 9578
    },
    {
      "epoch": 0.8564914163090128,
      "grad_norm": 0.18106762582951505,
      "learning_rate": 1.0609313982643331e-05,
      "loss": 0.703,
      "step": 9579
    },
    {
      "epoch": 0.8565808297567954,
      "grad_norm": 0.17822029624462923,
      "learning_rate": 1.0596336282831054e-05,
      "loss": 0.3376,
      "step": 9580
    },
    {
      "epoch": 0.856670243204578,
      "grad_norm": 0.14780004401095254,
      "learning_rate": 1.0583366081196066e-05,
      "loss": 0.5816,
      "step": 9581
    },
    {
      "epoch": 0.8567596566523605,
      "grad_norm": 0.1663184320998042,
      "learning_rate": 1.0570403378826166e-05,
      "loss": 0.6522,
      "step": 9582
    },
    {
      "epoch": 0.8568490701001431,
      "grad_norm": 0.15219158687806916,
      "learning_rate": 1.0557448176808537e-05,
      "loss": 0.636,
      "step": 9583
    },
    {
      "epoch": 0.8569384835479256,
      "grad_norm": 0.16905654915234258,
      "learning_rate": 1.0544500476229713e-05,
      "loss": 0.6655,
      "step": 9584
    },
    {
      "epoch": 0.8570278969957081,
      "grad_norm": 0.16736291907451972,
      "learning_rate": 1.0531560278175611e-05,
      "loss": 0.6188,
      "step": 9585
    },
    {
      "epoch": 0.8571173104434907,
      "grad_norm": 0.16396898342264435,
      "learning_rate": 1.0518627583731477e-05,
      "loss": 0.6259,
      "step": 9586
    },
    {
      "epoch": 0.8572067238912733,
      "grad_norm": 0.14640424135956814,
      "learning_rate": 1.0505702393981987e-05,
      "loss": 0.6037,
      "step": 9587
    },
    {
      "epoch": 0.8572961373390557,
      "grad_norm": 0.1578505326722252,
      "learning_rate": 1.0492784710011184e-05,
      "loss": 0.6277,
      "step": 9588
    },
    {
      "epoch": 0.8573855507868383,
      "grad_norm": 0.17853557466350004,
      "learning_rate": 1.04798745329024e-05,
      "loss": 0.6606,
      "step": 9589
    },
    {
      "epoch": 0.8574749642346209,
      "grad_norm": 0.1650899972571641,
      "learning_rate": 1.0466971863738406e-05,
      "loss": 0.6794,
      "step": 9590
    },
    {
      "epoch": 0.8575643776824035,
      "grad_norm": 0.16597824162854974,
      "learning_rate": 1.045407670360139e-05,
      "loss": 0.6392,
      "step": 9591
    },
    {
      "epoch": 0.857653791130186,
      "grad_norm": 0.1509328386434119,
      "learning_rate": 1.0441189053572809e-05,
      "loss": 0.6292,
      "step": 9592
    },
    {
      "epoch": 0.8577432045779685,
      "grad_norm": 0.174749643172546,
      "learning_rate": 1.0428308914733531e-05,
      "loss": 0.6562,
      "step": 9593
    },
    {
      "epoch": 0.8578326180257511,
      "grad_norm": 0.17354806090486324,
      "learning_rate": 1.0415436288163826e-05,
      "loss": 0.6833,
      "step": 9594
    },
    {
      "epoch": 0.8579220314735336,
      "grad_norm": 0.16666698844907302,
      "learning_rate": 1.0402571174943276e-05,
      "loss": 0.604,
      "step": 9595
    },
    {
      "epoch": 0.8580114449213162,
      "grad_norm": 0.15275586937821709,
      "learning_rate": 1.0389713576150883e-05,
      "loss": 0.6089,
      "step": 9596
    },
    {
      "epoch": 0.8581008583690987,
      "grad_norm": 0.1786144087147834,
      "learning_rate": 1.0376863492864975e-05,
      "loss": 0.692,
      "step": 9597
    },
    {
      "epoch": 0.8581902718168812,
      "grad_norm": 0.15759326449580496,
      "learning_rate": 1.0364020926163298e-05,
      "loss": 0.6631,
      "step": 9598
    },
    {
      "epoch": 0.8582796852646638,
      "grad_norm": 0.16236303923247006,
      "learning_rate": 1.0351185877122938e-05,
      "loss": 0.6309,
      "step": 9599
    },
    {
      "epoch": 0.8583690987124464,
      "grad_norm": 0.15872389635031067,
      "learning_rate": 1.0338358346820353e-05,
      "loss": 0.6079,
      "step": 9600
    },
    {
      "epoch": 0.858458512160229,
      "grad_norm": 0.17107398449590638,
      "learning_rate": 1.0325538336331364e-05,
      "loss": 0.6238,
      "step": 9601
    },
    {
      "epoch": 0.8585479256080114,
      "grad_norm": 0.16134209123831042,
      "learning_rate": 1.0312725846731175e-05,
      "loss": 0.6189,
      "step": 9602
    },
    {
      "epoch": 0.858637339055794,
      "grad_norm": 0.17231068926356685,
      "learning_rate": 1.0299920879094372e-05,
      "loss": 0.6291,
      "step": 9603
    },
    {
      "epoch": 0.8587267525035766,
      "grad_norm": 0.15214428432867697,
      "learning_rate": 1.0287123434494827e-05,
      "loss": 0.6176,
      "step": 9604
    },
    {
      "epoch": 0.8588161659513591,
      "grad_norm": 0.1553770496435128,
      "learning_rate": 1.027433351400594e-05,
      "loss": 0.615,
      "step": 9605
    },
    {
      "epoch": 0.8589055793991416,
      "grad_norm": 0.17315323905946264,
      "learning_rate": 1.0261551118700318e-05,
      "loss": 0.6337,
      "step": 9606
    },
    {
      "epoch": 0.8589949928469242,
      "grad_norm": 0.16400873025003482,
      "learning_rate": 1.0248776249650027e-05,
      "loss": 0.6198,
      "step": 9607
    },
    {
      "epoch": 0.8590844062947067,
      "grad_norm": 0.1758348357106021,
      "learning_rate": 1.0236008907926508e-05,
      "loss": 0.644,
      "step": 9608
    },
    {
      "epoch": 0.8591738197424893,
      "grad_norm": 0.1402855003377153,
      "learning_rate": 1.0223249094600485e-05,
      "loss": 0.6349,
      "step": 9609
    },
    {
      "epoch": 0.8592632331902719,
      "grad_norm": 0.1429475828539783,
      "learning_rate": 1.0210496810742143e-05,
      "loss": 0.6353,
      "step": 9610
    },
    {
      "epoch": 0.8593526466380543,
      "grad_norm": 0.15723690034649374,
      "learning_rate": 1.0197752057420995e-05,
      "loss": 0.6528,
      "step": 9611
    },
    {
      "epoch": 0.8594420600858369,
      "grad_norm": 0.17679138089200186,
      "learning_rate": 1.018501483570592e-05,
      "loss": 0.3584,
      "step": 9612
    },
    {
      "epoch": 0.8595314735336195,
      "grad_norm": 0.15466453039325612,
      "learning_rate": 1.0172285146665195e-05,
      "loss": 0.645,
      "step": 9613
    },
    {
      "epoch": 0.859620886981402,
      "grad_norm": 0.16110617167278896,
      "learning_rate": 1.0159562991366444e-05,
      "loss": 0.6526,
      "step": 9614
    },
    {
      "epoch": 0.8597103004291845,
      "grad_norm": 0.14461455610608884,
      "learning_rate": 1.0146848370876627e-05,
      "loss": 0.6273,
      "step": 9615
    },
    {
      "epoch": 0.8597997138769671,
      "grad_norm": 0.1757096140204721,
      "learning_rate": 1.013414128626211e-05,
      "loss": 0.675,
      "step": 9616
    },
    {
      "epoch": 0.8598891273247496,
      "grad_norm": 0.15596011690432326,
      "learning_rate": 1.0121441738588644e-05,
      "loss": 0.6289,
      "step": 9617
    },
    {
      "epoch": 0.8599785407725322,
      "grad_norm": 0.17451817392192417,
      "learning_rate": 1.0108749728921319e-05,
      "loss": 0.3657,
      "step": 9618
    },
    {
      "epoch": 0.8600679542203148,
      "grad_norm": 0.14089201209264948,
      "learning_rate": 1.0096065258324606e-05,
      "loss": 0.628,
      "step": 9619
    },
    {
      "epoch": 0.8601573676680973,
      "grad_norm": 0.16708347980288282,
      "learning_rate": 1.0083388327862298e-05,
      "loss": 0.6597,
      "step": 9620
    },
    {
      "epoch": 0.8602467811158798,
      "grad_norm": 0.15595072339849342,
      "learning_rate": 1.0070718938597623e-05,
      "loss": 0.6029,
      "step": 9621
    },
    {
      "epoch": 0.8603361945636624,
      "grad_norm": 0.1450600644160252,
      "learning_rate": 1.0058057091593154e-05,
      "loss": 0.5752,
      "step": 9622
    },
    {
      "epoch": 0.860425608011445,
      "grad_norm": 0.17357833745157988,
      "learning_rate": 1.0045402787910818e-05,
      "loss": 0.6855,
      "step": 9623
    },
    {
      "epoch": 0.8605150214592274,
      "grad_norm": 0.18373071806843724,
      "learning_rate": 1.0032756028611878e-05,
      "loss": 0.6658,
      "step": 9624
    },
    {
      "epoch": 0.86060443490701,
      "grad_norm": 0.15623245274612166,
      "learning_rate": 1.0020116814757085e-05,
      "loss": 0.5918,
      "step": 9625
    },
    {
      "epoch": 0.8606938483547926,
      "grad_norm": 0.17029146154405794,
      "learning_rate": 1.0007485147406404e-05,
      "loss": 0.6356,
      "step": 9626
    },
    {
      "epoch": 0.8607832618025751,
      "grad_norm": 0.15049287276112017,
      "learning_rate": 9.99486102761925e-06,
      "loss": 0.5996,
      "step": 9627
    },
    {
      "epoch": 0.8608726752503576,
      "grad_norm": 0.15484308466623897,
      "learning_rate": 9.982244456454427e-06,
      "loss": 0.6735,
      "step": 9628
    },
    {
      "epoch": 0.8609620886981402,
      "grad_norm": 0.17000590622833528,
      "learning_rate": 9.969635434970037e-06,
      "loss": 0.674,
      "step": 9629
    },
    {
      "epoch": 0.8610515021459227,
      "grad_norm": 0.16350292045768886,
      "learning_rate": 9.957033964223582e-06,
      "loss": 0.678,
      "step": 9630
    },
    {
      "epoch": 0.8611409155937053,
      "grad_norm": 0.1577443351313989,
      "learning_rate": 9.944440045271953e-06,
      "loss": 0.6421,
      "step": 9631
    },
    {
      "epoch": 0.8612303290414879,
      "grad_norm": 0.16298548310249655,
      "learning_rate": 9.931853679171377e-06,
      "loss": 0.6318,
      "step": 9632
    },
    {
      "epoch": 0.8613197424892703,
      "grad_norm": 0.16571501713554432,
      "learning_rate": 9.919274866977457e-06,
      "loss": 0.6321,
      "step": 9633
    },
    {
      "epoch": 0.8614091559370529,
      "grad_norm": 0.18759497800063943,
      "learning_rate": 9.90670360974517e-06,
      "loss": 0.6827,
      "step": 9634
    },
    {
      "epoch": 0.8614985693848355,
      "grad_norm": 0.17484809126727396,
      "learning_rate": 9.894139908528843e-06,
      "loss": 0.6573,
      "step": 9635
    },
    {
      "epoch": 0.8615879828326181,
      "grad_norm": 0.1495747069984564,
      "learning_rate": 9.881583764382175e-06,
      "loss": 0.6263,
      "step": 9636
    },
    {
      "epoch": 0.8616773962804005,
      "grad_norm": 0.1593771159018308,
      "learning_rate": 9.869035178358266e-06,
      "loss": 0.6141,
      "step": 9637
    },
    {
      "epoch": 0.8617668097281831,
      "grad_norm": 0.15805850615787187,
      "learning_rate": 9.856494151509488e-06,
      "loss": 0.5847,
      "step": 9638
    },
    {
      "epoch": 0.8618562231759657,
      "grad_norm": 0.16657663097623235,
      "learning_rate": 9.84396068488771e-06,
      "loss": 0.6276,
      "step": 9639
    },
    {
      "epoch": 0.8619456366237482,
      "grad_norm": 0.17178400894023835,
      "learning_rate": 9.831434779544057e-06,
      "loss": 0.6307,
      "step": 9640
    },
    {
      "epoch": 0.8620350500715308,
      "grad_norm": 0.15502832998612526,
      "learning_rate": 9.818916436529069e-06,
      "loss": 0.6158,
      "step": 9641
    },
    {
      "epoch": 0.8621244635193133,
      "grad_norm": 0.17095545073476368,
      "learning_rate": 9.80640565689267e-06,
      "loss": 0.6544,
      "step": 9642
    },
    {
      "epoch": 0.8622138769670958,
      "grad_norm": 0.1786059476691707,
      "learning_rate": 9.793902441684077e-06,
      "loss": 0.6715,
      "step": 9643
    },
    {
      "epoch": 0.8623032904148784,
      "grad_norm": 0.17248791487797538,
      "learning_rate": 9.781406791951952e-06,
      "loss": 0.6243,
      "step": 9644
    },
    {
      "epoch": 0.862392703862661,
      "grad_norm": 0.16575984754459888,
      "learning_rate": 9.76891870874428e-06,
      "loss": 0.6202,
      "step": 9645
    },
    {
      "epoch": 0.8624821173104434,
      "grad_norm": 0.157874010584707,
      "learning_rate": 9.756438193108419e-06,
      "loss": 0.6267,
      "step": 9646
    },
    {
      "epoch": 0.862571530758226,
      "grad_norm": 0.1758029346282653,
      "learning_rate": 9.743965246091102e-06,
      "loss": 0.6959,
      "step": 9647
    },
    {
      "epoch": 0.8626609442060086,
      "grad_norm": 0.15546480271967386,
      "learning_rate": 9.731499868738447e-06,
      "loss": 0.5847,
      "step": 9648
    },
    {
      "epoch": 0.8627503576537912,
      "grad_norm": 0.14601625418936298,
      "learning_rate": 9.719042062095851e-06,
      "loss": 0.6188,
      "step": 9649
    },
    {
      "epoch": 0.8628397711015737,
      "grad_norm": 0.16929187048800734,
      "learning_rate": 9.706591827208166e-06,
      "loss": 0.6409,
      "step": 9650
    },
    {
      "epoch": 0.8629291845493562,
      "grad_norm": 0.16534229817440418,
      "learning_rate": 9.694149165119603e-06,
      "loss": 0.5976,
      "step": 9651
    },
    {
      "epoch": 0.8630185979971388,
      "grad_norm": 0.14436288526352165,
      "learning_rate": 9.68171407687365e-06,
      "loss": 0.5949,
      "step": 9652
    },
    {
      "epoch": 0.8631080114449213,
      "grad_norm": 0.16737690286684562,
      "learning_rate": 9.66928656351329e-06,
      "loss": 0.6042,
      "step": 9653
    },
    {
      "epoch": 0.8631974248927039,
      "grad_norm": 0.19102270823148326,
      "learning_rate": 9.656866626080763e-06,
      "loss": 0.6304,
      "step": 9654
    },
    {
      "epoch": 0.8632868383404864,
      "grad_norm": 0.15025555897199852,
      "learning_rate": 9.644454265617731e-06,
      "loss": 0.6385,
      "step": 9655
    },
    {
      "epoch": 0.8633762517882689,
      "grad_norm": 0.1643219631649302,
      "learning_rate": 9.632049483165184e-06,
      "loss": 0.6418,
      "step": 9656
    },
    {
      "epoch": 0.8634656652360515,
      "grad_norm": 0.1564674786511382,
      "learning_rate": 9.619652279763536e-06,
      "loss": 0.6319,
      "step": 9657
    },
    {
      "epoch": 0.8635550786838341,
      "grad_norm": 0.1750515944498966,
      "learning_rate": 9.607262656452475e-06,
      "loss": 0.6476,
      "step": 9658
    },
    {
      "epoch": 0.8636444921316166,
      "grad_norm": 0.18477194787339699,
      "learning_rate": 9.59488061427114e-06,
      "loss": 0.6765,
      "step": 9659
    },
    {
      "epoch": 0.8637339055793991,
      "grad_norm": 0.15687311045001376,
      "learning_rate": 9.582506154257976e-06,
      "loss": 0.6266,
      "step": 9660
    },
    {
      "epoch": 0.8638233190271817,
      "grad_norm": 0.1526936984240728,
      "learning_rate": 9.57013927745083e-06,
      "loss": 0.6665,
      "step": 9661
    },
    {
      "epoch": 0.8639127324749643,
      "grad_norm": 0.16369517375831785,
      "learning_rate": 9.557779984886905e-06,
      "loss": 0.6782,
      "step": 9662
    },
    {
      "epoch": 0.8640021459227468,
      "grad_norm": 0.15615748251117076,
      "learning_rate": 9.545428277602731e-06,
      "loss": 0.6739,
      "step": 9663
    },
    {
      "epoch": 0.8640915593705293,
      "grad_norm": 0.1789901570804379,
      "learning_rate": 9.533084156634242e-06,
      "loss": 0.6563,
      "step": 9664
    },
    {
      "epoch": 0.8641809728183119,
      "grad_norm": 0.17071593031556237,
      "learning_rate": 9.520747623016747e-06,
      "loss": 0.637,
      "step": 9665
    },
    {
      "epoch": 0.8642703862660944,
      "grad_norm": 0.15716498616371136,
      "learning_rate": 9.508418677784847e-06,
      "loss": 0.6286,
      "step": 9666
    },
    {
      "epoch": 0.864359799713877,
      "grad_norm": 0.165185468689565,
      "learning_rate": 9.496097321972597e-06,
      "loss": 0.6325,
      "step": 9667
    },
    {
      "epoch": 0.8644492131616596,
      "grad_norm": 0.16077992600214716,
      "learning_rate": 9.48378355661339e-06,
      "loss": 0.6432,
      "step": 9668
    },
    {
      "epoch": 0.864538626609442,
      "grad_norm": 0.1663791706041725,
      "learning_rate": 9.471477382739912e-06,
      "loss": 0.6328,
      "step": 9669
    },
    {
      "epoch": 0.8646280400572246,
      "grad_norm": 0.16239772554868326,
      "learning_rate": 9.459178801384304e-06,
      "loss": 0.6495,
      "step": 9670
    },
    {
      "epoch": 0.8647174535050072,
      "grad_norm": 0.16607072576006718,
      "learning_rate": 9.446887813578031e-06,
      "loss": 0.6224,
      "step": 9671
    },
    {
      "epoch": 0.8648068669527897,
      "grad_norm": 0.1690752046526053,
      "learning_rate": 9.434604420351911e-06,
      "loss": 0.642,
      "step": 9672
    },
    {
      "epoch": 0.8648962804005722,
      "grad_norm": 0.17066947887215447,
      "learning_rate": 9.422328622736142e-06,
      "loss": 0.65,
      "step": 9673
    },
    {
      "epoch": 0.8649856938483548,
      "grad_norm": 0.1448455320282672,
      "learning_rate": 9.41006042176027e-06,
      "loss": 0.5941,
      "step": 9674
    },
    {
      "epoch": 0.8650751072961373,
      "grad_norm": 0.17661981147698008,
      "learning_rate": 9.397799818453235e-06,
      "loss": 0.6653,
      "step": 9675
    },
    {
      "epoch": 0.8651645207439199,
      "grad_norm": 0.15854996749818112,
      "learning_rate": 9.385546813843326e-06,
      "loss": 0.6478,
      "step": 9676
    },
    {
      "epoch": 0.8652539341917024,
      "grad_norm": 0.1860110904011231,
      "learning_rate": 9.373301408958157e-06,
      "loss": 0.6407,
      "step": 9677
    },
    {
      "epoch": 0.865343347639485,
      "grad_norm": 0.16187926595779736,
      "learning_rate": 9.361063604824738e-06,
      "loss": 0.6359,
      "step": 9678
    },
    {
      "epoch": 0.8654327610872675,
      "grad_norm": 0.158116453963245,
      "learning_rate": 9.34883340246946e-06,
      "loss": 0.6583,
      "step": 9679
    },
    {
      "epoch": 0.8655221745350501,
      "grad_norm": 0.15301605250213307,
      "learning_rate": 9.336610802918044e-06,
      "loss": 0.6448,
      "step": 9680
    },
    {
      "epoch": 0.8656115879828327,
      "grad_norm": 0.1791762290096296,
      "learning_rate": 9.324395807195585e-06,
      "loss": 0.6655,
      "step": 9681
    },
    {
      "epoch": 0.8657010014306151,
      "grad_norm": 0.16734691625605208,
      "learning_rate": 9.312188416326562e-06,
      "loss": 0.647,
      "step": 9682
    },
    {
      "epoch": 0.8657904148783977,
      "grad_norm": 0.13413517552828244,
      "learning_rate": 9.299988631334755e-06,
      "loss": 0.5863,
      "step": 9683
    },
    {
      "epoch": 0.8658798283261803,
      "grad_norm": 0.16077087021335418,
      "learning_rate": 9.287796453243358e-06,
      "loss": 0.657,
      "step": 9684
    },
    {
      "epoch": 0.8659692417739628,
      "grad_norm": 0.14182124286186173,
      "learning_rate": 9.275611883074941e-06,
      "loss": 0.6545,
      "step": 9685
    },
    {
      "epoch": 0.8660586552217453,
      "grad_norm": 0.1531276990873468,
      "learning_rate": 9.263434921851377e-06,
      "loss": 0.6722,
      "step": 9686
    },
    {
      "epoch": 0.8661480686695279,
      "grad_norm": 0.15400857824461664,
      "learning_rate": 9.251265570593914e-06,
      "loss": 0.5953,
      "step": 9687
    },
    {
      "epoch": 0.8662374821173104,
      "grad_norm": 0.1802646544882971,
      "learning_rate": 9.23910383032326e-06,
      "loss": 0.6937,
      "step": 9688
    },
    {
      "epoch": 0.866326895565093,
      "grad_norm": 0.18978607539569498,
      "learning_rate": 9.226949702059329e-06,
      "loss": 0.6675,
      "step": 9689
    },
    {
      "epoch": 0.8664163090128756,
      "grad_norm": 0.16691446862911005,
      "learning_rate": 9.214803186821497e-06,
      "loss": 0.6459,
      "step": 9690
    },
    {
      "epoch": 0.866505722460658,
      "grad_norm": 0.1779001984749356,
      "learning_rate": 9.202664285628504e-06,
      "loss": 0.6186,
      "step": 9691
    },
    {
      "epoch": 0.8665951359084406,
      "grad_norm": 0.167228148445782,
      "learning_rate": 9.190532999498392e-06,
      "loss": 0.6203,
      "step": 9692
    },
    {
      "epoch": 0.8666845493562232,
      "grad_norm": 0.1897094250556055,
      "learning_rate": 9.178409329448601e-06,
      "loss": 0.6862,
      "step": 9693
    },
    {
      "epoch": 0.8667739628040058,
      "grad_norm": 0.16967964699479632,
      "learning_rate": 9.16629327649593e-06,
      "loss": 0.6456,
      "step": 9694
    },
    {
      "epoch": 0.8668633762517882,
      "grad_norm": 0.17872803821508196,
      "learning_rate": 9.154184841656544e-06,
      "loss": 0.6538,
      "step": 9695
    },
    {
      "epoch": 0.8669527896995708,
      "grad_norm": 0.16751022595111673,
      "learning_rate": 9.142084025945984e-06,
      "loss": 0.5985,
      "step": 9696
    },
    {
      "epoch": 0.8670422031473534,
      "grad_norm": 0.15986236517614302,
      "learning_rate": 9.129990830379087e-06,
      "loss": 0.6737,
      "step": 9697
    },
    {
      "epoch": 0.8671316165951359,
      "grad_norm": 0.15317411183442828,
      "learning_rate": 9.117905255970116e-06,
      "loss": 0.631,
      "step": 9698
    },
    {
      "epoch": 0.8672210300429185,
      "grad_norm": 0.16165859733050714,
      "learning_rate": 9.105827303732695e-06,
      "loss": 0.6461,
      "step": 9699
    },
    {
      "epoch": 0.867310443490701,
      "grad_norm": 0.1599776950491416,
      "learning_rate": 9.093756974679746e-06,
      "loss": 0.6562,
      "step": 9700
    },
    {
      "epoch": 0.8673998569384835,
      "grad_norm": 0.1653550261169338,
      "learning_rate": 9.081694269823582e-06,
      "loss": 0.6271,
      "step": 9701
    },
    {
      "epoch": 0.8674892703862661,
      "grad_norm": 0.172018087141059,
      "learning_rate": 9.069639190175972e-06,
      "loss": 0.6589,
      "step": 9702
    },
    {
      "epoch": 0.8675786838340487,
      "grad_norm": 0.17968476236068107,
      "learning_rate": 9.057591736747883e-06,
      "loss": 0.6989,
      "step": 9703
    },
    {
      "epoch": 0.8676680972818311,
      "grad_norm": 0.17142612351400205,
      "learning_rate": 9.045551910549744e-06,
      "loss": 0.6435,
      "step": 9704
    },
    {
      "epoch": 0.8677575107296137,
      "grad_norm": 0.16482004734615086,
      "learning_rate": 9.033519712591332e-06,
      "loss": 0.6547,
      "step": 9705
    },
    {
      "epoch": 0.8678469241773963,
      "grad_norm": 0.16726648499341684,
      "learning_rate": 9.021495143881753e-06,
      "loss": 0.6692,
      "step": 9706
    },
    {
      "epoch": 0.8679363376251789,
      "grad_norm": 0.16180789650955452,
      "learning_rate": 9.0094782054295e-06,
      "loss": 0.6392,
      "step": 9707
    },
    {
      "epoch": 0.8680257510729614,
      "grad_norm": 0.1409165290139672,
      "learning_rate": 8.997468898242422e-06,
      "loss": 0.6441,
      "step": 9708
    },
    {
      "epoch": 0.8681151645207439,
      "grad_norm": 0.16596733755460225,
      "learning_rate": 8.985467223327726e-06,
      "loss": 0.6456,
      "step": 9709
    },
    {
      "epoch": 0.8682045779685265,
      "grad_norm": 0.1777119875036587,
      "learning_rate": 8.973473181691993e-06,
      "loss": 0.6594,
      "step": 9710
    },
    {
      "epoch": 0.868293991416309,
      "grad_norm": 0.14123579547328155,
      "learning_rate": 8.96148677434111e-06,
      "loss": 0.6206,
      "step": 9711
    },
    {
      "epoch": 0.8683834048640916,
      "grad_norm": 0.18513063176934483,
      "learning_rate": 8.949508002280382e-06,
      "loss": 0.6563,
      "step": 9712
    },
    {
      "epoch": 0.8684728183118741,
      "grad_norm": 0.17035465843795375,
      "learning_rate": 8.937536866514462e-06,
      "loss": 0.6228,
      "step": 9713
    },
    {
      "epoch": 0.8685622317596566,
      "grad_norm": 0.1511267097321403,
      "learning_rate": 8.925573368047358e-06,
      "loss": 0.6244,
      "step": 9714
    },
    {
      "epoch": 0.8686516452074392,
      "grad_norm": 0.16949720136912427,
      "learning_rate": 8.91361750788241e-06,
      "loss": 0.6525,
      "step": 9715
    },
    {
      "epoch": 0.8687410586552218,
      "grad_norm": 0.14817188214064556,
      "learning_rate": 8.901669287022384e-06,
      "loss": 0.6149,
      "step": 9716
    },
    {
      "epoch": 0.8688304721030042,
      "grad_norm": 0.1621227233796124,
      "learning_rate": 8.889728706469314e-06,
      "loss": 0.6451,
      "step": 9717
    },
    {
      "epoch": 0.8689198855507868,
      "grad_norm": 0.15176154042277554,
      "learning_rate": 8.877795767224672e-06,
      "loss": 0.654,
      "step": 9718
    },
    {
      "epoch": 0.8690092989985694,
      "grad_norm": 0.16568089710402079,
      "learning_rate": 8.86587047028926e-06,
      "loss": 0.6289,
      "step": 9719
    },
    {
      "epoch": 0.869098712446352,
      "grad_norm": 0.15021923417827013,
      "learning_rate": 8.853952816663213e-06,
      "loss": 0.645,
      "step": 9720
    },
    {
      "epoch": 0.8691881258941345,
      "grad_norm": 0.16340695171435576,
      "learning_rate": 8.842042807346051e-06,
      "loss": 0.6388,
      "step": 9721
    },
    {
      "epoch": 0.869277539341917,
      "grad_norm": 0.18177227886158587,
      "learning_rate": 8.830140443336699e-06,
      "loss": 0.6542,
      "step": 9722
    },
    {
      "epoch": 0.8693669527896996,
      "grad_norm": 0.1493650933910897,
      "learning_rate": 8.818245725633356e-06,
      "loss": 0.6489,
      "step": 9723
    },
    {
      "epoch": 0.8694563662374821,
      "grad_norm": 0.16515920067886258,
      "learning_rate": 8.806358655233615e-06,
      "loss": 0.6855,
      "step": 9724
    },
    {
      "epoch": 0.8695457796852647,
      "grad_norm": 0.17940109365356835,
      "learning_rate": 8.794479233134456e-06,
      "loss": 0.3688,
      "step": 9725
    },
    {
      "epoch": 0.8696351931330472,
      "grad_norm": 0.16294478204459,
      "learning_rate": 8.78260746033217e-06,
      "loss": 0.6653,
      "step": 9726
    },
    {
      "epoch": 0.8697246065808297,
      "grad_norm": 0.17000002733580052,
      "learning_rate": 8.770743337822418e-06,
      "loss": 0.622,
      "step": 9727
    },
    {
      "epoch": 0.8698140200286123,
      "grad_norm": 0.15732340749154297,
      "learning_rate": 8.758886866600257e-06,
      "loss": 0.6238,
      "step": 9728
    },
    {
      "epoch": 0.8699034334763949,
      "grad_norm": 0.17389985760228574,
      "learning_rate": 8.74703804766005e-06,
      "loss": 0.649,
      "step": 9729
    },
    {
      "epoch": 0.8699928469241774,
      "grad_norm": 0.16510426162951145,
      "learning_rate": 8.735196881995589e-06,
      "loss": 0.6434,
      "step": 9730
    },
    {
      "epoch": 0.8700822603719599,
      "grad_norm": 0.13084169312824395,
      "learning_rate": 8.723363370599924e-06,
      "loss": 0.6104,
      "step": 9731
    },
    {
      "epoch": 0.8701716738197425,
      "grad_norm": 0.155711614446123,
      "learning_rate": 8.71153751446553e-06,
      "loss": 0.6148,
      "step": 9732
    },
    {
      "epoch": 0.870261087267525,
      "grad_norm": 0.16596616869665926,
      "learning_rate": 8.699719314584265e-06,
      "loss": 0.6495,
      "step": 9733
    },
    {
      "epoch": 0.8703505007153076,
      "grad_norm": 0.15835415072402018,
      "learning_rate": 8.687908771947251e-06,
      "loss": 0.6287,
      "step": 9734
    },
    {
      "epoch": 0.8704399141630901,
      "grad_norm": 0.16764412059812941,
      "learning_rate": 8.676105887545039e-06,
      "loss": 0.6535,
      "step": 9735
    },
    {
      "epoch": 0.8705293276108726,
      "grad_norm": 0.1662376647778928,
      "learning_rate": 8.66431066236757e-06,
      "loss": 0.6385,
      "step": 9736
    },
    {
      "epoch": 0.8706187410586552,
      "grad_norm": 0.18020082927795877,
      "learning_rate": 8.652523097404042e-06,
      "loss": 0.6349,
      "step": 9737
    },
    {
      "epoch": 0.8707081545064378,
      "grad_norm": 0.15381469547495727,
      "learning_rate": 8.640743193643075e-06,
      "loss": 0.6554,
      "step": 9738
    },
    {
      "epoch": 0.8707975679542204,
      "grad_norm": 0.14502601815818547,
      "learning_rate": 8.628970952072667e-06,
      "loss": 0.6164,
      "step": 9739
    },
    {
      "epoch": 0.8708869814020028,
      "grad_norm": 0.1585474931546672,
      "learning_rate": 8.617206373680098e-06,
      "loss": 0.617,
      "step": 9740
    },
    {
      "epoch": 0.8709763948497854,
      "grad_norm": 0.1613278268947802,
      "learning_rate": 8.605449459452075e-06,
      "loss": 0.6166,
      "step": 9741
    },
    {
      "epoch": 0.871065808297568,
      "grad_norm": 0.16154463658597687,
      "learning_rate": 8.593700210374622e-06,
      "loss": 0.6288,
      "step": 9742
    },
    {
      "epoch": 0.8711552217453505,
      "grad_norm": 0.1623206821227971,
      "learning_rate": 8.58195862743314e-06,
      "loss": 0.6159,
      "step": 9743
    },
    {
      "epoch": 0.871244635193133,
      "grad_norm": 0.12973863128631227,
      "learning_rate": 8.570224711612385e-06,
      "loss": 0.5867,
      "step": 9744
    },
    {
      "epoch": 0.8713340486409156,
      "grad_norm": 0.16118784228015448,
      "learning_rate": 8.55849846389648e-06,
      "loss": 0.6202,
      "step": 9745
    },
    {
      "epoch": 0.8714234620886981,
      "grad_norm": 0.169033415054654,
      "learning_rate": 8.546779885268863e-06,
      "loss": 0.6317,
      "step": 9746
    },
    {
      "epoch": 0.8715128755364807,
      "grad_norm": 0.16333904477002786,
      "learning_rate": 8.535068976712368e-06,
      "loss": 0.665,
      "step": 9747
    },
    {
      "epoch": 0.8716022889842633,
      "grad_norm": 0.15770738547979735,
      "learning_rate": 8.523365739209188e-06,
      "loss": 0.6645,
      "step": 9748
    },
    {
      "epoch": 0.8716917024320457,
      "grad_norm": 0.1553395089736218,
      "learning_rate": 8.511670173740816e-06,
      "loss": 0.6391,
      "step": 9749
    },
    {
      "epoch": 0.8717811158798283,
      "grad_norm": 0.16218187402077117,
      "learning_rate": 8.499982281288221e-06,
      "loss": 0.6292,
      "step": 9750
    },
    {
      "epoch": 0.8718705293276109,
      "grad_norm": 0.1631759005016172,
      "learning_rate": 8.488302062831576e-06,
      "loss": 0.6417,
      "step": 9751
    },
    {
      "epoch": 0.8719599427753935,
      "grad_norm": 0.1670439316561267,
      "learning_rate": 8.476629519350532e-06,
      "loss": 0.6355,
      "step": 9752
    },
    {
      "epoch": 0.8720493562231759,
      "grad_norm": 0.16599879770818013,
      "learning_rate": 8.464964651824048e-06,
      "loss": 0.6985,
      "step": 9753
    },
    {
      "epoch": 0.8721387696709585,
      "grad_norm": 0.16616656289374704,
      "learning_rate": 8.453307461230409e-06,
      "loss": 0.6335,
      "step": 9754
    },
    {
      "epoch": 0.8722281831187411,
      "grad_norm": 0.14704590047171662,
      "learning_rate": 8.441657948547322e-06,
      "loss": 0.6233,
      "step": 9755
    },
    {
      "epoch": 0.8723175965665236,
      "grad_norm": 0.14757848509402632,
      "learning_rate": 8.430016114751805e-06,
      "loss": 0.6314,
      "step": 9756
    },
    {
      "epoch": 0.8724070100143062,
      "grad_norm": 0.15329520943675828,
      "learning_rate": 8.418381960820243e-06,
      "loss": 0.6122,
      "step": 9757
    },
    {
      "epoch": 0.8724964234620887,
      "grad_norm": 0.14076252828127578,
      "learning_rate": 8.40675548772839e-06,
      "loss": 0.611,
      "step": 9758
    },
    {
      "epoch": 0.8725858369098712,
      "grad_norm": 0.1774180196587893,
      "learning_rate": 8.395136696451355e-06,
      "loss": 0.6367,
      "step": 9759
    },
    {
      "epoch": 0.8726752503576538,
      "grad_norm": 0.16184246385601397,
      "learning_rate": 8.383525587963558e-06,
      "loss": 0.6414,
      "step": 9760
    },
    {
      "epoch": 0.8727646638054364,
      "grad_norm": 0.18520911095101625,
      "learning_rate": 8.371922163238821e-06,
      "loss": 0.6488,
      "step": 9761
    },
    {
      "epoch": 0.8728540772532188,
      "grad_norm": 0.16425068375912272,
      "learning_rate": 8.36032642325033e-06,
      "loss": 0.617,
      "step": 9762
    },
    {
      "epoch": 0.8729434907010014,
      "grad_norm": 0.15839260767268942,
      "learning_rate": 8.348738368970566e-06,
      "loss": 0.6382,
      "step": 9763
    },
    {
      "epoch": 0.873032904148784,
      "grad_norm": 0.1519179544218837,
      "learning_rate": 8.337158001371449e-06,
      "loss": 0.6464,
      "step": 9764
    },
    {
      "epoch": 0.8731223175965666,
      "grad_norm": 0.1772912472590058,
      "learning_rate": 8.325585321424178e-06,
      "loss": 0.639,
      "step": 9765
    },
    {
      "epoch": 0.873211731044349,
      "grad_norm": 0.1681759926751955,
      "learning_rate": 8.314020330099348e-06,
      "loss": 0.6278,
      "step": 9766
    },
    {
      "epoch": 0.8733011444921316,
      "grad_norm": 0.14846260109728815,
      "learning_rate": 8.302463028366924e-06,
      "loss": 0.6498,
      "step": 9767
    },
    {
      "epoch": 0.8733905579399142,
      "grad_norm": 0.15007503651132945,
      "learning_rate": 8.290913417196177e-06,
      "loss": 0.6444,
      "step": 9768
    },
    {
      "epoch": 0.8734799713876967,
      "grad_norm": 0.1720404493543649,
      "learning_rate": 8.279371497555755e-06,
      "loss": 0.6314,
      "step": 9769
    },
    {
      "epoch": 0.8735693848354793,
      "grad_norm": 0.1787905240001979,
      "learning_rate": 8.26783727041367e-06,
      "loss": 0.6477,
      "step": 9770
    },
    {
      "epoch": 0.8736587982832618,
      "grad_norm": 0.15568906682636968,
      "learning_rate": 8.256310736737294e-06,
      "loss": 0.5924,
      "step": 9771
    },
    {
      "epoch": 0.8737482117310443,
      "grad_norm": 0.15599504106405743,
      "learning_rate": 8.244791897493342e-06,
      "loss": 0.6264,
      "step": 9772
    },
    {
      "epoch": 0.8738376251788269,
      "grad_norm": 0.17805435406210748,
      "learning_rate": 8.233280753647887e-06,
      "loss": 0.6576,
      "step": 9773
    },
    {
      "epoch": 0.8739270386266095,
      "grad_norm": 0.15656479168255083,
      "learning_rate": 8.221777306166346e-06,
      "loss": 0.6139,
      "step": 9774
    },
    {
      "epoch": 0.8740164520743919,
      "grad_norm": 0.16057944466756455,
      "learning_rate": 8.210281556013489e-06,
      "loss": 0.6219,
      "step": 9775
    },
    {
      "epoch": 0.8741058655221745,
      "grad_norm": 0.16016640294759968,
      "learning_rate": 8.19879350415349e-06,
      "loss": 0.6321,
      "step": 9776
    },
    {
      "epoch": 0.8741952789699571,
      "grad_norm": 0.14724201569488102,
      "learning_rate": 8.18731315154978e-06,
      "loss": 0.6259,
      "step": 9777
    },
    {
      "epoch": 0.8742846924177397,
      "grad_norm": 0.16109855375992232,
      "learning_rate": 8.175840499165244e-06,
      "loss": 0.6308,
      "step": 9778
    },
    {
      "epoch": 0.8743741058655222,
      "grad_norm": 0.15282558806531085,
      "learning_rate": 8.16437554796209e-06,
      "loss": 0.6295,
      "step": 9779
    },
    {
      "epoch": 0.8744635193133047,
      "grad_norm": 0.1669602142020203,
      "learning_rate": 8.152918298901836e-06,
      "loss": 0.3561,
      "step": 9780
    },
    {
      "epoch": 0.8745529327610873,
      "grad_norm": 0.14666665795961592,
      "learning_rate": 8.141468752945392e-06,
      "loss": 0.6269,
      "step": 9781
    },
    {
      "epoch": 0.8746423462088698,
      "grad_norm": 0.14878183623069788,
      "learning_rate": 8.130026911053045e-06,
      "loss": 0.619,
      "step": 9782
    },
    {
      "epoch": 0.8747317596566524,
      "grad_norm": 0.15372769627411084,
      "learning_rate": 8.118592774184385e-06,
      "loss": 0.6181,
      "step": 9783
    },
    {
      "epoch": 0.8748211731044349,
      "grad_norm": 0.17320801859435314,
      "learning_rate": 8.107166343298377e-06,
      "loss": 0.671,
      "step": 9784
    },
    {
      "epoch": 0.8749105865522174,
      "grad_norm": 0.15593946799160807,
      "learning_rate": 8.095747619353345e-06,
      "loss": 0.6482,
      "step": 9785
    },
    {
      "epoch": 0.875,
      "grad_norm": 0.14734238795343374,
      "learning_rate": 8.084336603306974e-06,
      "loss": 0.6428,
      "step": 9786
    },
    {
      "epoch": 0.8750894134477826,
      "grad_norm": 0.16254967621960484,
      "learning_rate": 8.072933296116303e-06,
      "loss": 0.6529,
      "step": 9787
    },
    {
      "epoch": 0.8751788268955651,
      "grad_norm": 0.1761240561039717,
      "learning_rate": 8.061537698737675e-06,
      "loss": 0.6667,
      "step": 9788
    },
    {
      "epoch": 0.8752682403433476,
      "grad_norm": 0.17364469891043674,
      "learning_rate": 8.05014981212685e-06,
      "loss": 0.6507,
      "step": 9789
    },
    {
      "epoch": 0.8753576537911302,
      "grad_norm": 0.18027180204839038,
      "learning_rate": 8.038769637238907e-06,
      "loss": 0.6308,
      "step": 9790
    },
    {
      "epoch": 0.8754470672389127,
      "grad_norm": 0.16210144183043104,
      "learning_rate": 8.027397175028305e-06,
      "loss": 0.6662,
      "step": 9791
    },
    {
      "epoch": 0.8755364806866953,
      "grad_norm": 0.15699929090112585,
      "learning_rate": 8.016032426448817e-06,
      "loss": 0.6638,
      "step": 9792
    },
    {
      "epoch": 0.8756258941344778,
      "grad_norm": 0.13809538093825297,
      "learning_rate": 8.00467539245362e-06,
      "loss": 0.5985,
      "step": 9793
    },
    {
      "epoch": 0.8757153075822603,
      "grad_norm": 0.18081361196856685,
      "learning_rate": 7.993326073995189e-06,
      "loss": 0.6493,
      "step": 9794
    },
    {
      "epoch": 0.8758047210300429,
      "grad_norm": 0.1545993022097266,
      "learning_rate": 7.981984472025372e-06,
      "loss": 0.6437,
      "step": 9795
    },
    {
      "epoch": 0.8758941344778255,
      "grad_norm": 0.1732366406380805,
      "learning_rate": 7.97065058749541e-06,
      "loss": 0.6847,
      "step": 9796
    },
    {
      "epoch": 0.8759835479256081,
      "grad_norm": 0.15325460976983885,
      "learning_rate": 7.959324421355797e-06,
      "loss": 0.6157,
      "step": 9797
    },
    {
      "epoch": 0.8760729613733905,
      "grad_norm": 0.18371166312512535,
      "learning_rate": 7.948005974556539e-06,
      "loss": 0.6616,
      "step": 9798
    },
    {
      "epoch": 0.8761623748211731,
      "grad_norm": 0.18065894075128971,
      "learning_rate": 7.936695248046822e-06,
      "loss": 0.675,
      "step": 9799
    },
    {
      "epoch": 0.8762517882689557,
      "grad_norm": 0.15522401452069703,
      "learning_rate": 7.925392242775288e-06,
      "loss": 0.6307,
      "step": 9800
    },
    {
      "epoch": 0.8763412017167382,
      "grad_norm": 0.1601438466612859,
      "learning_rate": 7.91409695968991e-06,
      "loss": 0.6359,
      "step": 9801
    },
    {
      "epoch": 0.8764306151645207,
      "grad_norm": 0.17458498659031538,
      "learning_rate": 7.90280939973802e-06,
      "loss": 0.6287,
      "step": 9802
    },
    {
      "epoch": 0.8765200286123033,
      "grad_norm": 0.14987105221625036,
      "learning_rate": 7.891529563866274e-06,
      "loss": 0.5868,
      "step": 9803
    },
    {
      "epoch": 0.8766094420600858,
      "grad_norm": 0.17503521610307102,
      "learning_rate": 7.8802574530207e-06,
      "loss": 0.6506,
      "step": 9804
    },
    {
      "epoch": 0.8766988555078684,
      "grad_norm": 0.14406403076462584,
      "learning_rate": 7.86899306814668e-06,
      "loss": 0.6268,
      "step": 9805
    },
    {
      "epoch": 0.876788268955651,
      "grad_norm": 0.13467090467123508,
      "learning_rate": 7.857736410188953e-06,
      "loss": 0.6249,
      "step": 9806
    },
    {
      "epoch": 0.8768776824034334,
      "grad_norm": 0.16005589467269715,
      "learning_rate": 7.846487480091603e-06,
      "loss": 0.628,
      "step": 9807
    },
    {
      "epoch": 0.876967095851216,
      "grad_norm": 0.14681025505796502,
      "learning_rate": 7.835246278798037e-06,
      "loss": 0.6344,
      "step": 9808
    },
    {
      "epoch": 0.8770565092989986,
      "grad_norm": 0.17740043711163556,
      "learning_rate": 7.824012807251058e-06,
      "loss": 0.6509,
      "step": 9809
    },
    {
      "epoch": 0.8771459227467812,
      "grad_norm": 0.17355358780476918,
      "learning_rate": 7.812787066392825e-06,
      "loss": 0.6304,
      "step": 9810
    },
    {
      "epoch": 0.8772353361945636,
      "grad_norm": 0.17629151760981263,
      "learning_rate": 7.80156905716477e-06,
      "loss": 0.6725,
      "step": 9811
    },
    {
      "epoch": 0.8773247496423462,
      "grad_norm": 0.1540754278911486,
      "learning_rate": 7.790358780507789e-06,
      "loss": 0.643,
      "step": 9812
    },
    {
      "epoch": 0.8774141630901288,
      "grad_norm": 0.18053278400931694,
      "learning_rate": 7.779156237362084e-06,
      "loss": 0.6555,
      "step": 9813
    },
    {
      "epoch": 0.8775035765379113,
      "grad_norm": 0.15933678166360185,
      "learning_rate": 7.767961428667136e-06,
      "loss": 0.6382,
      "step": 9814
    },
    {
      "epoch": 0.8775929899856938,
      "grad_norm": 0.15603122270465777,
      "learning_rate": 7.756774355361884e-06,
      "loss": 0.61,
      "step": 9815
    },
    {
      "epoch": 0.8776824034334764,
      "grad_norm": 0.16573770234194496,
      "learning_rate": 7.745595018384578e-06,
      "loss": 0.6215,
      "step": 9816
    },
    {
      "epoch": 0.8777718168812589,
      "grad_norm": 0.1431236993209054,
      "learning_rate": 7.734423418672786e-06,
      "loss": 0.6107,
      "step": 9817
    },
    {
      "epoch": 0.8778612303290415,
      "grad_norm": 0.16078825047877052,
      "learning_rate": 7.723259557163487e-06,
      "loss": 0.6417,
      "step": 9818
    },
    {
      "epoch": 0.8779506437768241,
      "grad_norm": 0.1887874237619414,
      "learning_rate": 7.71210343479295e-06,
      "loss": 0.666,
      "step": 9819
    },
    {
      "epoch": 0.8780400572246065,
      "grad_norm": 0.18619900115203095,
      "learning_rate": 7.70095505249685e-06,
      "loss": 0.676,
      "step": 9820
    },
    {
      "epoch": 0.8781294706723891,
      "grad_norm": 0.14221987098942618,
      "learning_rate": 7.689814411210195e-06,
      "loss": 0.5989,
      "step": 9821
    },
    {
      "epoch": 0.8782188841201717,
      "grad_norm": 0.17415840080333722,
      "learning_rate": 7.678681511867304e-06,
      "loss": 0.6313,
      "step": 9822
    },
    {
      "epoch": 0.8783082975679543,
      "grad_norm": 0.16965666004299562,
      "learning_rate": 7.667556355401906e-06,
      "loss": 0.681,
      "step": 9823
    },
    {
      "epoch": 0.8783977110157367,
      "grad_norm": 0.17794344870624046,
      "learning_rate": 7.656438942747058e-06,
      "loss": 0.6969,
      "step": 9824
    },
    {
      "epoch": 0.8784871244635193,
      "grad_norm": 0.18328208785082636,
      "learning_rate": 7.645329274835122e-06,
      "loss": 0.6684,
      "step": 9825
    },
    {
      "epoch": 0.8785765379113019,
      "grad_norm": 0.16664076177877504,
      "learning_rate": 7.634227352597901e-06,
      "loss": 0.6425,
      "step": 9826
    },
    {
      "epoch": 0.8786659513590844,
      "grad_norm": 0.15388503979945348,
      "learning_rate": 7.623133176966491e-06,
      "loss": 0.6272,
      "step": 9827
    },
    {
      "epoch": 0.878755364806867,
      "grad_norm": 0.160306049236706,
      "learning_rate": 7.612046748871327e-06,
      "loss": 0.6502,
      "step": 9828
    },
    {
      "epoch": 0.8788447782546495,
      "grad_norm": 0.18003906529804045,
      "learning_rate": 7.600968069242232e-06,
      "loss": 0.6745,
      "step": 9829
    },
    {
      "epoch": 0.878934191702432,
      "grad_norm": 0.17022140691642826,
      "learning_rate": 7.589897139008362e-06,
      "loss": 0.6537,
      "step": 9830
    },
    {
      "epoch": 0.8790236051502146,
      "grad_norm": 0.1626883852695079,
      "learning_rate": 7.578833959098209e-06,
      "loss": 0.6282,
      "step": 9831
    },
    {
      "epoch": 0.8791130185979972,
      "grad_norm": 0.16244105848849266,
      "learning_rate": 7.567778530439606e-06,
      "loss": 0.6405,
      "step": 9832
    },
    {
      "epoch": 0.8792024320457796,
      "grad_norm": 0.17420826116574484,
      "learning_rate": 7.5567308539598256e-06,
      "loss": 0.645,
      "step": 9833
    },
    {
      "epoch": 0.8792918454935622,
      "grad_norm": 0.1727485947765454,
      "learning_rate": 7.545690930585381e-06,
      "loss": 0.3602,
      "step": 9834
    },
    {
      "epoch": 0.8793812589413448,
      "grad_norm": 0.15711810659631797,
      "learning_rate": 7.534658761242164e-06,
      "loss": 0.6859,
      "step": 9835
    },
    {
      "epoch": 0.8794706723891274,
      "grad_norm": 0.15326119337100114,
      "learning_rate": 7.52363434685548e-06,
      "loss": 0.6532,
      "step": 9836
    },
    {
      "epoch": 0.8795600858369099,
      "grad_norm": 0.14665057804359802,
      "learning_rate": 7.512617688349866e-06,
      "loss": 0.6152,
      "step": 9837
    },
    {
      "epoch": 0.8796494992846924,
      "grad_norm": 0.15461217875930705,
      "learning_rate": 7.501608786649328e-06,
      "loss": 0.6322,
      "step": 9838
    },
    {
      "epoch": 0.879738912732475,
      "grad_norm": 0.17671925813172903,
      "learning_rate": 7.490607642677139e-06,
      "loss": 0.6524,
      "step": 9839
    },
    {
      "epoch": 0.8798283261802575,
      "grad_norm": 0.16085886686997225,
      "learning_rate": 7.479614257355971e-06,
      "loss": 0.65,
      "step": 9840
    },
    {
      "epoch": 0.8799177396280401,
      "grad_norm": 0.17120485984559525,
      "learning_rate": 7.468628631607822e-06,
      "loss": 0.6568,
      "step": 9841
    },
    {
      "epoch": 0.8800071530758226,
      "grad_norm": 0.17015074823356602,
      "learning_rate": 7.45765076635404e-06,
      "loss": 0.6539,
      "step": 9842
    },
    {
      "epoch": 0.8800965665236051,
      "grad_norm": 0.15203405283262197,
      "learning_rate": 7.446680662515315e-06,
      "loss": 0.6353,
      "step": 9843
    },
    {
      "epoch": 0.8801859799713877,
      "grad_norm": 0.16715978533987536,
      "learning_rate": 7.435718321011731e-06,
      "loss": 0.5697,
      "step": 9844
    },
    {
      "epoch": 0.8802753934191703,
      "grad_norm": 0.15331346453139597,
      "learning_rate": 7.424763742762642e-06,
      "loss": 0.6679,
      "step": 9845
    },
    {
      "epoch": 0.8803648068669528,
      "grad_norm": 0.17910264563460424,
      "learning_rate": 7.41381692868679e-06,
      "loss": 0.6778,
      "step": 9846
    },
    {
      "epoch": 0.8804542203147353,
      "grad_norm": 0.1539157442170933,
      "learning_rate": 7.402877879702341e-06,
      "loss": 0.6335,
      "step": 9847
    },
    {
      "epoch": 0.8805436337625179,
      "grad_norm": 0.16719195970937636,
      "learning_rate": 7.391946596726673e-06,
      "loss": 0.643,
      "step": 9848
    },
    {
      "epoch": 0.8806330472103004,
      "grad_norm": 0.17011422180601538,
      "learning_rate": 7.381023080676608e-06,
      "loss": 0.6003,
      "step": 9849
    },
    {
      "epoch": 0.880722460658083,
      "grad_norm": 0.1513399841307319,
      "learning_rate": 7.3701073324682905e-06,
      "loss": 0.637,
      "step": 9850
    },
    {
      "epoch": 0.8808118741058655,
      "grad_norm": 0.151564265109668,
      "learning_rate": 7.3591993530171984e-06,
      "loss": 0.6074,
      "step": 9851
    },
    {
      "epoch": 0.880901287553648,
      "grad_norm": 0.17402326633703183,
      "learning_rate": 7.348299143238157e-06,
      "loss": 0.6226,
      "step": 9852
    },
    {
      "epoch": 0.8809907010014306,
      "grad_norm": 0.16971678711020305,
      "learning_rate": 7.33740670404538e-06,
      "loss": 0.6208,
      "step": 9853
    },
    {
      "epoch": 0.8810801144492132,
      "grad_norm": 0.15195683819430938,
      "learning_rate": 7.326522036352401e-06,
      "loss": 0.6322,
      "step": 9854
    },
    {
      "epoch": 0.8811695278969958,
      "grad_norm": 0.1643476313307496,
      "learning_rate": 7.315645141072103e-06,
      "loss": 0.6252,
      "step": 9855
    },
    {
      "epoch": 0.8812589413447782,
      "grad_norm": 0.15298046444440838,
      "learning_rate": 7.30477601911671e-06,
      "loss": 0.6483,
      "step": 9856
    },
    {
      "epoch": 0.8813483547925608,
      "grad_norm": 0.1403618770248574,
      "learning_rate": 7.293914671397795e-06,
      "loss": 0.6135,
      "step": 9857
    },
    {
      "epoch": 0.8814377682403434,
      "grad_norm": 0.1522571013618603,
      "learning_rate": 7.283061098826294e-06,
      "loss": 0.632,
      "step": 9858
    },
    {
      "epoch": 0.8815271816881259,
      "grad_norm": 0.17876980955586202,
      "learning_rate": 7.272215302312502e-06,
      "loss": 0.6378,
      "step": 9859
    },
    {
      "epoch": 0.8816165951359084,
      "grad_norm": 0.1592130414068431,
      "learning_rate": 7.261377282766002e-06,
      "loss": 0.6732,
      "step": 9860
    },
    {
      "epoch": 0.881706008583691,
      "grad_norm": 0.1576608185026109,
      "learning_rate": 7.250547041095812e-06,
      "loss": 0.6166,
      "step": 9861
    },
    {
      "epoch": 0.8817954220314735,
      "grad_norm": 0.16259268346974026,
      "learning_rate": 7.239724578210216e-06,
      "loss": 0.6268,
      "step": 9862
    },
    {
      "epoch": 0.8818848354792561,
      "grad_norm": 0.16777396412580672,
      "learning_rate": 7.2289098950168995e-06,
      "loss": 0.6059,
      "step": 9863
    },
    {
      "epoch": 0.8819742489270386,
      "grad_norm": 0.1438694390681535,
      "learning_rate": 7.2181029924228814e-06,
      "loss": 0.6274,
      "step": 9864
    },
    {
      "epoch": 0.8820636623748211,
      "grad_norm": 0.16652161431565307,
      "learning_rate": 7.207303871334492e-06,
      "loss": 0.6629,
      "step": 9865
    },
    {
      "epoch": 0.8821530758226037,
      "grad_norm": 0.16557975911174933,
      "learning_rate": 7.1965125326574735e-06,
      "loss": 0.6457,
      "step": 9866
    },
    {
      "epoch": 0.8822424892703863,
      "grad_norm": 0.16209336008656638,
      "learning_rate": 7.185728977296857e-06,
      "loss": 0.6454,
      "step": 9867
    },
    {
      "epoch": 0.8823319027181689,
      "grad_norm": 0.14219260094233888,
      "learning_rate": 7.174953206157064e-06,
      "loss": 0.624,
      "step": 9868
    },
    {
      "epoch": 0.8824213161659513,
      "grad_norm": 0.1650318306815556,
      "learning_rate": 7.16418522014185e-06,
      "loss": 0.6293,
      "step": 9869
    },
    {
      "epoch": 0.8825107296137339,
      "grad_norm": 0.1666012671888912,
      "learning_rate": 7.153425020154314e-06,
      "loss": 0.6498,
      "step": 9870
    },
    {
      "epoch": 0.8826001430615165,
      "grad_norm": 0.1712021987478201,
      "learning_rate": 7.142672607096878e-06,
      "loss": 0.6048,
      "step": 9871
    },
    {
      "epoch": 0.882689556509299,
      "grad_norm": 0.15324282046301693,
      "learning_rate": 7.1319279818713445e-06,
      "loss": 0.6067,
      "step": 9872
    },
    {
      "epoch": 0.8827789699570815,
      "grad_norm": 0.16703334177195137,
      "learning_rate": 7.121191145378858e-06,
      "loss": 0.6737,
      "step": 9873
    },
    {
      "epoch": 0.8828683834048641,
      "grad_norm": 0.16396334993965533,
      "learning_rate": 7.110462098519899e-06,
      "loss": 0.6242,
      "step": 9874
    },
    {
      "epoch": 0.8829577968526466,
      "grad_norm": 0.16598455025853082,
      "learning_rate": 7.099740842194313e-06,
      "loss": 0.6205,
      "step": 9875
    },
    {
      "epoch": 0.8830472103004292,
      "grad_norm": 0.17072742219873516,
      "learning_rate": 7.08902737730125e-06,
      "loss": 0.672,
      "step": 9876
    },
    {
      "epoch": 0.8831366237482118,
      "grad_norm": 0.16892961249332744,
      "learning_rate": 7.078321704739266e-06,
      "loss": 0.3446,
      "step": 9877
    },
    {
      "epoch": 0.8832260371959942,
      "grad_norm": 0.1762472627112892,
      "learning_rate": 7.067623825406222e-06,
      "loss": 0.6624,
      "step": 9878
    },
    {
      "epoch": 0.8833154506437768,
      "grad_norm": 0.1496909527671363,
      "learning_rate": 7.056933740199323e-06,
      "loss": 0.645,
      "step": 9879
    },
    {
      "epoch": 0.8834048640915594,
      "grad_norm": 0.16757960512140269,
      "learning_rate": 7.0462514500151285e-06,
      "loss": 0.6525,
      "step": 9880
    },
    {
      "epoch": 0.883494277539342,
      "grad_norm": 0.16533376450520104,
      "learning_rate": 7.035576955749601e-06,
      "loss": 0.6324,
      "step": 9881
    },
    {
      "epoch": 0.8835836909871244,
      "grad_norm": 0.16928858713294054,
      "learning_rate": 7.0249102582979455e-06,
      "loss": 0.6464,
      "step": 9882
    },
    {
      "epoch": 0.883673104434907,
      "grad_norm": 0.15991016444944856,
      "learning_rate": 7.01425135855478e-06,
      "loss": 0.6615,
      "step": 9883
    },
    {
      "epoch": 0.8837625178826896,
      "grad_norm": 0.15766600248325846,
      "learning_rate": 7.003600257414067e-06,
      "loss": 0.621,
      "step": 9884
    },
    {
      "epoch": 0.8838519313304721,
      "grad_norm": 0.16807782435992885,
      "learning_rate": 6.99295695576907e-06,
      "loss": 0.6411,
      "step": 9885
    },
    {
      "epoch": 0.8839413447782547,
      "grad_norm": 0.1719444313629979,
      "learning_rate": 6.9823214545124525e-06,
      "loss": 0.6726,
      "step": 9886
    },
    {
      "epoch": 0.8840307582260372,
      "grad_norm": 0.1531568933006179,
      "learning_rate": 6.971693754536201e-06,
      "loss": 0.6577,
      "step": 9887
    },
    {
      "epoch": 0.8841201716738197,
      "grad_norm": 0.16796694165537096,
      "learning_rate": 6.961073856731648e-06,
      "loss": 0.6344,
      "step": 9888
    },
    {
      "epoch": 0.8842095851216023,
      "grad_norm": 0.17416579050464717,
      "learning_rate": 6.950461761989458e-06,
      "loss": 0.6412,
      "step": 9889
    },
    {
      "epoch": 0.8842989985693849,
      "grad_norm": 0.16773254247848723,
      "learning_rate": 6.9398574711996844e-06,
      "loss": 0.6519,
      "step": 9890
    },
    {
      "epoch": 0.8843884120171673,
      "grad_norm": 0.15972714112433514,
      "learning_rate": 6.929260985251662e-06,
      "loss": 0.6074,
      "step": 9891
    },
    {
      "epoch": 0.8844778254649499,
      "grad_norm": 0.17109829076847466,
      "learning_rate": 6.918672305034124e-06,
      "loss": 0.6681,
      "step": 9892
    },
    {
      "epoch": 0.8845672389127325,
      "grad_norm": 0.17601714135266014,
      "learning_rate": 6.908091431435138e-06,
      "loss": 0.6669,
      "step": 9893
    },
    {
      "epoch": 0.884656652360515,
      "grad_norm": 0.17293950667510388,
      "learning_rate": 6.897518365342059e-06,
      "loss": 0.6708,
      "step": 9894
    },
    {
      "epoch": 0.8847460658082976,
      "grad_norm": 0.14683933948841915,
      "learning_rate": 6.8869531076417136e-06,
      "loss": 0.6223,
      "step": 9895
    },
    {
      "epoch": 0.8848354792560801,
      "grad_norm": 0.16889807735095252,
      "learning_rate": 6.876395659220148e-06,
      "loss": 0.6229,
      "step": 9896
    },
    {
      "epoch": 0.8849248927038627,
      "grad_norm": 0.1679968890516081,
      "learning_rate": 6.865846020962807e-06,
      "loss": 0.6274,
      "step": 9897
    },
    {
      "epoch": 0.8850143061516452,
      "grad_norm": 0.16216228913553032,
      "learning_rate": 6.855304193754497e-06,
      "loss": 0.6369,
      "step": 9898
    },
    {
      "epoch": 0.8851037195994278,
      "grad_norm": 0.15914542234387793,
      "learning_rate": 6.844770178479321e-06,
      "loss": 0.6238,
      "step": 9899
    },
    {
      "epoch": 0.8851931330472103,
      "grad_norm": 0.17981087230551573,
      "learning_rate": 6.834243976020771e-06,
      "loss": 0.6551,
      "step": 9900
    },
    {
      "epoch": 0.8852825464949928,
      "grad_norm": 0.16155207590802997,
      "learning_rate": 6.823725587261654e-06,
      "loss": 0.6502,
      "step": 9901
    },
    {
      "epoch": 0.8853719599427754,
      "grad_norm": 0.15896047800421909,
      "learning_rate": 6.813215013084151e-06,
      "loss": 0.6293,
      "step": 9902
    },
    {
      "epoch": 0.885461373390558,
      "grad_norm": 0.1574631378536625,
      "learning_rate": 6.8027122543697586e-06,
      "loss": 0.6147,
      "step": 9903
    },
    {
      "epoch": 0.8855507868383404,
      "grad_norm": 0.1623226550906766,
      "learning_rate": 6.7922173119993606e-06,
      "loss": 0.6361,
      "step": 9904
    },
    {
      "epoch": 0.885640200286123,
      "grad_norm": 0.15684961788234253,
      "learning_rate": 6.781730186853108e-06,
      "loss": 0.6099,
      "step": 9905
    },
    {
      "epoch": 0.8857296137339056,
      "grad_norm": 0.14831192765229634,
      "learning_rate": 6.771250879810565e-06,
      "loss": 0.6246,
      "step": 9906
    },
    {
      "epoch": 0.8858190271816881,
      "grad_norm": 0.15779364280060104,
      "learning_rate": 6.760779391750627e-06,
      "loss": 0.6419,
      "step": 9907
    },
    {
      "epoch": 0.8859084406294707,
      "grad_norm": 0.15431575410066692,
      "learning_rate": 6.750315723551492e-06,
      "loss": 0.6455,
      "step": 9908
    },
    {
      "epoch": 0.8859978540772532,
      "grad_norm": 0.13350155023619314,
      "learning_rate": 6.739859876090793e-06,
      "loss": 0.601,
      "step": 9909
    },
    {
      "epoch": 0.8860872675250357,
      "grad_norm": 0.16314763306589342,
      "learning_rate": 6.729411850245404e-06,
      "loss": 0.6116,
      "step": 9910
    },
    {
      "epoch": 0.8861766809728183,
      "grad_norm": 0.1486039034358353,
      "learning_rate": 6.718971646891603e-06,
      "loss": 0.604,
      "step": 9911
    },
    {
      "epoch": 0.8862660944206009,
      "grad_norm": 0.1543922461612202,
      "learning_rate": 6.708539266905001e-06,
      "loss": 0.6036,
      "step": 9912
    },
    {
      "epoch": 0.8863555078683834,
      "grad_norm": 0.1498989268153531,
      "learning_rate": 6.6981147111605305e-06,
      "loss": 0.6288,
      "step": 9913
    },
    {
      "epoch": 0.8864449213161659,
      "grad_norm": 0.15506724968003593,
      "learning_rate": 6.687697980532504e-06,
      "loss": 0.6251,
      "step": 9914
    },
    {
      "epoch": 0.8865343347639485,
      "grad_norm": 0.13763029788753134,
      "learning_rate": 6.677289075894544e-06,
      "loss": 0.6216,
      "step": 9915
    },
    {
      "epoch": 0.8866237482117311,
      "grad_norm": 0.16958403217132986,
      "learning_rate": 6.666887998119653e-06,
      "loss": 0.6588,
      "step": 9916
    },
    {
      "epoch": 0.8867131616595136,
      "grad_norm": 0.15301551516870984,
      "learning_rate": 6.656494748080144e-06,
      "loss": 0.6292,
      "step": 9917
    },
    {
      "epoch": 0.8868025751072961,
      "grad_norm": 0.1867820698425684,
      "learning_rate": 6.646109326647709e-06,
      "loss": 0.6916,
      "step": 9918
    },
    {
      "epoch": 0.8868919885550787,
      "grad_norm": 0.1484946699122285,
      "learning_rate": 6.635731734693329e-06,
      "loss": 0.6392,
      "step": 9919
    },
    {
      "epoch": 0.8869814020028612,
      "grad_norm": 0.1429401586565167,
      "learning_rate": 6.625361973087363e-06,
      "loss": 0.6125,
      "step": 9920
    },
    {
      "epoch": 0.8870708154506438,
      "grad_norm": 0.15569244469769944,
      "learning_rate": 6.6150000426995486e-06,
      "loss": 0.6236,
      "step": 9921
    },
    {
      "epoch": 0.8871602288984263,
      "grad_norm": 0.17159610440880133,
      "learning_rate": 6.604645944398858e-06,
      "loss": 0.6795,
      "step": 9922
    },
    {
      "epoch": 0.8872496423462088,
      "grad_norm": 0.13869475692407657,
      "learning_rate": 6.594299679053739e-06,
      "loss": 0.6253,
      "step": 9923
    },
    {
      "epoch": 0.8873390557939914,
      "grad_norm": 0.16188660583566358,
      "learning_rate": 6.583961247531911e-06,
      "loss": 0.6309,
      "step": 9924
    },
    {
      "epoch": 0.887428469241774,
      "grad_norm": 0.15196363189785633,
      "learning_rate": 6.573630650700424e-06,
      "loss": 0.6182,
      "step": 9925
    },
    {
      "epoch": 0.8875178826895566,
      "grad_norm": 0.15558125922704452,
      "learning_rate": 6.563307889425707e-06,
      "loss": 0.6122,
      "step": 9926
    },
    {
      "epoch": 0.887607296137339,
      "grad_norm": 0.1887281364045957,
      "learning_rate": 6.5529929645735235e-06,
      "loss": 0.6257,
      "step": 9927
    },
    {
      "epoch": 0.8876967095851216,
      "grad_norm": 0.16847541841464825,
      "learning_rate": 6.542685877008959e-06,
      "loss": 0.6443,
      "step": 9928
    },
    {
      "epoch": 0.8877861230329042,
      "grad_norm": 0.17233887129563571,
      "learning_rate": 6.532386627596454e-06,
      "loss": 0.6687,
      "step": 9929
    },
    {
      "epoch": 0.8878755364806867,
      "grad_norm": 0.15145371333470367,
      "learning_rate": 6.522095217199797e-06,
      "loss": 0.6453,
      "step": 9930
    },
    {
      "epoch": 0.8879649499284692,
      "grad_norm": 0.16089017877656533,
      "learning_rate": 6.511811646682131e-06,
      "loss": 0.6591,
      "step": 9931
    },
    {
      "epoch": 0.8880543633762518,
      "grad_norm": 0.16879239045701108,
      "learning_rate": 6.501535916905932e-06,
      "loss": 0.6235,
      "step": 9932
    },
    {
      "epoch": 0.8881437768240343,
      "grad_norm": 0.15579916141189537,
      "learning_rate": 6.491268028732977e-06,
      "loss": 0.6444,
      "step": 9933
    },
    {
      "epoch": 0.8882331902718169,
      "grad_norm": 0.15485684449354967,
      "learning_rate": 6.4810079830244455e-06,
      "loss": 0.6208,
      "step": 9934
    },
    {
      "epoch": 0.8883226037195995,
      "grad_norm": 0.1701407632181861,
      "learning_rate": 6.470755780640847e-06,
      "loss": 0.6682,
      "step": 9935
    },
    {
      "epoch": 0.8884120171673819,
      "grad_norm": 0.1627179273144868,
      "learning_rate": 6.460511422441984e-06,
      "loss": 0.6303,
      "step": 9936
    },
    {
      "epoch": 0.8885014306151645,
      "grad_norm": 0.1655934645868417,
      "learning_rate": 6.450274909287068e-06,
      "loss": 0.6572,
      "step": 9937
    },
    {
      "epoch": 0.8885908440629471,
      "grad_norm": 0.1613707914226172,
      "learning_rate": 6.440046242034625e-06,
      "loss": 0.6418,
      "step": 9938
    },
    {
      "epoch": 0.8886802575107297,
      "grad_norm": 0.17205834286842886,
      "learning_rate": 6.429825421542512e-06,
      "loss": 0.6283,
      "step": 9939
    },
    {
      "epoch": 0.8887696709585121,
      "grad_norm": 0.17183269119689454,
      "learning_rate": 6.4196124486679225e-06,
      "loss": 0.6435,
      "step": 9940
    },
    {
      "epoch": 0.8888590844062947,
      "grad_norm": 0.1668498362510855,
      "learning_rate": 6.409407324267447e-06,
      "loss": 0.6372,
      "step": 9941
    },
    {
      "epoch": 0.8889484978540773,
      "grad_norm": 0.15079516539930884,
      "learning_rate": 6.399210049196924e-06,
      "loss": 0.6129,
      "step": 9942
    },
    {
      "epoch": 0.8890379113018598,
      "grad_norm": 0.14052101332354838,
      "learning_rate": 6.3890206243116255e-06,
      "loss": 0.6218,
      "step": 9943
    },
    {
      "epoch": 0.8891273247496424,
      "grad_norm": 0.16086667153619036,
      "learning_rate": 6.378839050466101e-06,
      "loss": 0.663,
      "step": 9944
    },
    {
      "epoch": 0.8892167381974249,
      "grad_norm": 0.14831754010444043,
      "learning_rate": 6.36866532851429e-06,
      "loss": 0.6261,
      "step": 9945
    },
    {
      "epoch": 0.8893061516452074,
      "grad_norm": 0.17518398162719004,
      "learning_rate": 6.3584994593094305e-06,
      "loss": 0.695,
      "step": 9946
    },
    {
      "epoch": 0.88939556509299,
      "grad_norm": 0.1647401708613811,
      "learning_rate": 6.348341443704153e-06,
      "loss": 0.6606,
      "step": 9947
    },
    {
      "epoch": 0.8894849785407726,
      "grad_norm": 0.1550435979742353,
      "learning_rate": 6.338191282550354e-06,
      "loss": 0.6591,
      "step": 9948
    },
    {
      "epoch": 0.889574391988555,
      "grad_norm": 0.178399357683823,
      "learning_rate": 6.328048976699352e-06,
      "loss": 0.6825,
      "step": 9949
    },
    {
      "epoch": 0.8896638054363376,
      "grad_norm": 0.1500528200723321,
      "learning_rate": 6.317914527001745e-06,
      "loss": 0.6143,
      "step": 9950
    },
    {
      "epoch": 0.8897532188841202,
      "grad_norm": 0.15769763890702504,
      "learning_rate": 6.307787934307507e-06,
      "loss": 0.675,
      "step": 9951
    },
    {
      "epoch": 0.8898426323319027,
      "grad_norm": 0.17584564636244548,
      "learning_rate": 6.297669199465961e-06,
      "loss": 0.6397,
      "step": 9952
    },
    {
      "epoch": 0.8899320457796852,
      "grad_norm": 0.15251635659647897,
      "learning_rate": 6.287558323325715e-06,
      "loss": 0.6075,
      "step": 9953
    },
    {
      "epoch": 0.8900214592274678,
      "grad_norm": 0.16138238911180885,
      "learning_rate": 6.277455306734781e-06,
      "loss": 0.6382,
      "step": 9954
    },
    {
      "epoch": 0.8901108726752504,
      "grad_norm": 0.1707185040872951,
      "learning_rate": 6.267360150540491e-06,
      "loss": 0.6324,
      "step": 9955
    },
    {
      "epoch": 0.8902002861230329,
      "grad_norm": 0.16359540419843413,
      "learning_rate": 6.2572728555894796e-06,
      "loss": 0.6233,
      "step": 9956
    },
    {
      "epoch": 0.8902896995708155,
      "grad_norm": 0.16179330601253974,
      "learning_rate": 6.247193422727804e-06,
      "loss": 0.6409,
      "step": 9957
    },
    {
      "epoch": 0.890379113018598,
      "grad_norm": 0.1522899598747796,
      "learning_rate": 6.237121852800798e-06,
      "loss": 0.6423,
      "step": 9958
    },
    {
      "epoch": 0.8904685264663805,
      "grad_norm": 0.14902728671496976,
      "learning_rate": 6.227058146653131e-06,
      "loss": 0.6096,
      "step": 9959
    },
    {
      "epoch": 0.8905579399141631,
      "grad_norm": 0.16486310123976403,
      "learning_rate": 6.217002305128849e-06,
      "loss": 0.6318,
      "step": 9960
    },
    {
      "epoch": 0.8906473533619457,
      "grad_norm": 0.16426335405800122,
      "learning_rate": 6.206954329071335e-06,
      "loss": 0.6538,
      "step": 9961
    },
    {
      "epoch": 0.8907367668097281,
      "grad_norm": 0.16181948601355825,
      "learning_rate": 6.19691421932328e-06,
      "loss": 0.6003,
      "step": 9962
    },
    {
      "epoch": 0.8908261802575107,
      "grad_norm": 0.15960769881621673,
      "learning_rate": 6.186881976726733e-06,
      "loss": 0.6741,
      "step": 9963
    },
    {
      "epoch": 0.8909155937052933,
      "grad_norm": 0.1751811560202669,
      "learning_rate": 6.17685760212311e-06,
      "loss": 0.6719,
      "step": 9964
    },
    {
      "epoch": 0.8910050071530758,
      "grad_norm": 0.152763240434659,
      "learning_rate": 6.166841096353126e-06,
      "loss": 0.619,
      "step": 9965
    },
    {
      "epoch": 0.8910944206008584,
      "grad_norm": 0.14952421472419491,
      "learning_rate": 6.1568324602568675e-06,
      "loss": 0.6207,
      "step": 9966
    },
    {
      "epoch": 0.8911838340486409,
      "grad_norm": 0.15319271995759087,
      "learning_rate": 6.146831694673727e-06,
      "loss": 0.6174,
      "step": 9967
    },
    {
      "epoch": 0.8912732474964234,
      "grad_norm": 0.16263108862382847,
      "learning_rate": 6.136838800442457e-06,
      "loss": 0.6398,
      "step": 9968
    },
    {
      "epoch": 0.891362660944206,
      "grad_norm": 0.16069715124643075,
      "learning_rate": 6.126853778401187e-06,
      "loss": 0.6182,
      "step": 9969
    },
    {
      "epoch": 0.8914520743919886,
      "grad_norm": 0.15439335887250819,
      "learning_rate": 6.11687662938728e-06,
      "loss": 0.607,
      "step": 9970
    },
    {
      "epoch": 0.891541487839771,
      "grad_norm": 0.16066424451284786,
      "learning_rate": 6.1069073542375675e-06,
      "loss": 0.6504,
      "step": 9971
    },
    {
      "epoch": 0.8916309012875536,
      "grad_norm": 0.14409706298216848,
      "learning_rate": 6.0969459537881575e-06,
      "loss": 0.6,
      "step": 9972
    },
    {
      "epoch": 0.8917203147353362,
      "grad_norm": 0.1591341693804305,
      "learning_rate": 6.086992428874472e-06,
      "loss": 0.6319,
      "step": 9973
    },
    {
      "epoch": 0.8918097281831188,
      "grad_norm": 0.1812858016786826,
      "learning_rate": 6.077046780331308e-06,
      "loss": 0.669,
      "step": 9974
    },
    {
      "epoch": 0.8918991416309013,
      "grad_norm": 0.1571480571381325,
      "learning_rate": 6.06710900899281e-06,
      "loss": 0.6292,
      "step": 9975
    },
    {
      "epoch": 0.8919885550786838,
      "grad_norm": 0.15384609721554648,
      "learning_rate": 6.057179115692435e-06,
      "loss": 0.6516,
      "step": 9976
    },
    {
      "epoch": 0.8920779685264664,
      "grad_norm": 0.16297845529937618,
      "learning_rate": 6.047257101262982e-06,
      "loss": 0.6472,
      "step": 9977
    },
    {
      "epoch": 0.8921673819742489,
      "grad_norm": 0.16952939328698274,
      "learning_rate": 6.037342966536619e-06,
      "loss": 0.6421,
      "step": 9978
    },
    {
      "epoch": 0.8922567954220315,
      "grad_norm": 0.1554245964328913,
      "learning_rate": 6.027436712344814e-06,
      "loss": 0.6577,
      "step": 9979
    },
    {
      "epoch": 0.892346208869814,
      "grad_norm": 0.15066315938149222,
      "learning_rate": 6.017538339518403e-06,
      "loss": 0.6615,
      "step": 9980
    },
    {
      "epoch": 0.8924356223175965,
      "grad_norm": 0.14791673028034089,
      "learning_rate": 6.007647848887565e-06,
      "loss": 0.6272,
      "step": 9981
    },
    {
      "epoch": 0.8925250357653791,
      "grad_norm": 0.15550833929649033,
      "learning_rate": 5.997765241281783e-06,
      "loss": 0.5705,
      "step": 9982
    },
    {
      "epoch": 0.8926144492131617,
      "grad_norm": 0.1634439985368776,
      "learning_rate": 5.987890517529893e-06,
      "loss": 0.6849,
      "step": 9983
    },
    {
      "epoch": 0.8927038626609443,
      "grad_norm": 0.1546765520338763,
      "learning_rate": 5.978023678460099e-06,
      "loss": 0.6168,
      "step": 9984
    },
    {
      "epoch": 0.8927932761087267,
      "grad_norm": 0.15577026154622076,
      "learning_rate": 5.968164724899894e-06,
      "loss": 0.6112,
      "step": 9985
    },
    {
      "epoch": 0.8928826895565093,
      "grad_norm": 0.1589769152955588,
      "learning_rate": 5.958313657676173e-06,
      "loss": 0.651,
      "step": 9986
    },
    {
      "epoch": 0.8929721030042919,
      "grad_norm": 0.16533111892258398,
      "learning_rate": 5.948470477615098e-06,
      "loss": 0.6589,
      "step": 9987
    },
    {
      "epoch": 0.8930615164520744,
      "grad_norm": 0.176236835685073,
      "learning_rate": 5.938635185542218e-06,
      "loss": 0.6336,
      "step": 9988
    },
    {
      "epoch": 0.8931509298998569,
      "grad_norm": 0.17224230266013318,
      "learning_rate": 5.928807782282431e-06,
      "loss": 0.6366,
      "step": 9989
    },
    {
      "epoch": 0.8932403433476395,
      "grad_norm": 0.17262603681063624,
      "learning_rate": 5.918988268659898e-06,
      "loss": 0.6593,
      "step": 9990
    },
    {
      "epoch": 0.893329756795422,
      "grad_norm": 0.14514250356418582,
      "learning_rate": 5.909176645498193e-06,
      "loss": 0.6196,
      "step": 9991
    },
    {
      "epoch": 0.8934191702432046,
      "grad_norm": 0.17583334145045618,
      "learning_rate": 5.899372913620238e-06,
      "loss": 0.6971,
      "step": 9992
    },
    {
      "epoch": 0.8935085836909872,
      "grad_norm": 0.15785738984913947,
      "learning_rate": 5.889577073848207e-06,
      "loss": 0.661,
      "step": 9993
    },
    {
      "epoch": 0.8935979971387696,
      "grad_norm": 0.17597912194234797,
      "learning_rate": 5.879789127003699e-06,
      "loss": 0.6521,
      "step": 9994
    },
    {
      "epoch": 0.8936874105865522,
      "grad_norm": 0.14858437464074972,
      "learning_rate": 5.870009073907623e-06,
      "loss": 0.6251,
      "step": 9995
    },
    {
      "epoch": 0.8937768240343348,
      "grad_norm": 0.17282760355648707,
      "learning_rate": 5.86023691538019e-06,
      "loss": 0.6474,
      "step": 9996
    },
    {
      "epoch": 0.8938662374821174,
      "grad_norm": 0.170471302199649,
      "learning_rate": 5.850472652240991e-06,
      "loss": 0.6531,
      "step": 9997
    },
    {
      "epoch": 0.8939556509298998,
      "grad_norm": 0.15120132486120527,
      "learning_rate": 5.840716285308956e-06,
      "loss": 0.6341,
      "step": 9998
    },
    {
      "epoch": 0.8940450643776824,
      "grad_norm": 0.16553984977101624,
      "learning_rate": 5.8309678154023216e-06,
      "loss": 0.6276,
      "step": 9999
    },
    {
      "epoch": 0.894134477825465,
      "grad_norm": 0.17281726217687746,
      "learning_rate": 5.821227243338712e-06,
      "loss": 0.6565,
      "step": 10000
    },
    {
      "epoch": 0.8942238912732475,
      "grad_norm": 0.18103800048792326,
      "learning_rate": 5.811494569935016e-06,
      "loss": 0.6537,
      "step": 10001
    },
    {
      "epoch": 0.89431330472103,
      "grad_norm": 0.16223157482919162,
      "learning_rate": 5.801769796007517e-06,
      "loss": 0.6439,
      "step": 10002
    },
    {
      "epoch": 0.8944027181688126,
      "grad_norm": 0.16370555130452133,
      "learning_rate": 5.792052922371826e-06,
      "loss": 0.6577,
      "step": 10003
    },
    {
      "epoch": 0.8944921316165951,
      "grad_norm": 0.16808128562275,
      "learning_rate": 5.782343949842894e-06,
      "loss": 0.6603,
      "step": 10004
    },
    {
      "epoch": 0.8945815450643777,
      "grad_norm": 0.15774012632936898,
      "learning_rate": 5.7726428792349574e-06,
      "loss": 0.6572,
      "step": 10005
    },
    {
      "epoch": 0.8946709585121603,
      "grad_norm": 0.16760150551024378,
      "learning_rate": 5.762949711361698e-06,
      "loss": 0.6564,
      "step": 10006
    },
    {
      "epoch": 0.8947603719599427,
      "grad_norm": 0.16752179554126198,
      "learning_rate": 5.753264447036022e-06,
      "loss": 0.6349,
      "step": 10007
    },
    {
      "epoch": 0.8948497854077253,
      "grad_norm": 0.16447222750909246,
      "learning_rate": 5.743587087070235e-06,
      "loss": 0.6409,
      "step": 10008
    },
    {
      "epoch": 0.8949391988555079,
      "grad_norm": 0.16694832520677874,
      "learning_rate": 5.733917632275976e-06,
      "loss": 0.6633,
      "step": 10009
    },
    {
      "epoch": 0.8950286123032904,
      "grad_norm": 0.14123002258496853,
      "learning_rate": 5.7242560834641855e-06,
      "loss": 0.6355,
      "step": 10010
    },
    {
      "epoch": 0.8951180257510729,
      "grad_norm": 0.13112204152192863,
      "learning_rate": 5.714602441445194e-06,
      "loss": 0.5835,
      "step": 10011
    },
    {
      "epoch": 0.8952074391988555,
      "grad_norm": 0.15214289554253274,
      "learning_rate": 5.704956707028619e-06,
      "loss": 0.6315,
      "step": 10012
    },
    {
      "epoch": 0.895296852646638,
      "grad_norm": 0.15356548394995553,
      "learning_rate": 5.695318881023437e-06,
      "loss": 0.6182,
      "step": 10013
    },
    {
      "epoch": 0.8953862660944206,
      "grad_norm": 0.14207855417603182,
      "learning_rate": 5.685688964237979e-06,
      "loss": 0.6437,
      "step": 10014
    },
    {
      "epoch": 0.8954756795422032,
      "grad_norm": 0.14795365839644115,
      "learning_rate": 5.676066957479898e-06,
      "loss": 0.6123,
      "step": 10015
    },
    {
      "epoch": 0.8955650929899857,
      "grad_norm": 0.179803453327943,
      "learning_rate": 5.66645286155616e-06,
      "loss": 0.645,
      "step": 10016
    },
    {
      "epoch": 0.8956545064377682,
      "grad_norm": 0.14918733331646397,
      "learning_rate": 5.656846677273086e-06,
      "loss": 0.6249,
      "step": 10017
    },
    {
      "epoch": 0.8957439198855508,
      "grad_norm": 0.16694875492001077,
      "learning_rate": 5.647248405436356e-06,
      "loss": 0.379,
      "step": 10018
    },
    {
      "epoch": 0.8958333333333334,
      "grad_norm": 0.1492611080059724,
      "learning_rate": 5.637658046850924e-06,
      "loss": 0.6228,
      "step": 10019
    },
    {
      "epoch": 0.8959227467811158,
      "grad_norm": 0.15578425275893545,
      "learning_rate": 5.628075602321181e-06,
      "loss": 0.6153,
      "step": 10020
    },
    {
      "epoch": 0.8960121602288984,
      "grad_norm": 0.16106372605188451,
      "learning_rate": 5.618501072650761e-06,
      "loss": 0.6179,
      "step": 10021
    },
    {
      "epoch": 0.896101573676681,
      "grad_norm": 0.13896296354906035,
      "learning_rate": 5.608934458642656e-06,
      "loss": 0.6046,
      "step": 10022
    },
    {
      "epoch": 0.8961909871244635,
      "grad_norm": 0.16383518658533064,
      "learning_rate": 5.599375761099246e-06,
      "loss": 0.6606,
      "step": 10023
    },
    {
      "epoch": 0.8962804005722461,
      "grad_norm": 0.13924567462034906,
      "learning_rate": 5.589824980822167e-06,
      "loss": 0.6448,
      "step": 10024
    },
    {
      "epoch": 0.8963698140200286,
      "grad_norm": 0.1704391023984006,
      "learning_rate": 5.580282118612446e-06,
      "loss": 0.6764,
      "step": 10025
    },
    {
      "epoch": 0.8964592274678111,
      "grad_norm": 0.16151713284737565,
      "learning_rate": 5.570747175270441e-06,
      "loss": 0.6757,
      "step": 10026
    },
    {
      "epoch": 0.8965486409155937,
      "grad_norm": 0.16493801155416943,
      "learning_rate": 5.561220151595825e-06,
      "loss": 0.6128,
      "step": 10027
    },
    {
      "epoch": 0.8966380543633763,
      "grad_norm": 0.16594232918517188,
      "learning_rate": 5.551701048387614e-06,
      "loss": 0.6168,
      "step": 10028
    },
    {
      "epoch": 0.8967274678111588,
      "grad_norm": 0.16221821622876936,
      "learning_rate": 5.542189866444203e-06,
      "loss": 0.6096,
      "step": 10029
    },
    {
      "epoch": 0.8968168812589413,
      "grad_norm": 0.18160809940040087,
      "learning_rate": 5.53268660656322e-06,
      "loss": 0.3586,
      "step": 10030
    },
    {
      "epoch": 0.8969062947067239,
      "grad_norm": 0.1608944986416817,
      "learning_rate": 5.523191269541728e-06,
      "loss": 0.6374,
      "step": 10031
    },
    {
      "epoch": 0.8969957081545065,
      "grad_norm": 0.15960089046843903,
      "learning_rate": 5.5137038561761115e-06,
      "loss": 0.6077,
      "step": 10032
    },
    {
      "epoch": 0.897085121602289,
      "grad_norm": 0.15980679730324632,
      "learning_rate": 5.5042243672620006e-06,
      "loss": 0.6572,
      "step": 10033
    },
    {
      "epoch": 0.8971745350500715,
      "grad_norm": 0.16638307299554714,
      "learning_rate": 5.494752803594505e-06,
      "loss": 0.6487,
      "step": 10034
    },
    {
      "epoch": 0.8972639484978541,
      "grad_norm": 0.16006999228364674,
      "learning_rate": 5.485289165967933e-06,
      "loss": 0.6257,
      "step": 10035
    },
    {
      "epoch": 0.8973533619456366,
      "grad_norm": 0.13670239214769708,
      "learning_rate": 5.475833455176027e-06,
      "loss": 0.6152,
      "step": 10036
    },
    {
      "epoch": 0.8974427753934192,
      "grad_norm": 0.18540855567873152,
      "learning_rate": 5.466385672011809e-06,
      "loss": 0.6519,
      "step": 10037
    },
    {
      "epoch": 0.8975321888412017,
      "grad_norm": 0.17504696415407944,
      "learning_rate": 5.4569458172676665e-06,
      "loss": 0.7051,
      "step": 10038
    },
    {
      "epoch": 0.8976216022889842,
      "grad_norm": 0.18541522530981014,
      "learning_rate": 5.4475138917352894e-06,
      "loss": 0.6733,
      "step": 10039
    },
    {
      "epoch": 0.8977110157367668,
      "grad_norm": 0.15645358881279606,
      "learning_rate": 5.4380898962057336e-06,
      "loss": 0.6161,
      "step": 10040
    },
    {
      "epoch": 0.8978004291845494,
      "grad_norm": 0.16365735346744392,
      "learning_rate": 5.428673831469366e-06,
      "loss": 0.6136,
      "step": 10041
    },
    {
      "epoch": 0.897889842632332,
      "grad_norm": 0.16816705230057966,
      "learning_rate": 5.419265698315923e-06,
      "loss": 0.6367,
      "step": 10042
    },
    {
      "epoch": 0.8979792560801144,
      "grad_norm": 0.1618743447819668,
      "learning_rate": 5.40986549753445e-06,
      "loss": 0.6629,
      "step": 10043
    },
    {
      "epoch": 0.898068669527897,
      "grad_norm": 0.15750282698452533,
      "learning_rate": 5.400473229913305e-06,
      "loss": 0.6282,
      "step": 10044
    },
    {
      "epoch": 0.8981580829756796,
      "grad_norm": 0.15863497590831896,
      "learning_rate": 5.3910888962402265e-06,
      "loss": 0.6898,
      "step": 10045
    },
    {
      "epoch": 0.8982474964234621,
      "grad_norm": 0.16942986262129178,
      "learning_rate": 5.381712497302261e-06,
      "loss": 0.6573,
      "step": 10046
    },
    {
      "epoch": 0.8983369098712446,
      "grad_norm": 0.14469335058929206,
      "learning_rate": 5.372344033885801e-06,
      "loss": 0.6544,
      "step": 10047
    },
    {
      "epoch": 0.8984263233190272,
      "grad_norm": 0.17073094191789948,
      "learning_rate": 5.362983506776564e-06,
      "loss": 0.5884,
      "step": 10048
    },
    {
      "epoch": 0.8985157367668097,
      "grad_norm": 0.18353104162449943,
      "learning_rate": 5.353630916759622e-06,
      "loss": 0.6716,
      "step": 10049
    },
    {
      "epoch": 0.8986051502145923,
      "grad_norm": 0.15854588019547178,
      "learning_rate": 5.344286264619347e-06,
      "loss": 0.615,
      "step": 10050
    },
    {
      "epoch": 0.8986945636623748,
      "grad_norm": 0.16472014276841485,
      "learning_rate": 5.334949551139457e-06,
      "loss": 0.6238,
      "step": 10051
    },
    {
      "epoch": 0.8987839771101573,
      "grad_norm": 0.1679892920647032,
      "learning_rate": 5.325620777103035e-06,
      "loss": 0.3688,
      "step": 10052
    },
    {
      "epoch": 0.8988733905579399,
      "grad_norm": 0.17436031015092807,
      "learning_rate": 5.316299943292435e-06,
      "loss": 0.6261,
      "step": 10053
    },
    {
      "epoch": 0.8989628040057225,
      "grad_norm": 0.14563451553651782,
      "learning_rate": 5.306987050489442e-06,
      "loss": 0.6175,
      "step": 10054
    },
    {
      "epoch": 0.899052217453505,
      "grad_norm": 0.15592613161358737,
      "learning_rate": 5.297682099475066e-06,
      "loss": 0.6622,
      "step": 10055
    },
    {
      "epoch": 0.8991416309012875,
      "grad_norm": 0.167787441970031,
      "learning_rate": 5.2883850910297235e-06,
      "loss": 0.6314,
      "step": 10056
    },
    {
      "epoch": 0.8992310443490701,
      "grad_norm": 0.16031475519196886,
      "learning_rate": 5.27909602593315e-06,
      "loss": 0.6435,
      "step": 10057
    },
    {
      "epoch": 0.8993204577968527,
      "grad_norm": 0.15425820923593478,
      "learning_rate": 5.2698149049643874e-06,
      "loss": 0.6244,
      "step": 10058
    },
    {
      "epoch": 0.8994098712446352,
      "grad_norm": 0.14724846661670793,
      "learning_rate": 5.260541728901847e-06,
      "loss": 0.6156,
      "step": 10059
    },
    {
      "epoch": 0.8994992846924177,
      "grad_norm": 0.15157812032790696,
      "learning_rate": 5.25127649852325e-06,
      "loss": 0.6142,
      "step": 10060
    },
    {
      "epoch": 0.8995886981402003,
      "grad_norm": 0.15557844389245423,
      "learning_rate": 5.2420192146056645e-06,
      "loss": 0.6408,
      "step": 10061
    },
    {
      "epoch": 0.8996781115879828,
      "grad_norm": 0.16455231093262362,
      "learning_rate": 5.232769877925503e-06,
      "loss": 0.6876,
      "step": 10062
    },
    {
      "epoch": 0.8997675250357654,
      "grad_norm": 0.1745487115567382,
      "learning_rate": 5.2235284892584776e-06,
      "loss": 0.63,
      "step": 10063
    },
    {
      "epoch": 0.899856938483548,
      "grad_norm": 0.16941322382722265,
      "learning_rate": 5.214295049379658e-06,
      "loss": 0.634,
      "step": 10064
    },
    {
      "epoch": 0.8999463519313304,
      "grad_norm": 0.1568291315587166,
      "learning_rate": 5.205069559063425e-06,
      "loss": 0.6169,
      "step": 10065
    },
    {
      "epoch": 0.900035765379113,
      "grad_norm": 0.15470216725620073,
      "learning_rate": 5.195852019083558e-06,
      "loss": 0.6273,
      "step": 10066
    },
    {
      "epoch": 0.9001251788268956,
      "grad_norm": 0.18639954427982372,
      "learning_rate": 5.18664243021304e-06,
      "loss": 0.6466,
      "step": 10067
    },
    {
      "epoch": 0.9002145922746781,
      "grad_norm": 0.1622351180924821,
      "learning_rate": 5.177440793224342e-06,
      "loss": 0.6878,
      "step": 10068
    },
    {
      "epoch": 0.9003040057224606,
      "grad_norm": 0.17633834029746215,
      "learning_rate": 5.168247108889179e-06,
      "loss": 0.6193,
      "step": 10069
    },
    {
      "epoch": 0.9003934191702432,
      "grad_norm": 0.15871549885125716,
      "learning_rate": 5.159061377978591e-06,
      "loss": 0.634,
      "step": 10070
    },
    {
      "epoch": 0.9004828326180258,
      "grad_norm": 0.15826409562355398,
      "learning_rate": 5.149883601262984e-06,
      "loss": 0.6355,
      "step": 10071
    },
    {
      "epoch": 0.9005722460658083,
      "grad_norm": 0.15922159117675957,
      "learning_rate": 5.1407137795121075e-06,
      "loss": 0.641,
      "step": 10072
    },
    {
      "epoch": 0.9006616595135909,
      "grad_norm": 0.16764217621039423,
      "learning_rate": 5.131551913494981e-06,
      "loss": 0.634,
      "step": 10073
    },
    {
      "epoch": 0.9007510729613734,
      "grad_norm": 0.15566477959273836,
      "learning_rate": 5.122398003980033e-06,
      "loss": 0.6207,
      "step": 10074
    },
    {
      "epoch": 0.9008404864091559,
      "grad_norm": 0.16228697419002167,
      "learning_rate": 5.1132520517349735e-06,
      "loss": 0.628,
      "step": 10075
    },
    {
      "epoch": 0.9009298998569385,
      "grad_norm": 0.17225325041365858,
      "learning_rate": 5.104114057526876e-06,
      "loss": 0.6718,
      "step": 10076
    },
    {
      "epoch": 0.9010193133047211,
      "grad_norm": 0.17344088779125275,
      "learning_rate": 5.09498402212214e-06,
      "loss": 0.6127,
      "step": 10077
    },
    {
      "epoch": 0.9011087267525035,
      "grad_norm": 0.17024865827683058,
      "learning_rate": 5.085861946286463e-06,
      "loss": 0.663,
      "step": 10078
    },
    {
      "epoch": 0.9011981402002861,
      "grad_norm": 0.16174051681219975,
      "learning_rate": 5.076747830784923e-06,
      "loss": 0.6074,
      "step": 10079
    },
    {
      "epoch": 0.9012875536480687,
      "grad_norm": 0.1715183410534415,
      "learning_rate": 5.067641676381918e-06,
      "loss": 0.6648,
      "step": 10080
    },
    {
      "epoch": 0.9013769670958512,
      "grad_norm": 0.16871830411930924,
      "learning_rate": 5.058543483841116e-06,
      "loss": 0.6332,
      "step": 10081
    },
    {
      "epoch": 0.9014663805436338,
      "grad_norm": 0.14363383647181413,
      "learning_rate": 5.04945325392564e-06,
      "loss": 0.6244,
      "step": 10082
    },
    {
      "epoch": 0.9015557939914163,
      "grad_norm": 0.16117132981313514,
      "learning_rate": 5.040370987397858e-06,
      "loss": 0.6253,
      "step": 10083
    },
    {
      "epoch": 0.9016452074391988,
      "grad_norm": 0.16906930394246578,
      "learning_rate": 5.03129668501946e-06,
      "loss": 0.6523,
      "step": 10084
    },
    {
      "epoch": 0.9017346208869814,
      "grad_norm": 0.17103122507046076,
      "learning_rate": 5.022230347551515e-06,
      "loss": 0.6092,
      "step": 10085
    },
    {
      "epoch": 0.901824034334764,
      "grad_norm": 0.15503975377871848,
      "learning_rate": 5.013171975754427e-06,
      "loss": 0.6181,
      "step": 10086
    },
    {
      "epoch": 0.9019134477825465,
      "grad_norm": 0.1488939407770506,
      "learning_rate": 5.004121570387876e-06,
      "loss": 0.619,
      "step": 10087
    },
    {
      "epoch": 0.902002861230329,
      "grad_norm": 0.15159788202107108,
      "learning_rate": 4.995079132210922e-06,
      "loss": 0.6212,
      "step": 10088
    },
    {
      "epoch": 0.9020922746781116,
      "grad_norm": 0.16021951950602079,
      "learning_rate": 4.986044661981948e-06,
      "loss": 0.6389,
      "step": 10089
    },
    {
      "epoch": 0.9021816881258942,
      "grad_norm": 0.16604644495463866,
      "learning_rate": 4.977018160458646e-06,
      "loss": 0.647,
      "step": 10090
    },
    {
      "epoch": 0.9022711015736766,
      "grad_norm": 0.1658716500446684,
      "learning_rate": 4.967999628398101e-06,
      "loss": 0.6448,
      "step": 10091
    },
    {
      "epoch": 0.9023605150214592,
      "grad_norm": 0.1826500686627645,
      "learning_rate": 4.958989066556641e-06,
      "loss": 0.665,
      "step": 10092
    },
    {
      "epoch": 0.9024499284692418,
      "grad_norm": 0.17231983939590126,
      "learning_rate": 4.949986475689983e-06,
      "loss": 0.6538,
      "step": 10093
    },
    {
      "epoch": 0.9025393419170243,
      "grad_norm": 0.18390676426602853,
      "learning_rate": 4.9409918565531675e-06,
      "loss": 0.6602,
      "step": 10094
    },
    {
      "epoch": 0.9026287553648069,
      "grad_norm": 0.15985074580090913,
      "learning_rate": 4.93200520990057e-06,
      "loss": 0.6439,
      "step": 10095
    },
    {
      "epoch": 0.9027181688125894,
      "grad_norm": 0.1809678772486441,
      "learning_rate": 4.923026536485875e-06,
      "loss": 0.3968,
      "step": 10096
    },
    {
      "epoch": 0.9028075822603719,
      "grad_norm": 0.17140856366010349,
      "learning_rate": 4.914055837062137e-06,
      "loss": 0.6269,
      "step": 10097
    },
    {
      "epoch": 0.9028969957081545,
      "grad_norm": 0.13727459343208612,
      "learning_rate": 4.905093112381687e-06,
      "loss": 0.6315,
      "step": 10098
    },
    {
      "epoch": 0.9029864091559371,
      "grad_norm": 0.16066240922972144,
      "learning_rate": 4.896138363196235e-06,
      "loss": 0.6668,
      "step": 10099
    },
    {
      "epoch": 0.9030758226037195,
      "grad_norm": 0.16235880833687924,
      "learning_rate": 4.8871915902568125e-06,
      "loss": 0.6695,
      "step": 10100
    },
    {
      "epoch": 0.9031652360515021,
      "grad_norm": 0.16788364820698476,
      "learning_rate": 4.878252794313754e-06,
      "loss": 0.6345,
      "step": 10101
    },
    {
      "epoch": 0.9032546494992847,
      "grad_norm": 0.1567134176774388,
      "learning_rate": 4.869321976116737e-06,
      "loss": 0.6412,
      "step": 10102
    },
    {
      "epoch": 0.9033440629470673,
      "grad_norm": 0.17407165223050855,
      "learning_rate": 4.860399136414828e-06,
      "loss": 0.6399,
      "step": 10103
    },
    {
      "epoch": 0.9034334763948498,
      "grad_norm": 0.16102783162523354,
      "learning_rate": 4.8514842759563306e-06,
      "loss": 0.6087,
      "step": 10104
    },
    {
      "epoch": 0.9035228898426323,
      "grad_norm": 0.17134299892102678,
      "learning_rate": 4.842577395488934e-06,
      "loss": 0.6238,
      "step": 10105
    },
    {
      "epoch": 0.9036123032904149,
      "grad_norm": 0.1597478449452734,
      "learning_rate": 4.833678495759664e-06,
      "loss": 0.6109,
      "step": 10106
    },
    {
      "epoch": 0.9037017167381974,
      "grad_norm": 0.16592649215137203,
      "learning_rate": 4.8247875775148335e-06,
      "loss": 0.6344,
      "step": 10107
    },
    {
      "epoch": 0.90379113018598,
      "grad_norm": 0.1578205585360478,
      "learning_rate": 4.815904641500124e-06,
      "loss": 0.5891,
      "step": 10108
    },
    {
      "epoch": 0.9038805436337625,
      "grad_norm": 0.16487802613400673,
      "learning_rate": 4.80702968846054e-06,
      "loss": 0.6195,
      "step": 10109
    },
    {
      "epoch": 0.903969957081545,
      "grad_norm": 0.1653175115751687,
      "learning_rate": 4.79816271914042e-06,
      "loss": 0.6252,
      "step": 10110
    },
    {
      "epoch": 0.9040593705293276,
      "grad_norm": 0.1726056110295673,
      "learning_rate": 4.789303734283423e-06,
      "loss": 0.617,
      "step": 10111
    },
    {
      "epoch": 0.9041487839771102,
      "grad_norm": 0.1614866391063943,
      "learning_rate": 4.780452734632524e-06,
      "loss": 0.6561,
      "step": 10112
    },
    {
      "epoch": 0.9042381974248928,
      "grad_norm": 0.18641445474055487,
      "learning_rate": 4.771609720930059e-06,
      "loss": 0.6046,
      "step": 10113
    },
    {
      "epoch": 0.9043276108726752,
      "grad_norm": 0.18726427049581842,
      "learning_rate": 4.762774693917693e-06,
      "loss": 0.6772,
      "step": 10114
    },
    {
      "epoch": 0.9044170243204578,
      "grad_norm": 0.1466529539009502,
      "learning_rate": 4.753947654336388e-06,
      "loss": 0.6174,
      "step": 10115
    },
    {
      "epoch": 0.9045064377682404,
      "grad_norm": 0.16200872375562764,
      "learning_rate": 4.7451286029264405e-06,
      "loss": 0.6097,
      "step": 10116
    },
    {
      "epoch": 0.9045958512160229,
      "grad_norm": 0.16702126129029937,
      "learning_rate": 4.73631754042756e-06,
      "loss": 0.6583,
      "step": 10117
    },
    {
      "epoch": 0.9046852646638054,
      "grad_norm": 0.13111682898399374,
      "learning_rate": 4.727514467578653e-06,
      "loss": 0.6119,
      "step": 10118
    },
    {
      "epoch": 0.904774678111588,
      "grad_norm": 0.17428025421835072,
      "learning_rate": 4.718719385118053e-06,
      "loss": 0.6774,
      "step": 10119
    },
    {
      "epoch": 0.9048640915593705,
      "grad_norm": 0.18576568884780406,
      "learning_rate": 4.7099322937833925e-06,
      "loss": 0.5764,
      "step": 10120
    },
    {
      "epoch": 0.9049535050071531,
      "grad_norm": 0.15264989833511386,
      "learning_rate": 4.701153194311625e-06,
      "loss": 0.638,
      "step": 10121
    },
    {
      "epoch": 0.9050429184549357,
      "grad_norm": 0.16208474530530945,
      "learning_rate": 4.69238208743904e-06,
      "loss": 0.6132,
      "step": 10122
    },
    {
      "epoch": 0.9051323319027181,
      "grad_norm": 0.16750482359729998,
      "learning_rate": 4.6836189739012715e-06,
      "loss": 0.6913,
      "step": 10123
    },
    {
      "epoch": 0.9052217453505007,
      "grad_norm": 0.15538219825836205,
      "learning_rate": 4.6748638544332644e-06,
      "loss": 0.6572,
      "step": 10124
    },
    {
      "epoch": 0.9053111587982833,
      "grad_norm": 0.1732768020035466,
      "learning_rate": 4.66611672976931e-06,
      "loss": 0.6105,
      "step": 10125
    },
    {
      "epoch": 0.9054005722460658,
      "grad_norm": 0.16168673859490823,
      "learning_rate": 4.6573776006430205e-06,
      "loss": 0.6644,
      "step": 10126
    },
    {
      "epoch": 0.9054899856938483,
      "grad_norm": 0.15510031777814104,
      "learning_rate": 4.6486464677873094e-06,
      "loss": 0.6278,
      "step": 10127
    },
    {
      "epoch": 0.9055793991416309,
      "grad_norm": 0.15039334855897762,
      "learning_rate": 4.639923331934471e-06,
      "loss": 0.609,
      "step": 10128
    },
    {
      "epoch": 0.9056688125894135,
      "grad_norm": 0.16810748800743222,
      "learning_rate": 4.631208193816083e-06,
      "loss": 0.6016,
      "step": 10129
    },
    {
      "epoch": 0.905758226037196,
      "grad_norm": 0.16539857736957983,
      "learning_rate": 4.622501054163098e-06,
      "loss": 0.6233,
      "step": 10130
    },
    {
      "epoch": 0.9058476394849786,
      "grad_norm": 0.16755424373501024,
      "learning_rate": 4.613801913705773e-06,
      "loss": 0.6284,
      "step": 10131
    },
    {
      "epoch": 0.905937052932761,
      "grad_norm": 0.17208601622240705,
      "learning_rate": 4.605110773173682e-06,
      "loss": 0.6233,
      "step": 10132
    },
    {
      "epoch": 0.9060264663805436,
      "grad_norm": 0.16195618072630688,
      "learning_rate": 4.59642763329573e-06,
      "loss": 0.612,
      "step": 10133
    },
    {
      "epoch": 0.9061158798283262,
      "grad_norm": 0.14922118170819618,
      "learning_rate": 4.5877524948001905e-06,
      "loss": 0.6119,
      "step": 10134
    },
    {
      "epoch": 0.9062052932761088,
      "grad_norm": 0.16369167758555284,
      "learning_rate": 4.5790853584146035e-06,
      "loss": 0.6339,
      "step": 10135
    },
    {
      "epoch": 0.9062947067238912,
      "grad_norm": 0.16234427712282598,
      "learning_rate": 4.570426224865876e-06,
      "loss": 0.6406,
      "step": 10136
    },
    {
      "epoch": 0.9063841201716738,
      "grad_norm": 0.16391709508177044,
      "learning_rate": 4.561775094880283e-06,
      "loss": 0.6362,
      "step": 10137
    },
    {
      "epoch": 0.9064735336194564,
      "grad_norm": 0.17367900443967063,
      "learning_rate": 4.5531319691833326e-06,
      "loss": 0.6252,
      "step": 10138
    },
    {
      "epoch": 0.906562947067239,
      "grad_norm": 0.16917479531150156,
      "learning_rate": 4.544496848499946e-06,
      "loss": 0.6475,
      "step": 10139
    },
    {
      "epoch": 0.9066523605150214,
      "grad_norm": 0.15830423353636167,
      "learning_rate": 4.535869733554332e-06,
      "loss": 0.603,
      "step": 10140
    },
    {
      "epoch": 0.906741773962804,
      "grad_norm": 0.1413619406186926,
      "learning_rate": 4.527250625070012e-06,
      "loss": 0.6247,
      "step": 10141
    },
    {
      "epoch": 0.9068311874105865,
      "grad_norm": 0.15647461146671593,
      "learning_rate": 4.518639523769897e-06,
      "loss": 0.6185,
      "step": 10142
    },
    {
      "epoch": 0.9069206008583691,
      "grad_norm": 0.18547578992624034,
      "learning_rate": 4.510036430376152e-06,
      "loss": 0.6784,
      "step": 10143
    },
    {
      "epoch": 0.9070100143061517,
      "grad_norm": 0.1616135998308928,
      "learning_rate": 4.501441345610347e-06,
      "loss": 0.6398,
      "step": 10144
    },
    {
      "epoch": 0.9070994277539342,
      "grad_norm": 0.16766061048661193,
      "learning_rate": 4.492854270193325e-06,
      "loss": 0.6548,
      "step": 10145
    },
    {
      "epoch": 0.9071888412017167,
      "grad_norm": 0.16717121890755424,
      "learning_rate": 4.4842752048452676e-06,
      "loss": 0.3637,
      "step": 10146
    },
    {
      "epoch": 0.9072782546494993,
      "grad_norm": 0.16404453133048225,
      "learning_rate": 4.475704150285687e-06,
      "loss": 0.6643,
      "step": 10147
    },
    {
      "epoch": 0.9073676680972819,
      "grad_norm": 0.1474059043123029,
      "learning_rate": 4.4671411072334526e-06,
      "loss": 0.5818,
      "step": 10148
    },
    {
      "epoch": 0.9074570815450643,
      "grad_norm": 0.17482987582332818,
      "learning_rate": 4.458586076406701e-06,
      "loss": 0.6757,
      "step": 10149
    },
    {
      "epoch": 0.9075464949928469,
      "grad_norm": 0.16188914690493608,
      "learning_rate": 4.450039058522948e-06,
      "loss": 0.6198,
      "step": 10150
    },
    {
      "epoch": 0.9076359084406295,
      "grad_norm": 0.14769419292689495,
      "learning_rate": 4.441500054299042e-06,
      "loss": 0.6169,
      "step": 10151
    },
    {
      "epoch": 0.907725321888412,
      "grad_norm": 0.16669364481038784,
      "learning_rate": 4.432969064451109e-06,
      "loss": 0.6119,
      "step": 10152
    },
    {
      "epoch": 0.9078147353361946,
      "grad_norm": 0.1667402636007693,
      "learning_rate": 4.424446089694645e-06,
      "loss": 0.675,
      "step": 10153
    },
    {
      "epoch": 0.9079041487839771,
      "grad_norm": 0.1511827347263847,
      "learning_rate": 4.415931130744477e-06,
      "loss": 0.6327,
      "step": 10154
    },
    {
      "epoch": 0.9079935622317596,
      "grad_norm": 0.17538641569406235,
      "learning_rate": 4.407424188314713e-06,
      "loss": 0.583,
      "step": 10155
    },
    {
      "epoch": 0.9080829756795422,
      "grad_norm": 0.15204845528815783,
      "learning_rate": 4.398925263118836e-06,
      "loss": 0.5924,
      "step": 10156
    },
    {
      "epoch": 0.9081723891273248,
      "grad_norm": 0.14915137566599665,
      "learning_rate": 4.390434355869643e-06,
      "loss": 0.6319,
      "step": 10157
    },
    {
      "epoch": 0.9082618025751072,
      "grad_norm": 0.1590646853088609,
      "learning_rate": 4.381951467279244e-06,
      "loss": 0.6214,
      "step": 10158
    },
    {
      "epoch": 0.9083512160228898,
      "grad_norm": 0.17655059277072777,
      "learning_rate": 4.373476598059112e-06,
      "loss": 0.6215,
      "step": 10159
    },
    {
      "epoch": 0.9084406294706724,
      "grad_norm": 0.17491090836284084,
      "learning_rate": 4.365009748920012e-06,
      "loss": 0.6369,
      "step": 10160
    },
    {
      "epoch": 0.908530042918455,
      "grad_norm": 0.16529018541104637,
      "learning_rate": 4.356550920572044e-06,
      "loss": 0.6721,
      "step": 10161
    },
    {
      "epoch": 0.9086194563662375,
      "grad_norm": 0.19164812550343,
      "learning_rate": 4.348100113724629e-06,
      "loss": 0.7008,
      "step": 10162
    },
    {
      "epoch": 0.90870886981402,
      "grad_norm": 0.14474816661978465,
      "learning_rate": 4.339657329086566e-06,
      "loss": 0.6161,
      "step": 10163
    },
    {
      "epoch": 0.9087982832618026,
      "grad_norm": 0.15906077129113413,
      "learning_rate": 4.331222567365878e-06,
      "loss": 0.6094,
      "step": 10164
    },
    {
      "epoch": 0.9088876967095851,
      "grad_norm": 0.16112726708006875,
      "learning_rate": 4.322795829270043e-06,
      "loss": 0.6323,
      "step": 10165
    },
    {
      "epoch": 0.9089771101573677,
      "grad_norm": 0.15343511569479842,
      "learning_rate": 4.314377115505763e-06,
      "loss": 0.6395,
      "step": 10166
    },
    {
      "epoch": 0.9090665236051502,
      "grad_norm": 0.15390862513098977,
      "learning_rate": 4.305966426779118e-06,
      "loss": 0.6301,
      "step": 10167
    },
    {
      "epoch": 0.9091559370529327,
      "grad_norm": 0.16413403223447542,
      "learning_rate": 4.297563763795509e-06,
      "loss": 0.6517,
      "step": 10168
    },
    {
      "epoch": 0.9092453505007153,
      "grad_norm": 0.15969971585365886,
      "learning_rate": 4.289169127259629e-06,
      "loss": 0.6208,
      "step": 10169
    },
    {
      "epoch": 0.9093347639484979,
      "grad_norm": 0.15431238306680606,
      "learning_rate": 4.280782517875548e-06,
      "loss": 0.6264,
      "step": 10170
    },
    {
      "epoch": 0.9094241773962805,
      "grad_norm": 0.17581564756871576,
      "learning_rate": 4.272403936346647e-06,
      "loss": 0.6414,
      "step": 10171
    },
    {
      "epoch": 0.9095135908440629,
      "grad_norm": 0.15378705875997398,
      "learning_rate": 4.26403338337561e-06,
      "loss": 0.6314,
      "step": 10172
    },
    {
      "epoch": 0.9096030042918455,
      "grad_norm": 0.18057966984620175,
      "learning_rate": 4.255670859664474e-06,
      "loss": 0.397,
      "step": 10173
    },
    {
      "epoch": 0.9096924177396281,
      "grad_norm": 0.16801093466051745,
      "learning_rate": 4.2473163659146e-06,
      "loss": 0.6356,
      "step": 10174
    },
    {
      "epoch": 0.9097818311874106,
      "grad_norm": 0.16727694367744844,
      "learning_rate": 4.238969902826662e-06,
      "loss": 0.6558,
      "step": 10175
    },
    {
      "epoch": 0.9098712446351931,
      "grad_norm": 0.16061910225427253,
      "learning_rate": 4.230631471100655e-06,
      "loss": 0.6074,
      "step": 10176
    },
    {
      "epoch": 0.9099606580829757,
      "grad_norm": 0.17570154364464122,
      "learning_rate": 4.222301071435952e-06,
      "loss": 0.642,
      "step": 10177
    },
    {
      "epoch": 0.9100500715307582,
      "grad_norm": 0.16187527932344095,
      "learning_rate": 4.213978704531152e-06,
      "loss": 0.6845,
      "step": 10178
    },
    {
      "epoch": 0.9101394849785408,
      "grad_norm": 0.1579345385180036,
      "learning_rate": 4.205664371084306e-06,
      "loss": 0.624,
      "step": 10179
    },
    {
      "epoch": 0.9102288984263234,
      "grad_norm": 0.14490750112687378,
      "learning_rate": 4.19735807179269e-06,
      "loss": 0.6421,
      "step": 10180
    },
    {
      "epoch": 0.9103183118741058,
      "grad_norm": 0.16493962113922758,
      "learning_rate": 4.189059807352958e-06,
      "loss": 0.633,
      "step": 10181
    },
    {
      "epoch": 0.9104077253218884,
      "grad_norm": 0.1587152274488546,
      "learning_rate": 4.180769578461063e-06,
      "loss": 0.6591,
      "step": 10182
    },
    {
      "epoch": 0.910497138769671,
      "grad_norm": 0.16300245421960083,
      "learning_rate": 4.172487385812307e-06,
      "loss": 0.638,
      "step": 10183
    },
    {
      "epoch": 0.9105865522174535,
      "grad_norm": 0.17216067905943805,
      "learning_rate": 4.164213230101299e-06,
      "loss": 0.6473,
      "step": 10184
    },
    {
      "epoch": 0.910675965665236,
      "grad_norm": 0.16216666903818786,
      "learning_rate": 4.155947112021985e-06,
      "loss": 0.598,
      "step": 10185
    },
    {
      "epoch": 0.9107653791130186,
      "grad_norm": 0.1676887925518235,
      "learning_rate": 4.147689032267643e-06,
      "loss": 0.6769,
      "step": 10186
    },
    {
      "epoch": 0.9108547925608012,
      "grad_norm": 0.14812216347586854,
      "learning_rate": 4.139438991530853e-06,
      "loss": 0.6204,
      "step": 10187
    },
    {
      "epoch": 0.9109442060085837,
      "grad_norm": 0.18434760367313122,
      "learning_rate": 4.131196990503561e-06,
      "loss": 0.6627,
      "step": 10188
    },
    {
      "epoch": 0.9110336194563662,
      "grad_norm": 0.1605175664630486,
      "learning_rate": 4.1229630298769914e-06,
      "loss": 0.6406,
      "step": 10189
    },
    {
      "epoch": 0.9111230329041488,
      "grad_norm": 0.1673142802097222,
      "learning_rate": 4.114737110341715e-06,
      "loss": 0.6348,
      "step": 10190
    },
    {
      "epoch": 0.9112124463519313,
      "grad_norm": 0.18567824163414082,
      "learning_rate": 4.106519232587647e-06,
      "loss": 0.6776,
      "step": 10191
    },
    {
      "epoch": 0.9113018597997139,
      "grad_norm": 0.15943761465057094,
      "learning_rate": 4.098309397303978e-06,
      "loss": 0.5987,
      "step": 10192
    },
    {
      "epoch": 0.9113912732474965,
      "grad_norm": 0.1467432076392236,
      "learning_rate": 4.090107605179294e-06,
      "loss": 0.6637,
      "step": 10193
    },
    {
      "epoch": 0.9114806866952789,
      "grad_norm": 0.15214748086807697,
      "learning_rate": 4.081913856901476e-06,
      "loss": 0.6503,
      "step": 10194
    },
    {
      "epoch": 0.9115701001430615,
      "grad_norm": 0.15771817995022455,
      "learning_rate": 4.073728153157674e-06,
      "loss": 0.6458,
      "step": 10195
    },
    {
      "epoch": 0.9116595135908441,
      "grad_norm": 0.16841774877929663,
      "learning_rate": 4.065550494634451e-06,
      "loss": 0.609,
      "step": 10196
    },
    {
      "epoch": 0.9117489270386266,
      "grad_norm": 0.1752517799259626,
      "learning_rate": 4.057380882017658e-06,
      "loss": 0.6278,
      "step": 10197
    },
    {
      "epoch": 0.9118383404864091,
      "grad_norm": 0.1671029423232339,
      "learning_rate": 4.049219315992458e-06,
      "loss": 0.644,
      "step": 10198
    },
    {
      "epoch": 0.9119277539341917,
      "grad_norm": 0.17172814999914215,
      "learning_rate": 4.041065797243349e-06,
      "loss": 0.6622,
      "step": 10199
    },
    {
      "epoch": 0.9120171673819742,
      "grad_norm": 0.14220429912436366,
      "learning_rate": 4.032920326454159e-06,
      "loss": 0.5996,
      "step": 10200
    },
    {
      "epoch": 0.9121065808297568,
      "grad_norm": 0.15964234559662563,
      "learning_rate": 4.0247829043080445e-06,
      "loss": 0.6311,
      "step": 10201
    },
    {
      "epoch": 0.9121959942775394,
      "grad_norm": 0.14751872725739493,
      "learning_rate": 4.016653531487491e-06,
      "loss": 0.6085,
      "step": 10202
    },
    {
      "epoch": 0.9122854077253219,
      "grad_norm": 0.16830169357876137,
      "learning_rate": 4.008532208674276e-06,
      "loss": 0.6189,
      "step": 10203
    },
    {
      "epoch": 0.9123748211731044,
      "grad_norm": 0.16595521833005156,
      "learning_rate": 4.000418936549533e-06,
      "loss": 0.6425,
      "step": 10204
    },
    {
      "epoch": 0.912464234620887,
      "grad_norm": 0.17375833916611297,
      "learning_rate": 3.992313715793727e-06,
      "loss": 0.3649,
      "step": 10205
    },
    {
      "epoch": 0.9125536480686696,
      "grad_norm": 0.1530033359712871,
      "learning_rate": 3.984216547086606e-06,
      "loss": 0.6176,
      "step": 10206
    },
    {
      "epoch": 0.912643061516452,
      "grad_norm": 0.1575220862725917,
      "learning_rate": 3.97612743110729e-06,
      "loss": 0.609,
      "step": 10207
    },
    {
      "epoch": 0.9127324749642346,
      "grad_norm": 0.139505825812542,
      "learning_rate": 3.968046368534217e-06,
      "loss": 0.6132,
      "step": 10208
    },
    {
      "epoch": 0.9128218884120172,
      "grad_norm": 0.1836874719026376,
      "learning_rate": 3.9599733600450995e-06,
      "loss": 0.668,
      "step": 10209
    },
    {
      "epoch": 0.9129113018597997,
      "grad_norm": 0.1742858738224042,
      "learning_rate": 3.95190840631704e-06,
      "loss": 0.6275,
      "step": 10210
    },
    {
      "epoch": 0.9130007153075823,
      "grad_norm": 0.16460167734039083,
      "learning_rate": 3.94385150802643e-06,
      "loss": 0.6645,
      "step": 10211
    },
    {
      "epoch": 0.9130901287553648,
      "grad_norm": 0.16699001148775483,
      "learning_rate": 3.9358026658489535e-06,
      "loss": 0.622,
      "step": 10212
    },
    {
      "epoch": 0.9131795422031473,
      "grad_norm": 0.17010161053858036,
      "learning_rate": 3.927761880459735e-06,
      "loss": 0.6397,
      "step": 10213
    },
    {
      "epoch": 0.9132689556509299,
      "grad_norm": 0.16892502328804065,
      "learning_rate": 3.91972915253308e-06,
      "loss": 0.6222,
      "step": 10214
    },
    {
      "epoch": 0.9133583690987125,
      "grad_norm": 0.16705385508599585,
      "learning_rate": 3.9117044827427066e-06,
      "loss": 0.6633,
      "step": 10215
    },
    {
      "epoch": 0.913447782546495,
      "grad_norm": 0.1567609830670822,
      "learning_rate": 3.90368787176163e-06,
      "loss": 0.6534,
      "step": 10216
    },
    {
      "epoch": 0.9135371959942775,
      "grad_norm": 0.16276815921416507,
      "learning_rate": 3.895679320262202e-06,
      "loss": 0.6577,
      "step": 10217
    },
    {
      "epoch": 0.9136266094420601,
      "grad_norm": 0.1561895912246669,
      "learning_rate": 3.8876788289160855e-06,
      "loss": 0.6333,
      "step": 10218
    },
    {
      "epoch": 0.9137160228898427,
      "grad_norm": 0.16995165531508374,
      "learning_rate": 3.879686398394267e-06,
      "loss": 0.7023,
      "step": 10219
    },
    {
      "epoch": 0.9138054363376252,
      "grad_norm": 0.18178662276558288,
      "learning_rate": 3.871702029367064e-06,
      "loss": 0.5984,
      "step": 10220
    },
    {
      "epoch": 0.9138948497854077,
      "grad_norm": 0.16266399534484377,
      "learning_rate": 3.863725722504119e-06,
      "loss": 0.6853,
      "step": 10221
    },
    {
      "epoch": 0.9139842632331903,
      "grad_norm": 0.1492175588558236,
      "learning_rate": 3.8557574784744085e-06,
      "loss": 0.6277,
      "step": 10222
    },
    {
      "epoch": 0.9140736766809728,
      "grad_norm": 0.15524143177476393,
      "learning_rate": 3.847797297946198e-06,
      "loss": 0.5687,
      "step": 10223
    },
    {
      "epoch": 0.9141630901287554,
      "grad_norm": 0.16984441181147242,
      "learning_rate": 3.839845181587098e-06,
      "loss": 0.3781,
      "step": 10224
    },
    {
      "epoch": 0.9142525035765379,
      "grad_norm": 0.16267983652787493,
      "learning_rate": 3.831901130064064e-06,
      "loss": 0.6467,
      "step": 10225
    },
    {
      "epoch": 0.9143419170243204,
      "grad_norm": 0.13615824115422695,
      "learning_rate": 3.823965144043318e-06,
      "loss": 0.628,
      "step": 10226
    },
    {
      "epoch": 0.914431330472103,
      "grad_norm": 0.17885940886058535,
      "learning_rate": 3.816037224190483e-06,
      "loss": 0.6583,
      "step": 10227
    },
    {
      "epoch": 0.9145207439198856,
      "grad_norm": 0.1513584981080243,
      "learning_rate": 3.8081173711704497e-06,
      "loss": 0.6209,
      "step": 10228
    },
    {
      "epoch": 0.914610157367668,
      "grad_norm": 0.15746317203057553,
      "learning_rate": 3.8002055856474206e-06,
      "loss": 0.6138,
      "step": 10229
    },
    {
      "epoch": 0.9146995708154506,
      "grad_norm": 0.17307961910481603,
      "learning_rate": 3.7923018682849864e-06,
      "loss": 0.6242,
      "step": 10230
    },
    {
      "epoch": 0.9147889842632332,
      "grad_norm": 0.17374447812323415,
      "learning_rate": 3.784406219746006e-06,
      "loss": 0.6318,
      "step": 10231
    },
    {
      "epoch": 0.9148783977110158,
      "grad_norm": 0.150171584809301,
      "learning_rate": 3.7765186406926722e-06,
      "loss": 0.63,
      "step": 10232
    },
    {
      "epoch": 0.9149678111587983,
      "grad_norm": 0.16928789473972938,
      "learning_rate": 3.768639131786511e-06,
      "loss": 0.6249,
      "step": 10233
    },
    {
      "epoch": 0.9150572246065808,
      "grad_norm": 0.1746644221971442,
      "learning_rate": 3.760767693688361e-06,
      "loss": 0.6406,
      "step": 10234
    },
    {
      "epoch": 0.9151466380543634,
      "grad_norm": 0.1713365699833489,
      "learning_rate": 3.752904327058404e-06,
      "loss": 0.696,
      "step": 10235
    },
    {
      "epoch": 0.9152360515021459,
      "grad_norm": 0.15360979312242534,
      "learning_rate": 3.745049032556125e-06,
      "loss": 0.6527,
      "step": 10236
    },
    {
      "epoch": 0.9153254649499285,
      "grad_norm": 0.15617227563550673,
      "learning_rate": 3.7372018108403405e-06,
      "loss": 0.6619,
      "step": 10237
    },
    {
      "epoch": 0.915414878397711,
      "grad_norm": 0.15346657876819705,
      "learning_rate": 3.729362662569169e-06,
      "loss": 0.6296,
      "step": 10238
    },
    {
      "epoch": 0.9155042918454935,
      "grad_norm": 0.15756272361765913,
      "learning_rate": 3.7215315884000957e-06,
      "loss": 0.6649,
      "step": 10239
    },
    {
      "epoch": 0.9155937052932761,
      "grad_norm": 0.18104013629964585,
      "learning_rate": 3.7137085889898947e-06,
      "loss": 0.4025,
      "step": 10240
    },
    {
      "epoch": 0.9156831187410587,
      "grad_norm": 0.1640664878935651,
      "learning_rate": 3.705893664994664e-06,
      "loss": 0.6463,
      "step": 10241
    },
    {
      "epoch": 0.9157725321888412,
      "grad_norm": 0.16029928009183167,
      "learning_rate": 3.6980868170698456e-06,
      "loss": 0.6419,
      "step": 10242
    },
    {
      "epoch": 0.9158619456366237,
      "grad_norm": 0.17631757375421495,
      "learning_rate": 3.6902880458701826e-06,
      "loss": 0.6371,
      "step": 10243
    },
    {
      "epoch": 0.9159513590844063,
      "grad_norm": 0.15852313483443844,
      "learning_rate": 3.6824973520497408e-06,
      "loss": 0.6303,
      "step": 10244
    },
    {
      "epoch": 0.9160407725321889,
      "grad_norm": 0.1561904511875651,
      "learning_rate": 3.6747147362619304e-06,
      "loss": 0.6286,
      "step": 10245
    },
    {
      "epoch": 0.9161301859799714,
      "grad_norm": 0.1607931603282712,
      "learning_rate": 3.666940199159463e-06,
      "loss": 0.6291,
      "step": 10246
    },
    {
      "epoch": 0.9162195994277539,
      "grad_norm": 0.14899330275732509,
      "learning_rate": 3.6591737413943616e-06,
      "loss": 0.6117,
      "step": 10247
    },
    {
      "epoch": 0.9163090128755365,
      "grad_norm": 0.15082233247082646,
      "learning_rate": 3.6514153636180383e-06,
      "loss": 0.6422,
      "step": 10248
    },
    {
      "epoch": 0.916398426323319,
      "grad_norm": 0.1424713235787301,
      "learning_rate": 3.643665066481128e-06,
      "loss": 0.622,
      "step": 10249
    },
    {
      "epoch": 0.9164878397711016,
      "grad_norm": 0.16938156572698915,
      "learning_rate": 3.635922850633666e-06,
      "loss": 0.6208,
      "step": 10250
    },
    {
      "epoch": 0.9165772532188842,
      "grad_norm": 0.15759110413498847,
      "learning_rate": 3.6281887167249895e-06,
      "loss": 0.634,
      "step": 10251
    },
    {
      "epoch": 0.9166666666666666,
      "grad_norm": 0.15739766824750046,
      "learning_rate": 3.6204626654037233e-06,
      "loss": 0.6513,
      "step": 10252
    },
    {
      "epoch": 0.9167560801144492,
      "grad_norm": 0.1812180973809993,
      "learning_rate": 3.612744697317849e-06,
      "loss": 0.6597,
      "step": 10253
    },
    {
      "epoch": 0.9168454935622318,
      "grad_norm": 0.16535893654753261,
      "learning_rate": 3.6050348131146825e-06,
      "loss": 0.6335,
      "step": 10254
    },
    {
      "epoch": 0.9169349070100143,
      "grad_norm": 0.1628651763748734,
      "learning_rate": 3.597333013440829e-06,
      "loss": 0.6502,
      "step": 10255
    },
    {
      "epoch": 0.9170243204577968,
      "grad_norm": 0.15085780633607843,
      "learning_rate": 3.5896392989422377e-06,
      "loss": 0.6165,
      "step": 10256
    },
    {
      "epoch": 0.9171137339055794,
      "grad_norm": 0.15659504673339805,
      "learning_rate": 3.5819536702641485e-06,
      "loss": 0.6424,
      "step": 10257
    },
    {
      "epoch": 0.917203147353362,
      "grad_norm": 0.15542521096053702,
      "learning_rate": 3.5742761280511685e-06,
      "loss": 0.6445,
      "step": 10258
    },
    {
      "epoch": 0.9172925608011445,
      "grad_norm": 0.16628607576522894,
      "learning_rate": 3.566606672947204e-06,
      "loss": 0.6302,
      "step": 10259
    },
    {
      "epoch": 0.9173819742489271,
      "grad_norm": 0.1625508448999176,
      "learning_rate": 3.5589453055954737e-06,
      "loss": 0.6321,
      "step": 10260
    },
    {
      "epoch": 0.9174713876967096,
      "grad_norm": 0.1475134033975942,
      "learning_rate": 3.5512920266385085e-06,
      "loss": 0.6056,
      "step": 10261
    },
    {
      "epoch": 0.9175608011444921,
      "grad_norm": 0.15797166236847637,
      "learning_rate": 3.5436468367182284e-06,
      "loss": 0.6458,
      "step": 10262
    },
    {
      "epoch": 0.9176502145922747,
      "grad_norm": 0.16436300090796013,
      "learning_rate": 3.536009736475787e-06,
      "loss": 0.631,
      "step": 10263
    },
    {
      "epoch": 0.9177396280400573,
      "grad_norm": 0.17283349201614562,
      "learning_rate": 3.5283807265517053e-06,
      "loss": 0.667,
      "step": 10264
    },
    {
      "epoch": 0.9178290414878397,
      "grad_norm": 0.16140296788315484,
      "learning_rate": 3.5207598075858383e-06,
      "loss": 0.6134,
      "step": 10265
    },
    {
      "epoch": 0.9179184549356223,
      "grad_norm": 0.163648166115752,
      "learning_rate": 3.5131469802173076e-06,
      "loss": 0.6316,
      "step": 10266
    },
    {
      "epoch": 0.9180078683834049,
      "grad_norm": 0.18810791040318425,
      "learning_rate": 3.5055422450846253e-06,
      "loss": 0.7028,
      "step": 10267
    },
    {
      "epoch": 0.9180972818311874,
      "grad_norm": 0.15859945814590218,
      "learning_rate": 3.4979456028255806e-06,
      "loss": 0.6145,
      "step": 10268
    },
    {
      "epoch": 0.91818669527897,
      "grad_norm": 0.17689338709698124,
      "learning_rate": 3.4903570540772866e-06,
      "loss": 0.6651,
      "step": 10269
    },
    {
      "epoch": 0.9182761087267525,
      "grad_norm": 0.163419977571877,
      "learning_rate": 3.482776599476201e-06,
      "loss": 0.6611,
      "step": 10270
    },
    {
      "epoch": 0.918365522174535,
      "grad_norm": 0.17007933873594777,
      "learning_rate": 3.4752042396580807e-06,
      "loss": 0.6234,
      "step": 10271
    },
    {
      "epoch": 0.9184549356223176,
      "grad_norm": 0.18367650549886677,
      "learning_rate": 3.467639975257997e-06,
      "loss": 0.6545,
      "step": 10272
    },
    {
      "epoch": 0.9185443490701002,
      "grad_norm": 0.153846765976816,
      "learning_rate": 3.4600838069103635e-06,
      "loss": 0.6346,
      "step": 10273
    },
    {
      "epoch": 0.9186337625178826,
      "grad_norm": 0.17069787422351984,
      "learning_rate": 3.4525357352489295e-06,
      "loss": 0.6532,
      "step": 10274
    },
    {
      "epoch": 0.9187231759656652,
      "grad_norm": 0.15508265436666765,
      "learning_rate": 3.4449957609066996e-06,
      "loss": 0.6472,
      "step": 10275
    },
    {
      "epoch": 0.9188125894134478,
      "grad_norm": 0.17105282558061738,
      "learning_rate": 3.43746388451609e-06,
      "loss": 0.648,
      "step": 10276
    },
    {
      "epoch": 0.9189020028612304,
      "grad_norm": 0.17735745699788166,
      "learning_rate": 3.429940106708751e-06,
      "loss": 0.6468,
      "step": 10277
    },
    {
      "epoch": 0.9189914163090128,
      "grad_norm": 0.17038924890690008,
      "learning_rate": 3.422424428115711e-06,
      "loss": 0.647,
      "step": 10278
    },
    {
      "epoch": 0.9190808297567954,
      "grad_norm": 0.16451724492324366,
      "learning_rate": 3.4149168493673113e-06,
      "loss": 0.6574,
      "step": 10279
    },
    {
      "epoch": 0.919170243204578,
      "grad_norm": 0.16842462303497005,
      "learning_rate": 3.40741737109318e-06,
      "loss": 0.5874,
      "step": 10280
    },
    {
      "epoch": 0.9192596566523605,
      "grad_norm": 0.1798604838705743,
      "learning_rate": 3.3999259939222927e-06,
      "loss": 0.6289,
      "step": 10281
    },
    {
      "epoch": 0.9193490701001431,
      "grad_norm": 0.17773835508366903,
      "learning_rate": 3.3924427184829575e-06,
      "loss": 0.3719,
      "step": 10282
    },
    {
      "epoch": 0.9194384835479256,
      "grad_norm": 0.14092474348695821,
      "learning_rate": 3.3849675454027727e-06,
      "loss": 0.5755,
      "step": 10283
    },
    {
      "epoch": 0.9195278969957081,
      "grad_norm": 0.15870726592658338,
      "learning_rate": 3.3775004753086812e-06,
      "loss": 0.6236,
      "step": 10284
    },
    {
      "epoch": 0.9196173104434907,
      "grad_norm": 0.16131118773442185,
      "learning_rate": 3.3700415088269377e-06,
      "loss": 0.5564,
      "step": 10285
    },
    {
      "epoch": 0.9197067238912733,
      "grad_norm": 0.17911363204294192,
      "learning_rate": 3.362590646583108e-06,
      "loss": 0.6218,
      "step": 10286
    },
    {
      "epoch": 0.9197961373390557,
      "grad_norm": 0.15789816189933353,
      "learning_rate": 3.3551478892020926e-06,
      "loss": 0.6234,
      "step": 10287
    },
    {
      "epoch": 0.9198855507868383,
      "grad_norm": 0.17910254255357894,
      "learning_rate": 3.3477132373081254e-06,
      "loss": 0.6642,
      "step": 10288
    },
    {
      "epoch": 0.9199749642346209,
      "grad_norm": 0.17458758441764904,
      "learning_rate": 3.3402866915246854e-06,
      "loss": 0.6705,
      "step": 10289
    },
    {
      "epoch": 0.9200643776824035,
      "grad_norm": 0.1680458934394428,
      "learning_rate": 3.3328682524746967e-06,
      "loss": 0.6388,
      "step": 10290
    },
    {
      "epoch": 0.920153791130186,
      "grad_norm": 0.15943548802432472,
      "learning_rate": 3.325457920780295e-06,
      "loss": 0.6319,
      "step": 10291
    },
    {
      "epoch": 0.9202432045779685,
      "grad_norm": 0.1581033496590004,
      "learning_rate": 3.318055697062983e-06,
      "loss": 0.6328,
      "step": 10292
    },
    {
      "epoch": 0.9203326180257511,
      "grad_norm": 0.15843861409235044,
      "learning_rate": 3.310661581943586e-06,
      "loss": 0.6215,
      "step": 10293
    },
    {
      "epoch": 0.9204220314735336,
      "grad_norm": 0.15973695049331932,
      "learning_rate": 3.3032755760422196e-06,
      "loss": 0.6214,
      "step": 10294
    },
    {
      "epoch": 0.9205114449213162,
      "grad_norm": 0.18581063971424916,
      "learning_rate": 3.2958976799783326e-06,
      "loss": 0.717,
      "step": 10295
    },
    {
      "epoch": 0.9206008583690987,
      "grad_norm": 0.14825653060743488,
      "learning_rate": 3.288527894370752e-06,
      "loss": 0.5912,
      "step": 10296
    },
    {
      "epoch": 0.9206902718168812,
      "grad_norm": 0.1699229994638704,
      "learning_rate": 3.281166219837517e-06,
      "loss": 0.6454,
      "step": 10297
    },
    {
      "epoch": 0.9207796852646638,
      "grad_norm": 0.1673732861028986,
      "learning_rate": 3.273812656996067e-06,
      "loss": 0.6417,
      "step": 10298
    },
    {
      "epoch": 0.9208690987124464,
      "grad_norm": 0.1628393606941906,
      "learning_rate": 3.2664672064631528e-06,
      "loss": 0.6446,
      "step": 10299
    },
    {
      "epoch": 0.920958512160229,
      "grad_norm": 0.15641120173552084,
      "learning_rate": 3.2591298688547932e-06,
      "loss": 0.602,
      "step": 10300
    },
    {
      "epoch": 0.9210479256080114,
      "grad_norm": 0.1689477033059866,
      "learning_rate": 3.2518006447863847e-06,
      "loss": 0.6237,
      "step": 10301
    },
    {
      "epoch": 0.921137339055794,
      "grad_norm": 0.1617274245842624,
      "learning_rate": 3.244479534872602e-06,
      "loss": 0.6768,
      "step": 10302
    },
    {
      "epoch": 0.9212267525035766,
      "grad_norm": 0.176404586465455,
      "learning_rate": 3.2371665397274763e-06,
      "loss": 0.6546,
      "step": 10303
    },
    {
      "epoch": 0.9213161659513591,
      "grad_norm": 0.1577176469987362,
      "learning_rate": 3.2298616599643285e-06,
      "loss": 0.672,
      "step": 10304
    },
    {
      "epoch": 0.9214055793991416,
      "grad_norm": 0.15787353110220645,
      "learning_rate": 3.2225648961958344e-06,
      "loss": 0.6344,
      "step": 10305
    },
    {
      "epoch": 0.9214949928469242,
      "grad_norm": 0.17106032049864964,
      "learning_rate": 3.215276249033927e-06,
      "loss": 0.6417,
      "step": 10306
    },
    {
      "epoch": 0.9215844062947067,
      "grad_norm": 0.13618492285081846,
      "learning_rate": 3.207995719089918e-06,
      "loss": 0.5944,
      "step": 10307
    },
    {
      "epoch": 0.9216738197424893,
      "grad_norm": 0.13793272454170435,
      "learning_rate": 3.200723306974418e-06,
      "loss": 0.6049,
      "step": 10308
    },
    {
      "epoch": 0.9217632331902719,
      "grad_norm": 0.1795185616572345,
      "learning_rate": 3.1934590132973283e-06,
      "loss": 0.6725,
      "step": 10309
    },
    {
      "epoch": 0.9218526466380543,
      "grad_norm": 0.15903484277874944,
      "learning_rate": 3.186202838667951e-06,
      "loss": 0.6587,
      "step": 10310
    },
    {
      "epoch": 0.9219420600858369,
      "grad_norm": 0.1596382904440433,
      "learning_rate": 3.1789547836947986e-06,
      "loss": 0.643,
      "step": 10311
    },
    {
      "epoch": 0.9220314735336195,
      "grad_norm": 0.14280831148219914,
      "learning_rate": 3.171714848985785e-06,
      "loss": 0.6403,
      "step": 10312
    },
    {
      "epoch": 0.922120886981402,
      "grad_norm": 0.16701766191033487,
      "learning_rate": 3.164483035148114e-06,
      "loss": 0.6385,
      "step": 10313
    },
    {
      "epoch": 0.9222103004291845,
      "grad_norm": 0.17307266774655122,
      "learning_rate": 3.157259342788299e-06,
      "loss": 0.6648,
      "step": 10314
    },
    {
      "epoch": 0.9222997138769671,
      "grad_norm": 0.16113193523173228,
      "learning_rate": 3.150043772512179e-06,
      "loss": 0.6573,
      "step": 10315
    },
    {
      "epoch": 0.9223891273247496,
      "grad_norm": 0.14432580685487342,
      "learning_rate": 3.1428363249249247e-06,
      "loss": 0.6174,
      "step": 10316
    },
    {
      "epoch": 0.9224785407725322,
      "grad_norm": 0.16311693976427294,
      "learning_rate": 3.1356370006310197e-06,
      "loss": 0.632,
      "step": 10317
    },
    {
      "epoch": 0.9225679542203148,
      "grad_norm": 0.13592765285397304,
      "learning_rate": 3.1284458002342475e-06,
      "loss": 0.6101,
      "step": 10318
    },
    {
      "epoch": 0.9226573676680973,
      "grad_norm": 0.15886175163913147,
      "learning_rate": 3.121262724337748e-06,
      "loss": 0.6657,
      "step": 10319
    },
    {
      "epoch": 0.9227467811158798,
      "grad_norm": 0.1535595944075622,
      "learning_rate": 3.1140877735439387e-06,
      "loss": 0.5922,
      "step": 10320
    },
    {
      "epoch": 0.9228361945636624,
      "grad_norm": 0.1830482050612488,
      "learning_rate": 3.1069209484545725e-06,
      "loss": 0.7111,
      "step": 10321
    },
    {
      "epoch": 0.922925608011445,
      "grad_norm": 0.16331732363374923,
      "learning_rate": 3.0997622496707456e-06,
      "loss": 0.643,
      "step": 10322
    },
    {
      "epoch": 0.9230150214592274,
      "grad_norm": 0.1686644316274225,
      "learning_rate": 3.0926116777928116e-06,
      "loss": 0.6286,
      "step": 10323
    },
    {
      "epoch": 0.92310443490701,
      "grad_norm": 0.17198672659606584,
      "learning_rate": 3.0854692334205125e-06,
      "loss": 0.6576,
      "step": 10324
    },
    {
      "epoch": 0.9231938483547926,
      "grad_norm": 0.15703875140920723,
      "learning_rate": 3.0783349171528697e-06,
      "loss": 0.6259,
      "step": 10325
    },
    {
      "epoch": 0.9232832618025751,
      "grad_norm": 0.16713580635852193,
      "learning_rate": 3.0712087295882154e-06,
      "loss": 0.6437,
      "step": 10326
    },
    {
      "epoch": 0.9233726752503576,
      "grad_norm": 0.1669560131185244,
      "learning_rate": 3.064090671324238e-06,
      "loss": 0.6693,
      "step": 10327
    },
    {
      "epoch": 0.9234620886981402,
      "grad_norm": 0.1865731523594864,
      "learning_rate": 3.0569807429579044e-06,
      "loss": 0.6875,
      "step": 10328
    },
    {
      "epoch": 0.9235515021459227,
      "grad_norm": 0.15350056020012384,
      "learning_rate": 3.0498789450855046e-06,
      "loss": 0.5973,
      "step": 10329
    },
    {
      "epoch": 0.9236409155937053,
      "grad_norm": 0.16861432402675913,
      "learning_rate": 3.0427852783026843e-06,
      "loss": 0.3879,
      "step": 10330
    },
    {
      "epoch": 0.9237303290414879,
      "grad_norm": 0.17323119487156097,
      "learning_rate": 3.0356997432043565e-06,
      "loss": 0.6797,
      "step": 10331
    },
    {
      "epoch": 0.9238197424892703,
      "grad_norm": 0.2010355490643592,
      "learning_rate": 3.0286223403848014e-06,
      "loss": 0.6999,
      "step": 10332
    },
    {
      "epoch": 0.9239091559370529,
      "grad_norm": 0.1574183176475278,
      "learning_rate": 3.021553070437577e-06,
      "loss": 0.6157,
      "step": 10333
    },
    {
      "epoch": 0.9239985693848355,
      "grad_norm": 0.1439471913103536,
      "learning_rate": 3.0144919339555654e-06,
      "loss": 0.6002,
      "step": 10334
    },
    {
      "epoch": 0.9240879828326181,
      "grad_norm": 0.17435206956083568,
      "learning_rate": 3.0074389315309928e-06,
      "loss": 0.6192,
      "step": 10335
    },
    {
      "epoch": 0.9241773962804005,
      "grad_norm": 0.1584013768777858,
      "learning_rate": 3.000394063755396e-06,
      "loss": 0.6445,
      "step": 10336
    },
    {
      "epoch": 0.9242668097281831,
      "grad_norm": 0.17694338217898498,
      "learning_rate": 2.9933573312195708e-06,
      "loss": 0.6394,
      "step": 10337
    },
    {
      "epoch": 0.9243562231759657,
      "grad_norm": 0.1583140774946886,
      "learning_rate": 2.9863287345137216e-06,
      "loss": 0.6352,
      "step": 10338
    },
    {
      "epoch": 0.9244456366237482,
      "grad_norm": 0.1592596155047042,
      "learning_rate": 2.979308274227344e-06,
      "loss": 0.6425,
      "step": 10339
    },
    {
      "epoch": 0.9245350500715308,
      "grad_norm": 0.1612308579086934,
      "learning_rate": 2.9722959509491888e-06,
      "loss": 0.6393,
      "step": 10340
    },
    {
      "epoch": 0.9246244635193133,
      "grad_norm": 0.16822262288317294,
      "learning_rate": 2.965291765267386e-06,
      "loss": 0.6368,
      "step": 10341
    },
    {
      "epoch": 0.9247138769670958,
      "grad_norm": 0.15075427466271532,
      "learning_rate": 2.958295717769399e-06,
      "loss": 0.6414,
      "step": 10342
    },
    {
      "epoch": 0.9248032904148784,
      "grad_norm": 0.1432544693841015,
      "learning_rate": 2.9513078090419365e-06,
      "loss": 0.6358,
      "step": 10343
    },
    {
      "epoch": 0.924892703862661,
      "grad_norm": 0.1839643778284853,
      "learning_rate": 2.944328039671085e-06,
      "loss": 0.6847,
      "step": 10344
    },
    {
      "epoch": 0.9249821173104434,
      "grad_norm": 0.16456086600677491,
      "learning_rate": 2.93735641024222e-06,
      "loss": 0.5785,
      "step": 10345
    },
    {
      "epoch": 0.925071530758226,
      "grad_norm": 0.1813082451965254,
      "learning_rate": 2.930392921340053e-06,
      "loss": 0.6931,
      "step": 10346
    },
    {
      "epoch": 0.9251609442060086,
      "grad_norm": 0.15430782327865253,
      "learning_rate": 2.9234375735486153e-06,
      "loss": 0.6474,
      "step": 10347
    },
    {
      "epoch": 0.9252503576537912,
      "grad_norm": 0.1688483078611504,
      "learning_rate": 2.916490367451219e-06,
      "loss": 0.6745,
      "step": 10348
    },
    {
      "epoch": 0.9253397711015737,
      "grad_norm": 0.15012475863999916,
      "learning_rate": 2.909551303630531e-06,
      "loss": 0.64,
      "step": 10349
    },
    {
      "epoch": 0.9254291845493562,
      "grad_norm": 0.1551368626085025,
      "learning_rate": 2.9026203826685195e-06,
      "loss": 0.6468,
      "step": 10350
    },
    {
      "epoch": 0.9255185979971388,
      "grad_norm": 0.17223395921484635,
      "learning_rate": 2.8956976051464636e-06,
      "loss": 0.6276,
      "step": 10351
    },
    {
      "epoch": 0.9256080114449213,
      "grad_norm": 0.16319793035451122,
      "learning_rate": 2.8887829716449876e-06,
      "loss": 0.6447,
      "step": 10352
    },
    {
      "epoch": 0.9256974248927039,
      "grad_norm": 0.1531180064289464,
      "learning_rate": 2.8818764827440057e-06,
      "loss": 0.6701,
      "step": 10353
    },
    {
      "epoch": 0.9257868383404864,
      "grad_norm": 0.17857326987430916,
      "learning_rate": 2.8749781390227437e-06,
      "loss": 0.6137,
      "step": 10354
    },
    {
      "epoch": 0.9258762517882689,
      "grad_norm": 0.1615047216784315,
      "learning_rate": 2.8680879410597716e-06,
      "loss": 0.662,
      "step": 10355
    },
    {
      "epoch": 0.9259656652360515,
      "grad_norm": 0.15842285352435795,
      "learning_rate": 2.861205889432972e-06,
      "loss": 0.6457,
      "step": 10356
    },
    {
      "epoch": 0.9260550786838341,
      "grad_norm": 0.1644088545995944,
      "learning_rate": 2.854331984719505e-06,
      "loss": 0.6724,
      "step": 10357
    },
    {
      "epoch": 0.9261444921316166,
      "grad_norm": 0.17475473284227663,
      "learning_rate": 2.8474662274958987e-06,
      "loss": 0.6505,
      "step": 10358
    },
    {
      "epoch": 0.9262339055793991,
      "grad_norm": 0.16239451561189702,
      "learning_rate": 2.8406086183379586e-06,
      "loss": 0.6473,
      "step": 10359
    },
    {
      "epoch": 0.9263233190271817,
      "grad_norm": 0.15552425753391344,
      "learning_rate": 2.8337591578208366e-06,
      "loss": 0.65,
      "step": 10360
    },
    {
      "epoch": 0.9264127324749643,
      "grad_norm": 0.1553918436488152,
      "learning_rate": 2.826917846518995e-06,
      "loss": 0.644,
      "step": 10361
    },
    {
      "epoch": 0.9265021459227468,
      "grad_norm": 0.14417326509009767,
      "learning_rate": 2.820084685006208e-06,
      "loss": 0.5943,
      "step": 10362
    },
    {
      "epoch": 0.9265915593705293,
      "grad_norm": 0.17910766451256793,
      "learning_rate": 2.8132596738555397e-06,
      "loss": 0.6237,
      "step": 10363
    },
    {
      "epoch": 0.9266809728183119,
      "grad_norm": 0.1742825357350234,
      "learning_rate": 2.8064428136394096e-06,
      "loss": 0.6794,
      "step": 10364
    },
    {
      "epoch": 0.9267703862660944,
      "grad_norm": 0.1719390187376579,
      "learning_rate": 2.799634104929538e-06,
      "loss": 0.359,
      "step": 10365
    },
    {
      "epoch": 0.926859799713877,
      "grad_norm": 0.16690601555702858,
      "learning_rate": 2.7928335482969802e-06,
      "loss": 0.6492,
      "step": 10366
    },
    {
      "epoch": 0.9269492131616596,
      "grad_norm": 0.1551476496200462,
      "learning_rate": 2.7860411443120684e-06,
      "loss": 0.6185,
      "step": 10367
    },
    {
      "epoch": 0.927038626609442,
      "grad_norm": 0.15515664343462648,
      "learning_rate": 2.7792568935444796e-06,
      "loss": 0.6444,
      "step": 10368
    },
    {
      "epoch": 0.9271280400572246,
      "grad_norm": 0.16714520451263098,
      "learning_rate": 2.772480796563204e-06,
      "loss": 0.6,
      "step": 10369
    },
    {
      "epoch": 0.9272174535050072,
      "grad_norm": 0.1687359407346585,
      "learning_rate": 2.765712853936553e-06,
      "loss": 0.6563,
      "step": 10370
    },
    {
      "epoch": 0.9273068669527897,
      "grad_norm": 0.15983242317662755,
      "learning_rate": 2.7589530662321285e-06,
      "loss": 0.5975,
      "step": 10371
    },
    {
      "epoch": 0.9273962804005722,
      "grad_norm": 0.1554257245668893,
      "learning_rate": 2.7522014340168547e-06,
      "loss": 0.6135,
      "step": 10372
    },
    {
      "epoch": 0.9274856938483548,
      "grad_norm": 0.15174646164498837,
      "learning_rate": 2.745457957857023e-06,
      "loss": 0.6301,
      "step": 10373
    },
    {
      "epoch": 0.9275751072961373,
      "grad_norm": 0.17769503774003442,
      "learning_rate": 2.7387226383181696e-06,
      "loss": 0.394,
      "step": 10374
    },
    {
      "epoch": 0.9276645207439199,
      "grad_norm": 0.16401692112071475,
      "learning_rate": 2.7319954759651877e-06,
      "loss": 0.6346,
      "step": 10375
    },
    {
      "epoch": 0.9277539341917024,
      "grad_norm": 0.16358696905119427,
      "learning_rate": 2.7252764713622814e-06,
      "loss": 0.637,
      "step": 10376
    },
    {
      "epoch": 0.927843347639485,
      "grad_norm": 0.1671799532454895,
      "learning_rate": 2.718565625072955e-06,
      "loss": 0.6255,
      "step": 10377
    },
    {
      "epoch": 0.9279327610872675,
      "grad_norm": 0.1699782062696237,
      "learning_rate": 2.711862937660037e-06,
      "loss": 0.6363,
      "step": 10378
    },
    {
      "epoch": 0.9280221745350501,
      "grad_norm": 0.1739974995016189,
      "learning_rate": 2.7051684096856876e-06,
      "loss": 0.6588,
      "step": 10379
    },
    {
      "epoch": 0.9281115879828327,
      "grad_norm": 0.1566515085313409,
      "learning_rate": 2.6984820417113587e-06,
      "loss": 0.6592,
      "step": 10380
    },
    {
      "epoch": 0.9282010014306151,
      "grad_norm": 0.14354517319687551,
      "learning_rate": 2.6918038342978345e-06,
      "loss": 0.6135,
      "step": 10381
    },
    {
      "epoch": 0.9282904148783977,
      "grad_norm": 0.16097517522552127,
      "learning_rate": 2.685133788005201e-06,
      "loss": 0.6605,
      "step": 10382
    },
    {
      "epoch": 0.9283798283261803,
      "grad_norm": 0.1608522408490065,
      "learning_rate": 2.678471903392865e-06,
      "loss": 0.6471,
      "step": 10383
    },
    {
      "epoch": 0.9284692417739628,
      "grad_norm": 0.16059258289201597,
      "learning_rate": 2.6718181810195696e-06,
      "loss": 0.6506,
      "step": 10384
    },
    {
      "epoch": 0.9285586552217453,
      "grad_norm": 0.1648833576445659,
      "learning_rate": 2.6651726214433235e-06,
      "loss": 0.6354,
      "step": 10385
    },
    {
      "epoch": 0.9286480686695279,
      "grad_norm": 0.17002873980949856,
      "learning_rate": 2.6585352252215036e-06,
      "loss": 0.6046,
      "step": 10386
    },
    {
      "epoch": 0.9287374821173104,
      "grad_norm": 0.1600237044868962,
      "learning_rate": 2.651905992910786e-06,
      "loss": 0.6641,
      "step": 10387
    },
    {
      "epoch": 0.928826895565093,
      "grad_norm": 0.17568591927471647,
      "learning_rate": 2.6452849250671373e-06,
      "loss": 0.6436,
      "step": 10388
    },
    {
      "epoch": 0.9289163090128756,
      "grad_norm": 0.16707860859245457,
      "learning_rate": 2.6386720222458693e-06,
      "loss": 0.644,
      "step": 10389
    },
    {
      "epoch": 0.929005722460658,
      "grad_norm": 0.1583445272523447,
      "learning_rate": 2.6320672850016047e-06,
      "loss": 0.5855,
      "step": 10390
    },
    {
      "epoch": 0.9290951359084406,
      "grad_norm": 0.17924673125531942,
      "learning_rate": 2.625470713888256e-06,
      "loss": 0.6556,
      "step": 10391
    },
    {
      "epoch": 0.9291845493562232,
      "grad_norm": 0.16190049981507715,
      "learning_rate": 2.618882309459081e-06,
      "loss": 0.6736,
      "step": 10392
    },
    {
      "epoch": 0.9292739628040058,
      "grad_norm": 0.17401098567493847,
      "learning_rate": 2.612302072266637e-06,
      "loss": 0.6372,
      "step": 10393
    },
    {
      "epoch": 0.9293633762517882,
      "grad_norm": 0.1631729837356932,
      "learning_rate": 2.605730002862805e-06,
      "loss": 0.6282,
      "step": 10394
    },
    {
      "epoch": 0.9294527896995708,
      "grad_norm": 0.1593503728266124,
      "learning_rate": 2.5991661017987777e-06,
      "loss": 0.6084,
      "step": 10395
    },
    {
      "epoch": 0.9295422031473534,
      "grad_norm": 0.15089810962648617,
      "learning_rate": 2.5926103696250703e-06,
      "loss": 0.6249,
      "step": 10396
    },
    {
      "epoch": 0.9296316165951359,
      "grad_norm": 0.1526047753939926,
      "learning_rate": 2.586062806891476e-06,
      "loss": 0.616,
      "step": 10397
    },
    {
      "epoch": 0.9297210300429185,
      "grad_norm": 0.1769935119606462,
      "learning_rate": 2.5795234141471445e-06,
      "loss": 0.6549,
      "step": 10398
    },
    {
      "epoch": 0.929810443490701,
      "grad_norm": 0.1690315801617097,
      "learning_rate": 2.5729921919405377e-06,
      "loss": 0.6233,
      "step": 10399
    },
    {
      "epoch": 0.9298998569384835,
      "grad_norm": 0.18092655098755522,
      "learning_rate": 2.5664691408194165e-06,
      "loss": 0.6576,
      "step": 10400
    },
    {
      "epoch": 0.9299892703862661,
      "grad_norm": 0.158406871681308,
      "learning_rate": 2.559954261330866e-06,
      "loss": 0.6179,
      "step": 10401
    },
    {
      "epoch": 0.9300786838340487,
      "grad_norm": 0.15443489583934958,
      "learning_rate": 2.55344755402126e-06,
      "loss": 0.626,
      "step": 10402
    },
    {
      "epoch": 0.9301680972818311,
      "grad_norm": 0.17820031116959056,
      "learning_rate": 2.546949019436329e-06,
      "loss": 0.6569,
      "step": 10403
    },
    {
      "epoch": 0.9302575107296137,
      "grad_norm": 0.1596817016268803,
      "learning_rate": 2.540458658121092e-06,
      "loss": 0.6425,
      "step": 10404
    },
    {
      "epoch": 0.9303469241773963,
      "grad_norm": 0.15263749006304572,
      "learning_rate": 2.533976470619881e-06,
      "loss": 0.6419,
      "step": 10405
    },
    {
      "epoch": 0.9304363376251789,
      "grad_norm": 0.1648596231742979,
      "learning_rate": 2.5275024574763496e-06,
      "loss": 0.6512,
      "step": 10406
    },
    {
      "epoch": 0.9305257510729614,
      "grad_norm": 0.17122028174658854,
      "learning_rate": 2.5210366192334745e-06,
      "loss": 0.6539,
      "step": 10407
    },
    {
      "epoch": 0.9306151645207439,
      "grad_norm": 0.1861877243993135,
      "learning_rate": 2.514578956433533e-06,
      "loss": 0.6595,
      "step": 10408
    },
    {
      "epoch": 0.9307045779685265,
      "grad_norm": 0.1778330665757691,
      "learning_rate": 2.5081294696181255e-06,
      "loss": 0.638,
      "step": 10409
    },
    {
      "epoch": 0.930793991416309,
      "grad_norm": 0.1462334027775663,
      "learning_rate": 2.501688159328164e-06,
      "loss": 0.6085,
      "step": 10410
    },
    {
      "epoch": 0.9308834048640916,
      "grad_norm": 0.16039211138067286,
      "learning_rate": 2.49525502610386e-06,
      "loss": 0.6115,
      "step": 10411
    },
    {
      "epoch": 0.9309728183118741,
      "grad_norm": 0.15950811533058035,
      "learning_rate": 2.48883007048476e-06,
      "loss": 0.6104,
      "step": 10412
    },
    {
      "epoch": 0.9310622317596566,
      "grad_norm": 0.165726539623763,
      "learning_rate": 2.4824132930097222e-06,
      "loss": 0.641,
      "step": 10413
    },
    {
      "epoch": 0.9311516452074392,
      "grad_norm": 0.15955481909611485,
      "learning_rate": 2.4760046942169048e-06,
      "loss": 0.6294,
      "step": 10414
    },
    {
      "epoch": 0.9312410586552218,
      "grad_norm": 0.15677150239284895,
      "learning_rate": 2.4696042746438108e-06,
      "loss": 0.6723,
      "step": 10415
    },
    {
      "epoch": 0.9313304721030042,
      "grad_norm": 0.16684248049047037,
      "learning_rate": 2.4632120348272003e-06,
      "loss": 0.647,
      "step": 10416
    },
    {
      "epoch": 0.9314198855507868,
      "grad_norm": 0.1579965248448775,
      "learning_rate": 2.456827975303211e-06,
      "loss": 0.654,
      "step": 10417
    },
    {
      "epoch": 0.9315092989985694,
      "grad_norm": 0.15711242332588515,
      "learning_rate": 2.4504520966072476e-06,
      "loss": 0.6417,
      "step": 10418
    },
    {
      "epoch": 0.931598712446352,
      "grad_norm": 0.1592338052168995,
      "learning_rate": 2.4440843992740714e-06,
      "loss": 0.646,
      "step": 10419
    },
    {
      "epoch": 0.9316881258941345,
      "grad_norm": 0.15952193450950097,
      "learning_rate": 2.4377248838376996e-06,
      "loss": 0.5969,
      "step": 10420
    },
    {
      "epoch": 0.931777539341917,
      "grad_norm": 0.1442708710420152,
      "learning_rate": 2.4313735508315396e-06,
      "loss": 0.6418,
      "step": 10421
    },
    {
      "epoch": 0.9318669527896996,
      "grad_norm": 0.1644503176330398,
      "learning_rate": 2.425030400788231e-06,
      "loss": 0.6374,
      "step": 10422
    },
    {
      "epoch": 0.9319563662374821,
      "grad_norm": 0.15493365217637145,
      "learning_rate": 2.4186954342397815e-06,
      "loss": 0.6148,
      "step": 10423
    },
    {
      "epoch": 0.9320457796852647,
      "grad_norm": 0.16107071317002292,
      "learning_rate": 2.4123686517175113e-06,
      "loss": 0.6144,
      "step": 10424
    },
    {
      "epoch": 0.9321351931330472,
      "grad_norm": 0.168901952320925,
      "learning_rate": 2.406050053752018e-06,
      "loss": 0.6364,
      "step": 10425
    },
    {
      "epoch": 0.9322246065808297,
      "grad_norm": 0.14548922558962005,
      "learning_rate": 2.3997396408732443e-06,
      "loss": 0.6094,
      "step": 10426
    },
    {
      "epoch": 0.9323140200286123,
      "grad_norm": 0.15883347653604027,
      "learning_rate": 2.3934374136104222e-06,
      "loss": 0.6504,
      "step": 10427
    },
    {
      "epoch": 0.9324034334763949,
      "grad_norm": 0.1570408002442134,
      "learning_rate": 2.38714337249214e-06,
      "loss": 0.5975,
      "step": 10428
    },
    {
      "epoch": 0.9324928469241774,
      "grad_norm": 0.17570732445970028,
      "learning_rate": 2.3808575180462533e-06,
      "loss": 0.6777,
      "step": 10429
    },
    {
      "epoch": 0.9325822603719599,
      "grad_norm": 0.1863466321821012,
      "learning_rate": 2.374579850799963e-06,
      "loss": 0.3469,
      "step": 10430
    },
    {
      "epoch": 0.9326716738197425,
      "grad_norm": 0.15208702263379056,
      "learning_rate": 2.3683103712797473e-06,
      "loss": 0.6477,
      "step": 10431
    },
    {
      "epoch": 0.932761087267525,
      "grad_norm": 0.16873086634931506,
      "learning_rate": 2.3620490800114304e-06,
      "loss": 0.6391,
      "step": 10432
    },
    {
      "epoch": 0.9328505007153076,
      "grad_norm": 0.15730769327003521,
      "learning_rate": 2.3557959775201478e-06,
      "loss": 0.6448,
      "step": 10433
    },
    {
      "epoch": 0.9329399141630901,
      "grad_norm": 0.1688314257147117,
      "learning_rate": 2.349551064330313e-06,
      "loss": 0.6195,
      "step": 10434
    },
    {
      "epoch": 0.9330293276108726,
      "grad_norm": 0.14919701175617417,
      "learning_rate": 2.3433143409657188e-06,
      "loss": 0.6155,
      "step": 10435
    },
    {
      "epoch": 0.9331187410586552,
      "grad_norm": 0.17528829429931242,
      "learning_rate": 2.337085807949413e-06,
      "loss": 0.3753,
      "step": 10436
    },
    {
      "epoch": 0.9332081545064378,
      "grad_norm": 0.16250437948538313,
      "learning_rate": 2.3308654658037555e-06,
      "loss": 0.6285,
      "step": 10437
    },
    {
      "epoch": 0.9332975679542204,
      "grad_norm": 0.15313549310039318,
      "learning_rate": 2.3246533150504735e-06,
      "loss": 0.5858,
      "step": 10438
    },
    {
      "epoch": 0.9333869814020028,
      "grad_norm": 0.15703874670844054,
      "learning_rate": 2.3184493562105504e-06,
      "loss": 0.6244,
      "step": 10439
    },
    {
      "epoch": 0.9334763948497854,
      "grad_norm": 0.16582161128899656,
      "learning_rate": 2.312253589804314e-06,
      "loss": 0.6152,
      "step": 10440
    },
    {
      "epoch": 0.933565808297568,
      "grad_norm": 0.17204117211867906,
      "learning_rate": 2.3060660163513825e-06,
      "loss": 0.6624,
      "step": 10441
    },
    {
      "epoch": 0.9336552217453505,
      "grad_norm": 0.175814066035038,
      "learning_rate": 2.2998866363707184e-06,
      "loss": 0.6217,
      "step": 10442
    },
    {
      "epoch": 0.933744635193133,
      "grad_norm": 0.14901460893140994,
      "learning_rate": 2.2937154503805623e-06,
      "loss": 0.6645,
      "step": 10443
    },
    {
      "epoch": 0.9338340486409156,
      "grad_norm": 0.15383976761169324,
      "learning_rate": 2.287552458898501e-06,
      "loss": 0.6431,
      "step": 10444
    },
    {
      "epoch": 0.9339234620886981,
      "grad_norm": 0.1554946878836232,
      "learning_rate": 2.2813976624414093e-06,
      "loss": 0.597,
      "step": 10445
    },
    {
      "epoch": 0.9340128755364807,
      "grad_norm": 0.19702615408661361,
      "learning_rate": 2.275251061525474e-06,
      "loss": 0.679,
      "step": 10446
    },
    {
      "epoch": 0.9341022889842633,
      "grad_norm": 0.16975862203117772,
      "learning_rate": 2.269112656666217e-06,
      "loss": 0.6554,
      "step": 10447
    },
    {
      "epoch": 0.9341917024320457,
      "grad_norm": 0.1762562770023479,
      "learning_rate": 2.2629824483784366e-06,
      "loss": 0.6634,
      "step": 10448
    },
    {
      "epoch": 0.9342811158798283,
      "grad_norm": 0.19348410554036943,
      "learning_rate": 2.2568604371763e-06,
      "loss": 0.6627,
      "step": 10449
    },
    {
      "epoch": 0.9343705293276109,
      "grad_norm": 0.14562032803292121,
      "learning_rate": 2.25074662357323e-06,
      "loss": 0.6029,
      "step": 10450
    },
    {
      "epoch": 0.9344599427753935,
      "grad_norm": 0.15670717579610055,
      "learning_rate": 2.2446410080819824e-06,
      "loss": 0.632,
      "step": 10451
    },
    {
      "epoch": 0.9345493562231759,
      "grad_norm": 0.1478227961818043,
      "learning_rate": 2.238543591214637e-06,
      "loss": 0.6371,
      "step": 10452
    },
    {
      "epoch": 0.9346387696709585,
      "grad_norm": 0.1650565800632647,
      "learning_rate": 2.232454373482584e-06,
      "loss": 0.6439,
      "step": 10453
    },
    {
      "epoch": 0.9347281831187411,
      "grad_norm": 0.1396797474221605,
      "learning_rate": 2.226373355396505e-06,
      "loss": 0.5824,
      "step": 10454
    },
    {
      "epoch": 0.9348175965665236,
      "grad_norm": 0.1684448427810147,
      "learning_rate": 2.220300537466413e-06,
      "loss": 0.3568,
      "step": 10455
    },
    {
      "epoch": 0.9349070100143062,
      "grad_norm": 0.139542420492813,
      "learning_rate": 2.2142359202016237e-06,
      "loss": 0.6012,
      "step": 10456
    },
    {
      "epoch": 0.9349964234620887,
      "grad_norm": 0.15492012763940144,
      "learning_rate": 2.208179504110763e-06,
      "loss": 0.608,
      "step": 10457
    },
    {
      "epoch": 0.9350858369098712,
      "grad_norm": 0.15986166141665856,
      "learning_rate": 2.202131289701803e-06,
      "loss": 0.6746,
      "step": 10458
    },
    {
      "epoch": 0.9351752503576538,
      "grad_norm": 0.15054499373430444,
      "learning_rate": 2.1960912774819707e-06,
      "loss": 0.6099,
      "step": 10459
    },
    {
      "epoch": 0.9352646638054364,
      "grad_norm": 0.1490472439886685,
      "learning_rate": 2.1900594679578503e-06,
      "loss": 0.6465,
      "step": 10460
    },
    {
      "epoch": 0.9353540772532188,
      "grad_norm": 0.15329850811117518,
      "learning_rate": 2.1840358616353252e-06,
      "loss": 0.6084,
      "step": 10461
    },
    {
      "epoch": 0.9354434907010014,
      "grad_norm": 0.15655890811164652,
      "learning_rate": 2.1780204590195583e-06,
      "loss": 0.6428,
      "step": 10462
    },
    {
      "epoch": 0.935532904148784,
      "grad_norm": 0.1705261896877547,
      "learning_rate": 2.172013260615091e-06,
      "loss": 0.6527,
      "step": 10463
    },
    {
      "epoch": 0.9356223175965666,
      "grad_norm": 0.15071904691144464,
      "learning_rate": 2.166014266925731e-06,
      "loss": 0.6083,
      "step": 10464
    },
    {
      "epoch": 0.935711731044349,
      "grad_norm": 0.1531702519501774,
      "learning_rate": 2.160023478454587e-06,
      "loss": 0.6364,
      "step": 10465
    },
    {
      "epoch": 0.9358011444921316,
      "grad_norm": 0.16932653951999424,
      "learning_rate": 2.1540408957041235e-06,
      "loss": 0.6523,
      "step": 10466
    },
    {
      "epoch": 0.9358905579399142,
      "grad_norm": 0.17918752169018828,
      "learning_rate": 2.148066519176084e-06,
      "loss": 0.6499,
      "step": 10467
    },
    {
      "epoch": 0.9359799713876967,
      "grad_norm": 0.16725849539143975,
      "learning_rate": 2.142100349371512e-06,
      "loss": 0.6475,
      "step": 10468
    },
    {
      "epoch": 0.9360693848354793,
      "grad_norm": 0.17374302239485892,
      "learning_rate": 2.1361423867908293e-06,
      "loss": 0.3572,
      "step": 10469
    },
    {
      "epoch": 0.9361587982832618,
      "grad_norm": 0.17608257199916577,
      "learning_rate": 2.1301926319336696e-06,
      "loss": 0.6606,
      "step": 10470
    },
    {
      "epoch": 0.9362482117310443,
      "grad_norm": 0.14914846557299227,
      "learning_rate": 2.124251085299067e-06,
      "loss": 0.6388,
      "step": 10471
    },
    {
      "epoch": 0.9363376251788269,
      "grad_norm": 0.1752727422152581,
      "learning_rate": 2.1183177473853346e-06,
      "loss": 0.6569,
      "step": 10472
    },
    {
      "epoch": 0.9364270386266095,
      "grad_norm": 0.16479667222448663,
      "learning_rate": 2.112392618690062e-06,
      "loss": 0.6219,
      "step": 10473
    },
    {
      "epoch": 0.9365164520743919,
      "grad_norm": 0.17283658559507467,
      "learning_rate": 2.1064756997102084e-06,
      "loss": 0.6432,
      "step": 10474
    },
    {
      "epoch": 0.9366058655221745,
      "grad_norm": 0.1543773035471389,
      "learning_rate": 2.10056699094201e-06,
      "loss": 0.6435,
      "step": 10475
    },
    {
      "epoch": 0.9366952789699571,
      "grad_norm": 0.17663022537126846,
      "learning_rate": 2.0946664928810367e-06,
      "loss": 0.6601,
      "step": 10476
    },
    {
      "epoch": 0.9367846924177397,
      "grad_norm": 0.1485369396064188,
      "learning_rate": 2.0887742060221262e-06,
      "loss": 0.6336,
      "step": 10477
    },
    {
      "epoch": 0.9368741058655222,
      "grad_norm": 0.17608281928428232,
      "learning_rate": 2.082890130859505e-06,
      "loss": 0.6427,
      "step": 10478
    },
    {
      "epoch": 0.9369635193133047,
      "grad_norm": 0.14671323028835584,
      "learning_rate": 2.077014267886612e-06,
      "loss": 0.6235,
      "step": 10479
    },
    {
      "epoch": 0.9370529327610873,
      "grad_norm": 0.14723659488239355,
      "learning_rate": 2.0711466175962756e-06,
      "loss": 0.6269,
      "step": 10480
    },
    {
      "epoch": 0.9371423462088698,
      "grad_norm": 0.1617512076561625,
      "learning_rate": 2.065287180480613e-06,
      "loss": 0.6677,
      "step": 10481
    },
    {
      "epoch": 0.9372317596566524,
      "grad_norm": 0.15848791695258543,
      "learning_rate": 2.0594359570310196e-06,
      "loss": 0.6668,
      "step": 10482
    },
    {
      "epoch": 0.9373211731044349,
      "grad_norm": 0.1468482200427917,
      "learning_rate": 2.0535929477382587e-06,
      "loss": 0.6291,
      "step": 10483
    },
    {
      "epoch": 0.9374105865522174,
      "grad_norm": 0.18436618774510308,
      "learning_rate": 2.0477581530923717e-06,
      "loss": 0.6304,
      "step": 10484
    },
    {
      "epoch": 0.9375,
      "grad_norm": 0.15417014519640374,
      "learning_rate": 2.0419315735827116e-06,
      "loss": 0.6254,
      "step": 10485
    },
    {
      "epoch": 0.9375894134477826,
      "grad_norm": 0.16488878623833542,
      "learning_rate": 2.036113209697943e-06,
      "loss": 0.6503,
      "step": 10486
    },
    {
      "epoch": 0.9376788268955651,
      "grad_norm": 0.1474466485347854,
      "learning_rate": 2.0303030619260644e-06,
      "loss": 0.6408,
      "step": 10487
    },
    {
      "epoch": 0.9377682403433476,
      "grad_norm": 0.16410809685442793,
      "learning_rate": 2.0245011307543416e-06,
      "loss": 0.6196,
      "step": 10488
    },
    {
      "epoch": 0.9378576537911302,
      "grad_norm": 0.18227753888601958,
      "learning_rate": 2.018707416669374e-06,
      "loss": 0.6276,
      "step": 10489
    },
    {
      "epoch": 0.9379470672389127,
      "grad_norm": 0.1646774571143607,
      "learning_rate": 2.012921920157096e-06,
      "loss": 0.6479,
      "step": 10490
    },
    {
      "epoch": 0.9380364806866953,
      "grad_norm": 0.1687150894505683,
      "learning_rate": 2.0071446417027073e-06,
      "loss": 0.6511,
      "step": 10491
    },
    {
      "epoch": 0.9381258941344778,
      "grad_norm": 0.17809502684091016,
      "learning_rate": 2.0013755817907652e-06,
      "loss": 0.6824,
      "step": 10492
    },
    {
      "epoch": 0.9382153075822603,
      "grad_norm": 0.1601430086371637,
      "learning_rate": 1.995614740905094e-06,
      "loss": 0.6175,
      "step": 10493
    },
    {
      "epoch": 0.9383047210300429,
      "grad_norm": 0.1499059730511499,
      "learning_rate": 1.9898621195288515e-06,
      "loss": 0.619,
      "step": 10494
    },
    {
      "epoch": 0.9383941344778255,
      "grad_norm": 0.1718713577452152,
      "learning_rate": 1.984117718144518e-06,
      "loss": 0.6225,
      "step": 10495
    },
    {
      "epoch": 0.9384835479256081,
      "grad_norm": 0.1719379896382246,
      "learning_rate": 1.9783815372338423e-06,
      "loss": 0.6531,
      "step": 10496
    },
    {
      "epoch": 0.9385729613733905,
      "grad_norm": 0.16528433803746412,
      "learning_rate": 1.972653577277939e-06,
      "loss": 0.6095,
      "step": 10497
    },
    {
      "epoch": 0.9386623748211731,
      "grad_norm": 0.1590193880419114,
      "learning_rate": 1.96693383875719e-06,
      "loss": 0.6245,
      "step": 10498
    },
    {
      "epoch": 0.9387517882689557,
      "grad_norm": 0.16475480421398445,
      "learning_rate": 1.9612223221513125e-06,
      "loss": 0.6512,
      "step": 10499
    },
    {
      "epoch": 0.9388412017167382,
      "grad_norm": 0.15811691366114178,
      "learning_rate": 1.955519027939301e-06,
      "loss": 0.6417,
      "step": 10500
    },
    {
      "epoch": 0.9389306151645207,
      "grad_norm": 0.1674392429153441,
      "learning_rate": 1.949823956599528e-06,
      "loss": 0.6508,
      "step": 10501
    },
    {
      "epoch": 0.9390200286123033,
      "grad_norm": 0.15889670860540106,
      "learning_rate": 1.9441371086095784e-06,
      "loss": 0.6681,
      "step": 10502
    },
    {
      "epoch": 0.9391094420600858,
      "grad_norm": 0.17411965846961996,
      "learning_rate": 1.938458484446437e-06,
      "loss": 0.6738,
      "step": 10503
    },
    {
      "epoch": 0.9391988555078684,
      "grad_norm": 0.1741554925372566,
      "learning_rate": 1.9327880845863568e-06,
      "loss": 0.6448,
      "step": 10504
    },
    {
      "epoch": 0.939288268955651,
      "grad_norm": 0.15958812454714194,
      "learning_rate": 1.927125909504901e-06,
      "loss": 0.6539,
      "step": 10505
    },
    {
      "epoch": 0.9393776824034334,
      "grad_norm": 0.15188064949864527,
      "learning_rate": 1.921471959676957e-06,
      "loss": 0.5974,
      "step": 10506
    },
    {
      "epoch": 0.939467095851216,
      "grad_norm": 0.13624863873007484,
      "learning_rate": 1.915826235576712e-06,
      "loss": 0.594,
      "step": 10507
    },
    {
      "epoch": 0.9395565092989986,
      "grad_norm": 0.16457158001528963,
      "learning_rate": 1.910188737677665e-06,
      "loss": 0.6237,
      "step": 10508
    },
    {
      "epoch": 0.9396459227467812,
      "grad_norm": 0.15258031315343473,
      "learning_rate": 1.9045594664526155e-06,
      "loss": 0.6354,
      "step": 10509
    },
    {
      "epoch": 0.9397353361945636,
      "grad_norm": 0.17620945629785476,
      "learning_rate": 1.8989384223736971e-06,
      "loss": 0.6385,
      "step": 10510
    },
    {
      "epoch": 0.9398247496423462,
      "grad_norm": 0.1743761106246481,
      "learning_rate": 1.8933256059123438e-06,
      "loss": 0.6529,
      "step": 10511
    },
    {
      "epoch": 0.9399141630901288,
      "grad_norm": 0.1617809888381262,
      "learning_rate": 1.88772101753929e-06,
      "loss": 0.6473,
      "step": 10512
    },
    {
      "epoch": 0.9400035765379113,
      "grad_norm": 0.13466015382648755,
      "learning_rate": 1.8821246577245822e-06,
      "loss": 0.6081,
      "step": 10513
    },
    {
      "epoch": 0.9400929899856938,
      "grad_norm": 0.1733037844943803,
      "learning_rate": 1.876536526937589e-06,
      "loss": 0.6769,
      "step": 10514
    },
    {
      "epoch": 0.9401824034334764,
      "grad_norm": 0.15266227888626824,
      "learning_rate": 1.8709566256469691e-06,
      "loss": 0.5894,
      "step": 10515
    },
    {
      "epoch": 0.9402718168812589,
      "grad_norm": 0.15524878263401945,
      "learning_rate": 1.8653849543207036e-06,
      "loss": 0.6254,
      "step": 10516
    },
    {
      "epoch": 0.9403612303290415,
      "grad_norm": 0.17248743784978338,
      "learning_rate": 1.8598215134260743e-06,
      "loss": 0.6093,
      "step": 10517
    },
    {
      "epoch": 0.9404506437768241,
      "grad_norm": 0.188691964230619,
      "learning_rate": 1.8542663034297191e-06,
      "loss": 0.6191,
      "step": 10518
    },
    {
      "epoch": 0.9405400572246065,
      "grad_norm": 0.16936608291898414,
      "learning_rate": 1.8487193247974989e-06,
      "loss": 0.6555,
      "step": 10519
    },
    {
      "epoch": 0.9406294706723891,
      "grad_norm": 0.14867617416693155,
      "learning_rate": 1.843180577994652e-06,
      "loss": 0.643,
      "step": 10520
    },
    {
      "epoch": 0.9407188841201717,
      "grad_norm": 0.17200750958787547,
      "learning_rate": 1.8376500634857296e-06,
      "loss": 0.6219,
      "step": 10521
    },
    {
      "epoch": 0.9408082975679543,
      "grad_norm": 0.16455158921784144,
      "learning_rate": 1.8321277817345274e-06,
      "loss": 0.6374,
      "step": 10522
    },
    {
      "epoch": 0.9408977110157367,
      "grad_norm": 0.1672859641774098,
      "learning_rate": 1.8266137332042077e-06,
      "loss": 0.6522,
      "step": 10523
    },
    {
      "epoch": 0.9409871244635193,
      "grad_norm": 0.16279529557292594,
      "learning_rate": 1.8211079183572344e-06,
      "loss": 0.6398,
      "step": 10524
    },
    {
      "epoch": 0.9410765379113019,
      "grad_norm": 0.17380350016582366,
      "learning_rate": 1.8156103376553714e-06,
      "loss": 0.6469,
      "step": 10525
    },
    {
      "epoch": 0.9411659513590844,
      "grad_norm": 0.15105776691145104,
      "learning_rate": 1.810120991559694e-06,
      "loss": 0.6283,
      "step": 10526
    },
    {
      "epoch": 0.941255364806867,
      "grad_norm": 0.1524512792143149,
      "learning_rate": 1.8046398805305898e-06,
      "loss": 0.6281,
      "step": 10527
    },
    {
      "epoch": 0.9413447782546495,
      "grad_norm": 0.148399898820365,
      "learning_rate": 1.7991670050277354e-06,
      "loss": 0.6559,
      "step": 10528
    },
    {
      "epoch": 0.941434191702432,
      "grad_norm": 0.1553588614890033,
      "learning_rate": 1.7937023655101636e-06,
      "loss": 0.6516,
      "step": 10529
    },
    {
      "epoch": 0.9415236051502146,
      "grad_norm": 0.14849890197245103,
      "learning_rate": 1.7882459624361637e-06,
      "loss": 0.6312,
      "step": 10530
    },
    {
      "epoch": 0.9416130185979972,
      "grad_norm": 0.1661757464596121,
      "learning_rate": 1.782797796263358e-06,
      "loss": 0.6638,
      "step": 10531
    },
    {
      "epoch": 0.9417024320457796,
      "grad_norm": 0.1483561824534472,
      "learning_rate": 1.7773578674486923e-06,
      "loss": 0.6091,
      "step": 10532
    },
    {
      "epoch": 0.9417918454935622,
      "grad_norm": 0.169080292418956,
      "learning_rate": 1.7719261764484019e-06,
      "loss": 0.6325,
      "step": 10533
    },
    {
      "epoch": 0.9418812589413448,
      "grad_norm": 0.16276388715439,
      "learning_rate": 1.7665027237180332e-06,
      "loss": 0.6557,
      "step": 10534
    },
    {
      "epoch": 0.9419706723891274,
      "grad_norm": 0.15734901689877145,
      "learning_rate": 1.7610875097124446e-06,
      "loss": 0.6038,
      "step": 10535
    },
    {
      "epoch": 0.9420600858369099,
      "grad_norm": 0.17625509069904013,
      "learning_rate": 1.7556805348858064e-06,
      "loss": 0.702,
      "step": 10536
    },
    {
      "epoch": 0.9421494992846924,
      "grad_norm": 0.16098143224216263,
      "learning_rate": 1.7502817996915778e-06,
      "loss": 0.6954,
      "step": 10537
    },
    {
      "epoch": 0.942238912732475,
      "grad_norm": 0.17437462118894806,
      "learning_rate": 1.7448913045825742e-06,
      "loss": 0.6483,
      "step": 10538
    },
    {
      "epoch": 0.9423283261802575,
      "grad_norm": 0.14777066013819032,
      "learning_rate": 1.739509050010868e-06,
      "loss": 0.5953,
      "step": 10539
    },
    {
      "epoch": 0.9424177396280401,
      "grad_norm": 0.16408559176797904,
      "learning_rate": 1.7341350364278642e-06,
      "loss": 0.6057,
      "step": 10540
    },
    {
      "epoch": 0.9425071530758226,
      "grad_norm": 0.15965877483635843,
      "learning_rate": 1.7287692642842911e-06,
      "loss": 0.6465,
      "step": 10541
    },
    {
      "epoch": 0.9425965665236051,
      "grad_norm": 0.14488061251275394,
      "learning_rate": 1.723411734030156e-06,
      "loss": 0.629,
      "step": 10542
    },
    {
      "epoch": 0.9426859799713877,
      "grad_norm": 0.16127549687636683,
      "learning_rate": 1.7180624461147876e-06,
      "loss": 0.6666,
      "step": 10543
    },
    {
      "epoch": 0.9427753934191703,
      "grad_norm": 0.15746497603904838,
      "learning_rate": 1.7127214009868385e-06,
      "loss": 0.6322,
      "step": 10544
    },
    {
      "epoch": 0.9428648068669528,
      "grad_norm": 0.16568773246074991,
      "learning_rate": 1.7073885990942174e-06,
      "loss": 0.6193,
      "step": 10545
    },
    {
      "epoch": 0.9429542203147353,
      "grad_norm": 0.14452534394186484,
      "learning_rate": 1.7020640408842325e-06,
      "loss": 0.6364,
      "step": 10546
    },
    {
      "epoch": 0.9430436337625179,
      "grad_norm": 0.16104038067984902,
      "learning_rate": 1.696747726803416e-06,
      "loss": 0.6877,
      "step": 10547
    },
    {
      "epoch": 0.9431330472103004,
      "grad_norm": 0.17100629364253683,
      "learning_rate": 1.6914396572976444e-06,
      "loss": 0.6609,
      "step": 10548
    },
    {
      "epoch": 0.943222460658083,
      "grad_norm": 0.15322943550698878,
      "learning_rate": 1.6861398328121059e-06,
      "loss": 0.6333,
      "step": 10549
    },
    {
      "epoch": 0.9433118741058655,
      "grad_norm": 0.153255047674963,
      "learning_rate": 1.6808482537912896e-06,
      "loss": 0.6251,
      "step": 10550
    },
    {
      "epoch": 0.943401287553648,
      "grad_norm": 0.1524500825218116,
      "learning_rate": 1.6755649206789737e-06,
      "loss": 0.6094,
      "step": 10551
    },
    {
      "epoch": 0.9434907010014306,
      "grad_norm": 0.1602657970862045,
      "learning_rate": 1.6702898339182925e-06,
      "loss": 0.6477,
      "step": 10552
    },
    {
      "epoch": 0.9435801144492132,
      "grad_norm": 0.14951101402137293,
      "learning_rate": 1.6650229939516593e-06,
      "loss": 0.6712,
      "step": 10553
    },
    {
      "epoch": 0.9436695278969958,
      "grad_norm": 0.1372210982536296,
      "learning_rate": 1.6597644012207759e-06,
      "loss": 0.5878,
      "step": 10554
    },
    {
      "epoch": 0.9437589413447782,
      "grad_norm": 0.13840258902310593,
      "learning_rate": 1.6545140561667005e-06,
      "loss": 0.6427,
      "step": 10555
    },
    {
      "epoch": 0.9438483547925608,
      "grad_norm": 0.16268100636483085,
      "learning_rate": 1.6492719592297478e-06,
      "loss": 0.6289,
      "step": 10556
    },
    {
      "epoch": 0.9439377682403434,
      "grad_norm": 0.14119462044047842,
      "learning_rate": 1.6440381108495772e-06,
      "loss": 0.6539,
      "step": 10557
    },
    {
      "epoch": 0.9440271816881259,
      "grad_norm": 0.15411215563735348,
      "learning_rate": 1.6388125114651486e-06,
      "loss": 0.6462,
      "step": 10558
    },
    {
      "epoch": 0.9441165951359084,
      "grad_norm": 0.15147411760518845,
      "learning_rate": 1.6335951615147337e-06,
      "loss": 0.6232,
      "step": 10559
    },
    {
      "epoch": 0.944206008583691,
      "grad_norm": 0.1660650510639807,
      "learning_rate": 1.6283860614358936e-06,
      "loss": 0.6167,
      "step": 10560
    },
    {
      "epoch": 0.9442954220314735,
      "grad_norm": 0.15471684123668267,
      "learning_rate": 1.623185211665501e-06,
      "loss": 0.6673,
      "step": 10561
    },
    {
      "epoch": 0.9443848354792561,
      "grad_norm": 0.17905899250199425,
      "learning_rate": 1.6179926126397626e-06,
      "loss": 0.3633,
      "step": 10562
    },
    {
      "epoch": 0.9444742489270386,
      "grad_norm": 0.18146844494223793,
      "learning_rate": 1.6128082647941744e-06,
      "loss": 0.6323,
      "step": 10563
    },
    {
      "epoch": 0.9445636623748211,
      "grad_norm": 0.16344901481765647,
      "learning_rate": 1.6076321685635332e-06,
      "loss": 0.6306,
      "step": 10564
    },
    {
      "epoch": 0.9446530758226037,
      "grad_norm": 0.1732308813052465,
      "learning_rate": 1.602464324381936e-06,
      "loss": 0.6075,
      "step": 10565
    },
    {
      "epoch": 0.9447424892703863,
      "grad_norm": 0.15903879807286253,
      "learning_rate": 1.5973047326828472e-06,
      "loss": 0.6079,
      "step": 10566
    },
    {
      "epoch": 0.9448319027181689,
      "grad_norm": 0.1521432345007671,
      "learning_rate": 1.5921533938989542e-06,
      "loss": 0.66,
      "step": 10567
    },
    {
      "epoch": 0.9449213161659513,
      "grad_norm": 0.1637606183520319,
      "learning_rate": 1.5870103084623111e-06,
      "loss": 0.6172,
      "step": 10568
    },
    {
      "epoch": 0.9450107296137339,
      "grad_norm": 0.1656498293997229,
      "learning_rate": 1.5818754768042733e-06,
      "loss": 0.6472,
      "step": 10569
    },
    {
      "epoch": 0.9451001430615165,
      "grad_norm": 0.168788702015893,
      "learning_rate": 1.5767488993554736e-06,
      "loss": 0.653,
      "step": 10570
    },
    {
      "epoch": 0.945189556509299,
      "grad_norm": 0.16471111543941924,
      "learning_rate": 1.5716305765458683e-06,
      "loss": 0.6469,
      "step": 10571
    },
    {
      "epoch": 0.9452789699570815,
      "grad_norm": 0.15040656529291357,
      "learning_rate": 1.5665205088047474e-06,
      "loss": 0.6261,
      "step": 10572
    },
    {
      "epoch": 0.9453683834048641,
      "grad_norm": 0.1583356183285816,
      "learning_rate": 1.561418696560668e-06,
      "loss": 0.6405,
      "step": 10573
    },
    {
      "epoch": 0.9454577968526466,
      "grad_norm": 0.16147608067981062,
      "learning_rate": 1.5563251402415102e-06,
      "loss": 0.6413,
      "step": 10574
    },
    {
      "epoch": 0.9455472103004292,
      "grad_norm": 0.17082282033212803,
      "learning_rate": 1.5512398402744876e-06,
      "loss": 0.6498,
      "step": 10575
    },
    {
      "epoch": 0.9456366237482118,
      "grad_norm": 0.15118579272670096,
      "learning_rate": 1.5461627970860814e-06,
      "loss": 0.5979,
      "step": 10576
    },
    {
      "epoch": 0.9457260371959942,
      "grad_norm": 0.1568441162130608,
      "learning_rate": 1.5410940111020956e-06,
      "loss": 0.6168,
      "step": 10577
    },
    {
      "epoch": 0.9458154506437768,
      "grad_norm": 0.1542479509528289,
      "learning_rate": 1.5360334827476564e-06,
      "loss": 0.632,
      "step": 10578
    },
    {
      "epoch": 0.9459048640915594,
      "grad_norm": 0.15434356714610345,
      "learning_rate": 1.5309812124471579e-06,
      "loss": 0.6366,
      "step": 10579
    },
    {
      "epoch": 0.945994277539342,
      "grad_norm": 0.17358610201680152,
      "learning_rate": 1.52593720062435e-06,
      "loss": 0.6153,
      "step": 10580
    },
    {
      "epoch": 0.9460836909871244,
      "grad_norm": 0.17465437730013172,
      "learning_rate": 1.520901447702272e-06,
      "loss": 0.6557,
      "step": 10581
    },
    {
      "epoch": 0.946173104434907,
      "grad_norm": 0.1903431020771187,
      "learning_rate": 1.5158739541032418e-06,
      "loss": 0.6892,
      "step": 10582
    },
    {
      "epoch": 0.9462625178826896,
      "grad_norm": 0.16649733233224046,
      "learning_rate": 1.5108547202489443e-06,
      "loss": 0.667,
      "step": 10583
    },
    {
      "epoch": 0.9463519313304721,
      "grad_norm": 0.16983415412931258,
      "learning_rate": 1.5058437465602982e-06,
      "loss": 0.6614,
      "step": 10584
    },
    {
      "epoch": 0.9464413447782547,
      "grad_norm": 0.1423946138860823,
      "learning_rate": 1.5008410334576006e-06,
      "loss": 0.6258,
      "step": 10585
    },
    {
      "epoch": 0.9465307582260372,
      "grad_norm": 0.17296605728549472,
      "learning_rate": 1.495846581360394e-06,
      "loss": 0.6365,
      "step": 10586
    },
    {
      "epoch": 0.9466201716738197,
      "grad_norm": 0.1736741086209332,
      "learning_rate": 1.4908603906875761e-06,
      "loss": 0.6565,
      "step": 10587
    },
    {
      "epoch": 0.9467095851216023,
      "grad_norm": 0.17973258939048353,
      "learning_rate": 1.4858824618573352e-06,
      "loss": 0.677,
      "step": 10588
    },
    {
      "epoch": 0.9467989985693849,
      "grad_norm": 0.1729310665503611,
      "learning_rate": 1.4809127952871592e-06,
      "loss": 0.3571,
      "step": 10589
    },
    {
      "epoch": 0.9468884120171673,
      "grad_norm": 0.15121408044340487,
      "learning_rate": 1.4759513913938372e-06,
      "loss": 0.6212,
      "step": 10590
    },
    {
      "epoch": 0.9469778254649499,
      "grad_norm": 0.1503521175566067,
      "learning_rate": 1.4709982505934806e-06,
      "loss": 0.6512,
      "step": 10591
    },
    {
      "epoch": 0.9470672389127325,
      "grad_norm": 0.16891298638834942,
      "learning_rate": 1.4660533733015236e-06,
      "loss": 0.6466,
      "step": 10592
    },
    {
      "epoch": 0.947156652360515,
      "grad_norm": 0.14222766143884225,
      "learning_rate": 1.461116759932657e-06,
      "loss": 0.5963,
      "step": 10593
    },
    {
      "epoch": 0.9472460658082976,
      "grad_norm": 0.15609367764370183,
      "learning_rate": 1.4561884109009384e-06,
      "loss": 0.626,
      "step": 10594
    },
    {
      "epoch": 0.9473354792560801,
      "grad_norm": 0.1693347692049755,
      "learning_rate": 1.4512683266196703e-06,
      "loss": 0.6474,
      "step": 10595
    },
    {
      "epoch": 0.9474248927038627,
      "grad_norm": 0.16265187302399986,
      "learning_rate": 1.4463565075015228e-06,
      "loss": 0.3042,
      "step": 10596
    },
    {
      "epoch": 0.9475143061516452,
      "grad_norm": 0.16331837191291498,
      "learning_rate": 1.441452953958422e-06,
      "loss": 0.6442,
      "step": 10597
    },
    {
      "epoch": 0.9476037195994278,
      "grad_norm": 0.17373904828846923,
      "learning_rate": 1.43655766640165e-06,
      "loss": 0.6182,
      "step": 10598
    },
    {
      "epoch": 0.9476931330472103,
      "grad_norm": 0.17324762611386063,
      "learning_rate": 1.4316706452417338e-06,
      "loss": 0.6636,
      "step": 10599
    },
    {
      "epoch": 0.9477825464949928,
      "grad_norm": 0.1701743623263289,
      "learning_rate": 1.4267918908885681e-06,
      "loss": 0.6496,
      "step": 10600
    },
    {
      "epoch": 0.9478719599427754,
      "grad_norm": 0.15624292025689326,
      "learning_rate": 1.421921403751314e-06,
      "loss": 0.6379,
      "step": 10601
    },
    {
      "epoch": 0.947961373390558,
      "grad_norm": 0.16703826483978856,
      "learning_rate": 1.4170591842384672e-06,
      "loss": 0.6636,
      "step": 10602
    },
    {
      "epoch": 0.9480507868383404,
      "grad_norm": 0.1773556215739213,
      "learning_rate": 1.4122052327578128e-06,
      "loss": 0.6507,
      "step": 10603
    },
    {
      "epoch": 0.948140200286123,
      "grad_norm": 0.17464731289403201,
      "learning_rate": 1.4073595497164361e-06,
      "loss": 0.6449,
      "step": 10604
    },
    {
      "epoch": 0.9482296137339056,
      "grad_norm": 0.15533045299527964,
      "learning_rate": 1.402522135520734e-06,
      "loss": 0.6103,
      "step": 10605
    },
    {
      "epoch": 0.9483190271816881,
      "grad_norm": 0.15641243884601155,
      "learning_rate": 1.397692990576449e-06,
      "loss": 0.6209,
      "step": 10606
    },
    {
      "epoch": 0.9484084406294707,
      "grad_norm": 0.1335860405412612,
      "learning_rate": 1.392872115288546e-06,
      "loss": 0.6263,
      "step": 10607
    },
    {
      "epoch": 0.9484978540772532,
      "grad_norm": 0.15938571927553774,
      "learning_rate": 1.3880595100613792e-06,
      "loss": 0.6199,
      "step": 10608
    },
    {
      "epoch": 0.9485872675250357,
      "grad_norm": 0.17380306427345857,
      "learning_rate": 1.3832551752985811e-06,
      "loss": 0.6495,
      "step": 10609
    },
    {
      "epoch": 0.9486766809728183,
      "grad_norm": 0.1381691358143216,
      "learning_rate": 1.378459111403052e-06,
      "loss": 0.6188,
      "step": 10610
    },
    {
      "epoch": 0.9487660944206009,
      "grad_norm": 0.16868453809958978,
      "learning_rate": 1.37367131877707e-06,
      "loss": 0.6663,
      "step": 10611
    },
    {
      "epoch": 0.9488555078683834,
      "grad_norm": 0.1465128032472412,
      "learning_rate": 1.3688917978221583e-06,
      "loss": 0.6283,
      "step": 10612
    },
    {
      "epoch": 0.9489449213161659,
      "grad_norm": 0.1575728965415481,
      "learning_rate": 1.364120548939174e-06,
      "loss": 0.6348,
      "step": 10613
    },
    {
      "epoch": 0.9490343347639485,
      "grad_norm": 0.1622931894960164,
      "learning_rate": 1.3593575725282749e-06,
      "loss": 0.619,
      "step": 10614
    },
    {
      "epoch": 0.9491237482117311,
      "grad_norm": 0.16259434261870356,
      "learning_rate": 1.3546028689889302e-06,
      "loss": 0.6328,
      "step": 10615
    },
    {
      "epoch": 0.9492131616595136,
      "grad_norm": 0.15645029249712142,
      "learning_rate": 1.3498564387199098e-06,
      "loss": 0.6186,
      "step": 10616
    },
    {
      "epoch": 0.9493025751072961,
      "grad_norm": 0.1739301371929817,
      "learning_rate": 1.3451182821192954e-06,
      "loss": 0.6352,
      "step": 10617
    },
    {
      "epoch": 0.9493919885550787,
      "grad_norm": 0.17785037197294778,
      "learning_rate": 1.3403883995844579e-06,
      "loss": 0.6089,
      "step": 10618
    },
    {
      "epoch": 0.9494814020028612,
      "grad_norm": 0.14982998070115436,
      "learning_rate": 1.3356667915121025e-06,
      "loss": 0.6099,
      "step": 10619
    },
    {
      "epoch": 0.9495708154506438,
      "grad_norm": 0.1607966475257369,
      "learning_rate": 1.330953458298212e-06,
      "loss": 0.6229,
      "step": 10620
    },
    {
      "epoch": 0.9496602288984263,
      "grad_norm": 0.16765300239582862,
      "learning_rate": 1.3262484003380927e-06,
      "loss": 0.6252,
      "step": 10621
    },
    {
      "epoch": 0.9497496423462088,
      "grad_norm": 0.15785146387214619,
      "learning_rate": 1.321551618026351e-06,
      "loss": 0.6554,
      "step": 10622
    },
    {
      "epoch": 0.9498390557939914,
      "grad_norm": 0.16776296817690547,
      "learning_rate": 1.3168631117569052e-06,
      "loss": 0.6345,
      "step": 10623
    },
    {
      "epoch": 0.949928469241774,
      "grad_norm": 0.16019361355300382,
      "learning_rate": 1.3121828819229743e-06,
      "loss": 0.6486,
      "step": 10624
    },
    {
      "epoch": 0.9500178826895566,
      "grad_norm": 0.16478322819309918,
      "learning_rate": 1.3075109289170773e-06,
      "loss": 0.6274,
      "step": 10625
    },
    {
      "epoch": 0.950107296137339,
      "grad_norm": 0.16725721876003863,
      "learning_rate": 1.3028472531310454e-06,
      "loss": 0.6254,
      "step": 10626
    },
    {
      "epoch": 0.9501967095851216,
      "grad_norm": 0.1733540056927763,
      "learning_rate": 1.2981918549560213e-06,
      "loss": 0.6119,
      "step": 10627
    },
    {
      "epoch": 0.9502861230329042,
      "grad_norm": 0.17538315063537435,
      "learning_rate": 1.293544734782437e-06,
      "loss": 0.6703,
      "step": 10628
    },
    {
      "epoch": 0.9503755364806867,
      "grad_norm": 0.1657948509977632,
      "learning_rate": 1.2889058930000586e-06,
      "loss": 0.6223,
      "step": 10629
    },
    {
      "epoch": 0.9504649499284692,
      "grad_norm": 0.16280946470899238,
      "learning_rate": 1.2842753299979305e-06,
      "loss": 0.6562,
      "step": 10630
    },
    {
      "epoch": 0.9505543633762518,
      "grad_norm": 0.17277216236676063,
      "learning_rate": 1.2796530461644086e-06,
      "loss": 0.3653,
      "step": 10631
    },
    {
      "epoch": 0.9506437768240343,
      "grad_norm": 0.1472318150969589,
      "learning_rate": 1.2750390418871604e-06,
      "loss": 0.6181,
      "step": 10632
    },
    {
      "epoch": 0.9507331902718169,
      "grad_norm": 0.16372481867277286,
      "learning_rate": 1.2704333175531546e-06,
      "loss": 0.6037,
      "step": 10633
    },
    {
      "epoch": 0.9508226037195995,
      "grad_norm": 0.18046655490592553,
      "learning_rate": 1.265835873548682e-06,
      "loss": 0.6687,
      "step": 10634
    },
    {
      "epoch": 0.9509120171673819,
      "grad_norm": 0.154903172814833,
      "learning_rate": 1.2612467102593006e-06,
      "loss": 0.66,
      "step": 10635
    },
    {
      "epoch": 0.9510014306151645,
      "grad_norm": 0.17716343087105438,
      "learning_rate": 1.256665828069925e-06,
      "loss": 0.6433,
      "step": 10636
    },
    {
      "epoch": 0.9510908440629471,
      "grad_norm": 0.16281415204617952,
      "learning_rate": 1.2520932273647258e-06,
      "loss": 0.6256,
      "step": 10637
    },
    {
      "epoch": 0.9511802575107297,
      "grad_norm": 0.15606938582275545,
      "learning_rate": 1.2475289085272178e-06,
      "loss": 0.6289,
      "step": 10638
    },
    {
      "epoch": 0.9512696709585121,
      "grad_norm": 0.1464901390189543,
      "learning_rate": 1.2429728719401845e-06,
      "loss": 0.615,
      "step": 10639
    },
    {
      "epoch": 0.9513590844062947,
      "grad_norm": 0.1463430364518559,
      "learning_rate": 1.2384251179857643e-06,
      "loss": 0.6608,
      "step": 10640
    },
    {
      "epoch": 0.9514484978540773,
      "grad_norm": 0.16308417755633478,
      "learning_rate": 1.233885647045341e-06,
      "loss": 0.6312,
      "step": 10641
    },
    {
      "epoch": 0.9515379113018598,
      "grad_norm": 0.15861190468520456,
      "learning_rate": 1.2293544594996543e-06,
      "loss": 0.6303,
      "step": 10642
    },
    {
      "epoch": 0.9516273247496424,
      "grad_norm": 0.1633205438287924,
      "learning_rate": 1.2248315557287337e-06,
      "loss": 0.6237,
      "step": 10643
    },
    {
      "epoch": 0.9517167381974249,
      "grad_norm": 0.15197923841703462,
      "learning_rate": 1.2203169361118871e-06,
      "loss": 0.6489,
      "step": 10644
    },
    {
      "epoch": 0.9518061516452074,
      "grad_norm": 0.15799110751341044,
      "learning_rate": 1.215810601027767e-06,
      "loss": 0.6354,
      "step": 10645
    },
    {
      "epoch": 0.95189556509299,
      "grad_norm": 0.15872246704070703,
      "learning_rate": 1.2113125508543267e-06,
      "loss": 0.635,
      "step": 10646
    },
    {
      "epoch": 0.9519849785407726,
      "grad_norm": 0.1651268228085225,
      "learning_rate": 1.2068227859687753e-06,
      "loss": 0.6485,
      "step": 10647
    },
    {
      "epoch": 0.952074391988555,
      "grad_norm": 0.15639376526793689,
      "learning_rate": 1.2023413067476896e-06,
      "loss": 0.6287,
      "step": 10648
    },
    {
      "epoch": 0.9521638054363376,
      "grad_norm": 0.17106509280210808,
      "learning_rate": 1.1978681135669245e-06,
      "loss": 0.6571,
      "step": 10649
    },
    {
      "epoch": 0.9522532188841202,
      "grad_norm": 0.15959255590574822,
      "learning_rate": 1.1934032068016354e-06,
      "loss": 0.6313,
      "step": 10650
    },
    {
      "epoch": 0.9523426323319027,
      "grad_norm": 0.1625955478079696,
      "learning_rate": 1.1889465868263005e-06,
      "loss": 0.6465,
      "step": 10651
    },
    {
      "epoch": 0.9524320457796852,
      "grad_norm": 0.15373787681258713,
      "learning_rate": 1.1844982540146654e-06,
      "loss": 0.6359,
      "step": 10652
    },
    {
      "epoch": 0.9525214592274678,
      "grad_norm": 0.16069915697598816,
      "learning_rate": 1.1800582087398316e-06,
      "loss": 0.6245,
      "step": 10653
    },
    {
      "epoch": 0.9526108726752504,
      "grad_norm": 0.1556027191407639,
      "learning_rate": 1.1756264513741676e-06,
      "loss": 0.6292,
      "step": 10654
    },
    {
      "epoch": 0.9527002861230329,
      "grad_norm": 0.14638088140527636,
      "learning_rate": 1.1712029822893654e-06,
      "loss": 0.6087,
      "step": 10655
    },
    {
      "epoch": 0.9527896995708155,
      "grad_norm": 0.16635545196030596,
      "learning_rate": 1.1667878018564171e-06,
      "loss": 0.6163,
      "step": 10656
    },
    {
      "epoch": 0.952879113018598,
      "grad_norm": 0.15088785791372866,
      "learning_rate": 1.1623809104456262e-06,
      "loss": 0.6135,
      "step": 10657
    },
    {
      "epoch": 0.9529685264663805,
      "grad_norm": 0.17119412722249172,
      "learning_rate": 1.157982308426564e-06,
      "loss": 0.6726,
      "step": 10658
    },
    {
      "epoch": 0.9530579399141631,
      "grad_norm": 0.1739728901484759,
      "learning_rate": 1.1535919961681575e-06,
      "loss": 0.6215,
      "step": 10659
    },
    {
      "epoch": 0.9531473533619457,
      "grad_norm": 0.14670041377882106,
      "learning_rate": 1.1492099740386231e-06,
      "loss": 0.6422,
      "step": 10660
    },
    {
      "epoch": 0.9532367668097281,
      "grad_norm": 0.1592225980930319,
      "learning_rate": 1.144836242405467e-06,
      "loss": 0.6504,
      "step": 10661
    },
    {
      "epoch": 0.9533261802575107,
      "grad_norm": 0.16014634186785437,
      "learning_rate": 1.140470801635496e-06,
      "loss": 0.6508,
      "step": 10662
    },
    {
      "epoch": 0.9534155937052933,
      "grad_norm": 0.17090447768975728,
      "learning_rate": 1.13611365209485e-06,
      "loss": 0.6458,
      "step": 10663
    },
    {
      "epoch": 0.9535050071530758,
      "grad_norm": 0.17281347447491688,
      "learning_rate": 1.1317647941489595e-06,
      "loss": 0.6773,
      "step": 10664
    },
    {
      "epoch": 0.9535944206008584,
      "grad_norm": 0.17031185295669088,
      "learning_rate": 1.1274242281625547e-06,
      "loss": 0.655,
      "step": 10665
    },
    {
      "epoch": 0.9536838340486409,
      "grad_norm": 0.17404123194894106,
      "learning_rate": 1.1230919544996776e-06,
      "loss": 0.6511,
      "step": 10666
    },
    {
      "epoch": 0.9537732474964234,
      "grad_norm": 0.16466579364096398,
      "learning_rate": 1.1187679735236489e-06,
      "loss": 0.647,
      "step": 10667
    },
    {
      "epoch": 0.953862660944206,
      "grad_norm": 0.16212608482095317,
      "learning_rate": 1.114452285597145e-06,
      "loss": 0.6305,
      "step": 10668
    },
    {
      "epoch": 0.9539520743919886,
      "grad_norm": 0.1493394930918908,
      "learning_rate": 1.110144891082099e-06,
      "loss": 0.6149,
      "step": 10669
    },
    {
      "epoch": 0.954041487839771,
      "grad_norm": 0.17736853590474141,
      "learning_rate": 1.1058457903397656e-06,
      "loss": 0.3731,
      "step": 10670
    },
    {
      "epoch": 0.9541309012875536,
      "grad_norm": 0.16175184845031962,
      "learning_rate": 1.1015549837307237e-06,
      "loss": 0.6268,
      "step": 10671
    },
    {
      "epoch": 0.9542203147353362,
      "grad_norm": 0.1600177042341534,
      "learning_rate": 1.0972724716148187e-06,
      "loss": 0.6572,
      "step": 10672
    },
    {
      "epoch": 0.9543097281831188,
      "grad_norm": 0.16406997528235395,
      "learning_rate": 1.0929982543512296e-06,
      "loss": 0.6534,
      "step": 10673
    },
    {
      "epoch": 0.9543991416309013,
      "grad_norm": 0.17893391997362565,
      "learning_rate": 1.0887323322984366e-06,
      "loss": 0.6789,
      "step": 10674
    },
    {
      "epoch": 0.9544885550786838,
      "grad_norm": 0.16747441632885293,
      "learning_rate": 1.084474705814198e-06,
      "loss": 0.6292,
      "step": 10675
    },
    {
      "epoch": 0.9545779685264664,
      "grad_norm": 0.17129650986169184,
      "learning_rate": 1.0802253752556058e-06,
      "loss": 0.645,
      "step": 10676
    },
    {
      "epoch": 0.9546673819742489,
      "grad_norm": 0.1566189822664551,
      "learning_rate": 1.0759843409790527e-06,
      "loss": 0.6114,
      "step": 10677
    },
    {
      "epoch": 0.9547567954220315,
      "grad_norm": 0.16597607019436486,
      "learning_rate": 1.0717516033402097e-06,
      "loss": 0.6296,
      "step": 10678
    },
    {
      "epoch": 0.954846208869814,
      "grad_norm": 0.1524259527978557,
      "learning_rate": 1.0675271626940931e-06,
      "loss": 0.6333,
      "step": 10679
    },
    {
      "epoch": 0.9549356223175965,
      "grad_norm": 0.15489207989352416,
      "learning_rate": 1.063311019395008e-06,
      "loss": 0.6099,
      "step": 10680
    },
    {
      "epoch": 0.9550250357653791,
      "grad_norm": 0.16162377492335656,
      "learning_rate": 1.0591031737965273e-06,
      "loss": 0.6336,
      "step": 10681
    },
    {
      "epoch": 0.9551144492131617,
      "grad_norm": 0.15687867535595562,
      "learning_rate": 1.0549036262515689e-06,
      "loss": 0.6475,
      "step": 10682
    },
    {
      "epoch": 0.9552038626609443,
      "grad_norm": 0.14368346321896389,
      "learning_rate": 1.0507123771123505e-06,
      "loss": 0.6294,
      "step": 10683
    },
    {
      "epoch": 0.9552932761087267,
      "grad_norm": 0.16888987458804172,
      "learning_rate": 1.0465294267303915e-06,
      "loss": 0.6667,
      "step": 10684
    },
    {
      "epoch": 0.9553826895565093,
      "grad_norm": 0.1557756363287438,
      "learning_rate": 1.0423547754564888e-06,
      "loss": 0.6385,
      "step": 10685
    },
    {
      "epoch": 0.9554721030042919,
      "grad_norm": 0.17750709960073008,
      "learning_rate": 1.0381884236407958e-06,
      "loss": 0.6453,
      "step": 10686
    },
    {
      "epoch": 0.9555615164520744,
      "grad_norm": 0.14730563204715905,
      "learning_rate": 1.0340303716327215e-06,
      "loss": 0.6363,
      "step": 10687
    },
    {
      "epoch": 0.9556509298998569,
      "grad_norm": 0.16843252780542228,
      "learning_rate": 1.0298806197809984e-06,
      "loss": 0.6261,
      "step": 10688
    },
    {
      "epoch": 0.9557403433476395,
      "grad_norm": 0.14967254118814335,
      "learning_rate": 1.0257391684336703e-06,
      "loss": 0.6051,
      "step": 10689
    },
    {
      "epoch": 0.955829756795422,
      "grad_norm": 0.17660888059184277,
      "learning_rate": 1.0216060179380481e-06,
      "loss": 0.6596,
      "step": 10690
    },
    {
      "epoch": 0.9559191702432046,
      "grad_norm": 0.13218300722497942,
      "learning_rate": 1.0174811686408104e-06,
      "loss": 0.6411,
      "step": 10691
    },
    {
      "epoch": 0.9560085836909872,
      "grad_norm": 0.18577078970086938,
      "learning_rate": 1.01336462088788e-06,
      "loss": 0.6874,
      "step": 10692
    },
    {
      "epoch": 0.9560979971387696,
      "grad_norm": 0.16451392618412966,
      "learning_rate": 1.0092563750245032e-06,
      "loss": 0.6523,
      "step": 10693
    },
    {
      "epoch": 0.9561874105865522,
      "grad_norm": 0.16352893697201645,
      "learning_rate": 1.00515643139526e-06,
      "loss": 0.6698,
      "step": 10694
    },
    {
      "epoch": 0.9562768240343348,
      "grad_norm": 0.17033261818192172,
      "learning_rate": 1.0010647903439862e-06,
      "loss": 0.6732,
      "step": 10695
    },
    {
      "epoch": 0.9563662374821174,
      "grad_norm": 0.16333560492383264,
      "learning_rate": 9.96981452213852e-07,
      "loss": 0.6227,
      "step": 10696
    },
    {
      "epoch": 0.9564556509298998,
      "grad_norm": 0.17464384524837165,
      "learning_rate": 9.929064173473057e-07,
      "loss": 0.6421,
      "step": 10697
    },
    {
      "epoch": 0.9565450643776824,
      "grad_norm": 0.17248359228655163,
      "learning_rate": 9.888396860861404e-07,
      "loss": 0.6449,
      "step": 10698
    },
    {
      "epoch": 0.956634477825465,
      "grad_norm": 0.15246620966067,
      "learning_rate": 9.847812587714057e-07,
      "loss": 0.5723,
      "step": 10699
    },
    {
      "epoch": 0.9567238912732475,
      "grad_norm": 0.1546188872844794,
      "learning_rate": 9.807311357434956e-07,
      "loss": 0.6175,
      "step": 10700
    },
    {
      "epoch": 0.95681330472103,
      "grad_norm": 0.15839614097134222,
      "learning_rate": 9.766893173420721e-07,
      "loss": 0.6441,
      "step": 10701
    },
    {
      "epoch": 0.9569027181688126,
      "grad_norm": 0.1370423930954888,
      "learning_rate": 9.726558039061308e-07,
      "loss": 0.634,
      "step": 10702
    },
    {
      "epoch": 0.9569921316165951,
      "grad_norm": 0.1735884466517745,
      "learning_rate": 9.68630595773956e-07,
      "loss": 0.6649,
      "step": 10703
    },
    {
      "epoch": 0.9570815450643777,
      "grad_norm": 0.18367833910972747,
      "learning_rate": 9.64613693283123e-07,
      "loss": 0.6813,
      "step": 10704
    },
    {
      "epoch": 0.9571709585121603,
      "grad_norm": 0.1658897770103454,
      "learning_rate": 9.606050967705393e-07,
      "loss": 0.6661,
      "step": 10705
    },
    {
      "epoch": 0.9572603719599427,
      "grad_norm": 0.15942581664304759,
      "learning_rate": 9.566048065724032e-07,
      "loss": 0.6386,
      "step": 10706
    },
    {
      "epoch": 0.9573497854077253,
      "grad_norm": 0.1618020879926058,
      "learning_rate": 9.526128230242016e-07,
      "loss": 0.6341,
      "step": 10707
    },
    {
      "epoch": 0.9574391988555079,
      "grad_norm": 0.14628446657775207,
      "learning_rate": 9.486291464607444e-07,
      "loss": 0.6307,
      "step": 10708
    },
    {
      "epoch": 0.9575286123032904,
      "grad_norm": 0.14030509891897855,
      "learning_rate": 9.446537772161423e-07,
      "loss": 0.6184,
      "step": 10709
    },
    {
      "epoch": 0.9576180257510729,
      "grad_norm": 0.15219420817690227,
      "learning_rate": 9.406867156237842e-07,
      "loss": 0.6665,
      "step": 10710
    },
    {
      "epoch": 0.9577074391988555,
      "grad_norm": 0.16781767164406244,
      "learning_rate": 9.367279620164149e-07,
      "loss": 0.666,
      "step": 10711
    },
    {
      "epoch": 0.957796852646638,
      "grad_norm": 0.16499270903987673,
      "learning_rate": 9.327775167260244e-07,
      "loss": 0.6278,
      "step": 10712
    },
    {
      "epoch": 0.9578862660944206,
      "grad_norm": 0.1651274768607235,
      "learning_rate": 9.288353800839366e-07,
      "loss": 0.646,
      "step": 10713
    },
    {
      "epoch": 0.9579756795422032,
      "grad_norm": 0.16725314664950094,
      "learning_rate": 9.249015524207872e-07,
      "loss": 0.6285,
      "step": 10714
    },
    {
      "epoch": 0.9580650929899857,
      "grad_norm": 0.16509981181846642,
      "learning_rate": 9.209760340664897e-07,
      "loss": 0.6314,
      "step": 10715
    },
    {
      "epoch": 0.9581545064377682,
      "grad_norm": 0.163196410523196,
      "learning_rate": 9.170588253502698e-07,
      "loss": 0.6813,
      "step": 10716
    },
    {
      "epoch": 0.9582439198855508,
      "grad_norm": 0.15079634414837012,
      "learning_rate": 9.13149926600676e-07,
      "loss": 0.6223,
      "step": 10717
    },
    {
      "epoch": 0.9583333333333334,
      "grad_norm": 0.147957062979796,
      "learning_rate": 9.092493381455236e-07,
      "loss": 0.6288,
      "step": 10718
    },
    {
      "epoch": 0.9584227467811158,
      "grad_norm": 0.1576638378305804,
      "learning_rate": 9.05357060311951e-07,
      "loss": 0.6376,
      "step": 10719
    },
    {
      "epoch": 0.9585121602288984,
      "grad_norm": 0.16488117292200422,
      "learning_rate": 9.014730934264192e-07,
      "loss": 0.6507,
      "step": 10720
    },
    {
      "epoch": 0.958601573676681,
      "grad_norm": 0.16975907637900117,
      "learning_rate": 8.975974378146457e-07,
      "loss": 0.671,
      "step": 10721
    },
    {
      "epoch": 0.9586909871244635,
      "grad_norm": 0.1703098758903128,
      "learning_rate": 8.937300938017035e-07,
      "loss": 0.6268,
      "step": 10722
    },
    {
      "epoch": 0.9587804005722461,
      "grad_norm": 0.15671429592380542,
      "learning_rate": 8.898710617119222e-07,
      "loss": 0.6145,
      "step": 10723
    },
    {
      "epoch": 0.9588698140200286,
      "grad_norm": 0.16562367203438186,
      "learning_rate": 8.860203418689539e-07,
      "loss": 0.6569,
      "step": 10724
    },
    {
      "epoch": 0.9589592274678111,
      "grad_norm": 0.1562571367724046,
      "learning_rate": 8.821779345957626e-07,
      "loss": 0.6438,
      "step": 10725
    },
    {
      "epoch": 0.9590486409155937,
      "grad_norm": 0.16289835665977148,
      "learning_rate": 8.783438402146127e-07,
      "loss": 0.5909,
      "step": 10726
    },
    {
      "epoch": 0.9591380543633763,
      "grad_norm": 0.16496666066048674,
      "learning_rate": 8.74518059047047e-07,
      "loss": 0.6675,
      "step": 10727
    },
    {
      "epoch": 0.9592274678111588,
      "grad_norm": 0.16632778236498083,
      "learning_rate": 8.707005914139422e-07,
      "loss": 0.6442,
      "step": 10728
    },
    {
      "epoch": 0.9593168812589413,
      "grad_norm": 0.16803848459236773,
      "learning_rate": 8.668914376354642e-07,
      "loss": 0.6458,
      "step": 10729
    },
    {
      "epoch": 0.9594062947067239,
      "grad_norm": 0.1699676836309042,
      "learning_rate": 8.630905980310689e-07,
      "loss": 0.6417,
      "step": 10730
    },
    {
      "epoch": 0.9594957081545065,
      "grad_norm": 0.1866113975356765,
      "learning_rate": 8.592980729195455e-07,
      "loss": 0.7011,
      "step": 10731
    },
    {
      "epoch": 0.959585121602289,
      "grad_norm": 0.14900542858809737,
      "learning_rate": 8.555138626189618e-07,
      "loss": 0.6441,
      "step": 10732
    },
    {
      "epoch": 0.9596745350500715,
      "grad_norm": 0.15476390676431634,
      "learning_rate": 8.517379674466863e-07,
      "loss": 0.5947,
      "step": 10733
    },
    {
      "epoch": 0.9597639484978541,
      "grad_norm": 0.16766669840280662,
      "learning_rate": 8.479703877194212e-07,
      "loss": 0.6249,
      "step": 10734
    },
    {
      "epoch": 0.9598533619456366,
      "grad_norm": 0.1425857927491733,
      "learning_rate": 8.442111237531247e-07,
      "loss": 0.6378,
      "step": 10735
    },
    {
      "epoch": 0.9599427753934192,
      "grad_norm": 0.1644936402173733,
      "learning_rate": 8.404601758630892e-07,
      "loss": 0.6542,
      "step": 10736
    },
    {
      "epoch": 0.9600321888412017,
      "grad_norm": 0.16146820991145874,
      "learning_rate": 8.367175443639075e-07,
      "loss": 0.6894,
      "step": 10737
    },
    {
      "epoch": 0.9601216022889842,
      "grad_norm": 0.1564124401620301,
      "learning_rate": 8.329832295694618e-07,
      "loss": 0.6059,
      "step": 10738
    },
    {
      "epoch": 0.9602110157367668,
      "grad_norm": 0.16463260005469085,
      "learning_rate": 8.29257231792957e-07,
      "loss": 0.6372,
      "step": 10739
    },
    {
      "epoch": 0.9603004291845494,
      "grad_norm": 0.16312565918616248,
      "learning_rate": 8.255395513468767e-07,
      "loss": 0.6457,
      "step": 10740
    },
    {
      "epoch": 0.960389842632332,
      "grad_norm": 0.16187865093388268,
      "learning_rate": 8.218301885430268e-07,
      "loss": 0.6379,
      "step": 10741
    },
    {
      "epoch": 0.9604792560801144,
      "grad_norm": 0.15833058630595429,
      "learning_rate": 8.181291436924921e-07,
      "loss": 0.6293,
      "step": 10742
    },
    {
      "epoch": 0.960568669527897,
      "grad_norm": 0.1560541714815923,
      "learning_rate": 8.144364171056906e-07,
      "loss": 0.6305,
      "step": 10743
    },
    {
      "epoch": 0.9606580829756796,
      "grad_norm": 0.1783576974029709,
      "learning_rate": 8.107520090923193e-07,
      "loss": 0.6529,
      "step": 10744
    },
    {
      "epoch": 0.9607474964234621,
      "grad_norm": 0.14374721894415807,
      "learning_rate": 8.070759199613864e-07,
      "loss": 0.6284,
      "step": 10745
    },
    {
      "epoch": 0.9608369098712446,
      "grad_norm": 0.17075546461078467,
      "learning_rate": 8.03408150021201e-07,
      "loss": 0.6222,
      "step": 10746
    },
    {
      "epoch": 0.9609263233190272,
      "grad_norm": 0.1680039356939372,
      "learning_rate": 7.997486995793834e-07,
      "loss": 0.3746,
      "step": 10747
    },
    {
      "epoch": 0.9610157367668097,
      "grad_norm": 0.16263905318292007,
      "learning_rate": 7.96097568942833e-07,
      "loss": 0.6408,
      "step": 10748
    },
    {
      "epoch": 0.9611051502145923,
      "grad_norm": 0.1684655502380539,
      "learning_rate": 7.924547584177711e-07,
      "loss": 0.6472,
      "step": 10749
    },
    {
      "epoch": 0.9611945636623748,
      "grad_norm": 0.1592349573418705,
      "learning_rate": 7.88820268309709e-07,
      "loss": 0.6022,
      "step": 10750
    },
    {
      "epoch": 0.9612839771101573,
      "grad_norm": 0.16356581914236173,
      "learning_rate": 7.851940989234919e-07,
      "loss": 0.6211,
      "step": 10751
    },
    {
      "epoch": 0.9613733905579399,
      "grad_norm": 0.16290776909539353,
      "learning_rate": 7.815762505632096e-07,
      "loss": 0.633,
      "step": 10752
    },
    {
      "epoch": 0.9614628040057225,
      "grad_norm": 0.17658617392127077,
      "learning_rate": 7.779667235322974e-07,
      "loss": 0.6295,
      "step": 10753
    },
    {
      "epoch": 0.961552217453505,
      "grad_norm": 0.1601961691182987,
      "learning_rate": 7.743655181335019e-07,
      "loss": 0.6664,
      "step": 10754
    },
    {
      "epoch": 0.9616416309012875,
      "grad_norm": 0.1389077141903615,
      "learning_rate": 7.707726346688259e-07,
      "loss": 0.6173,
      "step": 10755
    },
    {
      "epoch": 0.9617310443490701,
      "grad_norm": 0.1562324918479287,
      "learning_rate": 7.671880734396175e-07,
      "loss": 0.6729,
      "step": 10756
    },
    {
      "epoch": 0.9618204577968527,
      "grad_norm": 0.16960972931607474,
      "learning_rate": 7.636118347465027e-07,
      "loss": 0.6595,
      "step": 10757
    },
    {
      "epoch": 0.9619098712446352,
      "grad_norm": 0.17356449167975338,
      "learning_rate": 7.600439188894082e-07,
      "loss": 0.6586,
      "step": 10758
    },
    {
      "epoch": 0.9619992846924177,
      "grad_norm": 0.16525773298370458,
      "learning_rate": 7.564843261675835e-07,
      "loss": 0.6367,
      "step": 10759
    },
    {
      "epoch": 0.9620886981402003,
      "grad_norm": 0.17405384033042648,
      "learning_rate": 7.529330568795568e-07,
      "loss": 0.6723,
      "step": 10760
    },
    {
      "epoch": 0.9621781115879828,
      "grad_norm": 0.1656855660278942,
      "learning_rate": 7.493901113231782e-07,
      "loss": 0.6898,
      "step": 10761
    },
    {
      "epoch": 0.9622675250357654,
      "grad_norm": 0.14915610158019502,
      "learning_rate": 7.458554897955883e-07,
      "loss": 0.606,
      "step": 10762
    },
    {
      "epoch": 0.962356938483548,
      "grad_norm": 0.1598195741184941,
      "learning_rate": 7.423291925932275e-07,
      "loss": 0.6329,
      "step": 10763
    },
    {
      "epoch": 0.9624463519313304,
      "grad_norm": 0.17758219786366664,
      "learning_rate": 7.388112200118479e-07,
      "loss": 0.66,
      "step": 10764
    },
    {
      "epoch": 0.962535765379113,
      "grad_norm": 0.15344315859900154,
      "learning_rate": 7.353015723464918e-07,
      "loss": 0.6364,
      "step": 10765
    },
    {
      "epoch": 0.9626251788268956,
      "grad_norm": 0.1561066483785476,
      "learning_rate": 7.318002498915122e-07,
      "loss": 0.6139,
      "step": 10766
    },
    {
      "epoch": 0.9627145922746781,
      "grad_norm": 0.15316177880017753,
      "learning_rate": 7.283072529405521e-07,
      "loss": 0.6391,
      "step": 10767
    },
    {
      "epoch": 0.9628040057224606,
      "grad_norm": 0.1653297913251167,
      "learning_rate": 7.248225817865884e-07,
      "loss": 0.6286,
      "step": 10768
    },
    {
      "epoch": 0.9628934191702432,
      "grad_norm": 0.14337614535120624,
      "learning_rate": 7.213462367218537e-07,
      "loss": 0.5943,
      "step": 10769
    },
    {
      "epoch": 0.9629828326180258,
      "grad_norm": 0.14974843339709154,
      "learning_rate": 7.17878218037904e-07,
      "loss": 0.6258,
      "step": 10770
    },
    {
      "epoch": 0.9630722460658083,
      "grad_norm": 0.16502386200084912,
      "learning_rate": 7.144185260256175e-07,
      "loss": 0.6269,
      "step": 10771
    },
    {
      "epoch": 0.9631616595135909,
      "grad_norm": 0.1684724673927464,
      "learning_rate": 7.1096716097514e-07,
      "loss": 0.6416,
      "step": 10772
    },
    {
      "epoch": 0.9632510729613734,
      "grad_norm": 0.1636528757759354,
      "learning_rate": 7.075241231759289e-07,
      "loss": 0.6342,
      "step": 10773
    },
    {
      "epoch": 0.9633404864091559,
      "grad_norm": 0.1663159597488157,
      "learning_rate": 7.040894129167641e-07,
      "loss": 0.6549,
      "step": 10774
    },
    {
      "epoch": 0.9634298998569385,
      "grad_norm": 0.129815779243808,
      "learning_rate": 7.006630304856932e-07,
      "loss": 0.6318,
      "step": 10775
    },
    {
      "epoch": 0.9635193133047211,
      "grad_norm": 0.1588131724307756,
      "learning_rate": 6.972449761700861e-07,
      "loss": 0.568,
      "step": 10776
    },
    {
      "epoch": 0.9636087267525035,
      "grad_norm": 0.1765027555237834,
      "learning_rate": 6.938352502566358e-07,
      "loss": 0.6841,
      "step": 10777
    },
    {
      "epoch": 0.9636981402002861,
      "grad_norm": 0.15459507586799873,
      "learning_rate": 6.904338530312693e-07,
      "loss": 0.6757,
      "step": 10778
    },
    {
      "epoch": 0.9637875536480687,
      "grad_norm": 0.16190560051297864,
      "learning_rate": 6.870407847792915e-07,
      "loss": 0.6308,
      "step": 10779
    },
    {
      "epoch": 0.9638769670958512,
      "grad_norm": 0.15440318936738706,
      "learning_rate": 6.836560457852636e-07,
      "loss": 0.5947,
      "step": 10780
    },
    {
      "epoch": 0.9639663805436338,
      "grad_norm": 0.16726193100343986,
      "learning_rate": 6.802796363330588e-07,
      "loss": 0.6335,
      "step": 10781
    },
    {
      "epoch": 0.9640557939914163,
      "grad_norm": 0.16100117264266867,
      "learning_rate": 6.769115567058504e-07,
      "loss": 0.6295,
      "step": 10782
    },
    {
      "epoch": 0.9641452074391988,
      "grad_norm": 0.15550754855724125,
      "learning_rate": 6.735518071861235e-07,
      "loss": 0.6031,
      "step": 10783
    },
    {
      "epoch": 0.9642346208869814,
      "grad_norm": 0.14481533785554185,
      "learning_rate": 6.702003880556418e-07,
      "loss": 0.6259,
      "step": 10784
    },
    {
      "epoch": 0.964324034334764,
      "grad_norm": 0.1491539082365935,
      "learning_rate": 6.668572995955025e-07,
      "loss": 0.6083,
      "step": 10785
    },
    {
      "epoch": 0.9644134477825465,
      "grad_norm": 0.16424094247620427,
      "learning_rate": 6.635225420860702e-07,
      "loss": 0.6151,
      "step": 10786
    },
    {
      "epoch": 0.964502861230329,
      "grad_norm": 0.17093757229652923,
      "learning_rate": 6.601961158070325e-07,
      "loss": 0.6318,
      "step": 10787
    },
    {
      "epoch": 0.9645922746781116,
      "grad_norm": 0.17784410171527135,
      "learning_rate": 6.56878021037377e-07,
      "loss": 0.6254,
      "step": 10788
    },
    {
      "epoch": 0.9646816881258942,
      "grad_norm": 0.15118824287272517,
      "learning_rate": 6.535682580553926e-07,
      "loss": 0.6509,
      "step": 10789
    },
    {
      "epoch": 0.9647711015736766,
      "grad_norm": 0.1481984242359043,
      "learning_rate": 6.502668271386458e-07,
      "loss": 0.6241,
      "step": 10790
    },
    {
      "epoch": 0.9648605150214592,
      "grad_norm": 0.1686269564359828,
      "learning_rate": 6.469737285640487e-07,
      "loss": 0.6071,
      "step": 10791
    },
    {
      "epoch": 0.9649499284692418,
      "grad_norm": 0.16795924641252727,
      "learning_rate": 6.436889626077691e-07,
      "loss": 0.6951,
      "step": 10792
    },
    {
      "epoch": 0.9650393419170243,
      "grad_norm": 0.16471197291637718,
      "learning_rate": 6.40412529545309e-07,
      "loss": 0.6572,
      "step": 10793
    },
    {
      "epoch": 0.9651287553648069,
      "grad_norm": 0.1717558200498196,
      "learning_rate": 6.371444296514484e-07,
      "loss": 0.638,
      "step": 10794
    },
    {
      "epoch": 0.9652181688125894,
      "grad_norm": 0.15489297341496053,
      "learning_rate": 6.338846632002904e-07,
      "loss": 0.6397,
      "step": 10795
    },
    {
      "epoch": 0.9653075822603719,
      "grad_norm": 0.1562030769285789,
      "learning_rate": 6.306332304652273e-07,
      "loss": 0.6278,
      "step": 10796
    },
    {
      "epoch": 0.9653969957081545,
      "grad_norm": 0.1698858334625266,
      "learning_rate": 6.273901317189301e-07,
      "loss": 0.6374,
      "step": 10797
    },
    {
      "epoch": 0.9654864091559371,
      "grad_norm": 0.15065284465986642,
      "learning_rate": 6.241553672334255e-07,
      "loss": 0.6493,
      "step": 10798
    },
    {
      "epoch": 0.9655758226037195,
      "grad_norm": 0.16154625250021934,
      "learning_rate": 6.209289372799854e-07,
      "loss": 0.6535,
      "step": 10799
    },
    {
      "epoch": 0.9656652360515021,
      "grad_norm": 0.16403239414840423,
      "learning_rate": 6.177108421292266e-07,
      "loss": 0.6109,
      "step": 10800
    },
    {
      "epoch": 0.9657546494992847,
      "grad_norm": 0.15499553254667645,
      "learning_rate": 6.145010820510222e-07,
      "loss": 0.662,
      "step": 10801
    },
    {
      "epoch": 0.9658440629470673,
      "grad_norm": 0.15960775002915903,
      "learning_rate": 6.112996573145902e-07,
      "loss": 0.6228,
      "step": 10802
    },
    {
      "epoch": 0.9659334763948498,
      "grad_norm": 0.16572943671264112,
      "learning_rate": 6.081065681884268e-07,
      "loss": 0.6354,
      "step": 10803
    },
    {
      "epoch": 0.9660228898426323,
      "grad_norm": 0.15585501316082445,
      "learning_rate": 6.04921814940329e-07,
      "loss": 0.6419,
      "step": 10804
    },
    {
      "epoch": 0.9661123032904149,
      "grad_norm": 0.16093174335789043,
      "learning_rate": 6.017453978374055e-07,
      "loss": 0.6602,
      "step": 10805
    },
    {
      "epoch": 0.9662017167381974,
      "grad_norm": 0.17489814933699432,
      "learning_rate": 5.985773171460429e-07,
      "loss": 0.6457,
      "step": 10806
    },
    {
      "epoch": 0.96629113018598,
      "grad_norm": 0.1469282858283779,
      "learning_rate": 5.954175731319622e-07,
      "loss": 0.6028,
      "step": 10807
    },
    {
      "epoch": 0.9663805436337625,
      "grad_norm": 0.17477239246483414,
      "learning_rate": 5.922661660601514e-07,
      "loss": 0.6629,
      "step": 10808
    },
    {
      "epoch": 0.966469957081545,
      "grad_norm": 0.1736590345229364,
      "learning_rate": 5.891230961949324e-07,
      "loss": 0.6519,
      "step": 10809
    },
    {
      "epoch": 0.9665593705293276,
      "grad_norm": 0.1598500485948725,
      "learning_rate": 5.859883637998942e-07,
      "loss": 0.6498,
      "step": 10810
    },
    {
      "epoch": 0.9666487839771102,
      "grad_norm": 0.1502186241355659,
      "learning_rate": 5.8286196913796e-07,
      "loss": 0.6224,
      "step": 10811
    },
    {
      "epoch": 0.9667381974248928,
      "grad_norm": 0.1347666275655292,
      "learning_rate": 5.7974391247132e-07,
      "loss": 0.6099,
      "step": 10812
    },
    {
      "epoch": 0.9668276108726752,
      "grad_norm": 0.16435386952981756,
      "learning_rate": 5.766341940614872e-07,
      "loss": 0.6632,
      "step": 10813
    },
    {
      "epoch": 0.9669170243204578,
      "grad_norm": 0.1492498767972995,
      "learning_rate": 5.735328141692642e-07,
      "loss": 0.611,
      "step": 10814
    },
    {
      "epoch": 0.9670064377682404,
      "grad_norm": 0.15243859897840525,
      "learning_rate": 5.704397730547762e-07,
      "loss": 0.6374,
      "step": 10815
    },
    {
      "epoch": 0.9670958512160229,
      "grad_norm": 0.16042035721373774,
      "learning_rate": 5.673550709774267e-07,
      "loss": 0.6355,
      "step": 10816
    },
    {
      "epoch": 0.9671852646638054,
      "grad_norm": 0.16922979466095314,
      "learning_rate": 5.6427870819592e-07,
      "loss": 0.6531,
      "step": 10817
    },
    {
      "epoch": 0.967274678111588,
      "grad_norm": 0.17286532091511084,
      "learning_rate": 5.612106849682719e-07,
      "loss": 0.658,
      "step": 10818
    },
    {
      "epoch": 0.9673640915593705,
      "grad_norm": 0.15702379815168407,
      "learning_rate": 5.581510015517988e-07,
      "loss": 0.5811,
      "step": 10819
    },
    {
      "epoch": 0.9674535050071531,
      "grad_norm": 0.1478916444666412,
      "learning_rate": 5.550996582030954e-07,
      "loss": 0.5936,
      "step": 10820
    },
    {
      "epoch": 0.9675429184549357,
      "grad_norm": 0.16221114786443314,
      "learning_rate": 5.520566551780792e-07,
      "loss": 0.6311,
      "step": 10821
    },
    {
      "epoch": 0.9676323319027181,
      "grad_norm": 0.15189583778083487,
      "learning_rate": 5.490219927319795e-07,
      "loss": 0.5993,
      "step": 10822
    },
    {
      "epoch": 0.9677217453505007,
      "grad_norm": 0.1650902035632553,
      "learning_rate": 5.459956711192926e-07,
      "loss": 0.6432,
      "step": 10823
    },
    {
      "epoch": 0.9678111587982833,
      "grad_norm": 0.14352550663061858,
      "learning_rate": 5.429776905938489e-07,
      "loss": 0.6228,
      "step": 10824
    },
    {
      "epoch": 0.9679005722460658,
      "grad_norm": 0.15432323768120795,
      "learning_rate": 5.399680514087458e-07,
      "loss": 0.6417,
      "step": 10825
    },
    {
      "epoch": 0.9679899856938483,
      "grad_norm": 0.16309681188181974,
      "learning_rate": 5.369667538164036e-07,
      "loss": 0.637,
      "step": 10826
    },
    {
      "epoch": 0.9680793991416309,
      "grad_norm": 0.17310316348829255,
      "learning_rate": 5.339737980685433e-07,
      "loss": 0.6064,
      "step": 10827
    },
    {
      "epoch": 0.9681688125894135,
      "grad_norm": 0.15867262365232612,
      "learning_rate": 5.30989184416164e-07,
      "loss": 0.6703,
      "step": 10828
    },
    {
      "epoch": 0.968258226037196,
      "grad_norm": 0.17762013459293308,
      "learning_rate": 5.28012913109599e-07,
      "loss": 0.6006,
      "step": 10829
    },
    {
      "epoch": 0.9683476394849786,
      "grad_norm": 0.16264844119741398,
      "learning_rate": 5.250449843984706e-07,
      "loss": 0.6603,
      "step": 10830
    },
    {
      "epoch": 0.968437052932761,
      "grad_norm": 0.15805414791426511,
      "learning_rate": 5.220853985316798e-07,
      "loss": 0.632,
      "step": 10831
    },
    {
      "epoch": 0.9685264663805436,
      "grad_norm": 0.15787843559237014,
      "learning_rate": 5.191341557574392e-07,
      "loss": 0.6335,
      "step": 10832
    },
    {
      "epoch": 0.9686158798283262,
      "grad_norm": 0.16951162539527198,
      "learning_rate": 5.16191256323273e-07,
      "loss": 0.6233,
      "step": 10833
    },
    {
      "epoch": 0.9687052932761088,
      "grad_norm": 0.15443333333592993,
      "learning_rate": 5.132567004760169e-07,
      "loss": 0.6508,
      "step": 10834
    },
    {
      "epoch": 0.9687947067238912,
      "grad_norm": 0.1639289926475351,
      "learning_rate": 5.103304884617521e-07,
      "loss": 0.6255,
      "step": 10835
    },
    {
      "epoch": 0.9688841201716738,
      "grad_norm": 0.15861545798163681,
      "learning_rate": 5.074126205259266e-07,
      "loss": 0.6339,
      "step": 10836
    },
    {
      "epoch": 0.9689735336194564,
      "grad_norm": 0.1708401792138829,
      "learning_rate": 5.045030969132447e-07,
      "loss": 0.6,
      "step": 10837
    },
    {
      "epoch": 0.969062947067239,
      "grad_norm": 0.13590998329451562,
      "learning_rate": 5.016019178677333e-07,
      "loss": 0.5498,
      "step": 10838
    },
    {
      "epoch": 0.9691523605150214,
      "grad_norm": 0.15201710276456395,
      "learning_rate": 4.987090836327091e-07,
      "loss": 0.6218,
      "step": 10839
    },
    {
      "epoch": 0.969241773962804,
      "grad_norm": 0.14887843752040272,
      "learning_rate": 4.958245944507777e-07,
      "loss": 0.6239,
      "step": 10840
    },
    {
      "epoch": 0.9693311874105865,
      "grad_norm": 0.15094918366221716,
      "learning_rate": 4.929484505638682e-07,
      "loss": 0.6154,
      "step": 10841
    },
    {
      "epoch": 0.9694206008583691,
      "grad_norm": 0.17207035527693584,
      "learning_rate": 4.900806522131984e-07,
      "loss": 0.6895,
      "step": 10842
    },
    {
      "epoch": 0.9695100143061517,
      "grad_norm": 0.1722514773389875,
      "learning_rate": 4.872211996392872e-07,
      "loss": 0.659,
      "step": 10843
    },
    {
      "epoch": 0.9695994277539342,
      "grad_norm": 0.1701118907089719,
      "learning_rate": 4.843700930819539e-07,
      "loss": 0.6368,
      "step": 10844
    },
    {
      "epoch": 0.9696888412017167,
      "grad_norm": 0.17459165652526087,
      "learning_rate": 4.815273327803182e-07,
      "loss": 0.6418,
      "step": 10845
    },
    {
      "epoch": 0.9697782546494993,
      "grad_norm": 0.15671356492239605,
      "learning_rate": 4.786929189727896e-07,
      "loss": 0.5994,
      "step": 10846
    },
    {
      "epoch": 0.9698676680972819,
      "grad_norm": 0.16321881172526706,
      "learning_rate": 4.758668518970999e-07,
      "loss": 0.6662,
      "step": 10847
    },
    {
      "epoch": 0.9699570815450643,
      "grad_norm": 0.1645974965300174,
      "learning_rate": 4.7304913179025965e-07,
      "loss": 0.6088,
      "step": 10848
    },
    {
      "epoch": 0.9700464949928469,
      "grad_norm": 0.1527923813642033,
      "learning_rate": 4.7023975888859095e-07,
      "loss": 0.6055,
      "step": 10849
    },
    {
      "epoch": 0.9701359084406295,
      "grad_norm": 0.1631322734436805,
      "learning_rate": 4.674387334277164e-07,
      "loss": 0.643,
      "step": 10850
    },
    {
      "epoch": 0.970225321888412,
      "grad_norm": 0.17298930068462773,
      "learning_rate": 4.6464605564254803e-07,
      "loss": 0.6628,
      "step": 10851
    },
    {
      "epoch": 0.9703147353361946,
      "grad_norm": 0.15647906268279171,
      "learning_rate": 4.6186172576730967e-07,
      "loss": 0.6497,
      "step": 10852
    },
    {
      "epoch": 0.9704041487839771,
      "grad_norm": 0.157601366959407,
      "learning_rate": 4.5908574403551454e-07,
      "loss": 0.6655,
      "step": 10853
    },
    {
      "epoch": 0.9704935622317596,
      "grad_norm": 0.16644151947703917,
      "learning_rate": 4.5631811067998743e-07,
      "loss": 0.6492,
      "step": 10854
    },
    {
      "epoch": 0.9705829756795422,
      "grad_norm": 0.15639543221416272,
      "learning_rate": 4.5355882593283163e-07,
      "loss": 0.6226,
      "step": 10855
    },
    {
      "epoch": 0.9706723891273248,
      "grad_norm": 0.1628242318040561,
      "learning_rate": 4.5080789002548417e-07,
      "loss": 0.6369,
      "step": 10856
    },
    {
      "epoch": 0.9707618025751072,
      "grad_norm": 0.16313551698957032,
      "learning_rate": 4.4806530318864945e-07,
      "loss": 0.647,
      "step": 10857
    },
    {
      "epoch": 0.9708512160228898,
      "grad_norm": 0.172606474030222,
      "learning_rate": 4.453310656523435e-07,
      "loss": 0.6012,
      "step": 10858
    },
    {
      "epoch": 0.9709406294706724,
      "grad_norm": 0.16520378494332183,
      "learning_rate": 4.42605177645905e-07,
      "loss": 0.6555,
      "step": 10859
    },
    {
      "epoch": 0.971030042918455,
      "grad_norm": 0.1601977735676407,
      "learning_rate": 4.39887639397929e-07,
      "loss": 0.6332,
      "step": 10860
    },
    {
      "epoch": 0.9711194563662375,
      "grad_norm": 0.19585183082340404,
      "learning_rate": 4.3717845113633307e-07,
      "loss": 0.6448,
      "step": 10861
    },
    {
      "epoch": 0.97120886981402,
      "grad_norm": 0.160008072917659,
      "learning_rate": 4.344776130883466e-07,
      "loss": 0.6601,
      "step": 10862
    },
    {
      "epoch": 0.9712982832618026,
      "grad_norm": 0.16915409625358643,
      "learning_rate": 4.3178512548046613e-07,
      "loss": 0.6139,
      "step": 10863
    },
    {
      "epoch": 0.9713876967095851,
      "grad_norm": 0.15497002432730575,
      "learning_rate": 4.291009885385333e-07,
      "loss": 0.6899,
      "step": 10864
    },
    {
      "epoch": 0.9714771101573677,
      "grad_norm": 0.16721020175509352,
      "learning_rate": 4.264252024876458e-07,
      "loss": 0.6369,
      "step": 10865
    },
    {
      "epoch": 0.9715665236051502,
      "grad_norm": 0.1571961834900675,
      "learning_rate": 4.237577675522131e-07,
      "loss": 0.6374,
      "step": 10866
    },
    {
      "epoch": 0.9716559370529327,
      "grad_norm": 0.14977549786828553,
      "learning_rate": 4.210986839559672e-07,
      "loss": 0.621,
      "step": 10867
    },
    {
      "epoch": 0.9717453505007153,
      "grad_norm": 0.16803968846561676,
      "learning_rate": 4.184479519219187e-07,
      "loss": 0.657,
      "step": 10868
    },
    {
      "epoch": 0.9718347639484979,
      "grad_norm": 0.15907798029827708,
      "learning_rate": 4.1580557167236744e-07,
      "loss": 0.6253,
      "step": 10869
    },
    {
      "epoch": 0.9719241773962805,
      "grad_norm": 0.17357703221391763,
      "learning_rate": 4.131715434289363e-07,
      "loss": 0.6597,
      "step": 10870
    },
    {
      "epoch": 0.9720135908440629,
      "grad_norm": 0.15459646254919732,
      "learning_rate": 4.105458674125373e-07,
      "loss": 0.6359,
      "step": 10871
    },
    {
      "epoch": 0.9721030042918455,
      "grad_norm": 0.17595084889706278,
      "learning_rate": 4.0792854384338333e-07,
      "loss": 0.3755,
      "step": 10872
    },
    {
      "epoch": 0.9721924177396281,
      "grad_norm": 0.1432921800282413,
      "learning_rate": 4.0531957294098755e-07,
      "loss": 0.6309,
      "step": 10873
    },
    {
      "epoch": 0.9722818311874106,
      "grad_norm": 0.15362251858820855,
      "learning_rate": 4.027189549241639e-07,
      "loss": 0.6481,
      "step": 10874
    },
    {
      "epoch": 0.9723712446351931,
      "grad_norm": 0.16239607266951267,
      "learning_rate": 4.001266900110046e-07,
      "loss": 0.6639,
      "step": 10875
    },
    {
      "epoch": 0.9724606580829757,
      "grad_norm": 0.17256323642500598,
      "learning_rate": 3.975427784189467e-07,
      "loss": 0.6611,
      "step": 10876
    },
    {
      "epoch": 0.9725500715307582,
      "grad_norm": 0.13029999823833832,
      "learning_rate": 3.949672203646837e-07,
      "loss": 0.6222,
      "step": 10877
    },
    {
      "epoch": 0.9726394849785408,
      "grad_norm": 0.16006949258605133,
      "learning_rate": 3.924000160642205e-07,
      "loss": 0.6335,
      "step": 10878
    },
    {
      "epoch": 0.9727288984263234,
      "grad_norm": 0.1482361892016683,
      "learning_rate": 3.898411657328849e-07,
      "loss": 0.61,
      "step": 10879
    },
    {
      "epoch": 0.9728183118741058,
      "grad_norm": 0.17306677584348204,
      "learning_rate": 3.872906695852607e-07,
      "loss": 0.6284,
      "step": 10880
    },
    {
      "epoch": 0.9729077253218884,
      "grad_norm": 0.14923580455258903,
      "learning_rate": 3.847485278352658e-07,
      "loss": 0.6334,
      "step": 10881
    },
    {
      "epoch": 0.972997138769671,
      "grad_norm": 0.16641946987743653,
      "learning_rate": 3.8221474069611854e-07,
      "loss": 0.6583,
      "step": 10882
    },
    {
      "epoch": 0.9730865522174535,
      "grad_norm": 0.13932303644769167,
      "learning_rate": 3.7968930838030436e-07,
      "loss": 0.6116,
      "step": 10883
    },
    {
      "epoch": 0.973175965665236,
      "grad_norm": 0.1669135229422499,
      "learning_rate": 3.771722310996428e-07,
      "loss": 0.6589,
      "step": 10884
    },
    {
      "epoch": 0.9732653791130186,
      "grad_norm": 0.16511155732142263,
      "learning_rate": 3.7466350906522065e-07,
      "loss": 0.6568,
      "step": 10885
    },
    {
      "epoch": 0.9733547925608012,
      "grad_norm": 0.17180074183649702,
      "learning_rate": 3.721631424874694e-07,
      "loss": 0.6235,
      "step": 10886
    },
    {
      "epoch": 0.9734442060085837,
      "grad_norm": 0.17918847669164792,
      "learning_rate": 3.696711315760659e-07,
      "loss": 0.3663,
      "step": 10887
    },
    {
      "epoch": 0.9735336194563662,
      "grad_norm": 0.16268923145166572,
      "learning_rate": 3.671874765400207e-07,
      "loss": 0.6448,
      "step": 10888
    },
    {
      "epoch": 0.9736230329041488,
      "grad_norm": 0.15172202043569624,
      "learning_rate": 3.6471217758763387e-07,
      "loss": 0.5946,
      "step": 10889
    },
    {
      "epoch": 0.9737124463519313,
      "grad_norm": 0.15078572485583705,
      "learning_rate": 3.6224523492651706e-07,
      "loss": 0.591,
      "step": 10890
    },
    {
      "epoch": 0.9738018597997139,
      "grad_norm": 0.14301969204627893,
      "learning_rate": 3.5978664876354926e-07,
      "loss": 0.5676,
      "step": 10891
    },
    {
      "epoch": 0.9738912732474965,
      "grad_norm": 0.1708768344657797,
      "learning_rate": 3.573364193049433e-07,
      "loss": 0.676,
      "step": 10892
    },
    {
      "epoch": 0.9739806866952789,
      "grad_norm": 0.15855196748339354,
      "learning_rate": 3.5489454675620147e-07,
      "loss": 0.6488,
      "step": 10893
    },
    {
      "epoch": 0.9740701001430615,
      "grad_norm": 0.16056768711124786,
      "learning_rate": 3.524610313221155e-07,
      "loss": 0.6699,
      "step": 10894
    },
    {
      "epoch": 0.9741595135908441,
      "grad_norm": 0.14861142907389532,
      "learning_rate": 3.5003587320676655e-07,
      "loss": 0.639,
      "step": 10895
    },
    {
      "epoch": 0.9742489270386266,
      "grad_norm": 0.1455491077467617,
      "learning_rate": 3.4761907261356976e-07,
      "loss": 0.6321,
      "step": 10896
    },
    {
      "epoch": 0.9743383404864091,
      "grad_norm": 0.15459209693021614,
      "learning_rate": 3.4521062974520737e-07,
      "loss": 0.6276,
      "step": 10897
    },
    {
      "epoch": 0.9744277539341917,
      "grad_norm": 0.1506958475679363,
      "learning_rate": 3.4281054480368445e-07,
      "loss": 0.596,
      "step": 10898
    },
    {
      "epoch": 0.9745171673819742,
      "grad_norm": 0.1426653919032025,
      "learning_rate": 3.404188179902845e-07,
      "loss": 0.5786,
      "step": 10899
    },
    {
      "epoch": 0.9746065808297568,
      "grad_norm": 0.17182504900979803,
      "learning_rate": 3.380354495055915e-07,
      "loss": 0.603,
      "step": 10900
    },
    {
      "epoch": 0.9746959942775394,
      "grad_norm": 0.17008917852458702,
      "learning_rate": 3.356604395495122e-07,
      "loss": 0.6733,
      "step": 10901
    },
    {
      "epoch": 0.9747854077253219,
      "grad_norm": 0.15196550325552594,
      "learning_rate": 3.332937883212206e-07,
      "loss": 0.6215,
      "step": 10902
    },
    {
      "epoch": 0.9748748211731044,
      "grad_norm": 0.1516546692826302,
      "learning_rate": 3.3093549601921345e-07,
      "loss": 0.6161,
      "step": 10903
    },
    {
      "epoch": 0.974964234620887,
      "grad_norm": 0.15860942068245498,
      "learning_rate": 3.2858556284127704e-07,
      "loss": 0.6143,
      "step": 10904
    },
    {
      "epoch": 0.9750536480686696,
      "grad_norm": 0.16512763719458737,
      "learning_rate": 3.2624398898449814e-07,
      "loss": 0.5899,
      "step": 10905
    },
    {
      "epoch": 0.975143061516452,
      "grad_norm": 0.17625906271502717,
      "learning_rate": 3.239107746452641e-07,
      "loss": 0.662,
      "step": 10906
    },
    {
      "epoch": 0.9752324749642346,
      "grad_norm": 0.16596443413764977,
      "learning_rate": 3.215859200192517e-07,
      "loss": 0.6639,
      "step": 10907
    },
    {
      "epoch": 0.9753218884120172,
      "grad_norm": 0.15024514843669018,
      "learning_rate": 3.1926942530144945e-07,
      "loss": 0.6127,
      "step": 10908
    },
    {
      "epoch": 0.9754113018597997,
      "grad_norm": 0.14301115036019557,
      "learning_rate": 3.1696129068613525e-07,
      "loss": 0.6174,
      "step": 10909
    },
    {
      "epoch": 0.9755007153075823,
      "grad_norm": 0.1545261380503898,
      "learning_rate": 3.1466151636689865e-07,
      "loss": 0.6471,
      "step": 10910
    },
    {
      "epoch": 0.9755901287553648,
      "grad_norm": 0.1696297697786402,
      "learning_rate": 3.1237010253659657e-07,
      "loss": 0.658,
      "step": 10911
    },
    {
      "epoch": 0.9756795422031473,
      "grad_norm": 0.15695472142127828,
      "learning_rate": 3.1008704938743084e-07,
      "loss": 0.6333,
      "step": 10912
    },
    {
      "epoch": 0.9757689556509299,
      "grad_norm": 0.1566942114071421,
      "learning_rate": 3.078123571108704e-07,
      "loss": 0.5965,
      "step": 10913
    },
    {
      "epoch": 0.9758583690987125,
      "grad_norm": 0.15769210680592283,
      "learning_rate": 3.05546025897685e-07,
      "loss": 0.6286,
      "step": 10914
    },
    {
      "epoch": 0.975947782546495,
      "grad_norm": 0.16450314571839156,
      "learning_rate": 3.0328805593795584e-07,
      "loss": 0.6311,
      "step": 10915
    },
    {
      "epoch": 0.9760371959942775,
      "grad_norm": 0.16421267999029063,
      "learning_rate": 3.010384474210537e-07,
      "loss": 0.6392,
      "step": 10916
    },
    {
      "epoch": 0.9761266094420601,
      "grad_norm": 0.170406595316103,
      "learning_rate": 2.987972005356499e-07,
      "loss": 0.625,
      "step": 10917
    },
    {
      "epoch": 0.9762160228898427,
      "grad_norm": 0.16485705946326365,
      "learning_rate": 2.965643154697162e-07,
      "loss": 0.6482,
      "step": 10918
    },
    {
      "epoch": 0.9763054363376252,
      "grad_norm": 0.17710105732751028,
      "learning_rate": 2.943397924105251e-07,
      "loss": 0.3437,
      "step": 10919
    },
    {
      "epoch": 0.9763948497854077,
      "grad_norm": 0.15802492643324048,
      "learning_rate": 2.921236315446385e-07,
      "loss": 0.5949,
      "step": 10920
    },
    {
      "epoch": 0.9764842632331903,
      "grad_norm": 0.14398298058340464,
      "learning_rate": 2.899158330579299e-07,
      "loss": 0.5849,
      "step": 10921
    },
    {
      "epoch": 0.9765736766809728,
      "grad_norm": 0.1598978441566043,
      "learning_rate": 2.877163971355623e-07,
      "loss": 0.6461,
      "step": 10922
    },
    {
      "epoch": 0.9766630901287554,
      "grad_norm": 0.1597448108557176,
      "learning_rate": 2.8552532396198815e-07,
      "loss": 0.6529,
      "step": 10923
    },
    {
      "epoch": 0.9767525035765379,
      "grad_norm": 0.16454668235013453,
      "learning_rate": 2.833426137209938e-07,
      "loss": 0.6633,
      "step": 10924
    },
    {
      "epoch": 0.9768419170243204,
      "grad_norm": 0.16418630497040668,
      "learning_rate": 2.811682665956217e-07,
      "loss": 0.6686,
      "step": 10925
    },
    {
      "epoch": 0.976931330472103,
      "grad_norm": 0.16204390755303513,
      "learning_rate": 2.7900228276823704e-07,
      "loss": 0.6801,
      "step": 10926
    },
    {
      "epoch": 0.9770207439198856,
      "grad_norm": 0.1660729143233906,
      "learning_rate": 2.768446624204946e-07,
      "loss": 0.6162,
      "step": 10927
    },
    {
      "epoch": 0.977110157367668,
      "grad_norm": 0.18523921469762017,
      "learning_rate": 2.746954057333606e-07,
      "loss": 0.6894,
      "step": 10928
    },
    {
      "epoch": 0.9771995708154506,
      "grad_norm": 0.16748804634939068,
      "learning_rate": 2.7255451288707987e-07,
      "loss": 0.6174,
      "step": 10929
    },
    {
      "epoch": 0.9772889842632332,
      "grad_norm": 0.16992975301781763,
      "learning_rate": 2.704219840612199e-07,
      "loss": 0.6431,
      "step": 10930
    },
    {
      "epoch": 0.9773783977110158,
      "grad_norm": 0.16942593951461865,
      "learning_rate": 2.682978194346264e-07,
      "loss": 0.6289,
      "step": 10931
    },
    {
      "epoch": 0.9774678111587983,
      "grad_norm": 0.14772321670666394,
      "learning_rate": 2.661820191854347e-07,
      "loss": 0.6497,
      "step": 10932
    },
    {
      "epoch": 0.9775572246065808,
      "grad_norm": 0.18278727425711194,
      "learning_rate": 2.640745834911251e-07,
      "loss": 0.6852,
      "step": 10933
    },
    {
      "epoch": 0.9776466380543634,
      "grad_norm": 0.18519934738432756,
      "learning_rate": 2.6197551252842287e-07,
      "loss": 0.6412,
      "step": 10934
    },
    {
      "epoch": 0.9777360515021459,
      "grad_norm": 0.17206840551678793,
      "learning_rate": 2.598848064733761e-07,
      "loss": 0.6627,
      "step": 10935
    },
    {
      "epoch": 0.9778254649499285,
      "grad_norm": 0.17177869728808243,
      "learning_rate": 2.5780246550134444e-07,
      "loss": 0.6246,
      "step": 10936
    },
    {
      "epoch": 0.977914878397711,
      "grad_norm": 0.16061551349050376,
      "learning_rate": 2.5572848978695496e-07,
      "loss": 0.6755,
      "step": 10937
    },
    {
      "epoch": 0.9780042918454935,
      "grad_norm": 0.15471788305888323,
      "learning_rate": 2.5366287950415737e-07,
      "loss": 0.6247,
      "step": 10938
    },
    {
      "epoch": 0.9780937052932761,
      "grad_norm": 0.1735691242145956,
      "learning_rate": 2.516056348261908e-07,
      "loss": 0.6736,
      "step": 10939
    },
    {
      "epoch": 0.9781831187410587,
      "grad_norm": 0.16420963760315285,
      "learning_rate": 2.495567559256062e-07,
      "loss": 0.649,
      "step": 10940
    },
    {
      "epoch": 0.9782725321888412,
      "grad_norm": 0.189975292226629,
      "learning_rate": 2.475162429742106e-07,
      "loss": 0.6552,
      "step": 10941
    },
    {
      "epoch": 0.9783619456366237,
      "grad_norm": 0.16642785410387742,
      "learning_rate": 2.45484096143167e-07,
      "loss": 0.6636,
      "step": 10942
    },
    {
      "epoch": 0.9784513590844063,
      "grad_norm": 0.17082194274146503,
      "learning_rate": 2.434603156028947e-07,
      "loss": 0.6671,
      "step": 10943
    },
    {
      "epoch": 0.9785407725321889,
      "grad_norm": 0.16285803940040938,
      "learning_rate": 2.414449015231357e-07,
      "loss": 0.6119,
      "step": 10944
    },
    {
      "epoch": 0.9786301859799714,
      "grad_norm": 0.15801407869412015,
      "learning_rate": 2.394378540729214e-07,
      "loss": 0.6509,
      "step": 10945
    },
    {
      "epoch": 0.9787195994277539,
      "grad_norm": 0.174223469108814,
      "learning_rate": 2.3743917342056166e-07,
      "loss": 0.6558,
      "step": 10946
    },
    {
      "epoch": 0.9788090128755365,
      "grad_norm": 0.15813598845579757,
      "learning_rate": 2.3544885973370012e-07,
      "loss": 0.631,
      "step": 10947
    },
    {
      "epoch": 0.978898426323319,
      "grad_norm": 0.1542202735894919,
      "learning_rate": 2.3346691317924775e-07,
      "loss": 0.6341,
      "step": 10948
    },
    {
      "epoch": 0.9789878397711016,
      "grad_norm": 0.15475139103118732,
      "learning_rate": 2.314933339234493e-07,
      "loss": 0.6417,
      "step": 10949
    },
    {
      "epoch": 0.9790772532188842,
      "grad_norm": 0.1620285996257514,
      "learning_rate": 2.2952812213181684e-07,
      "loss": 0.6522,
      "step": 10950
    },
    {
      "epoch": 0.9791666666666666,
      "grad_norm": 0.1588098386091441,
      "learning_rate": 2.27571277969163e-07,
      "loss": 0.6353,
      "step": 10951
    },
    {
      "epoch": 0.9792560801144492,
      "grad_norm": 0.13893026722045257,
      "learning_rate": 2.2562280159961203e-07,
      "loss": 0.5501,
      "step": 10952
    },
    {
      "epoch": 0.9793454935622318,
      "grad_norm": 0.16670587398488784,
      "learning_rate": 2.2368269318657764e-07,
      "loss": 0.3533,
      "step": 10953
    },
    {
      "epoch": 0.9794349070100143,
      "grad_norm": 0.16568460933365098,
      "learning_rate": 2.2175095289278524e-07,
      "loss": 0.6564,
      "step": 10954
    },
    {
      "epoch": 0.9795243204577968,
      "grad_norm": 0.18846111386424186,
      "learning_rate": 2.1982758088022747e-07,
      "loss": 0.366,
      "step": 10955
    },
    {
      "epoch": 0.9796137339055794,
      "grad_norm": 0.16529373882137244,
      "learning_rate": 2.1791257731024194e-07,
      "loss": 0.6556,
      "step": 10956
    },
    {
      "epoch": 0.979703147353362,
      "grad_norm": 0.14473948737767017,
      "learning_rate": 2.160059423434113e-07,
      "loss": 0.6404,
      "step": 10957
    },
    {
      "epoch": 0.9797925608011445,
      "grad_norm": 0.1466119435157724,
      "learning_rate": 2.141076761396521e-07,
      "loss": 0.6387,
      "step": 10958
    },
    {
      "epoch": 0.9798819742489271,
      "grad_norm": 0.1501446240724477,
      "learning_rate": 2.1221777885817028e-07,
      "loss": 0.6202,
      "step": 10959
    },
    {
      "epoch": 0.9799713876967096,
      "grad_norm": 0.15821835360954248,
      "learning_rate": 2.1033625065747242e-07,
      "loss": 0.6156,
      "step": 10960
    },
    {
      "epoch": 0.9800608011444921,
      "grad_norm": 0.18779718348808963,
      "learning_rate": 2.084630916953656e-07,
      "loss": 0.6762,
      "step": 10961
    },
    {
      "epoch": 0.9801502145922747,
      "grad_norm": 0.17103950983997637,
      "learning_rate": 2.0659830212893527e-07,
      "loss": 0.644,
      "step": 10962
    },
    {
      "epoch": 0.9802396280400573,
      "grad_norm": 0.17679347596601508,
      "learning_rate": 2.0474188211457856e-07,
      "loss": 0.6304,
      "step": 10963
    },
    {
      "epoch": 0.9803290414878397,
      "grad_norm": 0.16346465074574046,
      "learning_rate": 2.0289383180801537e-07,
      "loss": 0.5909,
      "step": 10964
    },
    {
      "epoch": 0.9804184549356223,
      "grad_norm": 0.16637070493878728,
      "learning_rate": 2.0105415136421058e-07,
      "loss": 0.6458,
      "step": 10965
    },
    {
      "epoch": 0.9805078683834049,
      "grad_norm": 0.1666625795970151,
      "learning_rate": 1.9922284093746302e-07,
      "loss": 0.6011,
      "step": 10966
    },
    {
      "epoch": 0.9805972818311874,
      "grad_norm": 0.16004151372211947,
      "learning_rate": 1.9739990068137203e-07,
      "loss": 0.6303,
      "step": 10967
    },
    {
      "epoch": 0.98068669527897,
      "grad_norm": 0.16777134050273418,
      "learning_rate": 1.9558533074882646e-07,
      "loss": 0.6052,
      "step": 10968
    },
    {
      "epoch": 0.9807761087267525,
      "grad_norm": 0.17551299147016525,
      "learning_rate": 1.9377913129199344e-07,
      "loss": 0.6466,
      "step": 10969
    },
    {
      "epoch": 0.980865522174535,
      "grad_norm": 0.17306214528826525,
      "learning_rate": 1.919813024623851e-07,
      "loss": 0.6615,
      "step": 10970
    },
    {
      "epoch": 0.9809549356223176,
      "grad_norm": 0.1547374947036787,
      "learning_rate": 1.9019184441075865e-07,
      "loss": 0.6092,
      "step": 10971
    },
    {
      "epoch": 0.9810443490701002,
      "grad_norm": 0.14347928020267198,
      "learning_rate": 1.8841075728719404e-07,
      "loss": 0.6422,
      "step": 10972
    },
    {
      "epoch": 0.9811337625178826,
      "grad_norm": 0.17782750130423386,
      "learning_rate": 1.8663804124108286e-07,
      "loss": 0.6343,
      "step": 10973
    },
    {
      "epoch": 0.9812231759656652,
      "grad_norm": 0.15594188703135878,
      "learning_rate": 1.848736964211062e-07,
      "loss": 0.6357,
      "step": 10974
    },
    {
      "epoch": 0.9813125894134478,
      "grad_norm": 0.1366817210259277,
      "learning_rate": 1.8311772297521234e-07,
      "loss": 0.6847,
      "step": 10975
    },
    {
      "epoch": 0.9814020028612304,
      "grad_norm": 0.1584778453644806,
      "learning_rate": 1.813701210506946e-07,
      "loss": 0.642,
      "step": 10976
    },
    {
      "epoch": 0.9814914163090128,
      "grad_norm": 0.15212639280410245,
      "learning_rate": 1.7963089079411356e-07,
      "loss": 0.6044,
      "step": 10977
    },
    {
      "epoch": 0.9815808297567954,
      "grad_norm": 0.16059893901271002,
      "learning_rate": 1.7790003235134133e-07,
      "loss": 0.6186,
      "step": 10978
    },
    {
      "epoch": 0.981670243204578,
      "grad_norm": 0.17725888727188033,
      "learning_rate": 1.7617754586752855e-07,
      "loss": 0.6416,
      "step": 10979
    },
    {
      "epoch": 0.9817596566523605,
      "grad_norm": 0.1615389898053165,
      "learning_rate": 1.744634314871485e-07,
      "loss": 0.6032,
      "step": 10980
    },
    {
      "epoch": 0.9818490701001431,
      "grad_norm": 0.17202521980956242,
      "learning_rate": 1.7275768935397507e-07,
      "loss": 0.3467,
      "step": 10981
    },
    {
      "epoch": 0.9819384835479256,
      "grad_norm": 0.15299338959316994,
      "learning_rate": 1.710603196110383e-07,
      "loss": 0.6459,
      "step": 10982
    },
    {
      "epoch": 0.9820278969957081,
      "grad_norm": 0.16781773698676156,
      "learning_rate": 1.693713224007243e-07,
      "loss": 0.649,
      "step": 10983
    },
    {
      "epoch": 0.9821173104434907,
      "grad_norm": 0.18165012675294648,
      "learning_rate": 1.6769069786466418e-07,
      "loss": 0.68,
      "step": 10984
    },
    {
      "epoch": 0.9822067238912733,
      "grad_norm": 0.16926618548295175,
      "learning_rate": 1.66018446143823e-07,
      "loss": 0.6949,
      "step": 10985
    },
    {
      "epoch": 0.9822961373390557,
      "grad_norm": 0.1786819553493348,
      "learning_rate": 1.6435456737843302e-07,
      "loss": 0.6696,
      "step": 10986
    },
    {
      "epoch": 0.9823855507868383,
      "grad_norm": 0.18919930843534633,
      "learning_rate": 1.6269906170807148e-07,
      "loss": 0.6631,
      "step": 10987
    },
    {
      "epoch": 0.9824749642346209,
      "grad_norm": 0.14600376448530192,
      "learning_rate": 1.6105192927154956e-07,
      "loss": 0.6199,
      "step": 10988
    },
    {
      "epoch": 0.9825643776824035,
      "grad_norm": 0.16129751098399964,
      "learning_rate": 1.594131702070345e-07,
      "loss": 0.646,
      "step": 10989
    },
    {
      "epoch": 0.982653791130186,
      "grad_norm": 0.16244922944089998,
      "learning_rate": 1.5778278465197194e-07,
      "loss": 0.6409,
      "step": 10990
    },
    {
      "epoch": 0.9827432045779685,
      "grad_norm": 0.14701787462765203,
      "learning_rate": 1.5616077274307473e-07,
      "loss": 0.5994,
      "step": 10991
    },
    {
      "epoch": 0.9828326180257511,
      "grad_norm": 0.16896664421596588,
      "learning_rate": 1.545471346164007e-07,
      "loss": 0.6262,
      "step": 10992
    },
    {
      "epoch": 0.9829220314735336,
      "grad_norm": 0.17037966816524938,
      "learning_rate": 1.5294187040726382e-07,
      "loss": 0.6828,
      "step": 10993
    },
    {
      "epoch": 0.9830114449213162,
      "grad_norm": 0.15725659885473106,
      "learning_rate": 1.5134498025031196e-07,
      "loss": 0.6723,
      "step": 10994
    },
    {
      "epoch": 0.9831008583690987,
      "grad_norm": 0.1428593786995013,
      "learning_rate": 1.4975646427948244e-07,
      "loss": 0.636,
      "step": 10995
    },
    {
      "epoch": 0.9831902718168812,
      "grad_norm": 0.17053937303679564,
      "learning_rate": 1.4817632262797976e-07,
      "loss": 0.657,
      "step": 10996
    },
    {
      "epoch": 0.9832796852646638,
      "grad_norm": 0.14266280864626504,
      "learning_rate": 1.4660455542833128e-07,
      "loss": 0.6047,
      "step": 10997
    },
    {
      "epoch": 0.9833690987124464,
      "grad_norm": 0.16175672352115797,
      "learning_rate": 1.45041162812376e-07,
      "loss": 0.6006,
      "step": 10998
    },
    {
      "epoch": 0.983458512160229,
      "grad_norm": 0.17302438382565893,
      "learning_rate": 1.4348614491123125e-07,
      "loss": 0.6559,
      "step": 10999
    },
    {
      "epoch": 0.9835479256080114,
      "grad_norm": 0.170305174398647,
      "learning_rate": 1.419395018552927e-07,
      "loss": 0.6201,
      "step": 11000
    },
    {
      "epoch": 0.983637339055794,
      "grad_norm": 0.17933642012786247,
      "learning_rate": 1.4040123377428993e-07,
      "loss": 0.3951,
      "step": 11001
    },
    {
      "epoch": 0.9837267525035766,
      "grad_norm": 0.1594305117949077,
      "learning_rate": 1.3887134079724196e-07,
      "loss": 0.638,
      "step": 11002
    },
    {
      "epoch": 0.9838161659513591,
      "grad_norm": 0.17964448445182987,
      "learning_rate": 1.3734982305245724e-07,
      "loss": 0.6313,
      "step": 11003
    },
    {
      "epoch": 0.9839055793991416,
      "grad_norm": 0.18329312999060643,
      "learning_rate": 1.3583668066753375e-07,
      "loss": 0.3911,
      "step": 11004
    },
    {
      "epoch": 0.9839949928469242,
      "grad_norm": 0.1588826619951883,
      "learning_rate": 1.3433191376938103e-07,
      "loss": 0.6589,
      "step": 11005
    },
    {
      "epoch": 0.9840844062947067,
      "grad_norm": 0.16167986447775148,
      "learning_rate": 1.3283552248420927e-07,
      "loss": 0.626,
      "step": 11006
    },
    {
      "epoch": 0.9841738197424893,
      "grad_norm": 0.1554190396541846,
      "learning_rate": 1.3134750693751806e-07,
      "loss": 0.6509,
      "step": 11007
    },
    {
      "epoch": 0.9842632331902719,
      "grad_norm": 0.16720338887446604,
      "learning_rate": 1.298678672540854e-07,
      "loss": 0.6392,
      "step": 11008
    },
    {
      "epoch": 0.9843526466380543,
      "grad_norm": 0.1730252396653883,
      "learning_rate": 1.2839660355803417e-07,
      "loss": 0.6231,
      "step": 11009
    },
    {
      "epoch": 0.9844420600858369,
      "grad_norm": 0.16000208968880028,
      "learning_rate": 1.2693371597273241e-07,
      "loss": 0.6552,
      "step": 11010
    },
    {
      "epoch": 0.9845314735336195,
      "grad_norm": 0.1537643567245833,
      "learning_rate": 1.2547920462089302e-07,
      "loss": 0.62,
      "step": 11011
    },
    {
      "epoch": 0.984620886981402,
      "grad_norm": 0.15855910239029167,
      "learning_rate": 1.2403306962449624e-07,
      "loss": 0.648,
      "step": 11012
    },
    {
      "epoch": 0.9847103004291845,
      "grad_norm": 0.1522128087950409,
      "learning_rate": 1.225953111048228e-07,
      "loss": 0.6206,
      "step": 11013
    },
    {
      "epoch": 0.9847997138769671,
      "grad_norm": 0.14954680438929177,
      "learning_rate": 1.2116592918246516e-07,
      "loss": 0.6128,
      "step": 11014
    },
    {
      "epoch": 0.9848891273247496,
      "grad_norm": 0.15381502411192652,
      "learning_rate": 1.197449239772941e-07,
      "loss": 0.6352,
      "step": 11015
    },
    {
      "epoch": 0.9849785407725322,
      "grad_norm": 0.18438122609190669,
      "learning_rate": 1.1833229560848092e-07,
      "loss": 0.6468,
      "step": 11016
    },
    {
      "epoch": 0.9850679542203148,
      "grad_norm": 0.1661784046531238,
      "learning_rate": 1.1692804419451975e-07,
      "loss": 0.6578,
      "step": 11017
    },
    {
      "epoch": 0.9851573676680973,
      "grad_norm": 0.1673602242369829,
      "learning_rate": 1.1553216985318305e-07,
      "loss": 0.617,
      "step": 11018
    },
    {
      "epoch": 0.9852467811158798,
      "grad_norm": 0.1674725951814037,
      "learning_rate": 1.1414467270152163e-07,
      "loss": 0.6464,
      "step": 11019
    },
    {
      "epoch": 0.9853361945636624,
      "grad_norm": 0.16247257291095182,
      "learning_rate": 1.1276555285592017e-07,
      "loss": 0.6254,
      "step": 11020
    },
    {
      "epoch": 0.985425608011445,
      "grad_norm": 0.16076608559312233,
      "learning_rate": 1.113948104320417e-07,
      "loss": 0.6229,
      "step": 11021
    },
    {
      "epoch": 0.9855150214592274,
      "grad_norm": 0.13948338690560225,
      "learning_rate": 1.1003244554483871e-07,
      "loss": 0.6269,
      "step": 11022
    },
    {
      "epoch": 0.98560443490701,
      "grad_norm": 0.1794142187989409,
      "learning_rate": 1.0867845830858647e-07,
      "loss": 0.681,
      "step": 11023
    },
    {
      "epoch": 0.9856938483547926,
      "grad_norm": 0.14664242148695394,
      "learning_rate": 1.0733284883682749e-07,
      "loss": 0.607,
      "step": 11024
    },
    {
      "epoch": 0.9857832618025751,
      "grad_norm": 0.1664108844001068,
      "learning_rate": 1.0599561724242702e-07,
      "loss": 0.6727,
      "step": 11025
    },
    {
      "epoch": 0.9858726752503576,
      "grad_norm": 0.16295366895498947,
      "learning_rate": 1.046667636375287e-07,
      "loss": 0.6482,
      "step": 11026
    },
    {
      "epoch": 0.9859620886981402,
      "grad_norm": 0.17218956917955205,
      "learning_rate": 1.0334628813358782e-07,
      "loss": 0.3949,
      "step": 11027
    },
    {
      "epoch": 0.9860515021459227,
      "grad_norm": 0.15452571331677975,
      "learning_rate": 1.0203419084134913e-07,
      "loss": 0.6185,
      "step": 11028
    },
    {
      "epoch": 0.9861409155937053,
      "grad_norm": 0.15835080954499298,
      "learning_rate": 1.0073047187085794e-07,
      "loss": 0.6356,
      "step": 11029
    },
    {
      "epoch": 0.9862303290414879,
      "grad_norm": 0.15452459097439247,
      "learning_rate": 9.9435131331449e-08,
      "loss": 0.6152,
      "step": 11030
    },
    {
      "epoch": 0.9863197424892703,
      "grad_norm": 0.14070847885480883,
      "learning_rate": 9.814816933176874e-08,
      "loss": 0.6,
      "step": 11031
    },
    {
      "epoch": 0.9864091559370529,
      "grad_norm": 0.16032814674659251,
      "learning_rate": 9.686958597975304e-08,
      "loss": 0.6565,
      "step": 11032
    },
    {
      "epoch": 0.9864985693848355,
      "grad_norm": 0.15402218670151213,
      "learning_rate": 9.559938138263836e-08,
      "loss": 0.6164,
      "step": 11033
    },
    {
      "epoch": 0.9865879828326181,
      "grad_norm": 0.1647550050856026,
      "learning_rate": 9.433755564693947e-08,
      "loss": 0.6311,
      "step": 11034
    },
    {
      "epoch": 0.9866773962804005,
      "grad_norm": 0.17674241707962998,
      "learning_rate": 9.308410887849394e-08,
      "loss": 0.6601,
      "step": 11035
    },
    {
      "epoch": 0.9867668097281831,
      "grad_norm": 0.14973886022125357,
      "learning_rate": 9.18390411824288e-08,
      "loss": 0.5813,
      "step": 11036
    },
    {
      "epoch": 0.9868562231759657,
      "grad_norm": 0.17742051031131684,
      "learning_rate": 9.060235266317163e-08,
      "loss": 0.6284,
      "step": 11037
    },
    {
      "epoch": 0.9869456366237482,
      "grad_norm": 0.14503973906096312,
      "learning_rate": 8.937404342442834e-08,
      "loss": 0.6364,
      "step": 11038
    },
    {
      "epoch": 0.9870350500715308,
      "grad_norm": 0.14827930701582007,
      "learning_rate": 8.815411356922764e-08,
      "loss": 0.6379,
      "step": 11039
    },
    {
      "epoch": 0.9871244635193133,
      "grad_norm": 0.15504324135534486,
      "learning_rate": 8.694256319987659e-08,
      "loss": 0.6197,
      "step": 11040
    },
    {
      "epoch": 0.9872138769670958,
      "grad_norm": 0.18468387224667146,
      "learning_rate": 8.573939241798278e-08,
      "loss": 0.6405,
      "step": 11041
    },
    {
      "epoch": 0.9873032904148784,
      "grad_norm": 0.1488435623383017,
      "learning_rate": 8.454460132446552e-08,
      "loss": 0.645,
      "step": 11042
    },
    {
      "epoch": 0.987392703862661,
      "grad_norm": 0.15901232401394674,
      "learning_rate": 8.335819001952239e-08,
      "loss": 0.6731,
      "step": 11043
    },
    {
      "epoch": 0.9874821173104434,
      "grad_norm": 0.16115611985064346,
      "learning_rate": 8.21801586026627e-08,
      "loss": 0.6074,
      "step": 11044
    },
    {
      "epoch": 0.987571530758226,
      "grad_norm": 0.1755592293689717,
      "learning_rate": 8.101050717267411e-08,
      "loss": 0.6753,
      "step": 11045
    },
    {
      "epoch": 0.9876609442060086,
      "grad_norm": 0.1510356966960996,
      "learning_rate": 7.984923582767812e-08,
      "loss": 0.6404,
      "step": 11046
    },
    {
      "epoch": 0.9877503576537912,
      "grad_norm": 0.15357537744315328,
      "learning_rate": 7.869634466504128e-08,
      "loss": 0.6218,
      "step": 11047
    },
    {
      "epoch": 0.9878397711015737,
      "grad_norm": 0.16553258616063785,
      "learning_rate": 7.755183378147512e-08,
      "loss": 0.6714,
      "step": 11048
    },
    {
      "epoch": 0.9879291845493562,
      "grad_norm": 0.16247711711283977,
      "learning_rate": 7.641570327295844e-08,
      "loss": 0.6185,
      "step": 11049
    },
    {
      "epoch": 0.9880185979971388,
      "grad_norm": 0.16841377569292565,
      "learning_rate": 7.528795323477055e-08,
      "loss": 0.643,
      "step": 11050
    },
    {
      "epoch": 0.9881080114449213,
      "grad_norm": 0.15874880685815274,
      "learning_rate": 7.416858376151359e-08,
      "loss": 0.6332,
      "step": 11051
    },
    {
      "epoch": 0.9881974248927039,
      "grad_norm": 0.15806434071223008,
      "learning_rate": 7.305759494705689e-08,
      "loss": 0.6279,
      "step": 11052
    },
    {
      "epoch": 0.9882868383404864,
      "grad_norm": 0.15748561062144809,
      "learning_rate": 7.195498688458147e-08,
      "loss": 0.6634,
      "step": 11053
    },
    {
      "epoch": 0.9883762517882689,
      "grad_norm": 0.1551647324975611,
      "learning_rate": 7.08607596665467e-08,
      "loss": 0.6003,
      "step": 11054
    },
    {
      "epoch": 0.9884656652360515,
      "grad_norm": 0.15788659336755428,
      "learning_rate": 6.977491338474585e-08,
      "loss": 0.6591,
      "step": 11055
    },
    {
      "epoch": 0.9885550786838341,
      "grad_norm": 0.16004892286167466,
      "learning_rate": 6.869744813023937e-08,
      "loss": 0.5942,
      "step": 11056
    },
    {
      "epoch": 0.9886444921316166,
      "grad_norm": 0.17740292325289492,
      "learning_rate": 6.762836399338834e-08,
      "loss": 0.678,
      "step": 11057
    },
    {
      "epoch": 0.9887339055793991,
      "grad_norm": 0.15188490395527698,
      "learning_rate": 6.656766106385436e-08,
      "loss": 0.6343,
      "step": 11058
    },
    {
      "epoch": 0.9888233190271817,
      "grad_norm": 0.15871230610336864,
      "learning_rate": 6.551533943061072e-08,
      "loss": 0.6524,
      "step": 11059
    },
    {
      "epoch": 0.9889127324749643,
      "grad_norm": 0.15548015660441233,
      "learning_rate": 6.447139918189793e-08,
      "loss": 0.6652,
      "step": 11060
    },
    {
      "epoch": 0.9890021459227468,
      "grad_norm": 0.1676898486936848,
      "learning_rate": 6.343584040527927e-08,
      "loss": 0.6209,
      "step": 11061
    },
    {
      "epoch": 0.9890915593705293,
      "grad_norm": 0.17350165080072136,
      "learning_rate": 6.240866318760752e-08,
      "loss": 0.6176,
      "step": 11062
    },
    {
      "epoch": 0.9891809728183119,
      "grad_norm": 0.1487277661731032,
      "learning_rate": 6.138986761502486e-08,
      "loss": 0.6369,
      "step": 11063
    },
    {
      "epoch": 0.9892703862660944,
      "grad_norm": 0.15937401801737205,
      "learning_rate": 6.037945377297405e-08,
      "loss": 0.6764,
      "step": 11064
    },
    {
      "epoch": 0.989359799713877,
      "grad_norm": 0.15425276164394205,
      "learning_rate": 5.9377421746209525e-08,
      "loss": 0.6179,
      "step": 11065
    },
    {
      "epoch": 0.9894492131616596,
      "grad_norm": 0.15901991340582725,
      "learning_rate": 5.838377161875297e-08,
      "loss": 0.6241,
      "step": 11066
    },
    {
      "epoch": 0.989538626609442,
      "grad_norm": 0.16771816333272588,
      "learning_rate": 5.739850347395992e-08,
      "loss": 0.6608,
      "step": 11067
    },
    {
      "epoch": 0.9896280400572246,
      "grad_norm": 0.17949114947191172,
      "learning_rate": 5.642161739445317e-08,
      "loss": 0.6496,
      "step": 11068
    },
    {
      "epoch": 0.9897174535050072,
      "grad_norm": 0.18305174024380333,
      "learning_rate": 5.545311346215609e-08,
      "loss": 0.6298,
      "step": 11069
    },
    {
      "epoch": 0.9898068669527897,
      "grad_norm": 0.16817269550336822,
      "learning_rate": 5.449299175831479e-08,
      "loss": 0.6337,
      "step": 11070
    },
    {
      "epoch": 0.9898962804005722,
      "grad_norm": 0.1590928655841397,
      "learning_rate": 5.354125236343155e-08,
      "loss": 0.6274,
      "step": 11071
    },
    {
      "epoch": 0.9899856938483548,
      "grad_norm": 0.171316146533884,
      "learning_rate": 5.25978953573536e-08,
      "loss": 0.6102,
      "step": 11072
    },
    {
      "epoch": 0.9900751072961373,
      "grad_norm": 0.17941384664882742,
      "learning_rate": 5.166292081917323e-08,
      "loss": 0.6822,
      "step": 11073
    },
    {
      "epoch": 0.9901645207439199,
      "grad_norm": 0.18548122686934285,
      "learning_rate": 5.0736328827316605e-08,
      "loss": 0.6369,
      "step": 11074
    },
    {
      "epoch": 0.9902539341917024,
      "grad_norm": 0.1613278152952608,
      "learning_rate": 4.9818119459499325e-08,
      "loss": 0.625,
      "step": 11075
    },
    {
      "epoch": 0.990343347639485,
      "grad_norm": 0.15634139308214068,
      "learning_rate": 4.890829279272646e-08,
      "loss": 0.6283,
      "step": 11076
    },
    {
      "epoch": 0.9904327610872675,
      "grad_norm": 0.14947417051349537,
      "learning_rate": 4.800684890330365e-08,
      "loss": 0.5799,
      "step": 11077
    },
    {
      "epoch": 0.9905221745350501,
      "grad_norm": 0.16053018121691218,
      "learning_rate": 4.711378786683707e-08,
      "loss": 0.6553,
      "step": 11078
    },
    {
      "epoch": 0.9906115879828327,
      "grad_norm": 0.16387065342385237,
      "learning_rate": 4.6229109758222365e-08,
      "loss": 0.6531,
      "step": 11079
    },
    {
      "epoch": 0.9907010014306151,
      "grad_norm": 0.1641371597541972,
      "learning_rate": 4.535281465165575e-08,
      "loss": 0.6655,
      "step": 11080
    },
    {
      "epoch": 0.9907904148783977,
      "grad_norm": 0.177915613633474,
      "learning_rate": 4.448490262064509e-08,
      "loss": 0.6342,
      "step": 11081
    },
    {
      "epoch": 0.9908798283261803,
      "grad_norm": 0.15599101255249836,
      "learning_rate": 4.362537373795439e-08,
      "loss": 0.6317,
      "step": 11082
    },
    {
      "epoch": 0.9909692417739628,
      "grad_norm": 0.18494237971236768,
      "learning_rate": 4.277422807570375e-08,
      "loss": 0.6705,
      "step": 11083
    },
    {
      "epoch": 0.9910586552217453,
      "grad_norm": 0.1643337900935329,
      "learning_rate": 4.1931465705247195e-08,
      "loss": 0.6296,
      "step": 11084
    },
    {
      "epoch": 0.9911480686695279,
      "grad_norm": 0.17329360551594203,
      "learning_rate": 4.109708669728374e-08,
      "loss": 0.6707,
      "step": 11085
    },
    {
      "epoch": 0.9912374821173104,
      "grad_norm": 0.15353295609577913,
      "learning_rate": 4.027109112179073e-08,
      "loss": 0.6279,
      "step": 11086
    },
    {
      "epoch": 0.991326895565093,
      "grad_norm": 0.1848026338160375,
      "learning_rate": 3.945347904803498e-08,
      "loss": 0.6625,
      "step": 11087
    },
    {
      "epoch": 0.9914163090128756,
      "grad_norm": 0.16723405381921946,
      "learning_rate": 3.8644250544594975e-08,
      "loss": 0.6467,
      "step": 11088
    },
    {
      "epoch": 0.991505722460658,
      "grad_norm": 0.19121176891882902,
      "learning_rate": 3.784340567934974e-08,
      "loss": 0.6937,
      "step": 11089
    },
    {
      "epoch": 0.9915951359084406,
      "grad_norm": 0.1595866481957616,
      "learning_rate": 3.7050944519445576e-08,
      "loss": 0.668,
      "step": 11090
    },
    {
      "epoch": 0.9916845493562232,
      "grad_norm": 0.1563325528704097,
      "learning_rate": 3.626686713135152e-08,
      "loss": 0.625,
      "step": 11091
    },
    {
      "epoch": 0.9917739628040058,
      "grad_norm": 0.1637262294329221,
      "learning_rate": 3.54911735808372e-08,
      "loss": 0.6323,
      "step": 11092
    },
    {
      "epoch": 0.9918633762517882,
      "grad_norm": 0.1742364687563488,
      "learning_rate": 3.472386393293947e-08,
      "loss": 0.6604,
      "step": 11093
    },
    {
      "epoch": 0.9919527896995708,
      "grad_norm": 0.1493099955567443,
      "learning_rate": 3.3964938252040166e-08,
      "loss": 0.6327,
      "step": 11094
    },
    {
      "epoch": 0.9920422031473534,
      "grad_norm": 0.1706250448376037,
      "learning_rate": 3.3214396601766176e-08,
      "loss": 0.6424,
      "step": 11095
    },
    {
      "epoch": 0.9921316165951359,
      "grad_norm": 0.1616973782118961,
      "learning_rate": 3.247223904506713e-08,
      "loss": 0.6437,
      "step": 11096
    },
    {
      "epoch": 0.9922210300429185,
      "grad_norm": 0.1574052965078939,
      "learning_rate": 3.173846564419325e-08,
      "loss": 0.6353,
      "step": 11097
    },
    {
      "epoch": 0.992310443490701,
      "grad_norm": 0.1773859017796553,
      "learning_rate": 3.1013076460684186e-08,
      "loss": 0.6381,
      "step": 11098
    },
    {
      "epoch": 0.9923998569384835,
      "grad_norm": 0.17968882872730513,
      "learning_rate": 3.0296071555369065e-08,
      "loss": 0.6659,
      "step": 11099
    },
    {
      "epoch": 0.9924892703862661,
      "grad_norm": 0.15121342066835214,
      "learning_rate": 2.9587450988399768e-08,
      "loss": 0.5812,
      "step": 11100
    },
    {
      "epoch": 0.9925786838340487,
      "grad_norm": 0.15969680645564127,
      "learning_rate": 2.888721481919543e-08,
      "loss": 0.6356,
      "step": 11101
    },
    {
      "epoch": 0.9926680972818311,
      "grad_norm": 0.15288874781229136,
      "learning_rate": 2.819536310648685e-08,
      "loss": 0.635,
      "step": 11102
    },
    {
      "epoch": 0.9927575107296137,
      "grad_norm": 0.15412178984259814,
      "learning_rate": 2.7511895908294282e-08,
      "loss": 0.6266,
      "step": 11103
    },
    {
      "epoch": 0.9928469241773963,
      "grad_norm": 0.15340293733734794,
      "learning_rate": 2.6836813281938543e-08,
      "loss": 0.6369,
      "step": 11104
    },
    {
      "epoch": 0.9929363376251789,
      "grad_norm": 0.14566847468286348,
      "learning_rate": 2.617011528405211e-08,
      "loss": 0.6472,
      "step": 11105
    },
    {
      "epoch": 0.9930257510729614,
      "grad_norm": 0.157345162146068,
      "learning_rate": 2.551180197053471e-08,
      "loss": 0.5846,
      "step": 11106
    },
    {
      "epoch": 0.9931151645207439,
      "grad_norm": 0.17099375653184495,
      "learning_rate": 2.4861873396608838e-08,
      "loss": 0.6698,
      "step": 11107
    },
    {
      "epoch": 0.9932045779685265,
      "grad_norm": 0.17375533825015183,
      "learning_rate": 2.422032961677534e-08,
      "loss": 0.6571,
      "step": 11108
    },
    {
      "epoch": 0.993293991416309,
      "grad_norm": 0.17403141595173977,
      "learning_rate": 2.3587170684835623e-08,
      "loss": 0.6617,
      "step": 11109
    },
    {
      "epoch": 0.9933834048640916,
      "grad_norm": 0.17315674732153322,
      "learning_rate": 2.2962396653913864e-08,
      "loss": 0.6699,
      "step": 11110
    },
    {
      "epoch": 0.9934728183118741,
      "grad_norm": 0.17474885506664425,
      "learning_rate": 2.234600757637928e-08,
      "loss": 0.6297,
      "step": 11111
    },
    {
      "epoch": 0.9935622317596566,
      "grad_norm": 0.16169177143645125,
      "learning_rate": 2.1738003503946057e-08,
      "loss": 0.6461,
      "step": 11112
    },
    {
      "epoch": 0.9936516452074392,
      "grad_norm": 0.17586989390230906,
      "learning_rate": 2.1138384487606742e-08,
      "loss": 0.6291,
      "step": 11113
    },
    {
      "epoch": 0.9937410586552218,
      "grad_norm": 0.16637615453198018,
      "learning_rate": 2.054715057765444e-08,
      "loss": 0.6615,
      "step": 11114
    },
    {
      "epoch": 0.9938304721030042,
      "grad_norm": 0.1813024506973212,
      "learning_rate": 1.9964301823660604e-08,
      "loss": 0.6499,
      "step": 11115
    },
    {
      "epoch": 0.9939198855507868,
      "grad_norm": 0.1593331934791062,
      "learning_rate": 1.9389838274508355e-08,
      "loss": 0.6163,
      "step": 11116
    },
    {
      "epoch": 0.9940092989985694,
      "grad_norm": 0.1536188469869761,
      "learning_rate": 1.8823759978392474e-08,
      "loss": 0.6349,
      "step": 11117
    },
    {
      "epoch": 0.994098712446352,
      "grad_norm": 0.16725785670443327,
      "learning_rate": 1.8266066982774997e-08,
      "loss": 0.6436,
      "step": 11118
    },
    {
      "epoch": 0.9941881258941345,
      "grad_norm": 0.1416608609376429,
      "learning_rate": 1.7716759334440724e-08,
      "loss": 0.5802,
      "step": 11119
    },
    {
      "epoch": 0.994277539341917,
      "grad_norm": 0.17241780674899349,
      "learning_rate": 1.7175837079452804e-08,
      "loss": 0.6426,
      "step": 11120
    },
    {
      "epoch": 0.9943669527896996,
      "grad_norm": 0.16230390081175988,
      "learning_rate": 1.6643300263186056e-08,
      "loss": 0.6506,
      "step": 11121
    },
    {
      "epoch": 0.9944563662374821,
      "grad_norm": 0.18131172113803,
      "learning_rate": 1.6119148930282546e-08,
      "loss": 0.6247,
      "step": 11122
    },
    {
      "epoch": 0.9945457796852647,
      "grad_norm": 0.1740481067734761,
      "learning_rate": 1.560338312472931e-08,
      "loss": 0.5682,
      "step": 11123
    },
    {
      "epoch": 0.9946351931330472,
      "grad_norm": 0.176438668797062,
      "learning_rate": 1.5096002889758433e-08,
      "loss": 0.3582,
      "step": 11124
    },
    {
      "epoch": 0.9947246065808297,
      "grad_norm": 0.15662866075488355,
      "learning_rate": 1.4597008267935863e-08,
      "loss": 0.624,
      "step": 11125
    },
    {
      "epoch": 0.9948140200286123,
      "grad_norm": 0.16029585880013797,
      "learning_rate": 1.4106399301117012e-08,
      "loss": 0.6286,
      "step": 11126
    },
    {
      "epoch": 0.9949034334763949,
      "grad_norm": 0.15865730376919324,
      "learning_rate": 1.3624176030435642e-08,
      "loss": 0.6372,
      "step": 11127
    },
    {
      "epoch": 0.9949928469241774,
      "grad_norm": 0.15828504544069855,
      "learning_rate": 1.315033849634828e-08,
      "loss": 0.6458,
      "step": 11128
    },
    {
      "epoch": 0.9950822603719599,
      "grad_norm": 0.1491468969394609,
      "learning_rate": 1.2684886738589808e-08,
      "loss": 0.6229,
      "step": 11129
    },
    {
      "epoch": 0.9951716738197425,
      "grad_norm": 0.153498509096051,
      "learning_rate": 1.2227820796184564e-08,
      "loss": 0.6348,
      "step": 11130
    },
    {
      "epoch": 0.995261087267525,
      "grad_norm": 0.16727572447104413,
      "learning_rate": 1.1779140707490755e-08,
      "loss": 0.6173,
      "step": 11131
    },
    {
      "epoch": 0.9953505007153076,
      "grad_norm": 0.17755072504914687,
      "learning_rate": 1.1338846510111633e-08,
      "loss": 0.6655,
      "step": 11132
    },
    {
      "epoch": 0.9954399141630901,
      "grad_norm": 0.17967874430672254,
      "learning_rate": 1.0906938240995423e-08,
      "loss": 0.6769,
      "step": 11133
    },
    {
      "epoch": 0.9955293276108726,
      "grad_norm": 0.18813883454150723,
      "learning_rate": 1.04834159363576e-08,
      "loss": 0.6953,
      "step": 11134
    },
    {
      "epoch": 0.9956187410586552,
      "grad_norm": 0.16417856855202723,
      "learning_rate": 1.0068279631725297e-08,
      "loss": 0.6416,
      "step": 11135
    },
    {
      "epoch": 0.9957081545064378,
      "grad_norm": 0.15409748594274322,
      "learning_rate": 9.661529361892907e-09,
      "loss": 0.625,
      "step": 11136
    },
    {
      "epoch": 0.9957975679542204,
      "grad_norm": 0.1739153862767388,
      "learning_rate": 9.263165160999787e-09,
      "loss": 0.6899,
      "step": 11137
    },
    {
      "epoch": 0.9958869814020028,
      "grad_norm": 0.15254932635519344,
      "learning_rate": 8.873187062452548e-09,
      "loss": 0.6363,
      "step": 11138
    },
    {
      "epoch": 0.9959763948497854,
      "grad_norm": 0.16864100509826793,
      "learning_rate": 8.491595098947258e-09,
      "loss": 0.6521,
      "step": 11139
    },
    {
      "epoch": 0.996065808297568,
      "grad_norm": 0.17585711050498049,
      "learning_rate": 8.118389302491647e-09,
      "loss": 0.6771,
      "step": 11140
    },
    {
      "epoch": 0.9961552217453505,
      "grad_norm": 0.16435113812362884,
      "learning_rate": 7.753569704382902e-09,
      "loss": 0.5929,
      "step": 11141
    },
    {
      "epoch": 0.996244635193133,
      "grad_norm": 0.13075547550580122,
      "learning_rate": 7.397136335229871e-09,
      "loss": 0.624,
      "step": 11142
    },
    {
      "epoch": 0.9963340486409156,
      "grad_norm": 0.16679293225559869,
      "learning_rate": 7.049089224919758e-09,
      "loss": 0.6457,
      "step": 11143
    },
    {
      "epoch": 0.9964234620886981,
      "grad_norm": 0.15528895011033414,
      "learning_rate": 6.709428402629225e-09,
      "loss": 0.639,
      "step": 11144
    },
    {
      "epoch": 0.9965128755364807,
      "grad_norm": 0.16554358247266854,
      "learning_rate": 6.378153896868799e-09,
      "loss": 0.6227,
      "step": 11145
    },
    {
      "epoch": 0.9966022889842633,
      "grad_norm": 0.1535049498633818,
      "learning_rate": 6.055265735405158e-09,
      "loss": 0.6742,
      "step": 11146
    },
    {
      "epoch": 0.9966917024320457,
      "grad_norm": 0.16977466020941465,
      "learning_rate": 5.740763945327743e-09,
      "loss": 0.6503,
      "step": 11147
    },
    {
      "epoch": 0.9967811158798283,
      "grad_norm": 0.1770016815536338,
      "learning_rate": 5.434648553015453e-09,
      "loss": 0.5951,
      "step": 11148
    },
    {
      "epoch": 0.9968705293276109,
      "grad_norm": 0.1754622147276064,
      "learning_rate": 5.136919584125544e-09,
      "loss": 0.6372,
      "step": 11149
    },
    {
      "epoch": 0.9969599427753935,
      "grad_norm": 0.1394607361895271,
      "learning_rate": 4.847577063649133e-09,
      "loss": 0.6233,
      "step": 11150
    },
    {
      "epoch": 0.9970493562231759,
      "grad_norm": 0.15163270114923677,
      "learning_rate": 4.566621015833495e-09,
      "loss": 0.634,
      "step": 11151
    },
    {
      "epoch": 0.9971387696709585,
      "grad_norm": 0.17104255765777052,
      "learning_rate": 4.2940514642597626e-09,
      "loss": 0.6115,
      "step": 11152
    },
    {
      "epoch": 0.9972281831187411,
      "grad_norm": 0.16855688608685002,
      "learning_rate": 4.029868431765227e-09,
      "loss": 0.6556,
      "step": 11153
    },
    {
      "epoch": 0.9973175965665236,
      "grad_norm": 0.17303800445695047,
      "learning_rate": 3.774071940532142e-09,
      "loss": 0.3764,
      "step": 11154
    },
    {
      "epoch": 0.9974070100143062,
      "grad_norm": 0.18506447225882286,
      "learning_rate": 3.526662012010018e-09,
      "loss": 0.5994,
      "step": 11155
    },
    {
      "epoch": 0.9974964234620887,
      "grad_norm": 0.1547910810479377,
      "learning_rate": 3.2876386669267177e-09,
      "loss": 0.6432,
      "step": 11156
    },
    {
      "epoch": 0.9975858369098712,
      "grad_norm": 0.1536504001393728,
      "learning_rate": 3.057001925355074e-09,
      "loss": 0.6416,
      "step": 11157
    },
    {
      "epoch": 0.9976752503576538,
      "grad_norm": 0.1511460324604458,
      "learning_rate": 2.8347518066129675e-09,
      "loss": 0.6182,
      "step": 11158
    },
    {
      "epoch": 0.9977646638054364,
      "grad_norm": 0.1548075828962477,
      "learning_rate": 2.620888329363247e-09,
      "loss": 0.6308,
      "step": 11159
    },
    {
      "epoch": 0.9978540772532188,
      "grad_norm": 0.16500853018815906,
      "learning_rate": 2.4154115115360144e-09,
      "loss": 0.6589,
      "step": 11160
    },
    {
      "epoch": 0.9979434907010014,
      "grad_norm": 0.16002122726834395,
      "learning_rate": 2.218321370361931e-09,
      "loss": 0.6539,
      "step": 11161
    },
    {
      "epoch": 0.998032904148784,
      "grad_norm": 0.16385090918979933,
      "learning_rate": 2.0296179223722176e-09,
      "loss": 0.6337,
      "step": 11162
    },
    {
      "epoch": 0.9981223175965666,
      "grad_norm": 0.14387539933274643,
      "learning_rate": 1.8493011833875529e-09,
      "loss": 0.6203,
      "step": 11163
    },
    {
      "epoch": 0.998211731044349,
      "grad_norm": 0.16103190673455794,
      "learning_rate": 1.6773711685291738e-09,
      "loss": 0.6658,
      "step": 11164
    },
    {
      "epoch": 0.9983011444921316,
      "grad_norm": 0.16516013927693912,
      "learning_rate": 1.5138278922299797e-09,
      "loss": 0.6308,
      "step": 11165
    },
    {
      "epoch": 0.9983905579399142,
      "grad_norm": 0.15135190202485563,
      "learning_rate": 1.3586713681901232e-09,
      "loss": 0.602,
      "step": 11166
    },
    {
      "epoch": 0.9984799713876967,
      "grad_norm": 0.18969912263047564,
      "learning_rate": 1.211901609443622e-09,
      "loss": 0.6805,
      "step": 11167
    },
    {
      "epoch": 0.9985693848354793,
      "grad_norm": 0.14581497652527659,
      "learning_rate": 1.073518628269543e-09,
      "loss": 0.6258,
      "step": 11168
    },
    {
      "epoch": 0.9986587982832618,
      "grad_norm": 0.16085800320852173,
      "learning_rate": 9.435224363030238e-10,
      "loss": 0.6331,
      "step": 11169
    },
    {
      "epoch": 0.9987482117310443,
      "grad_norm": 0.1587509190877702,
      "learning_rate": 8.219130444353518e-10,
      "loss": 0.6149,
      "step": 11170
    },
    {
      "epoch": 0.9988376251788269,
      "grad_norm": 0.15262473564500367,
      "learning_rate": 7.086904628694769e-10,
      "loss": 0.6817,
      "step": 11171
    },
    {
      "epoch": 0.9989270386266095,
      "grad_norm": 0.1553589930181289,
      "learning_rate": 6.038547010867035e-10,
      "loss": 0.6279,
      "step": 11172
    },
    {
      "epoch": 0.9990164520743919,
      "grad_norm": 0.1552213575707258,
      "learning_rate": 5.074057678911004e-10,
      "loss": 0.6727,
      "step": 11173
    },
    {
      "epoch": 0.9991058655221745,
      "grad_norm": 0.15921297440715168,
      "learning_rate": 4.1934367137619334e-10,
      "loss": 0.609,
      "step": 11174
    },
    {
      "epoch": 0.9991952789699571,
      "grad_norm": 0.16473729322889225,
      "learning_rate": 3.396684189249655e-10,
      "loss": 0.6371,
      "step": 11175
    },
    {
      "epoch": 0.9992846924177397,
      "grad_norm": 0.17450515002253741,
      "learning_rate": 2.683800172098572e-10,
      "loss": 0.6629,
      "step": 11176
    },
    {
      "epoch": 0.9993741058655222,
      "grad_norm": 0.14409900222230945,
      "learning_rate": 2.054784722149705e-10,
      "loss": 0.5622,
      "step": 11177
    },
    {
      "epoch": 0.9994635193133047,
      "grad_norm": 0.169710959538377,
      "learning_rate": 1.5096378922496712e-10,
      "loss": 0.6418,
      "step": 11178
    },
    {
      "epoch": 0.9995529327610873,
      "grad_norm": 0.16025850974293906,
      "learning_rate": 1.0483597280286361e-10,
      "loss": 0.664,
      "step": 11179
    },
    {
      "epoch": 0.9996423462088698,
      "grad_norm": 0.17350903816288266,
      "learning_rate": 6.709502681223611e-11,
      "loss": 0.3845,
      "step": 11180
    },
    {
      "epoch": 0.9997317596566524,
      "grad_norm": 0.15416933030678928,
      "learning_rate": 3.774095442832248e-11,
      "loss": 0.643,
      "step": 11181
    },
    {
      "epoch": 0.9998211731044349,
      "grad_norm": 0.15468127253951464,
      "learning_rate": 1.6773758104715597e-11,
      "loss": 0.6208,
      "step": 11182
    },
    {
      "epoch": 0.9999105865522174,
      "grad_norm": 0.18767489961334924,
      "learning_rate": 4.193439617772299e-12,
      "loss": 0.584,
      "step": 11183
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.1654002418106855,
      "learning_rate": 0.0,
      "loss": 0.39,
      "step": 11184
    },
    {
      "epoch": 1.0,
      "step": 11184,
      "total_flos": 3641838377041920.0,
      "train_loss": 0.0,
      "train_runtime": 8.1209,
      "train_samples_per_second": 176275.217,
      "train_steps_per_second": 1377.18
    }
  ],
  "logging_steps": 1.0,
  "max_steps": 11184,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 300,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 3641838377041920.0,
  "train_batch_size": 16,
  "trial_name": null,
  "trial_params": null
}