{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.9999617985254231,
  "eval_steps": 2182,
  "global_step": 6544,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.00015280589830767466,
      "grad_norm": 0.3537859320640564,
      "learning_rate": 8.000000000000001e-07,
      "loss": 0.941,
      "step": 1
    },
    {
      "epoch": 0.00015280589830767466,
      "eval_loss": 0.8535330295562744,
      "eval_runtime": 1566.4875,
      "eval_samples_per_second": 7.119,
      "eval_steps_per_second": 3.56,
      "step": 1
    },
    {
      "epoch": 0.0003056117966153493,
      "grad_norm": 0.3782769739627838,
      "learning_rate": 1.6000000000000001e-06,
      "loss": 0.7386,
      "step": 2
    },
    {
      "epoch": 0.00045841769492302404,
      "grad_norm": 0.475812703371048,
      "learning_rate": 2.4000000000000003e-06,
      "loss": 1.059,
      "step": 3
    },
    {
      "epoch": 0.0006112235932306987,
      "grad_norm": 0.4409228265285492,
      "learning_rate": 3.2000000000000003e-06,
      "loss": 0.825,
      "step": 4
    },
    {
      "epoch": 0.0007640294915383734,
      "grad_norm": 0.39886385202407837,
      "learning_rate": 4.000000000000001e-06,
      "loss": 0.7462,
      "step": 5
    },
    {
      "epoch": 0.0009168353898460481,
      "grad_norm": 0.32536882162094116,
      "learning_rate": 4.800000000000001e-06,
      "loss": 0.9611,
      "step": 6
    },
    {
      "epoch": 0.0010696412881537228,
      "grad_norm": 0.41293051838874817,
      "learning_rate": 5.600000000000001e-06,
      "loss": 0.8808,
      "step": 7
    },
    {
      "epoch": 0.0012224471864613973,
      "grad_norm": 0.3373503088951111,
      "learning_rate": 6.4000000000000006e-06,
      "loss": 0.9459,
      "step": 8
    },
    {
      "epoch": 0.001375253084769072,
      "grad_norm": 0.3855852484703064,
      "learning_rate": 7.2e-06,
      "loss": 0.9431,
      "step": 9
    },
    {
      "epoch": 0.0015280589830767469,
      "grad_norm": 0.3200698792934418,
      "learning_rate": 8.000000000000001e-06,
      "loss": 0.9505,
      "step": 10
    },
    {
      "epoch": 0.0016808648813844214,
      "grad_norm": 0.3219761550426483,
      "learning_rate": 8.8e-06,
      "loss": 0.8947,
      "step": 11
    },
    {
      "epoch": 0.0018336707796920962,
      "grad_norm": 0.28418421745300293,
      "learning_rate": 9.600000000000001e-06,
      "loss": 0.7695,
      "step": 12
    },
    {
      "epoch": 0.001986476677999771,
      "grad_norm": 0.3636055290699005,
      "learning_rate": 1.04e-05,
      "loss": 0.7646,
      "step": 13
    },
    {
      "epoch": 0.0021392825763074455,
      "grad_norm": 0.26769423484802246,
      "learning_rate": 1.1200000000000001e-05,
      "loss": 0.6816,
      "step": 14
    },
    {
      "epoch": 0.00229208847461512,
      "grad_norm": 0.28433066606521606,
      "learning_rate": 1.2e-05,
      "loss": 0.7629,
      "step": 15
    },
    {
      "epoch": 0.0024448943729227946,
      "grad_norm": 0.33892345428466797,
      "learning_rate": 1.2800000000000001e-05,
      "loss": 0.9303,
      "step": 16
    },
    {
      "epoch": 0.0025977002712304696,
      "grad_norm": 0.31409522891044617,
      "learning_rate": 1.3600000000000002e-05,
      "loss": 0.805,
      "step": 17
    },
    {
      "epoch": 0.002750506169538144,
      "grad_norm": 0.34053927659988403,
      "learning_rate": 1.44e-05,
      "loss": 0.7556,
      "step": 18
    },
    {
      "epoch": 0.0029033120678458187,
      "grad_norm": 0.3334382176399231,
      "learning_rate": 1.52e-05,
      "loss": 0.7494,
      "step": 19
    },
    {
      "epoch": 0.0030561179661534937,
      "grad_norm": 0.3846895694732666,
      "learning_rate": 1.6000000000000003e-05,
      "loss": 0.6788,
      "step": 20
    },
    {
      "epoch": 0.0032089238644611683,
      "grad_norm": 0.46727222204208374,
      "learning_rate": 1.6800000000000002e-05,
      "loss": 0.8987,
      "step": 21
    },
    {
      "epoch": 0.003361729762768843,
      "grad_norm": 0.4377021789550781,
      "learning_rate": 1.76e-05,
      "loss": 0.7235,
      "step": 22
    },
    {
      "epoch": 0.0035145356610765174,
      "grad_norm": 0.4573345482349396,
      "learning_rate": 1.84e-05,
      "loss": 0.5838,
      "step": 23
    },
    {
      "epoch": 0.0036673415593841924,
      "grad_norm": 0.3256567716598511,
      "learning_rate": 1.9200000000000003e-05,
      "loss": 0.5452,
      "step": 24
    },
    {
      "epoch": 0.003820147457691867,
      "grad_norm": 0.2252429723739624,
      "learning_rate": 2e-05,
      "loss": 0.7252,
      "step": 25
    },
    {
      "epoch": 0.003972953355999542,
      "grad_norm": 0.22351256012916565,
      "learning_rate": 2.08e-05,
      "loss": 0.7701,
      "step": 26
    },
    {
      "epoch": 0.0041257592543072165,
      "grad_norm": 0.24318568408489227,
      "learning_rate": 2.16e-05,
      "loss": 0.6135,
      "step": 27
    },
    {
      "epoch": 0.004278565152614891,
      "grad_norm": 0.3728923201560974,
      "learning_rate": 2.2400000000000002e-05,
      "loss": 0.7592,
      "step": 28
    },
    {
      "epoch": 0.0044313710509225656,
      "grad_norm": 0.2996881306171417,
      "learning_rate": 2.32e-05,
      "loss": 0.6952,
      "step": 29
    },
    {
      "epoch": 0.00458417694923024,
      "grad_norm": 0.23991453647613525,
      "learning_rate": 2.4e-05,
      "loss": 0.6947,
      "step": 30
    },
    {
      "epoch": 0.004736982847537915,
      "grad_norm": 0.2515174150466919,
      "learning_rate": 2.48e-05,
      "loss": 1.1102,
      "step": 31
    },
    {
      "epoch": 0.004889788745845589,
      "grad_norm": 0.220277339220047,
      "learning_rate": 2.5600000000000002e-05,
      "loss": 0.654,
      "step": 32
    },
    {
      "epoch": 0.005042594644153265,
      "grad_norm": 0.24221166968345642,
      "learning_rate": 2.64e-05,
      "loss": 0.7946,
      "step": 33
    },
    {
      "epoch": 0.005195400542460939,
      "grad_norm": 0.22481025755405426,
      "learning_rate": 2.7200000000000004e-05,
      "loss": 0.646,
      "step": 34
    },
    {
      "epoch": 0.005348206440768614,
      "grad_norm": 0.200043722987175,
      "learning_rate": 2.8000000000000003e-05,
      "loss": 0.8636,
      "step": 35
    },
    {
      "epoch": 0.005501012339076288,
      "grad_norm": 0.3696175217628479,
      "learning_rate": 2.88e-05,
      "loss": 0.8136,
      "step": 36
    },
    {
      "epoch": 0.005653818237383963,
      "grad_norm": 0.2078743427991867,
      "learning_rate": 2.96e-05,
      "loss": 0.76,
      "step": 37
    },
    {
      "epoch": 0.005806624135691637,
      "grad_norm": 0.18780824542045593,
      "learning_rate": 3.04e-05,
      "loss": 0.6602,
      "step": 38
    },
    {
      "epoch": 0.005959430033999312,
      "grad_norm": 0.3369501829147339,
      "learning_rate": 3.12e-05,
      "loss": 0.782,
      "step": 39
    },
    {
      "epoch": 0.006112235932306987,
      "grad_norm": 0.19364964962005615,
      "learning_rate": 3.2000000000000005e-05,
      "loss": 0.617,
      "step": 40
    },
    {
      "epoch": 0.006265041830614662,
      "grad_norm": 0.24052347242832184,
      "learning_rate": 3.2800000000000004e-05,
      "loss": 0.763,
      "step": 41
    },
    {
      "epoch": 0.0064178477289223365,
      "grad_norm": 0.3821535110473633,
      "learning_rate": 3.3600000000000004e-05,
      "loss": 0.7338,
      "step": 42
    },
    {
      "epoch": 0.006570653627230011,
      "grad_norm": 0.25892436504364014,
      "learning_rate": 3.4399999999999996e-05,
      "loss": 0.8406,
      "step": 43
    },
    {
      "epoch": 0.006723459525537686,
      "grad_norm": 0.21732579171657562,
      "learning_rate": 3.52e-05,
      "loss": 0.6437,
      "step": 44
    },
    {
      "epoch": 0.00687626542384536,
      "grad_norm": 0.21630685031414032,
      "learning_rate": 3.6e-05,
      "loss": 0.8539,
      "step": 45
    },
    {
      "epoch": 0.007029071322153035,
      "grad_norm": 0.2213805615901947,
      "learning_rate": 3.68e-05,
      "loss": 0.6046,
      "step": 46
    },
    {
      "epoch": 0.00718187722046071,
      "grad_norm": 0.29060035943984985,
      "learning_rate": 3.76e-05,
      "loss": 0.874,
      "step": 47
    },
    {
      "epoch": 0.007334683118768385,
      "grad_norm": 0.32261431217193604,
      "learning_rate": 3.8400000000000005e-05,
      "loss": 0.6077,
      "step": 48
    },
    {
      "epoch": 0.007487489017076059,
      "grad_norm": 0.3036012053489685,
      "learning_rate": 3.9200000000000004e-05,
      "loss": 0.7008,
      "step": 49
    },
    {
      "epoch": 0.007640294915383734,
      "grad_norm": 0.8190217018127441,
      "learning_rate": 4e-05,
      "loss": 0.9121,
      "step": 50
    },
    {
      "epoch": 0.007793100813691408,
      "grad_norm": 0.19872790575027466,
      "learning_rate": 4.08e-05,
      "loss": 0.6881,
      "step": 51
    },
    {
      "epoch": 0.007945906711999084,
      "grad_norm": 0.2575259506702423,
      "learning_rate": 4.16e-05,
      "loss": 0.6109,
      "step": 52
    },
    {
      "epoch": 0.008098712610306757,
      "grad_norm": 0.19350558519363403,
      "learning_rate": 4.24e-05,
      "loss": 0.7016,
      "step": 53
    },
    {
      "epoch": 0.008251518508614433,
      "grad_norm": 0.23708432912826538,
      "learning_rate": 4.32e-05,
      "loss": 0.7601,
      "step": 54
    },
    {
      "epoch": 0.008404324406922107,
      "grad_norm": 0.42168232798576355,
      "learning_rate": 4.4000000000000006e-05,
      "loss": 0.7687,
      "step": 55
    },
    {
      "epoch": 0.008557130305229782,
      "grad_norm": 0.2412991225719452,
      "learning_rate": 4.4800000000000005e-05,
      "loss": 0.7205,
      "step": 56
    },
    {
      "epoch": 0.008709936203537456,
      "grad_norm": 0.2611636519432068,
      "learning_rate": 4.5600000000000004e-05,
      "loss": 0.9159,
      "step": 57
    },
    {
      "epoch": 0.008862742101845131,
      "grad_norm": 0.4061261713504791,
      "learning_rate": 4.64e-05,
      "loss": 0.9651,
      "step": 58
    },
    {
      "epoch": 0.009015548000152807,
      "grad_norm": 0.2744627892971039,
      "learning_rate": 4.72e-05,
      "loss": 0.8742,
      "step": 59
    },
    {
      "epoch": 0.00916835389846048,
      "grad_norm": 0.19657334685325623,
      "learning_rate": 4.8e-05,
      "loss": 0.5806,
      "step": 60
    },
    {
      "epoch": 0.009321159796768156,
      "grad_norm": 0.24348127841949463,
      "learning_rate": 4.88e-05,
      "loss": 0.6486,
      "step": 61
    },
    {
      "epoch": 0.00947396569507583,
      "grad_norm": 0.21159450709819794,
      "learning_rate": 4.96e-05,
      "loss": 0.7542,
      "step": 62
    },
    {
      "epoch": 0.009626771593383505,
      "grad_norm": 0.23291338980197906,
      "learning_rate": 5.0400000000000005e-05,
      "loss": 0.5697,
      "step": 63
    },
    {
      "epoch": 0.009779577491691178,
      "grad_norm": 0.2656891644001007,
      "learning_rate": 5.1200000000000004e-05,
      "loss": 0.5819,
      "step": 64
    },
    {
      "epoch": 0.009932383389998854,
      "grad_norm": 0.21467728912830353,
      "learning_rate": 5.2000000000000004e-05,
      "loss": 0.7506,
      "step": 65
    },
    {
      "epoch": 0.01008518928830653,
      "grad_norm": 0.25314462184906006,
      "learning_rate": 5.28e-05,
      "loss": 0.4424,
      "step": 66
    },
    {
      "epoch": 0.010237995186614203,
      "grad_norm": 0.2386377453804016,
      "learning_rate": 5.360000000000001e-05,
      "loss": 0.77,
      "step": 67
    },
    {
      "epoch": 0.010390801084921878,
      "grad_norm": 0.24037358164787292,
      "learning_rate": 5.440000000000001e-05,
      "loss": 0.6511,
      "step": 68
    },
    {
      "epoch": 0.010543606983229552,
      "grad_norm": 0.2473539263010025,
      "learning_rate": 5.520000000000001e-05,
      "loss": 0.7086,
      "step": 69
    },
    {
      "epoch": 0.010696412881537228,
      "grad_norm": 0.23620954155921936,
      "learning_rate": 5.6000000000000006e-05,
      "loss": 0.82,
      "step": 70
    },
    {
      "epoch": 0.010849218779844901,
      "grad_norm": 0.20047105848789215,
      "learning_rate": 5.68e-05,
      "loss": 0.6568,
      "step": 71
    },
    {
      "epoch": 0.011002024678152577,
      "grad_norm": 0.21529246866703033,
      "learning_rate": 5.76e-05,
      "loss": 0.5856,
      "step": 72
    },
    {
      "epoch": 0.011154830576460252,
      "grad_norm": 0.2424297332763672,
      "learning_rate": 5.8399999999999997e-05,
      "loss": 0.6073,
      "step": 73
    },
    {
      "epoch": 0.011307636474767926,
      "grad_norm": 0.2489442229270935,
      "learning_rate": 5.92e-05,
      "loss": 0.6807,
      "step": 74
    },
    {
      "epoch": 0.011460442373075601,
      "grad_norm": 0.35431531071662903,
      "learning_rate": 6e-05,
      "loss": 0.6741,
      "step": 75
    },
    {
      "epoch": 0.011613248271383275,
      "grad_norm": 0.24680747091770172,
      "learning_rate": 6.08e-05,
      "loss": 0.7881,
      "step": 76
    },
    {
      "epoch": 0.01176605416969095,
      "grad_norm": 0.2189926654100418,
      "learning_rate": 6.16e-05,
      "loss": 0.4981,
      "step": 77
    },
    {
      "epoch": 0.011918860067998624,
      "grad_norm": 0.29724177718162537,
      "learning_rate": 6.24e-05,
      "loss": 0.8307,
      "step": 78
    },
    {
      "epoch": 0.0120716659663063,
      "grad_norm": 0.2065054178237915,
      "learning_rate": 6.32e-05,
      "loss": 0.8858,
      "step": 79
    },
    {
      "epoch": 0.012224471864613975,
      "grad_norm": 0.21010780334472656,
      "learning_rate": 6.400000000000001e-05,
      "loss": 0.5629,
      "step": 80
    },
    {
      "epoch": 0.012377277762921648,
      "grad_norm": 0.26801830530166626,
      "learning_rate": 6.48e-05,
      "loss": 0.7186,
      "step": 81
    },
    {
      "epoch": 0.012530083661229324,
      "grad_norm": 0.3203904628753662,
      "learning_rate": 6.560000000000001e-05,
      "loss": 0.8373,
      "step": 82
    },
    {
      "epoch": 0.012682889559536998,
      "grad_norm": 0.2379075288772583,
      "learning_rate": 6.64e-05,
      "loss": 0.6567,
      "step": 83
    },
    {
      "epoch": 0.012835695457844673,
      "grad_norm": 0.2070106416940689,
      "learning_rate": 6.720000000000001e-05,
      "loss": 0.5546,
      "step": 84
    },
    {
      "epoch": 0.012988501356152347,
      "grad_norm": 0.27992406487464905,
      "learning_rate": 6.800000000000001e-05,
      "loss": 0.7352,
      "step": 85
    },
    {
      "epoch": 0.013141307254460022,
      "grad_norm": 0.21248190104961395,
      "learning_rate": 6.879999999999999e-05,
      "loss": 0.6147,
      "step": 86
    },
    {
      "epoch": 0.013294113152767698,
      "grad_norm": 0.23391371965408325,
      "learning_rate": 6.96e-05,
      "loss": 0.7289,
      "step": 87
    },
    {
      "epoch": 0.013446919051075371,
      "grad_norm": 0.2129083275794983,
      "learning_rate": 7.04e-05,
      "loss": 0.8087,
      "step": 88
    },
    {
      "epoch": 0.013599724949383047,
      "grad_norm": 0.20840856432914734,
      "learning_rate": 7.12e-05,
      "loss": 0.6256,
      "step": 89
    },
    {
      "epoch": 0.01375253084769072,
      "grad_norm": 0.2114286720752716,
      "learning_rate": 7.2e-05,
      "loss": 0.5729,
      "step": 90
    },
    {
      "epoch": 0.013905336745998396,
      "grad_norm": 0.36645349860191345,
      "learning_rate": 7.280000000000001e-05,
      "loss": 0.7079,
      "step": 91
    },
    {
      "epoch": 0.01405814264430607,
      "grad_norm": 0.25490131974220276,
      "learning_rate": 7.36e-05,
      "loss": 0.7698,
      "step": 92
    },
    {
      "epoch": 0.014210948542613745,
      "grad_norm": 0.3339272141456604,
      "learning_rate": 7.44e-05,
      "loss": 0.6126,
      "step": 93
    },
    {
      "epoch": 0.01436375444092142,
      "grad_norm": 0.23325824737548828,
      "learning_rate": 7.52e-05,
      "loss": 0.8602,
      "step": 94
    },
    {
      "epoch": 0.014516560339229094,
      "grad_norm": 0.2818077504634857,
      "learning_rate": 7.6e-05,
      "loss": 0.7726,
      "step": 95
    },
    {
      "epoch": 0.01466936623753677,
      "grad_norm": 0.23820696771144867,
      "learning_rate": 7.680000000000001e-05,
      "loss": 0.7344,
      "step": 96
    },
    {
      "epoch": 0.014822172135844443,
      "grad_norm": 0.25046974420547485,
      "learning_rate": 7.76e-05,
      "loss": 0.5652,
      "step": 97
    },
    {
      "epoch": 0.014974978034152119,
      "grad_norm": 0.23637717962265015,
      "learning_rate": 7.840000000000001e-05,
      "loss": 0.9834,
      "step": 98
    },
    {
      "epoch": 0.015127783932459792,
      "grad_norm": 0.20385268330574036,
      "learning_rate": 7.920000000000001e-05,
      "loss": 0.685,
      "step": 99
    },
    {
      "epoch": 0.015280589830767468,
      "grad_norm": 0.22909928858280182,
      "learning_rate": 8e-05,
      "loss": 0.8559,
      "step": 100
    },
    {
      "epoch": 0.015433395729075143,
      "grad_norm": 0.22465063631534576,
      "learning_rate": 8.080000000000001e-05,
      "loss": 0.7211,
      "step": 101
    },
    {
      "epoch": 0.015586201627382817,
      "grad_norm": 0.24429404735565186,
      "learning_rate": 8.16e-05,
      "loss": 0.8082,
      "step": 102
    },
    {
      "epoch": 0.01573900752569049,
      "grad_norm": 0.23806914687156677,
      "learning_rate": 8.24e-05,
      "loss": 0.8902,
      "step": 103
    },
    {
      "epoch": 0.015891813423998168,
      "grad_norm": 0.6740613579750061,
      "learning_rate": 8.32e-05,
      "loss": 0.6925,
      "step": 104
    },
    {
      "epoch": 0.01604461932230584,
      "grad_norm": 0.21556046605110168,
      "learning_rate": 8.4e-05,
      "loss": 0.4671,
      "step": 105
    },
    {
      "epoch": 0.016197425220613515,
      "grad_norm": 0.23331165313720703,
      "learning_rate": 8.48e-05,
      "loss": 0.8063,
      "step": 106
    },
    {
      "epoch": 0.01635023111892119,
      "grad_norm": 0.2387675642967224,
      "learning_rate": 8.560000000000001e-05,
      "loss": 0.7208,
      "step": 107
    },
    {
      "epoch": 0.016503037017228866,
      "grad_norm": 0.24151624739170074,
      "learning_rate": 8.64e-05,
      "loss": 0.6599,
      "step": 108
    },
    {
      "epoch": 0.01665584291553654,
      "grad_norm": 0.24208898842334747,
      "learning_rate": 8.72e-05,
      "loss": 0.8813,
      "step": 109
    },
    {
      "epoch": 0.016808648813844213,
      "grad_norm": 0.2825816571712494,
      "learning_rate": 8.800000000000001e-05,
      "loss": 0.6972,
      "step": 110
    },
    {
      "epoch": 0.01696145471215189,
      "grad_norm": 0.20937465131282806,
      "learning_rate": 8.88e-05,
      "loss": 0.6302,
      "step": 111
    },
    {
      "epoch": 0.017114260610459564,
      "grad_norm": 0.5450260043144226,
      "learning_rate": 8.960000000000001e-05,
      "loss": 0.7495,
      "step": 112
    },
    {
      "epoch": 0.017267066508767238,
      "grad_norm": 0.23792274296283722,
      "learning_rate": 9.04e-05,
      "loss": 0.8158,
      "step": 113
    },
    {
      "epoch": 0.01741987240707491,
      "grad_norm": 0.2838549315929413,
      "learning_rate": 9.120000000000001e-05,
      "loss": 0.9494,
      "step": 114
    },
    {
      "epoch": 0.01757267830538259,
      "grad_norm": 0.19924430549144745,
      "learning_rate": 9.200000000000001e-05,
      "loss": 0.7721,
      "step": 115
    },
    {
      "epoch": 0.017725484203690262,
      "grad_norm": 0.18079274892807007,
      "learning_rate": 9.28e-05,
      "loss": 0.5387,
      "step": 116
    },
    {
      "epoch": 0.017878290101997936,
      "grad_norm": 0.20002222061157227,
      "learning_rate": 9.360000000000001e-05,
      "loss": 0.717,
      "step": 117
    },
    {
      "epoch": 0.018031096000305613,
      "grad_norm": 0.193673238158226,
      "learning_rate": 9.44e-05,
      "loss": 0.6842,
      "step": 118
    },
    {
      "epoch": 0.018183901898613287,
      "grad_norm": 0.21627160906791687,
      "learning_rate": 9.52e-05,
      "loss": 0.6531,
      "step": 119
    },
    {
      "epoch": 0.01833670779692096,
      "grad_norm": 0.2337784618139267,
      "learning_rate": 9.6e-05,
      "loss": 0.7066,
      "step": 120
    },
    {
      "epoch": 0.018489513695228634,
      "grad_norm": 0.21653355658054352,
      "learning_rate": 9.680000000000001e-05,
      "loss": 0.7411,
      "step": 121
    },
    {
      "epoch": 0.01864231959353631,
      "grad_norm": 0.26810961961746216,
      "learning_rate": 9.76e-05,
      "loss": 0.6168,
      "step": 122
    },
    {
      "epoch": 0.018795125491843985,
      "grad_norm": 0.21840594708919525,
      "learning_rate": 9.84e-05,
      "loss": 0.6318,
      "step": 123
    },
    {
      "epoch": 0.01894793139015166,
      "grad_norm": 0.26883718371391296,
      "learning_rate": 9.92e-05,
      "loss": 0.7906,
      "step": 124
    },
    {
      "epoch": 0.019100737288459336,
      "grad_norm": 0.40301695466041565,
      "learning_rate": 0.0001,
      "loss": 0.6718,
      "step": 125
    },
    {
      "epoch": 0.01925354318676701,
      "grad_norm": 0.36299192905426025,
      "learning_rate": 0.00010080000000000001,
      "loss": 0.9555,
      "step": 126
    },
    {
      "epoch": 0.019406349085074683,
      "grad_norm": 0.40861931443214417,
      "learning_rate": 0.0001016,
      "loss": 0.7246,
      "step": 127
    },
    {
      "epoch": 0.019559154983382357,
      "grad_norm": 0.2326318919658661,
      "learning_rate": 0.00010240000000000001,
      "loss": 0.7032,
      "step": 128
    },
    {
      "epoch": 0.019711960881690034,
      "grad_norm": 0.22199535369873047,
      "learning_rate": 0.0001032,
      "loss": 0.6007,
      "step": 129
    },
    {
      "epoch": 0.019864766779997708,
      "grad_norm": 0.2680632174015045,
      "learning_rate": 0.00010400000000000001,
      "loss": 0.6804,
      "step": 130
    },
    {
      "epoch": 0.02001757267830538,
      "grad_norm": 0.21533040702342987,
      "learning_rate": 0.00010480000000000001,
      "loss": 0.8305,
      "step": 131
    },
    {
      "epoch": 0.02017037857661306,
      "grad_norm": 0.22990071773529053,
      "learning_rate": 0.0001056,
      "loss": 0.7334,
      "step": 132
    },
    {
      "epoch": 0.020323184474920732,
      "grad_norm": 0.2372717261314392,
      "learning_rate": 0.00010640000000000001,
      "loss": 0.5291,
      "step": 133
    },
    {
      "epoch": 0.020475990373228406,
      "grad_norm": 0.19138963520526886,
      "learning_rate": 0.00010720000000000002,
      "loss": 0.6131,
      "step": 134
    },
    {
      "epoch": 0.02062879627153608,
      "grad_norm": 0.2097582370042801,
      "learning_rate": 0.00010800000000000001,
      "loss": 0.6131,
      "step": 135
    },
    {
      "epoch": 0.020781602169843757,
      "grad_norm": 0.19639591872692108,
      "learning_rate": 0.00010880000000000002,
      "loss": 0.5467,
      "step": 136
    },
    {
      "epoch": 0.02093440806815143,
      "grad_norm": 0.5305817723274231,
      "learning_rate": 0.00010960000000000001,
      "loss": 0.6327,
      "step": 137
    },
    {
      "epoch": 0.021087213966459104,
      "grad_norm": 0.2177964448928833,
      "learning_rate": 0.00011040000000000001,
      "loss": 0.6252,
      "step": 138
    },
    {
      "epoch": 0.02124001986476678,
      "grad_norm": 0.18753781914710999,
      "learning_rate": 0.00011120000000000002,
      "loss": 0.8267,
      "step": 139
    },
    {
      "epoch": 0.021392825763074455,
      "grad_norm": 0.26264771819114685,
      "learning_rate": 0.00011200000000000001,
      "loss": 0.737,
      "step": 140
    },
    {
      "epoch": 0.02154563166138213,
      "grad_norm": 0.2190270870923996,
      "learning_rate": 0.00011279999999999999,
      "loss": 0.6809,
      "step": 141
    },
    {
      "epoch": 0.021698437559689802,
      "grad_norm": 0.21061022579669952,
      "learning_rate": 0.0001136,
      "loss": 0.7108,
      "step": 142
    },
    {
      "epoch": 0.02185124345799748,
      "grad_norm": 0.23190730810165405,
      "learning_rate": 0.0001144,
      "loss": 0.625,
      "step": 143
    },
    {
      "epoch": 0.022004049356305153,
      "grad_norm": 0.21410205960273743,
      "learning_rate": 0.0001152,
      "loss": 0.7908,
      "step": 144
    },
    {
      "epoch": 0.022156855254612827,
      "grad_norm": 0.19211190938949585,
      "learning_rate": 0.000116,
      "loss": 0.6662,
      "step": 145
    },
    {
      "epoch": 0.022309661152920504,
      "grad_norm": 0.43506669998168945,
      "learning_rate": 0.00011679999999999999,
      "loss": 0.7876,
      "step": 146
    },
    {
      "epoch": 0.022462467051228178,
      "grad_norm": 0.24997620284557343,
      "learning_rate": 0.0001176,
      "loss": 0.5589,
      "step": 147
    },
    {
      "epoch": 0.02261527294953585,
      "grad_norm": 0.22067512571811676,
      "learning_rate": 0.0001184,
      "loss": 0.5908,
      "step": 148
    },
    {
      "epoch": 0.022768078847843525,
      "grad_norm": 0.5890689492225647,
      "learning_rate": 0.0001192,
      "loss": 0.7447,
      "step": 149
    },
    {
      "epoch": 0.022920884746151202,
      "grad_norm": 0.2859780192375183,
      "learning_rate": 0.00012,
      "loss": 0.7093,
      "step": 150
    },
    {
      "epoch": 0.023073690644458876,
      "grad_norm": 0.20324255526065826,
      "learning_rate": 0.0001208,
      "loss": 0.6664,
      "step": 151
    },
    {
      "epoch": 0.02322649654276655,
      "grad_norm": 0.2541416883468628,
      "learning_rate": 0.0001216,
      "loss": 0.861,
      "step": 152
    },
    {
      "epoch": 0.023379302441074227,
      "grad_norm": 0.22396203875541687,
      "learning_rate": 0.0001224,
      "loss": 0.6853,
      "step": 153
    },
    {
      "epoch": 0.0235321083393819,
      "grad_norm": 0.3173479735851288,
      "learning_rate": 0.0001232,
      "loss": 0.7057,
      "step": 154
    },
    {
      "epoch": 0.023684914237689574,
      "grad_norm": 0.2800653576850891,
      "learning_rate": 0.000124,
      "loss": 0.9188,
      "step": 155
    },
    {
      "epoch": 0.023837720135997248,
      "grad_norm": 0.18186135590076447,
      "learning_rate": 0.0001248,
      "loss": 0.7335,
      "step": 156
    },
    {
      "epoch": 0.023990526034304925,
      "grad_norm": 0.25458452105522156,
      "learning_rate": 0.00012560000000000002,
      "loss": 0.7153,
      "step": 157
    },
    {
      "epoch": 0.0241433319326126,
      "grad_norm": 0.21995219588279724,
      "learning_rate": 0.0001264,
      "loss": 0.43,
      "step": 158
    },
    {
      "epoch": 0.024296137830920272,
      "grad_norm": 1.0608121156692505,
      "learning_rate": 0.0001272,
      "loss": 0.6193,
      "step": 159
    },
    {
      "epoch": 0.02444894372922795,
      "grad_norm": 0.2779378592967987,
      "learning_rate": 0.00012800000000000002,
      "loss": 0.9149,
      "step": 160
    },
    {
      "epoch": 0.024601749627535623,
      "grad_norm": 0.1996106058359146,
      "learning_rate": 0.00012880000000000001,
      "loss": 0.6259,
      "step": 161
    },
    {
      "epoch": 0.024754555525843297,
      "grad_norm": 0.2813643515110016,
      "learning_rate": 0.0001296,
      "loss": 0.6519,
      "step": 162
    },
    {
      "epoch": 0.02490736142415097,
      "grad_norm": 0.16814516484737396,
      "learning_rate": 0.0001304,
      "loss": 0.736,
      "step": 163
    },
    {
      "epoch": 0.025060167322458648,
      "grad_norm": 0.2353413999080658,
      "learning_rate": 0.00013120000000000002,
      "loss": 0.6421,
      "step": 164
    },
    {
      "epoch": 0.02521297322076632,
      "grad_norm": 0.1907549351453781,
      "learning_rate": 0.000132,
      "loss": 0.6655,
      "step": 165
    },
    {
      "epoch": 0.025365779119073995,
      "grad_norm": 0.20261786878108978,
      "learning_rate": 0.0001328,
      "loss": 0.5768,
      "step": 166
    },
    {
      "epoch": 0.025518585017381672,
      "grad_norm": 0.19534656405448914,
      "learning_rate": 0.00013360000000000002,
      "loss": 0.6831,
      "step": 167
    },
    {
      "epoch": 0.025671390915689346,
      "grad_norm": 0.18376581370830536,
      "learning_rate": 0.00013440000000000001,
      "loss": 0.8075,
      "step": 168
    },
    {
      "epoch": 0.02582419681399702,
      "grad_norm": 0.23888923227787018,
      "learning_rate": 0.0001352,
      "loss": 0.6131,
      "step": 169
    },
    {
      "epoch": 0.025977002712304693,
      "grad_norm": 0.23357047140598297,
      "learning_rate": 0.00013600000000000003,
      "loss": 0.6604,
      "step": 170
    },
    {
      "epoch": 0.02612980861061237,
      "grad_norm": 0.3035596013069153,
      "learning_rate": 0.00013680000000000002,
      "loss": 0.6949,
      "step": 171
    },
    {
      "epoch": 0.026282614508920044,
      "grad_norm": 0.22164690494537354,
      "learning_rate": 0.00013759999999999998,
      "loss": 0.8732,
      "step": 172
    },
    {
      "epoch": 0.026435420407227718,
      "grad_norm": 0.21173541247844696,
      "learning_rate": 0.0001384,
      "loss": 0.7322,
      "step": 173
    },
    {
      "epoch": 0.026588226305535395,
      "grad_norm": 0.20340844988822937,
      "learning_rate": 0.0001392,
      "loss": 0.684,
      "step": 174
    },
    {
      "epoch": 0.02674103220384307,
      "grad_norm": 0.21223647892475128,
      "learning_rate": 0.00014,
      "loss": 0.7615,
      "step": 175
    },
    {
      "epoch": 0.026893838102150742,
      "grad_norm": 0.25785163044929504,
      "learning_rate": 0.0001408,
      "loss": 0.665,
      "step": 176
    },
    {
      "epoch": 0.027046644000458416,
      "grad_norm": 0.2169693559408188,
      "learning_rate": 0.0001416,
      "loss": 0.553,
      "step": 177
    },
    {
      "epoch": 0.027199449898766093,
      "grad_norm": 0.22600002586841583,
      "learning_rate": 0.0001424,
      "loss": 0.5225,
      "step": 178
    },
    {
      "epoch": 0.027352255797073767,
      "grad_norm": 0.21666403114795685,
      "learning_rate": 0.0001432,
      "loss": 0.5592,
      "step": 179
    },
    {
      "epoch": 0.02750506169538144,
      "grad_norm": 0.19408009946346283,
      "learning_rate": 0.000144,
      "loss": 0.6251,
      "step": 180
    },
    {
      "epoch": 0.027657867593689118,
      "grad_norm": 0.22444888949394226,
      "learning_rate": 0.0001448,
      "loss": 0.6119,
      "step": 181
    },
    {
      "epoch": 0.02781067349199679,
      "grad_norm": 0.1960359364748001,
      "learning_rate": 0.00014560000000000002,
      "loss": 0.8866,
      "step": 182
    },
    {
      "epoch": 0.027963479390304465,
      "grad_norm": 0.298685759305954,
      "learning_rate": 0.0001464,
      "loss": 0.5952,
      "step": 183
    },
    {
      "epoch": 0.02811628528861214,
      "grad_norm": 0.21745067834854126,
      "learning_rate": 0.0001472,
      "loss": 1.0509,
      "step": 184
    },
    {
      "epoch": 0.028269091186919816,
      "grad_norm": 0.44042158126831055,
      "learning_rate": 0.000148,
      "loss": 0.8793,
      "step": 185
    },
    {
      "epoch": 0.02842189708522749,
      "grad_norm": 0.22677303850650787,
      "learning_rate": 0.0001488,
      "loss": 0.6196,
      "step": 186
    },
    {
      "epoch": 0.028574702983535163,
      "grad_norm": 0.2111995816230774,
      "learning_rate": 0.0001496,
      "loss": 0.7332,
      "step": 187
    },
    {
      "epoch": 0.02872750888184284,
      "grad_norm": 0.19154132902622223,
      "learning_rate": 0.0001504,
      "loss": 0.766,
      "step": 188
    },
    {
      "epoch": 0.028880314780150514,
      "grad_norm": 0.24843589961528778,
      "learning_rate": 0.00015120000000000002,
      "loss": 0.929,
      "step": 189
    },
    {
      "epoch": 0.029033120678458188,
      "grad_norm": 0.22019292414188385,
      "learning_rate": 0.000152,
      "loss": 0.5629,
      "step": 190
    },
    {
      "epoch": 0.02918592657676586,
      "grad_norm": 0.23560045659542084,
      "learning_rate": 0.0001528,
      "loss": 0.6681,
      "step": 191
    },
    {
      "epoch": 0.02933873247507354,
      "grad_norm": 0.19246064126491547,
      "learning_rate": 0.00015360000000000002,
      "loss": 0.6445,
      "step": 192
    },
    {
      "epoch": 0.029491538373381213,
      "grad_norm": 0.21508120000362396,
      "learning_rate": 0.0001544,
      "loss": 0.6329,
      "step": 193
    },
    {
      "epoch": 0.029644344271688886,
      "grad_norm": 0.2356320321559906,
      "learning_rate": 0.0001552,
      "loss": 0.6302,
      "step": 194
    },
    {
      "epoch": 0.029797150169996563,
      "grad_norm": 0.22980546951293945,
      "learning_rate": 0.00015600000000000002,
      "loss": 0.5325,
      "step": 195
    },
    {
      "epoch": 0.029949956068304237,
      "grad_norm": 0.2617977559566498,
      "learning_rate": 0.00015680000000000002,
      "loss": 0.8915,
      "step": 196
    },
    {
      "epoch": 0.03010276196661191,
      "grad_norm": 0.19717738032341003,
      "learning_rate": 0.0001576,
      "loss": 0.7757,
      "step": 197
    },
    {
      "epoch": 0.030255567864919584,
      "grad_norm": 0.20106296241283417,
      "learning_rate": 0.00015840000000000003,
      "loss": 0.7742,
      "step": 198
    },
    {
      "epoch": 0.03040837376322726,
      "grad_norm": 0.226706400513649,
      "learning_rate": 0.00015920000000000002,
      "loss": 0.5814,
      "step": 199
    },
    {
      "epoch": 0.030561179661534935,
      "grad_norm": 0.18126145005226135,
      "learning_rate": 0.00016,
      "loss": 0.6697,
      "step": 200
    },
    {
      "epoch": 0.03071398555984261,
      "grad_norm": 1.2944668531417847,
      "learning_rate": 0.0001608,
      "loss": 0.6111,
      "step": 201
    },
    {
      "epoch": 0.030866791458150286,
      "grad_norm": 0.19455716013908386,
      "learning_rate": 0.00016160000000000002,
      "loss": 0.6571,
      "step": 202
    },
    {
      "epoch": 0.03101959735645796,
      "grad_norm": 0.23030945658683777,
      "learning_rate": 0.00016240000000000002,
      "loss": 0.8378,
      "step": 203
    },
    {
      "epoch": 0.031172403254765634,
      "grad_norm": 0.22586551308631897,
      "learning_rate": 0.0001632,
      "loss": 0.8245,
      "step": 204
    },
    {
      "epoch": 0.03132520915307331,
      "grad_norm": 0.2673279643058777,
      "learning_rate": 0.000164,
      "loss": 0.701,
      "step": 205
    },
    {
      "epoch": 0.03147801505138098,
      "grad_norm": 0.22940319776535034,
      "learning_rate": 0.0001648,
      "loss": 1.0499,
      "step": 206
    },
    {
      "epoch": 0.03163082094968866,
      "grad_norm": 0.33147504925727844,
      "learning_rate": 0.0001656,
      "loss": 0.6698,
      "step": 207
    },
    {
      "epoch": 0.031783626847996335,
      "grad_norm": 0.22897526621818542,
      "learning_rate": 0.0001664,
      "loss": 0.7872,
      "step": 208
    },
    {
      "epoch": 0.031936432746304005,
      "grad_norm": 0.23269681632518768,
      "learning_rate": 0.0001672,
      "loss": 0.6758,
      "step": 209
    },
    {
      "epoch": 0.03208923864461168,
      "grad_norm": 0.25892311334609985,
      "learning_rate": 0.000168,
      "loss": 0.6459,
      "step": 210
    },
    {
      "epoch": 0.03224204454291936,
      "grad_norm": 0.2470550239086151,
      "learning_rate": 0.0001688,
      "loss": 0.6778,
      "step": 211
    },
    {
      "epoch": 0.03239485044122703,
      "grad_norm": 0.23179148137569427,
      "learning_rate": 0.0001696,
      "loss": 0.6098,
      "step": 212
    },
    {
      "epoch": 0.03254765633953471,
      "grad_norm": 0.23430663347244263,
      "learning_rate": 0.0001704,
      "loss": 0.7551,
      "step": 213
    },
    {
      "epoch": 0.03270046223784238,
      "grad_norm": 0.18951766192913055,
      "learning_rate": 0.00017120000000000001,
      "loss": 0.7198,
      "step": 214
    },
    {
      "epoch": 0.032853268136150054,
      "grad_norm": 0.2654738128185272,
      "learning_rate": 0.000172,
      "loss": 0.6006,
      "step": 215
    },
    {
      "epoch": 0.03300607403445773,
      "grad_norm": 0.22690650820732117,
      "learning_rate": 0.0001728,
      "loss": 0.7018,
      "step": 216
    },
    {
      "epoch": 0.0331588799327654,
      "grad_norm": 0.22692647576332092,
      "learning_rate": 0.00017360000000000002,
      "loss": 0.7319,
      "step": 217
    },
    {
      "epoch": 0.03331168583107308,
      "grad_norm": 0.20025219023227692,
      "learning_rate": 0.0001744,
      "loss": 0.6302,
      "step": 218
    },
    {
      "epoch": 0.033464491729380756,
      "grad_norm": 0.19332559406757355,
      "learning_rate": 0.0001752,
      "loss": 0.6756,
      "step": 219
    },
    {
      "epoch": 0.033617297627688426,
      "grad_norm": 0.25213485956192017,
      "learning_rate": 0.00017600000000000002,
      "loss": 0.865,
      "step": 220
    },
    {
      "epoch": 0.033770103525996104,
      "grad_norm": 0.2248384654521942,
      "learning_rate": 0.00017680000000000001,
      "loss": 0.6936,
      "step": 221
    },
    {
      "epoch": 0.03392290942430378,
      "grad_norm": 0.23252415657043457,
      "learning_rate": 0.0001776,
      "loss": 0.8629,
      "step": 222
    },
    {
      "epoch": 0.03407571532261145,
      "grad_norm": 0.2784040570259094,
      "learning_rate": 0.0001784,
      "loss": 0.9894,
      "step": 223
    },
    {
      "epoch": 0.03422852122091913,
      "grad_norm": 0.23547817766666412,
      "learning_rate": 0.00017920000000000002,
      "loss": 0.6912,
      "step": 224
    },
    {
      "epoch": 0.034381327119226805,
      "grad_norm": 0.22327569127082825,
      "learning_rate": 0.00018,
      "loss": 0.7035,
      "step": 225
    },
    {
      "epoch": 0.034534133017534475,
      "grad_norm": 0.22189348936080933,
      "learning_rate": 0.0001808,
      "loss": 0.5271,
      "step": 226
    },
    {
      "epoch": 0.03468693891584215,
      "grad_norm": 0.19266308844089508,
      "learning_rate": 0.00018160000000000002,
      "loss": 0.4573,
      "step": 227
    },
    {
      "epoch": 0.03483974481414982,
      "grad_norm": 0.23664893209934235,
      "learning_rate": 0.00018240000000000002,
      "loss": 0.6511,
      "step": 228
    },
    {
      "epoch": 0.0349925507124575,
      "grad_norm": 0.20202231407165527,
      "learning_rate": 0.0001832,
      "loss": 0.6569,
      "step": 229
    },
    {
      "epoch": 0.03514535661076518,
      "grad_norm": 0.23481759428977966,
      "learning_rate": 0.00018400000000000003,
      "loss": 0.6054,
      "step": 230
    },
    {
      "epoch": 0.03529816250907285,
      "grad_norm": 0.2738634943962097,
      "learning_rate": 0.00018480000000000002,
      "loss": 0.8319,
      "step": 231
    },
    {
      "epoch": 0.035450968407380525,
      "grad_norm": 0.24060329794883728,
      "learning_rate": 0.0001856,
      "loss": 0.6179,
      "step": 232
    },
    {
      "epoch": 0.0356037743056882,
      "grad_norm": 0.2128535658121109,
      "learning_rate": 0.00018640000000000003,
      "loss": 0.7123,
      "step": 233
    },
    {
      "epoch": 0.03575658020399587,
      "grad_norm": 0.1951960027217865,
      "learning_rate": 0.00018720000000000002,
      "loss": 0.5302,
      "step": 234
    },
    {
      "epoch": 0.03590938610230355,
      "grad_norm": 0.3803926110267639,
      "learning_rate": 0.000188,
      "loss": 0.6614,
      "step": 235
    },
    {
      "epoch": 0.036062192000611226,
      "grad_norm": 0.19294387102127075,
      "learning_rate": 0.0001888,
      "loss": 0.6031,
      "step": 236
    },
    {
      "epoch": 0.036214997898918896,
      "grad_norm": 0.24113322794437408,
      "learning_rate": 0.0001896,
      "loss": 0.8938,
      "step": 237
    },
    {
      "epoch": 0.036367803797226574,
      "grad_norm": 0.19767731428146362,
      "learning_rate": 0.0001904,
      "loss": 0.5464,
      "step": 238
    },
    {
      "epoch": 0.03652060969553425,
      "grad_norm": 0.2186284363269806,
      "learning_rate": 0.0001912,
      "loss": 0.7288,
      "step": 239
    },
    {
      "epoch": 0.03667341559384192,
      "grad_norm": 0.5541898608207703,
      "learning_rate": 0.000192,
      "loss": 0.6484,
      "step": 240
    },
    {
      "epoch": 0.0368262214921496,
      "grad_norm": 0.22552861273288727,
      "learning_rate": 0.0001928,
      "loss": 0.834,
      "step": 241
    },
    {
      "epoch": 0.03697902739045727,
      "grad_norm": 0.3038541078567505,
      "learning_rate": 0.00019360000000000002,
      "loss": 0.6914,
      "step": 242
    },
    {
      "epoch": 0.037131833288764945,
      "grad_norm": 0.27954229712486267,
      "learning_rate": 0.0001944,
      "loss": 0.6588,
      "step": 243
    },
    {
      "epoch": 0.03728463918707262,
      "grad_norm": 0.5024107098579407,
      "learning_rate": 0.0001952,
      "loss": 0.688,
      "step": 244
    },
    {
      "epoch": 0.03743744508538029,
      "grad_norm": 0.23389217257499695,
      "learning_rate": 0.000196,
      "loss": 0.7403,
      "step": 245
    },
    {
      "epoch": 0.03759025098368797,
      "grad_norm": 0.22935818135738373,
      "learning_rate": 0.0001968,
      "loss": 0.6697,
      "step": 246
    },
    {
      "epoch": 0.03774305688199565,
      "grad_norm": 0.2132337987422943,
      "learning_rate": 0.0001976,
      "loss": 0.7081,
      "step": 247
    },
    {
      "epoch": 0.03789586278030332,
      "grad_norm": 0.22637519240379333,
      "learning_rate": 0.0001984,
      "loss": 0.5676,
      "step": 248
    },
    {
      "epoch": 0.038048668678610995,
      "grad_norm": 0.2421012669801712,
      "learning_rate": 0.00019920000000000002,
      "loss": 0.6939,
      "step": 249
    },
    {
      "epoch": 0.03820147457691867,
      "grad_norm": 0.36056315898895264,
      "learning_rate": 0.0002,
      "loss": 0.7907,
      "step": 250
    },
    {
      "epoch": 0.03835428047522634,
      "grad_norm": 0.2190164178609848,
      "learning_rate": 0.00019999998754291972,
      "loss": 0.6726,
      "step": 251
    },
    {
      "epoch": 0.03850708637353402,
      "grad_norm": 0.2309923619031906,
      "learning_rate": 0.00019999995017168197,
      "loss": 0.6444,
      "step": 252
    },
    {
      "epoch": 0.038659892271841696,
      "grad_norm": 0.32520991563796997,
      "learning_rate": 0.00019999988788629606,
      "loss": 0.748,
      "step": 253
    },
    {
      "epoch": 0.038812698170149366,
      "grad_norm": 0.2230103313922882,
      "learning_rate": 0.00019999980068677745,
      "loss": 0.5999,
      "step": 254
    },
    {
      "epoch": 0.038965504068457044,
      "grad_norm": 0.21019278466701508,
      "learning_rate": 0.00019999968857314798,
      "loss": 0.6995,
      "step": 255
    },
    {
      "epoch": 0.039118309966764714,
      "grad_norm": 1.701196312904358,
      "learning_rate": 0.00019999955154543554,
      "loss": 0.7642,
      "step": 256
    },
    {
      "epoch": 0.03927111586507239,
      "grad_norm": 0.3295258581638336,
      "learning_rate": 0.0001999993896036742,
      "loss": 0.6656,
      "step": 257
    },
    {
      "epoch": 0.03942392176338007,
      "grad_norm": 0.2401845008134842,
      "learning_rate": 0.00019999920274790437,
      "loss": 0.6979,
      "step": 258
    },
    {
      "epoch": 0.03957672766168774,
      "grad_norm": 0.2586315870285034,
      "learning_rate": 0.00019999899097817263,
      "loss": 0.6154,
      "step": 259
    },
    {
      "epoch": 0.039729533559995416,
      "grad_norm": 0.2295006513595581,
      "learning_rate": 0.00019999875429453168,
      "loss": 0.7028,
      "step": 260
    },
    {
      "epoch": 0.03988233945830309,
      "grad_norm": 0.23985904455184937,
      "learning_rate": 0.0001999984926970405,
      "loss": 0.553,
      "step": 261
    },
    {
      "epoch": 0.04003514535661076,
      "grad_norm": 0.20894868671894073,
      "learning_rate": 0.00019999820618576427,
      "loss": 0.5702,
      "step": 262
    },
    {
      "epoch": 0.04018795125491844,
      "grad_norm": 0.257010817527771,
      "learning_rate": 0.00019999789476077441,
      "loss": 0.9264,
      "step": 263
    },
    {
      "epoch": 0.04034075715322612,
      "grad_norm": 0.28151246905326843,
      "learning_rate": 0.00019999755842214846,
      "loss": 0.6658,
      "step": 264
    },
    {
      "epoch": 0.04049356305153379,
      "grad_norm": 0.22812116146087646,
      "learning_rate": 0.00019999719716997025,
      "loss": 0.736,
      "step": 265
    },
    {
      "epoch": 0.040646368949841465,
      "grad_norm": 0.20041359961032867,
      "learning_rate": 0.00019999681100432977,
      "loss": 0.7334,
      "step": 266
    },
    {
      "epoch": 0.04079917484814914,
      "grad_norm": 0.2387220561504364,
      "learning_rate": 0.0001999963999253232,
      "loss": 0.5816,
      "step": 267
    },
    {
      "epoch": 0.04095198074645681,
      "grad_norm": 0.23148131370544434,
      "learning_rate": 0.00019999596393305296,
      "loss": 0.7564,
      "step": 268
    },
    {
      "epoch": 0.04110478664476449,
      "grad_norm": 0.30160292983055115,
      "learning_rate": 0.00019999550302762776,
      "loss": 0.805,
      "step": 269
    },
    {
      "epoch": 0.04125759254307216,
      "grad_norm": 0.25093773007392883,
      "learning_rate": 0.0001999950172091623,
      "loss": 0.7729,
      "step": 270
    },
    {
      "epoch": 0.041410398441379836,
      "grad_norm": 0.18739305436611176,
      "learning_rate": 0.00019999450647777774,
      "loss": 0.6284,
      "step": 271
    },
    {
      "epoch": 0.041563204339687514,
      "grad_norm": 0.2319766879081726,
      "learning_rate": 0.00019999397083360126,
      "loss": 0.4766,
      "step": 272
    },
    {
      "epoch": 0.041716010237995184,
      "grad_norm": 0.24282965064048767,
      "learning_rate": 0.0001999934102767663,
      "loss": 0.8018,
      "step": 273
    },
    {
      "epoch": 0.04186881613630286,
      "grad_norm": 0.21952565014362335,
      "learning_rate": 0.00019999282480741255,
      "loss": 0.6215,
      "step": 274
    },
    {
      "epoch": 0.04202162203461054,
      "grad_norm": 0.5087156295776367,
      "learning_rate": 0.00019999221442568586,
      "loss": 0.7481,
      "step": 275
    },
    {
      "epoch": 0.04217442793291821,
      "grad_norm": 0.2524573504924774,
      "learning_rate": 0.00019999157913173828,
      "loss": 0.7204,
      "step": 276
    },
    {
      "epoch": 0.042327233831225886,
      "grad_norm": 0.2968989312648773,
      "learning_rate": 0.00019999091892572817,
      "loss": 0.5803,
      "step": 277
    },
    {
      "epoch": 0.04248003972953356,
      "grad_norm": 0.24576199054718018,
      "learning_rate": 0.0001999902338078199,
      "loss": 0.8048,
      "step": 278
    },
    {
      "epoch": 0.04263284562784123,
      "grad_norm": 0.2983776032924652,
      "learning_rate": 0.00019998952377818426,
      "loss": 0.6864,
      "step": 279
    },
    {
      "epoch": 0.04278565152614891,
      "grad_norm": 0.21671080589294434,
      "learning_rate": 0.0001999887888369981,
      "loss": 0.7487,
      "step": 280
    },
    {
      "epoch": 0.04293845742445659,
      "grad_norm": 0.24726702272891998,
      "learning_rate": 0.00019998802898444452,
      "loss": 0.6923,
      "step": 281
    },
    {
      "epoch": 0.04309126332276426,
      "grad_norm": 0.20502431690692902,
      "learning_rate": 0.00019998724422071282,
      "loss": 0.8878,
      "step": 282
    },
    {
      "epoch": 0.043244069221071935,
      "grad_norm": 0.20872731506824493,
      "learning_rate": 0.00019998643454599856,
      "loss": 0.7725,
      "step": 283
    },
    {
      "epoch": 0.043396875119379605,
      "grad_norm": 0.3191676139831543,
      "learning_rate": 0.00019998559996050347,
      "loss": 0.8263,
      "step": 284
    },
    {
      "epoch": 0.04354968101768728,
      "grad_norm": 0.39908912777900696,
      "learning_rate": 0.00019998474046443546,
      "loss": 0.6558,
      "step": 285
    },
    {
      "epoch": 0.04370248691599496,
      "grad_norm": 0.1785343736410141,
      "learning_rate": 0.0001999838560580086,
      "loss": 0.8449,
      "step": 286
    },
    {
      "epoch": 0.04385529281430263,
      "grad_norm": 0.2524084448814392,
      "learning_rate": 0.00019998294674144332,
      "loss": 0.7286,
      "step": 287
    },
    {
      "epoch": 0.04400809871261031,
      "grad_norm": 0.22183741629123688,
      "learning_rate": 0.00019998201251496617,
      "loss": 0.6983,
      "step": 288
    },
    {
      "epoch": 0.044160904610917984,
      "grad_norm": 0.351361483335495,
      "learning_rate": 0.00019998105337880984,
      "loss": 0.8788,
      "step": 289
    },
    {
      "epoch": 0.044313710509225654,
      "grad_norm": 0.28324541449546814,
      "learning_rate": 0.00019998006933321332,
      "loss": 0.6135,
      "step": 290
    },
    {
      "epoch": 0.04446651640753333,
      "grad_norm": 0.2555903494358063,
      "learning_rate": 0.00019997906037842183,
      "loss": 0.5704,
      "step": 291
    },
    {
      "epoch": 0.04461932230584101,
      "grad_norm": 0.2161989063024521,
      "learning_rate": 0.00019997802651468665,
      "loss": 0.6411,
      "step": 292
    },
    {
      "epoch": 0.04477212820414868,
      "grad_norm": 0.2448689043521881,
      "learning_rate": 0.00019997696774226543,
      "loss": 0.6679,
      "step": 293
    },
    {
      "epoch": 0.044924934102456356,
      "grad_norm": 0.2556678056716919,
      "learning_rate": 0.00019997588406142188,
      "loss": 0.8401,
      "step": 294
    },
    {
      "epoch": 0.04507774000076403,
      "grad_norm": 0.24582459032535553,
      "learning_rate": 0.00019997477547242608,
      "loss": 0.8461,
      "step": 295
    },
    {
      "epoch": 0.0452305458990717,
      "grad_norm": 0.19258172810077667,
      "learning_rate": 0.0001999736419755542,
      "loss": 0.749,
      "step": 296
    },
    {
      "epoch": 0.04538335179737938,
      "grad_norm": 0.2357243299484253,
      "learning_rate": 0.0001999724835710886,
      "loss": 0.6866,
      "step": 297
    },
    {
      "epoch": 0.04553615769568705,
      "grad_norm": 0.2215932160615921,
      "learning_rate": 0.00019997130025931788,
      "loss": 0.7202,
      "step": 298
    },
    {
      "epoch": 0.04568896359399473,
      "grad_norm": 0.22760243713855743,
      "learning_rate": 0.00019997009204053695,
      "loss": 0.7233,
      "step": 299
    },
    {
      "epoch": 0.045841769492302405,
      "grad_norm": 0.23306317627429962,
      "learning_rate": 0.00019996885891504672,
      "loss": 0.6106,
      "step": 300
    },
    {
      "epoch": 0.045994575390610075,
      "grad_norm": 0.2906085252761841,
      "learning_rate": 0.00019996760088315444,
      "loss": 0.8809,
      "step": 301
    },
    {
      "epoch": 0.04614738128891775,
      "grad_norm": 0.21287627518177032,
      "learning_rate": 0.0001999663179451736,
      "loss": 0.7354,
      "step": 302
    },
    {
      "epoch": 0.04630018718722543,
      "grad_norm": 0.25051966309547424,
      "learning_rate": 0.00019996501010142377,
      "loss": 0.5903,
      "step": 303
    },
    {
      "epoch": 0.0464529930855331,
      "grad_norm": 0.2597728371620178,
      "learning_rate": 0.00019996367735223078,
      "loss": 0.9319,
      "step": 304
    },
    {
      "epoch": 0.04660579898384078,
      "grad_norm": 0.26951470971107483,
      "learning_rate": 0.00019996231969792672,
      "loss": 0.7461,
      "step": 305
    },
    {
      "epoch": 0.046758604882148454,
      "grad_norm": 1.7744990587234497,
      "learning_rate": 0.00019996093713884981,
      "loss": 0.768,
      "step": 306
    },
    {
      "epoch": 0.046911410780456124,
      "grad_norm": 0.3497793674468994,
      "learning_rate": 0.0001999595296753445,
      "loss": 0.9868,
      "step": 307
    },
    {
      "epoch": 0.0470642166787638,
      "grad_norm": 0.25556042790412903,
      "learning_rate": 0.00019995809730776146,
      "loss": 0.7797,
      "step": 308
    },
    {
      "epoch": 0.04721702257707148,
      "grad_norm": 0.21160712838172913,
      "learning_rate": 0.00019995664003645756,
      "loss": 0.5969,
      "step": 309
    },
    {
      "epoch": 0.04736982847537915,
      "grad_norm": 0.20472657680511475,
      "learning_rate": 0.00019995515786179583,
      "loss": 0.4852,
      "step": 310
    },
    {
      "epoch": 0.047522634373686826,
      "grad_norm": 0.2371402531862259,
      "learning_rate": 0.0001999536507841456,
      "loss": 0.6591,
      "step": 311
    },
    {
      "epoch": 0.047675440271994496,
      "grad_norm": 0.2097351998090744,
      "learning_rate": 0.0001999521188038823,
      "loss": 0.6849,
      "step": 312
    },
    {
      "epoch": 0.04782824617030217,
      "grad_norm": 0.27859818935394287,
      "learning_rate": 0.0001999505619213876,
      "loss": 0.6925,
      "step": 313
    },
    {
      "epoch": 0.04798105206860985,
      "grad_norm": 0.22203388810157776,
      "learning_rate": 0.0001999489801370494,
      "loss": 0.8685,
      "step": 314
    },
    {
      "epoch": 0.04813385796691752,
      "grad_norm": 0.23791301250457764,
      "learning_rate": 0.00019994737345126185,
      "loss": 0.7374,
      "step": 315
    },
    {
      "epoch": 0.0482866638652252,
      "grad_norm": 0.21271444857120514,
      "learning_rate": 0.00019994574186442513,
      "loss": 0.6281,
      "step": 316
    },
    {
      "epoch": 0.048439469763532875,
      "grad_norm": 0.24780192971229553,
      "learning_rate": 0.00019994408537694585,
      "loss": 0.6525,
      "step": 317
    },
    {
      "epoch": 0.048592275661840545,
      "grad_norm": 0.2564080059528351,
      "learning_rate": 0.0001999424039892366,
      "loss": 0.8889,
      "step": 318
    },
    {
      "epoch": 0.04874508156014822,
      "grad_norm": 0.22218337655067444,
      "learning_rate": 0.00019994069770171637,
      "loss": 0.7197,
      "step": 319
    },
    {
      "epoch": 0.0488978874584559,
      "grad_norm": 0.2387639880180359,
      "learning_rate": 0.00019993896651481022,
      "loss": 0.7767,
      "step": 320
    },
    {
      "epoch": 0.04905069335676357,
      "grad_norm": 0.2468961626291275,
      "learning_rate": 0.0001999372104289495,
      "loss": 0.568,
      "step": 321
    },
    {
      "epoch": 0.04920349925507125,
      "grad_norm": 0.23209026455879211,
      "learning_rate": 0.00019993542944457166,
      "loss": 0.7791,
      "step": 322
    },
    {
      "epoch": 0.049356305153378924,
      "grad_norm": 0.21054136753082275,
      "learning_rate": 0.0001999336235621205,
      "loss": 0.5405,
      "step": 323
    },
    {
      "epoch": 0.049509111051686594,
      "grad_norm": 0.36745285987854004,
      "learning_rate": 0.00019993179278204583,
      "loss": 0.604,
      "step": 324
    },
    {
      "epoch": 0.04966191694999427,
      "grad_norm": 0.25285887718200684,
      "learning_rate": 0.0001999299371048039,
      "loss": 0.6,
      "step": 325
    },
    {
      "epoch": 0.04981472284830194,
      "grad_norm": 0.25242236256599426,
      "learning_rate": 0.00019992805653085697,
      "loss": 0.5935,
      "step": 326
    },
    {
      "epoch": 0.04996752874660962,
      "grad_norm": 0.2363995909690857,
      "learning_rate": 0.00019992615106067353,
      "loss": 0.9162,
      "step": 327
    },
    {
      "epoch": 0.050120334644917296,
      "grad_norm": 0.24165673553943634,
      "learning_rate": 0.0001999242206947284,
      "loss": 0.7828,
      "step": 328
    },
    {
      "epoch": 0.050273140543224966,
      "grad_norm": 0.23123116791248322,
      "learning_rate": 0.00019992226543350246,
      "loss": 0.7983,
      "step": 329
    },
    {
      "epoch": 0.05042594644153264,
      "grad_norm": 0.26019179821014404,
      "learning_rate": 0.00019992028527748287,
      "loss": 0.7911,
      "step": 330
    },
    {
      "epoch": 0.05057875233984032,
      "grad_norm": 0.22852295637130737,
      "learning_rate": 0.00019991828022716295,
      "loss": 0.6836,
      "step": 331
    },
    {
      "epoch": 0.05073155823814799,
      "grad_norm": 0.313212513923645,
      "learning_rate": 0.00019991625028304224,
      "loss": 0.7387,
      "step": 332
    },
    {
      "epoch": 0.05088436413645567,
      "grad_norm": 0.22907061874866486,
      "learning_rate": 0.00019991419544562652,
      "loss": 0.7111,
      "step": 333
    },
    {
      "epoch": 0.051037170034763345,
      "grad_norm": 0.24576780200004578,
      "learning_rate": 0.0001999121157154277,
      "loss": 0.6846,
      "step": 334
    },
    {
      "epoch": 0.051189975933071015,
      "grad_norm": 0.27017709612846375,
      "learning_rate": 0.00019991001109296392,
      "loss": 0.6491,
      "step": 335
    },
    {
      "epoch": 0.05134278183137869,
      "grad_norm": 0.30673229694366455,
      "learning_rate": 0.00019990788157875955,
      "loss": 0.7643,
      "step": 336
    },
    {
      "epoch": 0.05149558772968637,
      "grad_norm": 0.26924848556518555,
      "learning_rate": 0.00019990572717334514,
      "loss": 0.6674,
      "step": 337
    },
    {
      "epoch": 0.05164839362799404,
      "grad_norm": 0.23859171569347382,
      "learning_rate": 0.00019990354787725742,
      "loss": 0.6755,
      "step": 338
    },
    {
      "epoch": 0.05180119952630172,
      "grad_norm": 0.3418155312538147,
      "learning_rate": 0.00019990134369103938,
      "loss": 0.9036,
      "step": 339
    },
    {
      "epoch": 0.05195400542460939,
      "grad_norm": 0.22035688161849976,
      "learning_rate": 0.00019989911461524017,
      "loss": 0.7557,
      "step": 340
    },
    {
      "epoch": 0.052106811322917064,
      "grad_norm": 0.21298931539058685,
      "learning_rate": 0.0001998968606504151,
      "loss": 0.7085,
      "step": 341
    },
    {
      "epoch": 0.05225961722122474,
      "grad_norm": 0.30325108766555786,
      "learning_rate": 0.0001998945817971258,
      "loss": 0.6753,
      "step": 342
    },
    {
      "epoch": 0.05241242311953241,
      "grad_norm": 0.20659081637859344,
      "learning_rate": 0.00019989227805593994,
      "loss": 0.7346,
      "step": 343
    },
    {
      "epoch": 0.05256522901784009,
      "grad_norm": 0.18806900084018707,
      "learning_rate": 0.00019988994942743153,
      "loss": 0.6469,
      "step": 344
    },
    {
      "epoch": 0.052718034916147766,
      "grad_norm": 0.23977094888687134,
      "learning_rate": 0.00019988759591218073,
      "loss": 0.6006,
      "step": 345
    },
    {
      "epoch": 0.052870840814455436,
      "grad_norm": 0.19300661981105804,
      "learning_rate": 0.0001998852175107739,
      "loss": 0.6165,
      "step": 346
    },
    {
      "epoch": 0.05302364671276311,
      "grad_norm": 0.2542365491390228,
      "learning_rate": 0.00019988281422380358,
      "loss": 0.615,
      "step": 347
    },
    {
      "epoch": 0.05317645261107079,
      "grad_norm": 0.33191439509391785,
      "learning_rate": 0.00019988038605186855,
      "loss": 0.5821,
      "step": 348
    },
    {
      "epoch": 0.05332925850937846,
      "grad_norm": 0.2477361559867859,
      "learning_rate": 0.0001998779329955737,
      "loss": 0.7881,
      "step": 349
    },
    {
      "epoch": 0.05348206440768614,
      "grad_norm": 0.45226341485977173,
      "learning_rate": 0.00019987545505553028,
      "loss": 0.8674,
      "step": 350
    },
    {
      "epoch": 0.053634870305993815,
      "grad_norm": 0.2678709030151367,
      "learning_rate": 0.00019987295223235566,
      "loss": 0.9906,
      "step": 351
    },
    {
      "epoch": 0.053787676204301485,
      "grad_norm": 0.2115461379289627,
      "learning_rate": 0.00019987042452667328,
      "loss": 0.5388,
      "step": 352
    },
    {
      "epoch": 0.05394048210260916,
      "grad_norm": 0.24016588926315308,
      "learning_rate": 0.00019986787193911298,
      "loss": 0.6162,
      "step": 353
    },
    {
      "epoch": 0.05409328800091683,
      "grad_norm": 0.20813335478305817,
      "learning_rate": 0.00019986529447031074,
      "loss": 0.7018,
      "step": 354
    },
    {
      "epoch": 0.05424609389922451,
      "grad_norm": 0.22935904562473297,
      "learning_rate": 0.00019986269212090863,
      "loss": 0.6674,
      "step": 355
    },
    {
      "epoch": 0.05439889979753219,
      "grad_norm": 0.210664764046669,
      "learning_rate": 0.00019986006489155508,
      "loss": 0.8623,
      "step": 356
    },
    {
      "epoch": 0.05455170569583986,
      "grad_norm": 0.25411364436149597,
      "learning_rate": 0.00019985741278290457,
      "loss": 0.6944,
      "step": 357
    },
    {
      "epoch": 0.054704511594147534,
      "grad_norm": 0.21661530435085297,
      "learning_rate": 0.00019985473579561794,
      "loss": 0.7631,
      "step": 358
    },
    {
      "epoch": 0.05485731749245521,
      "grad_norm": 0.21907834708690643,
      "learning_rate": 0.00019985203393036206,
      "loss": 0.751,
      "step": 359
    },
    {
      "epoch": 0.05501012339076288,
      "grad_norm": 0.24429883062839508,
      "learning_rate": 0.00019984930718781012,
      "loss": 0.4515,
      "step": 360
    },
    {
      "epoch": 0.05516292928907056,
      "grad_norm": 0.23436371982097626,
      "learning_rate": 0.00019984655556864146,
      "loss": 0.6239,
      "step": 361
    },
    {
      "epoch": 0.055315735187378236,
      "grad_norm": 0.2612748146057129,
      "learning_rate": 0.0001998437790735416,
      "loss": 0.6628,
      "step": 362
    },
    {
      "epoch": 0.055468541085685906,
      "grad_norm": 0.2066948562860489,
      "learning_rate": 0.00019984097770320235,
      "loss": 0.6496,
      "step": 363
    },
    {
      "epoch": 0.05562134698399358,
      "grad_norm": 0.2153586447238922,
      "learning_rate": 0.00019983815145832153,
      "loss": 0.5914,
      "step": 364
    },
    {
      "epoch": 0.05577415288230126,
      "grad_norm": 0.2186291664838791,
      "learning_rate": 0.00019983530033960335,
      "loss": 0.7676,
      "step": 365
    },
    {
      "epoch": 0.05592695878060893,
      "grad_norm": 0.26197004318237305,
      "learning_rate": 0.00019983242434775815,
      "loss": 0.637,
      "step": 366
    },
    {
      "epoch": 0.05607976467891661,
      "grad_norm": 0.2043347805738449,
      "learning_rate": 0.00019982952348350245,
      "loss": 0.6166,
      "step": 367
    },
    {
      "epoch": 0.05623257057722428,
      "grad_norm": 0.29757070541381836,
      "learning_rate": 0.00019982659774755895,
      "loss": 0.7035,
      "step": 368
    },
    {
      "epoch": 0.056385376475531955,
      "grad_norm": 0.20683075487613678,
      "learning_rate": 0.0001998236471406566,
      "loss": 0.7503,
      "step": 369
    },
    {
      "epoch": 0.05653818237383963,
      "grad_norm": 0.25130218267440796,
      "learning_rate": 0.0001998206716635305,
      "loss": 0.6997,
      "step": 370
    },
    {
      "epoch": 0.0566909882721473,
      "grad_norm": 0.23295602202415466,
      "learning_rate": 0.00019981767131692198,
      "loss": 0.6887,
      "step": 371
    },
    {
      "epoch": 0.05684379417045498,
      "grad_norm": 0.3534759283065796,
      "learning_rate": 0.00019981464610157855,
      "loss": 0.8564,
      "step": 372
    },
    {
      "epoch": 0.05699660006876266,
      "grad_norm": 0.25935059785842896,
      "learning_rate": 0.0001998115960182539,
      "loss": 0.7129,
      "step": 373
    },
    {
      "epoch": 0.05714940596707033,
      "grad_norm": 0.2791917026042938,
      "learning_rate": 0.00019980852106770797,
      "loss": 0.6854,
      "step": 374
    },
    {
      "epoch": 0.057302211865378004,
      "grad_norm": 0.31893056631088257,
      "learning_rate": 0.0001998054212507068,
      "loss": 0.8234,
      "step": 375
    },
    {
      "epoch": 0.05745501776368568,
      "grad_norm": 0.24410194158554077,
      "learning_rate": 0.00019980229656802273,
      "loss": 0.5903,
      "step": 376
    },
    {
      "epoch": 0.05760782366199335,
      "grad_norm": 0.21686480939388275,
      "learning_rate": 0.00019979914702043423,
      "loss": 0.7364,
      "step": 377
    },
    {
      "epoch": 0.05776062956030103,
      "grad_norm": 0.25539955496788025,
      "learning_rate": 0.00019979597260872601,
      "loss": 0.8077,
      "step": 378
    },
    {
      "epoch": 0.0579134354586087,
      "grad_norm": 0.26865750551223755,
      "learning_rate": 0.00019979277333368888,
      "loss": 0.742,
      "step": 379
    },
    {
      "epoch": 0.058066241356916376,
      "grad_norm": 0.20411862432956696,
      "learning_rate": 0.00019978954919612,
      "loss": 0.6866,
      "step": 380
    },
    {
      "epoch": 0.05821904725522405,
      "grad_norm": 0.21796946227550507,
      "learning_rate": 0.0001997863001968226,
      "loss": 0.7437,
      "step": 381
    },
    {
      "epoch": 0.05837185315353172,
      "grad_norm": 0.27662667632102966,
      "learning_rate": 0.0001997830263366061,
      "loss": 0.6777,
      "step": 382
    },
    {
      "epoch": 0.0585246590518394,
      "grad_norm": 0.283934623003006,
      "learning_rate": 0.0001997797276162862,
      "loss": 0.6572,
      "step": 383
    },
    {
      "epoch": 0.05867746495014708,
      "grad_norm": 0.22082751989364624,
      "learning_rate": 0.00019977640403668476,
      "loss": 0.6067,
      "step": 384
    },
    {
      "epoch": 0.05883027084845475,
      "grad_norm": 0.2193826287984848,
      "learning_rate": 0.00019977305559862977,
      "loss": 0.7021,
      "step": 385
    },
    {
      "epoch": 0.058983076746762425,
      "grad_norm": 0.24583138525485992,
      "learning_rate": 0.00019976968230295554,
      "loss": 0.8803,
      "step": 386
    },
    {
      "epoch": 0.0591358826450701,
      "grad_norm": 0.30317580699920654,
      "learning_rate": 0.00019976628415050246,
      "loss": 0.8169,
      "step": 387
    },
    {
      "epoch": 0.05928868854337777,
      "grad_norm": 0.2399805188179016,
      "learning_rate": 0.0001997628611421171,
      "loss": 0.8351,
      "step": 388
    },
    {
      "epoch": 0.05944149444168545,
      "grad_norm": 0.2840050458908081,
      "learning_rate": 0.00019975941327865233,
      "loss": 0.738,
      "step": 389
    },
    {
      "epoch": 0.05959430033999313,
      "grad_norm": 0.3253823518753052,
      "learning_rate": 0.00019975594056096717,
      "loss": 0.5278,
      "step": 390
    },
    {
      "epoch": 0.0597471062383008,
      "grad_norm": 0.24650508165359497,
      "learning_rate": 0.00019975244298992676,
      "loss": 0.7123,
      "step": 391
    },
    {
      "epoch": 0.059899912136608474,
      "grad_norm": 0.3255913555622101,
      "learning_rate": 0.00019974892056640257,
      "loss": 0.6411,
      "step": 392
    },
    {
      "epoch": 0.060052718034916144,
      "grad_norm": 0.21249303221702576,
      "learning_rate": 0.00019974537329127209,
      "loss": 0.9045,
      "step": 393
    },
    {
      "epoch": 0.06020552393322382,
      "grad_norm": 0.23240245878696442,
      "learning_rate": 0.0001997418011654192,
      "loss": 0.7853,
      "step": 394
    },
    {
      "epoch": 0.0603583298315315,
      "grad_norm": 0.42459583282470703,
      "learning_rate": 0.00019973820418973376,
      "loss": 0.7974,
      "step": 395
    },
    {
      "epoch": 0.06051113572983917,
      "grad_norm": 0.34886565804481506,
      "learning_rate": 0.000199734582365112,
      "loss": 0.7454,
      "step": 396
    },
    {
      "epoch": 0.060663941628146846,
      "grad_norm": 0.23069462180137634,
      "learning_rate": 0.0001997309356924562,
      "loss": 0.7182,
      "step": 397
    },
    {
      "epoch": 0.06081674752645452,
      "grad_norm": 0.25458237528800964,
      "learning_rate": 0.00019972726417267497,
      "loss": 0.6431,
      "step": 398
    },
    {
      "epoch": 0.06096955342476219,
      "grad_norm": 0.21412460505962372,
      "learning_rate": 0.000199723567806683,
      "loss": 0.6802,
      "step": 399
    },
    {
      "epoch": 0.06112235932306987,
      "grad_norm": 0.21457818150520325,
      "learning_rate": 0.0001997198465954012,
      "loss": 0.6221,
      "step": 400
    },
    {
      "epoch": 0.06127516522137755,
      "grad_norm": 0.2284182459115982,
      "learning_rate": 0.0001997161005397567,
      "loss": 0.6552,
      "step": 401
    },
    {
      "epoch": 0.06142797111968522,
      "grad_norm": 0.2774156630039215,
      "learning_rate": 0.00019971232964068283,
      "loss": 0.647,
      "step": 402
    },
    {
      "epoch": 0.061580777017992895,
      "grad_norm": 0.2522546350955963,
      "learning_rate": 0.000199708533899119,
      "loss": 0.5137,
      "step": 403
    },
    {
      "epoch": 0.06173358291630057,
      "grad_norm": 0.28763559460639954,
      "learning_rate": 0.00019970471331601095,
      "loss": 0.6016,
      "step": 404
    },
    {
      "epoch": 0.06188638881460824,
      "grad_norm": 0.7510436177253723,
      "learning_rate": 0.0001997008678923105,
      "loss": 0.533,
      "step": 405
    },
    {
      "epoch": 0.06203919471291592,
      "grad_norm": 0.25240078568458557,
      "learning_rate": 0.00019969699762897576,
      "loss": 0.6597,
      "step": 406
    },
    {
      "epoch": 0.06219200061122359,
      "grad_norm": 0.2635802924633026,
      "learning_rate": 0.0001996931025269709,
      "loss": 0.7477,
      "step": 407
    },
    {
      "epoch": 0.06234480650953127,
      "grad_norm": 0.3156009614467621,
      "learning_rate": 0.00019968918258726642,
      "loss": 0.8181,
      "step": 408
    },
    {
      "epoch": 0.062497612407838944,
      "grad_norm": 0.24074885249137878,
      "learning_rate": 0.0001996852378108389,
      "loss": 0.6273,
      "step": 409
    },
    {
      "epoch": 0.06265041830614662,
      "grad_norm": 0.336436003446579,
      "learning_rate": 0.00019968126819867117,
      "loss": 0.7783,
      "step": 410
    },
    {
      "epoch": 0.06280322420445429,
      "grad_norm": 0.24637262523174286,
      "learning_rate": 0.00019967727375175222,
      "loss": 0.68,
      "step": 411
    },
    {
      "epoch": 0.06295603010276196,
      "grad_norm": 0.3599497377872467,
      "learning_rate": 0.00019967325447107722,
      "loss": 0.6744,
      "step": 412
    },
    {
      "epoch": 0.06310883600106965,
      "grad_norm": 0.29385289549827576,
      "learning_rate": 0.00019966921035764756,
      "loss": 0.9023,
      "step": 413
    },
    {
      "epoch": 0.06326164189937732,
      "grad_norm": 0.2313985675573349,
      "learning_rate": 0.00019966514141247078,
      "loss": 0.6057,
      "step": 414
    },
    {
      "epoch": 0.06341444779768499,
      "grad_norm": 0.27997085452079773,
      "learning_rate": 0.00019966104763656064,
      "loss": 0.6791,
      "step": 415
    },
    {
      "epoch": 0.06356725369599267,
      "grad_norm": 0.2614806890487671,
      "learning_rate": 0.00019965692903093705,
      "loss": 0.6162,
      "step": 416
    },
    {
      "epoch": 0.06372005959430034,
      "grad_norm": 0.2659102976322174,
      "learning_rate": 0.00019965278559662614,
      "loss": 0.5931,
      "step": 417
    },
    {
      "epoch": 0.06387286549260801,
      "grad_norm": 0.22393980622291565,
      "learning_rate": 0.0001996486173346602,
      "loss": 0.6176,
      "step": 418
    },
    {
      "epoch": 0.0640256713909157,
      "grad_norm": 0.25976166129112244,
      "learning_rate": 0.00019964442424607774,
      "loss": 0.6612,
      "step": 419
    },
    {
      "epoch": 0.06417847728922337,
      "grad_norm": 0.3219575881958008,
      "learning_rate": 0.00019964020633192342,
      "loss": 0.8215,
      "step": 420
    },
    {
      "epoch": 0.06433128318753104,
      "grad_norm": 0.24153344333171844,
      "learning_rate": 0.0001996359635932481,
      "loss": 0.8909,
      "step": 421
    },
    {
      "epoch": 0.06448408908583872,
      "grad_norm": 0.7330458760261536,
      "learning_rate": 0.00019963169603110878,
      "loss": 0.743,
      "step": 422
    },
    {
      "epoch": 0.06463689498414639,
      "grad_norm": 0.278689980506897,
      "learning_rate": 0.00019962740364656874,
      "loss": 0.7773,
      "step": 423
    },
    {
      "epoch": 0.06478970088245406,
      "grad_norm": 0.23073042929172516,
      "learning_rate": 0.00019962308644069744,
      "loss": 0.6776,
      "step": 424
    },
    {
      "epoch": 0.06494250678076174,
      "grad_norm": 0.20433606207370758,
      "learning_rate": 0.00019961874441457034,
      "loss": 0.6218,
      "step": 425
    },
    {
      "epoch": 0.06509531267906941,
      "grad_norm": 0.25579705834388733,
      "learning_rate": 0.00019961437756926934,
      "loss": 0.7303,
      "step": 426
    },
    {
      "epoch": 0.06524811857737708,
      "grad_norm": 0.2474275529384613,
      "learning_rate": 0.00019960998590588233,
      "loss": 0.6678,
      "step": 427
    },
    {
      "epoch": 0.06540092447568475,
      "grad_norm": 0.29059740900993347,
      "learning_rate": 0.0001996055694255035,
      "loss": 0.738,
      "step": 428
    },
    {
      "epoch": 0.06555373037399244,
      "grad_norm": 0.21308888494968414,
      "learning_rate": 0.00019960112812923312,
      "loss": 0.5843,
      "step": 429
    },
    {
      "epoch": 0.06570653627230011,
      "grad_norm": 0.27173516154289246,
      "learning_rate": 0.00019959666201817776,
      "loss": 0.6929,
      "step": 430
    },
    {
      "epoch": 0.06585934217060778,
      "grad_norm": 0.29579100012779236,
      "learning_rate": 0.00019959217109345013,
      "loss": 0.7057,
      "step": 431
    },
    {
      "epoch": 0.06601214806891546,
      "grad_norm": 0.22613683342933655,
      "learning_rate": 0.00019958765535616906,
      "loss": 0.7614,
      "step": 432
    },
    {
      "epoch": 0.06616495396722313,
      "grad_norm": 0.24712997674942017,
      "learning_rate": 0.0001995831148074596,
      "loss": 0.6486,
      "step": 433
    },
    {
      "epoch": 0.0663177598655308,
      "grad_norm": 0.26547789573669434,
      "learning_rate": 0.00019957854944845305,
      "loss": 0.8202,
      "step": 434
    },
    {
      "epoch": 0.06647056576383849,
      "grad_norm": 0.20722678303718567,
      "learning_rate": 0.00019957395928028675,
      "loss": 0.6485,
      "step": 435
    },
    {
      "epoch": 0.06662337166214616,
      "grad_norm": 0.2522684335708618,
      "learning_rate": 0.00019956934430410438,
      "loss": 0.7336,
      "step": 436
    },
    {
      "epoch": 0.06677617756045383,
      "grad_norm": 0.22733008861541748,
      "learning_rate": 0.00019956470452105562,
      "loss": 0.7137,
      "step": 437
    },
    {
      "epoch": 0.06692898345876151,
      "grad_norm": 0.29240962862968445,
      "learning_rate": 0.00019956003993229656,
      "loss": 0.733,
      "step": 438
    },
    {
      "epoch": 0.06708178935706918,
      "grad_norm": 0.3016742467880249,
      "learning_rate": 0.00019955535053898927,
      "loss": 0.7911,
      "step": 439
    },
    {
      "epoch": 0.06723459525537685,
      "grad_norm": 0.28375834226608276,
      "learning_rate": 0.0001995506363423021,
      "loss": 0.6417,
      "step": 440
    },
    {
      "epoch": 0.06738740115368454,
      "grad_norm": 0.2522740066051483,
      "learning_rate": 0.00019954589734340949,
      "loss": 0.9409,
      "step": 441
    },
    {
      "epoch": 0.06754020705199221,
      "grad_norm": 0.23570802807807922,
      "learning_rate": 0.0001995411335434922,
      "loss": 0.6449,
      "step": 442
    },
    {
      "epoch": 0.06769301295029988,
      "grad_norm": 0.4781047999858856,
      "learning_rate": 0.00019953634494373706,
      "loss": 0.8234,
      "step": 443
    },
    {
      "epoch": 0.06784581884860756,
      "grad_norm": 0.21055488288402557,
      "learning_rate": 0.0001995315315453371,
      "loss": 0.5919,
      "step": 444
    },
    {
      "epoch": 0.06799862474691523,
      "grad_norm": 0.24888089299201965,
      "learning_rate": 0.00019952669334949156,
      "loss": 0.823,
      "step": 445
    },
    {
      "epoch": 0.0681514306452229,
      "grad_norm": 0.4187317192554474,
      "learning_rate": 0.0001995218303574058,
      "loss": 0.691,
      "step": 446
    },
    {
      "epoch": 0.06830423654353059,
      "grad_norm": 0.23317286372184753,
      "learning_rate": 0.00019951694257029146,
      "loss": 0.6538,
      "step": 447
    },
    {
      "epoch": 0.06845704244183826,
      "grad_norm": 0.2808625400066376,
      "learning_rate": 0.0001995120299893662,
      "loss": 0.5262,
      "step": 448
    },
    {
      "epoch": 0.06860984834014593,
      "grad_norm": 0.2186977118253708,
      "learning_rate": 0.00019950709261585403,
      "loss": 0.7305,
      "step": 449
    },
    {
      "epoch": 0.06876265423845361,
      "grad_norm": 0.25195857882499695,
      "learning_rate": 0.00019950213045098503,
      "loss": 0.7507,
      "step": 450
    },
    {
      "epoch": 0.06891546013676128,
      "grad_norm": 0.2823186218738556,
      "learning_rate": 0.00019949714349599545,
      "loss": 0.6588,
      "step": 451
    },
    {
      "epoch": 0.06906826603506895,
      "grad_norm": 0.27081775665283203,
      "learning_rate": 0.00019949213175212774,
      "loss": 0.5578,
      "step": 452
    },
    {
      "epoch": 0.06922107193337663,
      "grad_norm": 0.2283964604139328,
      "learning_rate": 0.00019948709522063063,
      "loss": 0.575,
      "step": 453
    },
    {
      "epoch": 0.0693738778316843,
      "grad_norm": 0.24064375460147858,
      "learning_rate": 0.00019948203390275884,
      "loss": 0.7462,
      "step": 454
    },
    {
      "epoch": 0.06952668372999198,
      "grad_norm": 0.4738442599773407,
      "learning_rate": 0.00019947694779977337,
      "loss": 0.6016,
      "step": 455
    },
    {
      "epoch": 0.06967948962829965,
      "grad_norm": 0.2238418608903885,
      "learning_rate": 0.0001994718369129414,
      "loss": 0.7509,
      "step": 456
    },
    {
      "epoch": 0.06983229552660733,
      "grad_norm": 0.2132556140422821,
      "learning_rate": 0.00019946670124353622,
      "loss": 0.7021,
      "step": 457
    },
    {
      "epoch": 0.069985101424915,
      "grad_norm": 0.249766007065773,
      "learning_rate": 0.00019946154079283744,
      "loss": 0.7264,
      "step": 458
    },
    {
      "epoch": 0.07013790732322267,
      "grad_norm": 0.23593513667583466,
      "learning_rate": 0.00019945635556213064,
      "loss": 0.559,
      "step": 459
    },
    {
      "epoch": 0.07029071322153035,
      "grad_norm": 0.21727308630943298,
      "learning_rate": 0.00019945114555270768,
      "loss": 0.6118,
      "step": 460
    },
    {
      "epoch": 0.07044351911983802,
      "grad_norm": 0.5952973365783691,
      "learning_rate": 0.00019944591076586664,
      "loss": 0.7533,
      "step": 461
    },
    {
      "epoch": 0.0705963250181457,
      "grad_norm": 0.43861424922943115,
      "learning_rate": 0.00019944065120291175,
      "loss": 0.6487,
      "step": 462
    },
    {
      "epoch": 0.07074913091645338,
      "grad_norm": 0.2683066725730896,
      "learning_rate": 0.0001994353668651533,
      "loss": 0.7177,
      "step": 463
    },
    {
      "epoch": 0.07090193681476105,
      "grad_norm": 0.23011751472949982,
      "learning_rate": 0.0001994300577539079,
      "loss": 0.5861,
      "step": 464
    },
    {
      "epoch": 0.07105474271306872,
      "grad_norm": 0.20545627176761627,
      "learning_rate": 0.00019942472387049823,
      "loss": 0.5209,
      "step": 465
    },
    {
      "epoch": 0.0712075486113764,
      "grad_norm": 0.28548967838287354,
      "learning_rate": 0.0001994193652162532,
      "loss": 0.6958,
      "step": 466
    },
    {
      "epoch": 0.07136035450968407,
      "grad_norm": 0.26860255002975464,
      "learning_rate": 0.0001994139817925079,
      "loss": 0.5867,
      "step": 467
    },
    {
      "epoch": 0.07151316040799174,
      "grad_norm": 0.2905493676662445,
      "learning_rate": 0.00019940857360060355,
      "loss": 0.6942,
      "step": 468
    },
    {
      "epoch": 0.07166596630629943,
      "grad_norm": 0.31361424922943115,
      "learning_rate": 0.00019940314064188753,
      "loss": 0.6028,
      "step": 469
    },
    {
      "epoch": 0.0718187722046071,
      "grad_norm": 0.2557202875614166,
      "learning_rate": 0.0001993976829177134,
      "loss": 0.7129,
      "step": 470
    },
    {
      "epoch": 0.07197157810291477,
      "grad_norm": 0.22137515246868134,
      "learning_rate": 0.00019939220042944098,
      "loss": 0.8681,
      "step": 471
    },
    {
      "epoch": 0.07212438400122245,
      "grad_norm": 0.18441233038902283,
      "learning_rate": 0.00019938669317843614,
      "loss": 0.5655,
      "step": 472
    },
    {
      "epoch": 0.07227718989953012,
      "grad_norm": 0.22239898145198822,
      "learning_rate": 0.00019938116116607096,
      "loss": 0.7098,
      "step": 473
    },
    {
      "epoch": 0.07242999579783779,
      "grad_norm": 0.25017887353897095,
      "learning_rate": 0.00019937560439372372,
      "loss": 0.8911,
      "step": 474
    },
    {
      "epoch": 0.07258280169614548,
      "grad_norm": 0.25843703746795654,
      "learning_rate": 0.00019937002286277882,
      "loss": 0.7774,
      "step": 475
    },
    {
      "epoch": 0.07273560759445315,
      "grad_norm": 0.24830183386802673,
      "learning_rate": 0.00019936441657462687,
      "loss": 0.5783,
      "step": 476
    },
    {
      "epoch": 0.07288841349276082,
      "grad_norm": 0.6441646814346313,
      "learning_rate": 0.00019935878553066462,
      "loss": 0.7754,
      "step": 477
    },
    {
      "epoch": 0.0730412193910685,
      "grad_norm": 0.2400471419095993,
      "learning_rate": 0.000199353129732295,
      "loss": 0.7317,
      "step": 478
    },
    {
      "epoch": 0.07319402528937617,
      "grad_norm": 0.3065354824066162,
      "learning_rate": 0.00019934744918092707,
      "loss": 0.6605,
      "step": 479
    },
    {
      "epoch": 0.07334683118768384,
      "grad_norm": 0.2226812243461609,
      "learning_rate": 0.00019934174387797613,
      "loss": 0.6817,
      "step": 480
    },
    {
      "epoch": 0.07349963708599153,
      "grad_norm": 0.224257230758667,
      "learning_rate": 0.00019933601382486363,
      "loss": 0.7232,
      "step": 481
    },
    {
      "epoch": 0.0736524429842992,
      "grad_norm": 0.2415555864572525,
      "learning_rate": 0.0001993302590230171,
      "loss": 0.8262,
      "step": 482
    },
    {
      "epoch": 0.07380524888260687,
      "grad_norm": 0.20679202675819397,
      "learning_rate": 0.00019932447947387037,
      "loss": 0.5378,
      "step": 483
    },
    {
      "epoch": 0.07395805478091454,
      "grad_norm": 0.1950317621231079,
      "learning_rate": 0.00019931867517886332,
      "loss": 0.6533,
      "step": 484
    },
    {
      "epoch": 0.07411086067922222,
      "grad_norm": 0.2754247486591339,
      "learning_rate": 0.00019931284613944206,
      "loss": 0.8435,
      "step": 485
    },
    {
      "epoch": 0.07426366657752989,
      "grad_norm": 0.23111988604068756,
      "learning_rate": 0.00019930699235705884,
      "loss": 0.6632,
      "step": 486
    },
    {
      "epoch": 0.07441647247583756,
      "grad_norm": 0.2312602549791336,
      "learning_rate": 0.00019930111383317204,
      "loss": 0.6834,
      "step": 487
    },
    {
      "epoch": 0.07456927837414525,
      "grad_norm": 0.24196754395961761,
      "learning_rate": 0.00019929521056924633,
      "loss": 0.7273,
      "step": 488
    },
    {
      "epoch": 0.07472208427245292,
      "grad_norm": 0.26272863149642944,
      "learning_rate": 0.00019928928256675242,
      "loss": 0.7997,
      "step": 489
    },
    {
      "epoch": 0.07487489017076059,
      "grad_norm": 0.22681844234466553,
      "learning_rate": 0.0001992833298271672,
      "loss": 0.6919,
      "step": 490
    },
    {
      "epoch": 0.07502769606906827,
      "grad_norm": 0.22710320353507996,
      "learning_rate": 0.00019927735235197375,
      "loss": 0.7005,
      "step": 491
    },
    {
      "epoch": 0.07518050196737594,
      "grad_norm": 0.27059561014175415,
      "learning_rate": 0.00019927135014266134,
      "loss": 0.8002,
      "step": 492
    },
    {
      "epoch": 0.07533330786568361,
      "grad_norm": 0.2716640532016754,
      "learning_rate": 0.00019926532320072536,
      "loss": 0.7222,
      "step": 493
    },
    {
      "epoch": 0.0754861137639913,
      "grad_norm": 0.2126639038324356,
      "learning_rate": 0.00019925927152766735,
      "loss": 0.5937,
      "step": 494
    },
    {
      "epoch": 0.07563891966229896,
      "grad_norm": 0.24724045395851135,
      "learning_rate": 0.00019925319512499506,
      "loss": 0.552,
      "step": 495
    },
    {
      "epoch": 0.07579172556060663,
      "grad_norm": 0.2463061362504959,
      "learning_rate": 0.00019924709399422232,
      "loss": 0.7636,
      "step": 496
    },
    {
      "epoch": 0.07594453145891432,
      "grad_norm": 0.24597403407096863,
      "learning_rate": 0.00019924096813686923,
      "loss": 0.8017,
      "step": 497
    },
    {
      "epoch": 0.07609733735722199,
      "grad_norm": 0.37607234716415405,
      "learning_rate": 0.000199234817554462,
      "loss": 0.7762,
      "step": 498
    },
    {
      "epoch": 0.07625014325552966,
      "grad_norm": 0.2166277915239334,
      "learning_rate": 0.00019922864224853297,
      "loss": 0.8255,
      "step": 499
    },
    {
      "epoch": 0.07640294915383734,
      "grad_norm": 0.3068873882293701,
      "learning_rate": 0.00019922244222062067,
      "loss": 0.8931,
      "step": 500
    },
    {
      "epoch": 0.07655575505214501,
      "grad_norm": 0.2293839454650879,
      "learning_rate": 0.00019921621747226976,
      "loss": 0.5859,
      "step": 501
    },
    {
      "epoch": 0.07670856095045268,
      "grad_norm": 0.20842444896697998,
      "learning_rate": 0.0001992099680050312,
      "loss": 0.6295,
      "step": 502
    },
    {
      "epoch": 0.07686136684876037,
      "grad_norm": 0.24702678620815277,
      "learning_rate": 0.00019920369382046181,
      "loss": 0.8282,
      "step": 503
    },
    {
      "epoch": 0.07701417274706804,
      "grad_norm": 0.3347665071487427,
      "learning_rate": 0.0001991973949201249,
      "loss": 0.696,
      "step": 504
    },
    {
      "epoch": 0.07716697864537571,
      "grad_norm": 0.21844464540481567,
      "learning_rate": 0.0001991910713055897,
      "loss": 0.6584,
      "step": 505
    },
    {
      "epoch": 0.07731978454368339,
      "grad_norm": 0.2989930808544159,
      "learning_rate": 0.00019918472297843174,
      "loss": 0.7263,
      "step": 506
    },
    {
      "epoch": 0.07747259044199106,
      "grad_norm": 0.22986134886741638,
      "learning_rate": 0.0001991783499402326,
      "loss": 0.6598,
      "step": 507
    },
    {
      "epoch": 0.07762539634029873,
      "grad_norm": 0.2555464506149292,
      "learning_rate": 0.00019917195219258012,
      "loss": 0.6093,
      "step": 508
    },
    {
      "epoch": 0.07777820223860642,
      "grad_norm": 0.22957204282283783,
      "learning_rate": 0.00019916552973706824,
      "loss": 0.872,
      "step": 509
    },
    {
      "epoch": 0.07793100813691409,
      "grad_norm": 0.2668517529964447,
      "learning_rate": 0.00019915908257529702,
      "loss": 0.907,
      "step": 510
    },
    {
      "epoch": 0.07808381403522176,
      "grad_norm": 0.29907530546188354,
      "learning_rate": 0.00019915261070887276,
      "loss": 0.7975,
      "step": 511
    },
    {
      "epoch": 0.07823661993352943,
      "grad_norm": 0.2448134869337082,
      "learning_rate": 0.00019914611413940784,
      "loss": 0.9981,
      "step": 512
    },
    {
      "epoch": 0.07838942583183711,
      "grad_norm": 0.5516696572303772,
      "learning_rate": 0.00019913959286852083,
      "loss": 0.6658,
      "step": 513
    },
    {
      "epoch": 0.07854223173014478,
      "grad_norm": 0.30436620116233826,
      "learning_rate": 0.00019913304689783646,
      "loss": 0.6571,
      "step": 514
    },
    {
      "epoch": 0.07869503762845245,
      "grad_norm": 0.24897192418575287,
      "learning_rate": 0.00019912647622898563,
      "loss": 0.7244,
      "step": 515
    },
    {
      "epoch": 0.07884784352676014,
      "grad_norm": 0.25573232769966125,
      "learning_rate": 0.00019911988086360533,
      "loss": 0.6093,
      "step": 516
    },
    {
      "epoch": 0.0790006494250678,
      "grad_norm": 0.21649105846881866,
      "learning_rate": 0.00019911326080333875,
      "loss": 0.6002,
      "step": 517
    },
    {
      "epoch": 0.07915345532337548,
      "grad_norm": 0.3718641996383667,
      "learning_rate": 0.0001991066160498352,
      "loss": 0.6874,
      "step": 518
    },
    {
      "epoch": 0.07930626122168316,
      "grad_norm": 0.23083071410655975,
      "learning_rate": 0.00019909994660475023,
      "loss": 0.7339,
      "step": 519
    },
    {
      "epoch": 0.07945906711999083,
      "grad_norm": 0.2922573983669281,
      "learning_rate": 0.0001990932524697454,
      "loss": 0.7994,
      "step": 520
    },
    {
      "epoch": 0.0796118730182985,
      "grad_norm": 0.26380589604377747,
      "learning_rate": 0.00019908653364648853,
      "loss": 0.7204,
      "step": 521
    },
    {
      "epoch": 0.07976467891660619,
      "grad_norm": 0.28353989124298096,
      "learning_rate": 0.00019907979013665357,
      "loss": 0.5284,
      "step": 522
    },
    {
      "epoch": 0.07991748481491386,
      "grad_norm": 0.2280929833650589,
      "learning_rate": 0.00019907302194192058,
      "loss": 0.7311,
      "step": 523
    },
    {
      "epoch": 0.08007029071322153,
      "grad_norm": 0.24350902438163757,
      "learning_rate": 0.00019906622906397582,
      "loss": 0.6621,
      "step": 524
    },
    {
      "epoch": 0.08022309661152921,
      "grad_norm": 0.22378286719322205,
      "learning_rate": 0.0001990594115045117,
      "loss": 0.6478,
      "step": 525
    },
    {
      "epoch": 0.08037590250983688,
      "grad_norm": 0.2641814053058624,
      "learning_rate": 0.00019905256926522672,
      "loss": 0.6855,
      "step": 526
    },
    {
      "epoch": 0.08052870840814455,
      "grad_norm": 0.2580053210258484,
      "learning_rate": 0.00019904570234782556,
      "loss": 0.6963,
      "step": 527
    },
    {
      "epoch": 0.08068151430645223,
      "grad_norm": 0.37223583459854126,
      "learning_rate": 0.00019903881075401908,
      "loss": 0.8338,
      "step": 528
    },
    {
      "epoch": 0.0808343202047599,
      "grad_norm": 0.3101045489311218,
      "learning_rate": 0.0001990318944855243,
      "loss": 0.6928,
      "step": 529
    },
    {
      "epoch": 0.08098712610306757,
      "grad_norm": 0.22513873875141144,
      "learning_rate": 0.00019902495354406425,
      "loss": 0.7592,
      "step": 530
    },
    {
      "epoch": 0.08113993200137526,
      "grad_norm": 0.2204548716545105,
      "learning_rate": 0.00019901798793136829,
      "loss": 0.6437,
      "step": 531
    },
    {
      "epoch": 0.08129273789968293,
      "grad_norm": 0.30862662196159363,
      "learning_rate": 0.0001990109976491718,
      "loss": 0.7737,
      "step": 532
    },
    {
      "epoch": 0.0814455437979906,
      "grad_norm": 0.2376687079668045,
      "learning_rate": 0.00019900398269921636,
      "loss": 0.7087,
      "step": 533
    },
    {
      "epoch": 0.08159834969629828,
      "grad_norm": 0.3251570463180542,
      "learning_rate": 0.0001989969430832497,
      "loss": 0.688,
      "step": 534
    },
    {
      "epoch": 0.08175115559460595,
      "grad_norm": 0.2662348449230194,
      "learning_rate": 0.00019898987880302574,
      "loss": 0.8595,
      "step": 535
    },
    {
      "epoch": 0.08190396149291362,
      "grad_norm": 0.21960824728012085,
      "learning_rate": 0.00019898278986030436,
      "loss": 0.6531,
      "step": 536
    },
    {
      "epoch": 0.08205676739122131,
      "grad_norm": 0.34016793966293335,
      "learning_rate": 0.00019897567625685176,
      "loss": 0.6846,
      "step": 537
    },
    {
      "epoch": 0.08220957328952898,
      "grad_norm": 0.23817062377929688,
      "learning_rate": 0.00019896853799444028,
      "loss": 0.7138,
      "step": 538
    },
    {
      "epoch": 0.08236237918783665,
      "grad_norm": 0.31925493478775024,
      "learning_rate": 0.00019896137507484834,
      "loss": 0.76,
      "step": 539
    },
    {
      "epoch": 0.08251518508614432,
      "grad_norm": 0.2391405999660492,
      "learning_rate": 0.00019895418749986047,
      "loss": 0.6027,
      "step": 540
    },
    {
      "epoch": 0.082667990984452,
      "grad_norm": 0.21949400007724762,
      "learning_rate": 0.00019894697527126742,
      "loss": 0.6997,
      "step": 541
    },
    {
      "epoch": 0.08282079688275967,
      "grad_norm": 0.23678694665431976,
      "learning_rate": 0.00019893973839086608,
      "loss": 0.5587,
      "step": 542
    },
    {
      "epoch": 0.08297360278106734,
      "grad_norm": 0.2640646994113922,
      "learning_rate": 0.00019893247686045946,
      "loss": 0.6838,
      "step": 543
    },
    {
      "epoch": 0.08312640867937503,
      "grad_norm": 0.2536254823207855,
      "learning_rate": 0.0001989251906818567,
      "loss": 0.5734,
      "step": 544
    },
    {
      "epoch": 0.0832792145776827,
      "grad_norm": 0.20557624101638794,
      "learning_rate": 0.00019891787985687308,
      "loss": 0.7211,
      "step": 545
    },
    {
      "epoch": 0.08343202047599037,
      "grad_norm": 0.26330065727233887,
      "learning_rate": 0.00019891054438732998,
      "loss": 0.704,
      "step": 546
    },
    {
      "epoch": 0.08358482637429805,
      "grad_norm": 0.2792690396308899,
      "learning_rate": 0.0001989031842750551,
      "loss": 0.5176,
      "step": 547
    },
    {
      "epoch": 0.08373763227260572,
      "grad_norm": 0.3036953806877136,
      "learning_rate": 0.00019889579952188204,
      "loss": 0.858,
      "step": 548
    },
    {
      "epoch": 0.08389043817091339,
      "grad_norm": 0.2737971842288971,
      "learning_rate": 0.00019888839012965068,
      "loss": 0.6229,
      "step": 549
    },
    {
      "epoch": 0.08404324406922108,
      "grad_norm": 0.2955757975578308,
      "learning_rate": 0.000198880956100207,
      "loss": 0.7844,
      "step": 550
    },
    {
      "epoch": 0.08419604996752875,
      "grad_norm": 0.31220605969429016,
      "learning_rate": 0.0001988734974354032,
      "loss": 0.9625,
      "step": 551
    },
    {
      "epoch": 0.08434885586583642,
      "grad_norm": 0.23816774785518646,
      "learning_rate": 0.0001988660141370974,
      "loss": 0.6341,
      "step": 552
    },
    {
      "epoch": 0.0845016617641441,
      "grad_norm": 0.26111069321632385,
      "learning_rate": 0.00019885850620715413,
      "loss": 0.7624,
      "step": 553
    },
    {
      "epoch": 0.08465446766245177,
      "grad_norm": 0.2268974632024765,
      "learning_rate": 0.0001988509736474439,
      "loss": 0.68,
      "step": 554
    },
    {
      "epoch": 0.08480727356075944,
      "grad_norm": 0.24625982344150543,
      "learning_rate": 0.00019884341645984332,
      "loss": 0.7494,
      "step": 555
    },
    {
      "epoch": 0.08496007945906713,
      "grad_norm": 0.2776056230068207,
      "learning_rate": 0.00019883583464623525,
      "loss": 0.6182,
      "step": 556
    },
    {
      "epoch": 0.0851128853573748,
      "grad_norm": 0.3328106105327606,
      "learning_rate": 0.00019882822820850866,
      "loss": 0.663,
      "step": 557
    },
    {
      "epoch": 0.08526569125568247,
      "grad_norm": 0.2359543889760971,
      "learning_rate": 0.00019882059714855857,
      "loss": 0.9259,
      "step": 558
    },
    {
      "epoch": 0.08541849715399015,
      "grad_norm": 0.2494177222251892,
      "learning_rate": 0.00019881294146828626,
      "loss": 0.6741,
      "step": 559
    },
    {
      "epoch": 0.08557130305229782,
      "grad_norm": 0.25279515981674194,
      "learning_rate": 0.000198805261169599,
      "loss": 0.824,
      "step": 560
    },
    {
      "epoch": 0.08572410895060549,
      "grad_norm": 0.2714499235153198,
      "learning_rate": 0.00019879755625441033,
      "loss": 0.6873,
      "step": 561
    },
    {
      "epoch": 0.08587691484891317,
      "grad_norm": 0.26827186346054077,
      "learning_rate": 0.0001987898267246399,
      "loss": 0.7667,
      "step": 562
    },
    {
      "epoch": 0.08602972074722084,
      "grad_norm": 0.2813642621040344,
      "learning_rate": 0.00019878207258221332,
      "loss": 0.7336,
      "step": 563
    },
    {
      "epoch": 0.08618252664552851,
      "grad_norm": 0.25137224793434143,
      "learning_rate": 0.00019877429382906262,
      "loss": 0.838,
      "step": 564
    },
    {
      "epoch": 0.08633533254383619,
      "grad_norm": 0.23676906526088715,
      "learning_rate": 0.00019876649046712572,
      "loss": 0.8007,
      "step": 565
    },
    {
      "epoch": 0.08648813844214387,
      "grad_norm": 0.25752153992652893,
      "learning_rate": 0.00019875866249834681,
      "loss": 0.8034,
      "step": 566
    },
    {
      "epoch": 0.08664094434045154,
      "grad_norm": 0.4326179027557373,
      "learning_rate": 0.0001987508099246761,
      "loss": 0.6266,
      "step": 567
    },
    {
      "epoch": 0.08679375023875921,
      "grad_norm": 0.23379918932914734,
      "learning_rate": 0.0001987429327480701,
      "loss": 0.7669,
      "step": 568
    },
    {
      "epoch": 0.0869465561370669,
      "grad_norm": 1.726702094078064,
      "learning_rate": 0.00019873503097049124,
      "loss": 0.6843,
      "step": 569
    },
    {
      "epoch": 0.08709936203537456,
      "grad_norm": 0.39877480268478394,
      "learning_rate": 0.0001987271045939082,
      "loss": 0.7714,
      "step": 570
    },
    {
      "epoch": 0.08725216793368223,
      "grad_norm": 0.46413347125053406,
      "learning_rate": 0.00019871915362029583,
      "loss": 0.8398,
      "step": 571
    },
    {
      "epoch": 0.08740497383198992,
      "grad_norm": 0.2789576053619385,
      "learning_rate": 0.000198711178051635,
      "loss": 0.677,
      "step": 572
    },
    {
      "epoch": 0.08755777973029759,
      "grad_norm": 0.47314074635505676,
      "learning_rate": 0.00019870317788991276,
      "loss": 0.6724,
      "step": 573
    },
    {
      "epoch": 0.08771058562860526,
      "grad_norm": 0.3048081398010254,
      "learning_rate": 0.0001986951531371223,
      "loss": 0.5743,
      "step": 574
    },
    {
      "epoch": 0.08786339152691294,
      "grad_norm": 0.2774280309677124,
      "learning_rate": 0.00019868710379526287,
      "loss": 0.6805,
      "step": 575
    },
    {
      "epoch": 0.08801619742522061,
      "grad_norm": 0.24514140188694,
      "learning_rate": 0.00019867902986633995,
      "loss": 0.5526,
      "step": 576
    },
    {
      "epoch": 0.08816900332352828,
      "grad_norm": 0.279148131608963,
      "learning_rate": 0.0001986709313523651,
      "loss": 0.7963,
      "step": 577
    },
    {
      "epoch": 0.08832180922183597,
      "grad_norm": 0.23727378249168396,
      "learning_rate": 0.00019866280825535593,
      "loss": 0.6729,
      "step": 578
    },
    {
      "epoch": 0.08847461512014364,
      "grad_norm": 0.3538941442966461,
      "learning_rate": 0.0001986546605773363,
      "loss": 0.7902,
      "step": 579
    },
    {
      "epoch": 0.08862742101845131,
      "grad_norm": 0.521626353263855,
      "learning_rate": 0.00019864648832033612,
      "loss": 0.7231,
      "step": 580
    },
    {
      "epoch": 0.08878022691675899,
      "grad_norm": 0.23101353645324707,
      "learning_rate": 0.00019863829148639142,
      "loss": 0.6654,
      "step": 581
    },
    {
      "epoch": 0.08893303281506666,
      "grad_norm": 0.23713093996047974,
      "learning_rate": 0.00019863007007754445,
      "loss": 0.7468,
      "step": 582
    },
    {
      "epoch": 0.08908583871337433,
      "grad_norm": 0.2619504928588867,
      "learning_rate": 0.00019862182409584339,
      "loss": 0.676,
      "step": 583
    },
    {
      "epoch": 0.08923864461168202,
      "grad_norm": 0.24824418127536774,
      "learning_rate": 0.00019861355354334272,
      "loss": 0.7717,
      "step": 584
    },
    {
      "epoch": 0.08939145050998969,
      "grad_norm": 0.35055017471313477,
      "learning_rate": 0.00019860525842210297,
      "loss": 0.7893,
      "step": 585
    },
    {
      "epoch": 0.08954425640829736,
      "grad_norm": 0.3420320153236389,
      "learning_rate": 0.00019859693873419082,
      "loss": 0.8808,
      "step": 586
    },
    {
      "epoch": 0.08969706230660504,
      "grad_norm": 0.2500932812690735,
      "learning_rate": 0.000198588594481679,
      "loss": 0.6469,
      "step": 587
    },
    {
      "epoch": 0.08984986820491271,
      "grad_norm": 0.24625301361083984,
      "learning_rate": 0.00019858022566664646,
      "loss": 0.7172,
      "step": 588
    },
    {
      "epoch": 0.09000267410322038,
      "grad_norm": 0.2936956286430359,
      "learning_rate": 0.0001985718322911782,
      "loss": 0.7343,
      "step": 589
    },
    {
      "epoch": 0.09015548000152807,
      "grad_norm": 0.2684360146522522,
      "learning_rate": 0.00019856341435736538,
      "loss": 0.8843,
      "step": 590
    },
    {
      "epoch": 0.09030828589983574,
      "grad_norm": 0.39383620023727417,
      "learning_rate": 0.0001985549718673052,
      "loss": 0.6679,
      "step": 591
    },
    {
      "epoch": 0.0904610917981434,
      "grad_norm": 0.26644083857536316,
      "learning_rate": 0.00019854650482310112,
      "loss": 0.8205,
      "step": 592
    },
    {
      "epoch": 0.09061389769645108,
      "grad_norm": 0.2587982416152954,
      "learning_rate": 0.00019853801322686256,
      "loss": 0.6291,
      "step": 593
    },
    {
      "epoch": 0.09076670359475876,
      "grad_norm": 0.24516814947128296,
      "learning_rate": 0.00019852949708070515,
      "loss": 0.8208,
      "step": 594
    },
    {
      "epoch": 0.09091950949306643,
      "grad_norm": 0.2279921919107437,
      "learning_rate": 0.00019852095638675063,
      "loss": 0.5292,
      "step": 595
    },
    {
      "epoch": 0.0910723153913741,
      "grad_norm": 0.2680317461490631,
      "learning_rate": 0.00019851239114712684,
      "loss": 0.6363,
      "step": 596
    },
    {
      "epoch": 0.09122512128968178,
      "grad_norm": 0.2625105679035187,
      "learning_rate": 0.00019850380136396774,
      "loss": 0.7019,
      "step": 597
    },
    {
      "epoch": 0.09137792718798946,
      "grad_norm": 0.2557803690433502,
      "learning_rate": 0.00019849518703941337,
      "loss": 0.6597,
      "step": 598
    },
    {
      "epoch": 0.09153073308629713,
      "grad_norm": 0.2681661546230316,
      "learning_rate": 0.00019848654817560996,
      "loss": 0.5386,
      "step": 599
    },
    {
      "epoch": 0.09168353898460481,
      "grad_norm": 0.2524600028991699,
      "learning_rate": 0.0001984778847747098,
      "loss": 0.8093,
      "step": 600
    },
    {
      "epoch": 0.09183634488291248,
      "grad_norm": 0.23507723212242126,
      "learning_rate": 0.00019846919683887127,
      "loss": 0.7312,
      "step": 601
    },
    {
      "epoch": 0.09198915078122015,
      "grad_norm": 0.2504274845123291,
      "learning_rate": 0.00019846048437025893,
      "loss": 0.5854,
      "step": 602
    },
    {
      "epoch": 0.09214195667952783,
      "grad_norm": 0.26254919171333313,
      "learning_rate": 0.0001984517473710434,
      "loss": 0.5426,
      "step": 603
    },
    {
      "epoch": 0.0922947625778355,
      "grad_norm": 0.26088839769363403,
      "learning_rate": 0.00019844298584340147,
      "loss": 0.8402,
      "step": 604
    },
    {
      "epoch": 0.09244756847614317,
      "grad_norm": 0.22979454696178436,
      "learning_rate": 0.00019843419978951595,
      "loss": 0.8721,
      "step": 605
    },
    {
      "epoch": 0.09260037437445086,
      "grad_norm": 0.2631072700023651,
      "learning_rate": 0.00019842538921157585,
      "loss": 0.7218,
      "step": 606
    },
    {
      "epoch": 0.09275318027275853,
      "grad_norm": 0.2664624750614166,
      "learning_rate": 0.00019841655411177622,
      "loss": 0.704,
      "step": 607
    },
    {
      "epoch": 0.0929059861710662,
      "grad_norm": 0.28286877274513245,
      "learning_rate": 0.00019840769449231828,
      "loss": 0.6592,
      "step": 608
    },
    {
      "epoch": 0.09305879206937388,
      "grad_norm": 0.24730631709098816,
      "learning_rate": 0.00019839881035540931,
      "loss": 0.8396,
      "step": 609
    },
    {
      "epoch": 0.09321159796768155,
      "grad_norm": 0.30036839842796326,
      "learning_rate": 0.00019838990170326272,
      "loss": 0.6895,
      "step": 610
    },
    {
      "epoch": 0.09336440386598922,
      "grad_norm": 0.394290030002594,
      "learning_rate": 0.000198380968538098,
      "loss": 0.8033,
      "step": 611
    },
    {
      "epoch": 0.09351720976429691,
      "grad_norm": 0.22317548096179962,
      "learning_rate": 0.00019837201086214085,
      "loss": 0.5986,
      "step": 612
    },
    {
      "epoch": 0.09367001566260458,
      "grad_norm": 0.3567953109741211,
      "learning_rate": 0.00019836302867762292,
      "loss": 0.5609,
      "step": 613
    },
    {
      "epoch": 0.09382282156091225,
      "grad_norm": 0.26048195362091064,
      "learning_rate": 0.0001983540219867821,
      "loss": 0.74,
      "step": 614
    },
    {
      "epoch": 0.09397562745921993,
      "grad_norm": 7.787338733673096,
      "learning_rate": 0.0001983449907918623,
      "loss": 0.6336,
      "step": 615
    },
    {
      "epoch": 0.0941284333575276,
      "grad_norm": 0.41731297969818115,
      "learning_rate": 0.0001983359350951136,
      "loss": 0.8691,
      "step": 616
    },
    {
      "epoch": 0.09428123925583527,
      "grad_norm": 0.30972158908843994,
      "learning_rate": 0.00019832685489879208,
      "loss": 0.8158,
      "step": 617
    },
    {
      "epoch": 0.09443404515414296,
      "grad_norm": 0.2101728767156601,
      "learning_rate": 0.00019831775020516008,
      "loss": 0.6214,
      "step": 618
    },
    {
      "epoch": 0.09458685105245063,
      "grad_norm": 0.24316735565662384,
      "learning_rate": 0.00019830862101648592,
      "loss": 0.4683,
      "step": 619
    },
    {
      "epoch": 0.0947396569507583,
      "grad_norm": 0.6694328784942627,
      "learning_rate": 0.00019829946733504402,
      "loss": 0.7519,
      "step": 620
    },
    {
      "epoch": 0.09489246284906597,
      "grad_norm": 0.22268711030483246,
      "learning_rate": 0.000198290289163115,
      "loss": 0.7029,
      "step": 621
    },
    {
      "epoch": 0.09504526874737365,
      "grad_norm": 0.30094367265701294,
      "learning_rate": 0.00019828108650298554,
      "loss": 0.9032,
      "step": 622
    },
    {
      "epoch": 0.09519807464568132,
      "grad_norm": 0.24041011929512024,
      "learning_rate": 0.00019827185935694834,
      "loss": 0.7073,
      "step": 623
    },
    {
      "epoch": 0.09535088054398899,
      "grad_norm": 0.261481910943985,
      "learning_rate": 0.00019826260772730229,
      "loss": 0.6153,
      "step": 624
    },
    {
      "epoch": 0.09550368644229668,
      "grad_norm": 0.249210923910141,
      "learning_rate": 0.0001982533316163524,
      "loss": 0.7851,
      "step": 625
    },
    {
      "epoch": 0.09565649234060435,
      "grad_norm": 0.31153637170791626,
      "learning_rate": 0.00019824403102640967,
      "loss": 0.7585,
      "step": 626
    },
    {
      "epoch": 0.09580929823891202,
      "grad_norm": 0.2985265851020813,
      "learning_rate": 0.00019823470595979132,
      "loss": 0.679,
      "step": 627
    },
    {
      "epoch": 0.0959621041372197,
      "grad_norm": 0.275879830121994,
      "learning_rate": 0.00019822535641882057,
      "loss": 0.6026,
      "step": 628
    },
    {
      "epoch": 0.09611491003552737,
      "grad_norm": 0.2656649053096771,
      "learning_rate": 0.0001982159824058268,
      "loss": 1.0268,
      "step": 629
    },
    {
      "epoch": 0.09626771593383504,
      "grad_norm": 0.3071416914463043,
      "learning_rate": 0.00019820658392314547,
      "loss": 0.9391,
      "step": 630
    },
    {
      "epoch": 0.09642052183214272,
      "grad_norm": 0.22628253698349,
      "learning_rate": 0.0001981971609731181,
      "loss": 0.6201,
      "step": 631
    },
    {
      "epoch": 0.0965733277304504,
      "grad_norm": 0.33724674582481384,
      "learning_rate": 0.0001981877135580924,
      "loss": 0.6772,
      "step": 632
    },
    {
      "epoch": 0.09672613362875807,
      "grad_norm": 0.2758637070655823,
      "learning_rate": 0.00019817824168042204,
      "loss": 0.6861,
      "step": 633
    },
    {
      "epoch": 0.09687893952706575,
      "grad_norm": 0.25251737236976624,
      "learning_rate": 0.00019816874534246695,
      "loss": 0.6669,
      "step": 634
    },
    {
      "epoch": 0.09703174542537342,
      "grad_norm": 0.3473283350467682,
      "learning_rate": 0.00019815922454659296,
      "loss": 0.8036,
      "step": 635
    },
    {
      "epoch": 0.09718455132368109,
      "grad_norm": 0.2671525776386261,
      "learning_rate": 0.00019814967929517217,
      "loss": 0.7605,
      "step": 636
    },
    {
      "epoch": 0.09733735722198877,
      "grad_norm": 0.3378095328807831,
      "learning_rate": 0.0001981401095905827,
      "loss": 0.6923,
      "step": 637
    },
    {
      "epoch": 0.09749016312029644,
      "grad_norm": 0.3124406635761261,
      "learning_rate": 0.00019813051543520868,
      "loss": 0.8457,
      "step": 638
    },
    {
      "epoch": 0.09764296901860411,
      "grad_norm": 0.26661619544029236,
      "learning_rate": 0.0001981208968314405,
      "loss": 0.638,
      "step": 639
    },
    {
      "epoch": 0.0977957749169118,
      "grad_norm": 0.23887763917446136,
      "learning_rate": 0.00019811125378167452,
      "loss": 0.7673,
      "step": 640
    },
    {
      "epoch": 0.09794858081521947,
      "grad_norm": 0.24191714823246002,
      "learning_rate": 0.00019810158628831323,
      "loss": 0.6071,
      "step": 641
    },
    {
      "epoch": 0.09810138671352714,
      "grad_norm": 0.2351231426000595,
      "learning_rate": 0.0001980918943537652,
      "loss": 0.8946,
      "step": 642
    },
    {
      "epoch": 0.09825419261183482,
      "grad_norm": 0.2848316729068756,
      "learning_rate": 0.00019808217798044514,
      "loss": 0.5735,
      "step": 643
    },
    {
      "epoch": 0.0984069985101425,
      "grad_norm": 0.256180077791214,
      "learning_rate": 0.0001980724371707737,
      "loss": 0.7506,
      "step": 644
    },
    {
      "epoch": 0.09855980440845016,
      "grad_norm": 0.22801616787910461,
      "learning_rate": 0.0001980626719271778,
      "loss": 0.6963,
      "step": 645
    },
    {
      "epoch": 0.09871261030675785,
      "grad_norm": 0.2420923262834549,
      "learning_rate": 0.0001980528822520904,
      "loss": 0.6607,
      "step": 646
    },
    {
      "epoch": 0.09886541620506552,
      "grad_norm": 0.2945977747440338,
      "learning_rate": 0.0001980430681479504,
      "loss": 0.7108,
      "step": 647
    },
    {
      "epoch": 0.09901822210337319,
      "grad_norm": 0.26587414741516113,
      "learning_rate": 0.00019803322961720304,
      "loss": 0.7858,
      "step": 648
    },
    {
      "epoch": 0.09917102800168086,
      "grad_norm": 0.2787562608718872,
      "learning_rate": 0.0001980233666622994,
      "loss": 0.5684,
      "step": 649
    },
    {
      "epoch": 0.09932383389998854,
      "grad_norm": 0.22903244197368622,
      "learning_rate": 0.00019801347928569677,
      "loss": 0.6586,
      "step": 650
    },
    {
      "epoch": 0.09947663979829621,
      "grad_norm": 0.22474107146263123,
      "learning_rate": 0.00019800356748985853,
      "loss": 0.5589,
      "step": 651
    },
    {
      "epoch": 0.09962944569660388,
      "grad_norm": 0.26125723123550415,
      "learning_rate": 0.00019799363127725412,
      "loss": 0.7504,
      "step": 652
    },
    {
      "epoch": 0.09978225159491157,
      "grad_norm": 0.263163685798645,
      "learning_rate": 0.00019798367065035905,
      "loss": 0.6425,
      "step": 653
    },
    {
      "epoch": 0.09993505749321924,
      "grad_norm": 0.3001612424850464,
      "learning_rate": 0.00019797368561165496,
      "loss": 0.64,
      "step": 654
    },
    {
      "epoch": 0.10008786339152691,
      "grad_norm": 0.425373911857605,
      "learning_rate": 0.0001979636761636295,
      "loss": 0.8331,
      "step": 655
    },
    {
      "epoch": 0.10024066928983459,
      "grad_norm": 0.358103483915329,
      "learning_rate": 0.00019795364230877649,
      "loss": 0.7991,
      "step": 656
    },
    {
      "epoch": 0.10039347518814226,
      "grad_norm": 0.26949623227119446,
      "learning_rate": 0.0001979435840495957,
      "loss": 0.7071,
      "step": 657
    },
    {
      "epoch": 0.10054628108644993,
      "grad_norm": 0.41269823908805847,
      "learning_rate": 0.00019793350138859312,
      "loss": 0.6083,
      "step": 658
    },
    {
      "epoch": 0.10069908698475762,
      "grad_norm": 0.25923144817352295,
      "learning_rate": 0.00019792339432828074,
      "loss": 0.6838,
      "step": 659
    },
    {
      "epoch": 0.10085189288306529,
      "grad_norm": 0.2757934331893921,
      "learning_rate": 0.00019791326287117668,
      "loss": 0.7489,
      "step": 660
    },
    {
      "epoch": 0.10100469878137296,
      "grad_norm": 0.2592853605747223,
      "learning_rate": 0.00019790310701980505,
      "loss": 0.6127,
      "step": 661
    },
    {
      "epoch": 0.10115750467968064,
      "grad_norm": 0.23879987001419067,
      "learning_rate": 0.00019789292677669615,
      "loss": 0.6025,
      "step": 662
    },
    {
      "epoch": 0.10131031057798831,
      "grad_norm": 0.23186875879764557,
      "learning_rate": 0.00019788272214438628,
      "loss": 0.7529,
      "step": 663
    },
    {
      "epoch": 0.10146311647629598,
      "grad_norm": 0.2402157485485077,
      "learning_rate": 0.00019787249312541784,
      "loss": 0.9017,
      "step": 664
    },
    {
      "epoch": 0.10161592237460366,
      "grad_norm": 0.24203136563301086,
      "learning_rate": 0.0001978622397223393,
      "loss": 0.764,
      "step": 665
    },
    {
      "epoch": 0.10176872827291134,
      "grad_norm": 0.3500341773033142,
      "learning_rate": 0.00019785196193770522,
      "loss": 0.7858,
      "step": 666
    },
    {
      "epoch": 0.101921534171219,
      "grad_norm": 0.357442170381546,
      "learning_rate": 0.0001978416597740762,
      "loss": 0.6913,
      "step": 667
    },
    {
      "epoch": 0.10207434006952669,
      "grad_norm": 0.33510905504226685,
      "learning_rate": 0.00019783133323401898,
      "loss": 0.6076,
      "step": 668
    },
    {
      "epoch": 0.10222714596783436,
      "grad_norm": 0.2523046135902405,
      "learning_rate": 0.00019782098232010625,
      "loss": 0.6613,
      "step": 669
    },
    {
      "epoch": 0.10237995186614203,
      "grad_norm": 1.28267240524292,
      "learning_rate": 0.00019781060703491697,
      "loss": 0.8129,
      "step": 670
    },
    {
      "epoch": 0.10253275776444971,
      "grad_norm": 0.31581321358680725,
      "learning_rate": 0.00019780020738103594,
      "loss": 0.7189,
      "step": 671
    },
    {
      "epoch": 0.10268556366275738,
      "grad_norm": 0.22247004508972168,
      "learning_rate": 0.00019778978336105425,
      "loss": 0.6889,
      "step": 672
    },
    {
      "epoch": 0.10283836956106505,
      "grad_norm": 0.28829923272132874,
      "learning_rate": 0.00019777933497756885,
      "loss": 0.7773,
      "step": 673
    },
    {
      "epoch": 0.10299117545937274,
      "grad_norm": 0.4107334017753601,
      "learning_rate": 0.00019776886223318299,
      "loss": 0.7051,
      "step": 674
    },
    {
      "epoch": 0.10314398135768041,
      "grad_norm": 0.2262149304151535,
      "learning_rate": 0.00019775836513050577,
      "loss": 0.6931,
      "step": 675
    },
    {
      "epoch": 0.10329678725598808,
      "grad_norm": 0.24012112617492676,
      "learning_rate": 0.0001977478436721525,
      "loss": 0.6046,
      "step": 676
    },
    {
      "epoch": 0.10344959315429575,
      "grad_norm": 0.2686203122138977,
      "learning_rate": 0.00019773729786074447,
      "loss": 0.6899,
      "step": 677
    },
    {
      "epoch": 0.10360239905260343,
      "grad_norm": 0.25490519404411316,
      "learning_rate": 0.00019772672769890912,
      "loss": 0.8237,
      "step": 678
    },
    {
      "epoch": 0.1037552049509111,
      "grad_norm": 0.2543981671333313,
      "learning_rate": 0.00019771613318927988,
      "loss": 0.669,
      "step": 679
    },
    {
      "epoch": 0.10390801084921877,
      "grad_norm": 0.2917165756225586,
      "learning_rate": 0.00019770551433449636,
      "loss": 0.8482,
      "step": 680
    },
    {
      "epoch": 0.10406081674752646,
      "grad_norm": 0.25636452436447144,
      "learning_rate": 0.00019769487113720406,
      "loss": 0.8233,
      "step": 681
    },
    {
      "epoch": 0.10421362264583413,
      "grad_norm": 0.3934386670589447,
      "learning_rate": 0.00019768420360005473,
      "loss": 0.6585,
      "step": 682
    },
    {
      "epoch": 0.1043664285441418,
      "grad_norm": 0.2667856514453888,
      "learning_rate": 0.00019767351172570602,
      "loss": 0.6018,
      "step": 683
    },
    {
      "epoch": 0.10451923444244948,
      "grad_norm": 0.5042889714241028,
      "learning_rate": 0.0001976627955168218,
      "loss": 0.7258,
      "step": 684
    },
    {
      "epoch": 0.10467204034075715,
      "grad_norm": 0.274236261844635,
      "learning_rate": 0.00019765205497607186,
      "loss": 0.7307,
      "step": 685
    },
    {
      "epoch": 0.10482484623906482,
      "grad_norm": 0.24832475185394287,
      "learning_rate": 0.00019764129010613215,
      "loss": 0.8898,
      "step": 686
    },
    {
      "epoch": 0.1049776521373725,
      "grad_norm": 0.3612132668495178,
      "learning_rate": 0.00019763050090968462,
      "loss": 0.7601,
      "step": 687
    },
    {
      "epoch": 0.10513045803568018,
      "grad_norm": 0.22813887894153595,
      "learning_rate": 0.00019761968738941734,
      "loss": 0.6691,
      "step": 688
    },
    {
      "epoch": 0.10528326393398785,
      "grad_norm": 0.23167642951011658,
      "learning_rate": 0.00019760884954802437,
      "loss": 0.8389,
      "step": 689
    },
    {
      "epoch": 0.10543606983229553,
      "grad_norm": 0.2619309723377228,
      "learning_rate": 0.0001975979873882059,
      "loss": 0.8322,
      "step": 690
    },
    {
      "epoch": 0.1055888757306032,
      "grad_norm": 0.30721771717071533,
      "learning_rate": 0.00019758710091266813,
      "loss": 0.7664,
      "step": 691
    },
    {
      "epoch": 0.10574168162891087,
      "grad_norm": 0.2530481517314911,
      "learning_rate": 0.00019757619012412332,
      "loss": 0.5927,
      "step": 692
    },
    {
      "epoch": 0.10589448752721856,
      "grad_norm": 0.29600846767425537,
      "learning_rate": 0.00019756525502528986,
      "loss": 0.6524,
      "step": 693
    },
    {
      "epoch": 0.10604729342552623,
      "grad_norm": 0.25071632862091064,
      "learning_rate": 0.00019755429561889204,
      "loss": 0.7333,
      "step": 694
    },
    {
      "epoch": 0.1062000993238339,
      "grad_norm": 0.27111098170280457,
      "learning_rate": 0.0001975433119076604,
      "loss": 0.8976,
      "step": 695
    },
    {
      "epoch": 0.10635290522214158,
      "grad_norm": 0.2631009519100189,
      "learning_rate": 0.0001975323038943314,
      "loss": 0.6236,
      "step": 696
    },
    {
      "epoch": 0.10650571112044925,
      "grad_norm": 0.25254061818122864,
      "learning_rate": 0.0001975212715816476,
      "loss": 0.6441,
      "step": 697
    },
    {
      "epoch": 0.10665851701875692,
      "grad_norm": 0.3293875753879547,
      "learning_rate": 0.0001975102149723576,
      "loss": 0.7531,
      "step": 698
    },
    {
      "epoch": 0.1068113229170646,
      "grad_norm": 0.2423682063817978,
      "learning_rate": 0.00019749913406921606,
      "loss": 0.8024,
      "step": 699
    },
    {
      "epoch": 0.10696412881537228,
      "grad_norm": 0.2802618145942688,
      "learning_rate": 0.00019748802887498368,
      "loss": 0.6301,
      "step": 700
    },
    {
      "epoch": 0.10711693471367995,
      "grad_norm": 0.21464811265468597,
      "learning_rate": 0.00019747689939242726,
      "loss": 0.5926,
      "step": 701
    },
    {
      "epoch": 0.10726974061198763,
      "grad_norm": 0.2736561894416809,
      "learning_rate": 0.00019746574562431958,
      "loss": 0.6572,
      "step": 702
    },
    {
      "epoch": 0.1074225465102953,
      "grad_norm": 0.3253500759601593,
      "learning_rate": 0.00019745456757343957,
      "loss": 0.7262,
      "step": 703
    },
    {
      "epoch": 0.10757535240860297,
      "grad_norm": 0.39992064237594604,
      "learning_rate": 0.00019744336524257208,
      "loss": 0.9614,
      "step": 704
    },
    {
      "epoch": 0.10772815830691064,
      "grad_norm": 0.5507543683052063,
      "learning_rate": 0.0001974321386345081,
      "loss": 0.7267,
      "step": 705
    },
    {
      "epoch": 0.10788096420521832,
      "grad_norm": 0.2982296049594879,
      "learning_rate": 0.00019742088775204466,
      "loss": 0.7433,
      "step": 706
    },
    {
      "epoch": 0.108033770103526,
      "grad_norm": 0.32143905758857727,
      "learning_rate": 0.0001974096125979848,
      "loss": 0.5736,
      "step": 707
    },
    {
      "epoch": 0.10818657600183366,
      "grad_norm": 0.26693427562713623,
      "learning_rate": 0.00019739831317513767,
      "loss": 0.6675,
      "step": 708
    },
    {
      "epoch": 0.10833938190014135,
      "grad_norm": 0.2991078794002533,
      "learning_rate": 0.00019738698948631837,
      "loss": 0.7309,
      "step": 709
    },
    {
      "epoch": 0.10849218779844902,
      "grad_norm": 0.3002963066101074,
      "learning_rate": 0.00019737564153434812,
      "loss": 0.6062,
      "step": 710
    },
    {
      "epoch": 0.10864499369675669,
      "grad_norm": 0.254621684551239,
      "learning_rate": 0.00019736426932205422,
      "loss": 0.6951,
      "step": 711
    },
    {
      "epoch": 0.10879779959506437,
      "grad_norm": 0.25402164459228516,
      "learning_rate": 0.00019735287285226988,
      "loss": 0.6384,
      "step": 712
    },
    {
      "epoch": 0.10895060549337204,
      "grad_norm": 0.31595587730407715,
      "learning_rate": 0.0001973414521278345,
      "loss": 0.6293,
      "step": 713
    },
    {
      "epoch": 0.10910341139167971,
      "grad_norm": 0.3181349039077759,
      "learning_rate": 0.00019733000715159337,
      "loss": 0.7432,
      "step": 714
    },
    {
      "epoch": 0.1092562172899874,
      "grad_norm": 0.24884875118732452,
      "learning_rate": 0.00019731853792639802,
      "loss": 0.633,
      "step": 715
    },
    {
      "epoch": 0.10940902318829507,
      "grad_norm": 0.23580528795719147,
      "learning_rate": 0.00019730704445510586,
      "loss": 0.7396,
      "step": 716
    },
    {
      "epoch": 0.10956182908660274,
      "grad_norm": 0.33131417632102966,
      "learning_rate": 0.0001972955267405804,
      "loss": 0.6598,
      "step": 717
    },
    {
      "epoch": 0.10971463498491042,
      "grad_norm": 0.21372541785240173,
      "learning_rate": 0.00019728398478569115,
      "loss": 0.5871,
      "step": 718
    },
    {
      "epoch": 0.10986744088321809,
      "grad_norm": 0.34117481112480164,
      "learning_rate": 0.00019727241859331373,
      "loss": 0.903,
      "step": 719
    },
    {
      "epoch": 0.11002024678152576,
      "grad_norm": 0.4706093668937683,
      "learning_rate": 0.00019726082816632975,
      "loss": 0.605,
      "step": 720
    },
    {
      "epoch": 0.11017305267983345,
      "grad_norm": 0.23070864379405975,
      "learning_rate": 0.00019724921350762684,
      "loss": 0.7316,
      "step": 721
    },
    {
      "epoch": 0.11032585857814112,
      "grad_norm": 0.4054511487483978,
      "learning_rate": 0.00019723757462009875,
      "loss": 0.6363,
      "step": 722
    },
    {
      "epoch": 0.11047866447644879,
      "grad_norm": 0.23478427529335022,
      "learning_rate": 0.00019722591150664518,
      "loss": 0.755,
      "step": 723
    },
    {
      "epoch": 0.11063147037475647,
      "grad_norm": 0.5467060804367065,
      "learning_rate": 0.00019721422417017185,
      "loss": 0.6103,
      "step": 724
    },
    {
      "epoch": 0.11078427627306414,
      "grad_norm": 0.32651928067207336,
      "learning_rate": 0.00019720251261359065,
      "loss": 0.9015,
      "step": 725
    },
    {
      "epoch": 0.11093708217137181,
      "grad_norm": 0.33620190620422363,
      "learning_rate": 0.00019719077683981936,
      "loss": 0.766,
      "step": 726
    },
    {
      "epoch": 0.1110898880696795,
      "grad_norm": 0.22980651259422302,
      "learning_rate": 0.0001971790168517819,
      "loss": 0.551,
      "step": 727
    },
    {
      "epoch": 0.11124269396798717,
      "grad_norm": 0.2606430649757385,
      "learning_rate": 0.00019716723265240807,
      "loss": 0.5819,
      "step": 728
    },
    {
      "epoch": 0.11139549986629484,
      "grad_norm": 0.25085484981536865,
      "learning_rate": 0.00019715542424463388,
      "loss": 0.734,
      "step": 729
    },
    {
      "epoch": 0.11154830576460252,
      "grad_norm": 0.29355061054229736,
      "learning_rate": 0.00019714359163140133,
      "loss": 0.7688,
      "step": 730
    },
    {
      "epoch": 0.11170111166291019,
      "grad_norm": 0.34205570816993713,
      "learning_rate": 0.00019713173481565837,
      "loss": 0.976,
      "step": 731
    },
    {
      "epoch": 0.11185391756121786,
      "grad_norm": 0.29738330841064453,
      "learning_rate": 0.000197119853800359,
      "loss": 0.7573,
      "step": 732
    },
    {
      "epoch": 0.11200672345952553,
      "grad_norm": 0.26155439019203186,
      "learning_rate": 0.0001971079485884633,
      "loss": 0.7691,
      "step": 733
    },
    {
      "epoch": 0.11215952935783322,
      "grad_norm": 0.3583777844905853,
      "learning_rate": 0.00019709601918293737,
      "loss": 0.7932,
      "step": 734
    },
    {
      "epoch": 0.11231233525614089,
      "grad_norm": 0.285895437002182,
      "learning_rate": 0.00019708406558675333,
      "loss": 0.7157,
      "step": 735
    },
    {
      "epoch": 0.11246514115444856,
      "grad_norm": 0.26534533500671387,
      "learning_rate": 0.00019707208780288924,
      "loss": 0.6047,
      "step": 736
    },
    {
      "epoch": 0.11261794705275624,
      "grad_norm": 0.3675645887851715,
      "learning_rate": 0.00019706008583432935,
      "loss": 0.6816,
      "step": 737
    },
    {
      "epoch": 0.11277075295106391,
      "grad_norm": 0.2926788926124573,
      "learning_rate": 0.00019704805968406383,
      "loss": 0.794,
      "step": 738
    },
    {
      "epoch": 0.11292355884937158,
      "grad_norm": 0.31891047954559326,
      "learning_rate": 0.00019703600935508888,
      "loss": 0.856,
      "step": 739
    },
    {
      "epoch": 0.11307636474767926,
      "grad_norm": 0.32140710949897766,
      "learning_rate": 0.00019702393485040672,
      "loss": 0.6825,
      "step": 740
    },
    {
      "epoch": 0.11322917064598693,
      "grad_norm": 0.2733408212661743,
      "learning_rate": 0.00019701183617302568,
      "loss": 0.7611,
      "step": 741
    },
    {
      "epoch": 0.1133819765442946,
      "grad_norm": 0.22607572376728058,
      "learning_rate": 0.00019699971332595996,
      "loss": 0.5884,
      "step": 742
    },
    {
      "epoch": 0.11353478244260229,
      "grad_norm": 0.29300516843795776,
      "learning_rate": 0.00019698756631222994,
      "loss": 0.6787,
      "step": 743
    },
    {
      "epoch": 0.11368758834090996,
      "grad_norm": 0.39608168601989746,
      "learning_rate": 0.0001969753951348619,
      "loss": 0.6543,
      "step": 744
    },
    {
      "epoch": 0.11384039423921763,
      "grad_norm": 0.2555294632911682,
      "learning_rate": 0.00019696319979688816,
      "loss": 0.5899,
      "step": 745
    },
    {
      "epoch": 0.11399320013752531,
      "grad_norm": 0.2862085700035095,
      "learning_rate": 0.00019695098030134717,
      "loss": 0.7661,
      "step": 746
    },
    {
      "epoch": 0.11414600603583298,
      "grad_norm": 0.2918783128261566,
      "learning_rate": 0.00019693873665128323,
      "loss": 0.6101,
      "step": 747
    },
    {
      "epoch": 0.11429881193414065,
      "grad_norm": 0.22642338275909424,
      "learning_rate": 0.0001969264688497468,
      "loss": 0.7746,
      "step": 748
    },
    {
      "epoch": 0.11445161783244834,
      "grad_norm": 0.3243122398853302,
      "learning_rate": 0.00019691417689979428,
      "loss": 0.8686,
      "step": 749
    },
    {
      "epoch": 0.11460442373075601,
      "grad_norm": 0.45320257544517517,
      "learning_rate": 0.0001969018608044881,
      "loss": 0.8523,
      "step": 750
    },
    {
      "epoch": 0.11475722962906368,
      "grad_norm": 0.27918341755867004,
      "learning_rate": 0.00019688952056689672,
      "loss": 0.6287,
      "step": 751
    },
    {
      "epoch": 0.11491003552737136,
      "grad_norm": 0.24446265399456024,
      "learning_rate": 0.0001968771561900946,
      "loss": 0.7383,
      "step": 752
    },
    {
      "epoch": 0.11506284142567903,
      "grad_norm": 0.24698710441589355,
      "learning_rate": 0.00019686476767716225,
      "loss": 0.5625,
      "step": 753
    },
    {
      "epoch": 0.1152156473239867,
      "grad_norm": 0.2587762773036957,
      "learning_rate": 0.00019685235503118614,
      "loss": 0.6205,
      "step": 754
    },
    {
      "epoch": 0.11536845322229439,
      "grad_norm": 0.2515849769115448,
      "learning_rate": 0.00019683991825525875,
      "loss": 0.5296,
      "step": 755
    },
    {
      "epoch": 0.11552125912060206,
      "grad_norm": 0.2782059907913208,
      "learning_rate": 0.00019682745735247862,
      "loss": 0.7873,
      "step": 756
    },
    {
      "epoch": 0.11567406501890973,
      "grad_norm": 0.2650497257709503,
      "learning_rate": 0.0001968149723259503,
      "loss": 0.5342,
      "step": 757
    },
    {
      "epoch": 0.1158268709172174,
      "grad_norm": 0.2502165138721466,
      "learning_rate": 0.00019680246317878433,
      "loss": 0.6457,
      "step": 758
    },
    {
      "epoch": 0.11597967681552508,
      "grad_norm": 0.2777862250804901,
      "learning_rate": 0.00019678992991409723,
      "loss": 0.6767,
      "step": 759
    },
    {
      "epoch": 0.11613248271383275,
      "grad_norm": 0.213461235165596,
      "learning_rate": 0.00019677737253501155,
      "loss": 0.6959,
      "step": 760
    },
    {
      "epoch": 0.11628528861214042,
      "grad_norm": 0.28070124983787537,
      "learning_rate": 0.0001967647910446559,
      "loss": 0.6332,
      "step": 761
    },
    {
      "epoch": 0.1164380945104481,
      "grad_norm": 0.38696399331092834,
      "learning_rate": 0.00019675218544616482,
      "loss": 0.7009,
      "step": 762
    },
    {
      "epoch": 0.11659090040875578,
      "grad_norm": 0.24727767705917358,
      "learning_rate": 0.00019673955574267887,
      "loss": 0.6498,
      "step": 763
    },
    {
      "epoch": 0.11674370630706345,
      "grad_norm": 0.3144250810146332,
      "learning_rate": 0.00019672690193734468,
      "loss": 0.6396,
      "step": 764
    },
    {
      "epoch": 0.11689651220537113,
      "grad_norm": 0.30413126945495605,
      "learning_rate": 0.00019671422403331486,
      "loss": 0.6576,
      "step": 765
    },
    {
      "epoch": 0.1170493181036788,
      "grad_norm": 0.2549903392791748,
      "learning_rate": 0.00019670152203374793,
      "loss": 0.6948,
      "step": 766
    },
    {
      "epoch": 0.11720212400198647,
      "grad_norm": 0.2680594325065613,
      "learning_rate": 0.00019668879594180854,
      "loss": 0.9079,
      "step": 767
    },
    {
      "epoch": 0.11735492990029416,
      "grad_norm": 0.3748367726802826,
      "learning_rate": 0.00019667604576066724,
      "loss": 0.6821,
      "step": 768
    },
    {
      "epoch": 0.11750773579860183,
      "grad_norm": 0.2943898141384125,
      "learning_rate": 0.00019666327149350067,
      "loss": 0.6278,
      "step": 769
    },
    {
      "epoch": 0.1176605416969095,
      "grad_norm": 0.30158859491348267,
      "learning_rate": 0.00019665047314349146,
      "loss": 0.7498,
      "step": 770
    },
    {
      "epoch": 0.11781334759521718,
      "grad_norm": 0.2680291533470154,
      "learning_rate": 0.0001966376507138281,
      "loss": 0.6299,
      "step": 771
    },
    {
      "epoch": 0.11796615349352485,
      "grad_norm": 0.24324612319469452,
      "learning_rate": 0.00019662480420770532,
      "loss": 0.6987,
      "step": 772
    },
    {
      "epoch": 0.11811895939183252,
      "grad_norm": 0.2948191463947296,
      "learning_rate": 0.00019661193362832365,
      "loss": 0.6614,
      "step": 773
    },
    {
      "epoch": 0.1182717652901402,
      "grad_norm": 0.3052494525909424,
      "learning_rate": 0.00019659903897888972,
      "loss": 0.7924,
      "step": 774
    },
    {
      "epoch": 0.11842457118844787,
      "grad_norm": 0.2476397603750229,
      "learning_rate": 0.00019658612026261606,
      "loss": 0.7096,
      "step": 775
    },
    {
      "epoch": 0.11857737708675554,
      "grad_norm": 0.21247366070747375,
      "learning_rate": 0.00019657317748272135,
      "loss": 0.8716,
      "step": 776
    },
    {
      "epoch": 0.11873018298506323,
      "grad_norm": 0.32150718569755554,
      "learning_rate": 0.00019656021064243012,
      "loss": 0.647,
      "step": 777
    },
    {
      "epoch": 0.1188829888833709,
      "grad_norm": 0.3018513321876526,
      "learning_rate": 0.00019654721974497294,
      "loss": 0.6629,
      "step": 778
    },
    {
      "epoch": 0.11903579478167857,
      "grad_norm": 0.27877163887023926,
      "learning_rate": 0.00019653420479358639,
      "loss": 0.6738,
      "step": 779
    },
    {
      "epoch": 0.11918860067998625,
      "grad_norm": 0.2589527368545532,
      "learning_rate": 0.0001965211657915131,
      "loss": 0.6804,
      "step": 780
    },
    {
      "epoch": 0.11934140657829392,
      "grad_norm": 0.4898923337459564,
      "learning_rate": 0.00019650810274200153,
      "loss": 0.6081,
      "step": 781
    },
    {
      "epoch": 0.1194942124766016,
      "grad_norm": 0.30923375487327576,
      "learning_rate": 0.0001964950156483063,
      "loss": 0.7302,
      "step": 782
    },
    {
      "epoch": 0.11964701837490928,
      "grad_norm": 0.26589056849479675,
      "learning_rate": 0.0001964819045136879,
      "loss": 0.797,
      "step": 783
    },
    {
      "epoch": 0.11979982427321695,
      "grad_norm": 0.24651648104190826,
      "learning_rate": 0.00019646876934141289,
      "loss": 0.6002,
      "step": 784
    },
    {
      "epoch": 0.11995263017152462,
      "grad_norm": 0.2416309118270874,
      "learning_rate": 0.0001964556101347538,
      "loss": 0.68,
      "step": 785
    },
    {
      "epoch": 0.12010543606983229,
      "grad_norm": 0.23243074119091034,
      "learning_rate": 0.00019644242689698907,
      "loss": 0.9595,
      "step": 786
    },
    {
      "epoch": 0.12025824196813997,
      "grad_norm": 0.3017045557498932,
      "learning_rate": 0.00019642921963140331,
      "loss": 0.7222,
      "step": 787
    },
    {
      "epoch": 0.12041104786644764,
      "grad_norm": 0.2747794985771179,
      "learning_rate": 0.00019641598834128687,
      "loss": 0.7744,
      "step": 788
    },
    {
      "epoch": 0.12056385376475531,
      "grad_norm": 0.24212798476219177,
      "learning_rate": 0.0001964027330299363,
      "loss": 0.7895,
      "step": 789
    },
    {
      "epoch": 0.120716659663063,
      "grad_norm": 0.2827460467815399,
      "learning_rate": 0.000196389453700654,
      "loss": 0.551,
      "step": 790
    },
    {
      "epoch": 0.12086946556137067,
      "grad_norm": 0.25816842913627625,
      "learning_rate": 0.00019637615035674846,
      "loss": 0.7097,
      "step": 791
    },
    {
      "epoch": 0.12102227145967834,
      "grad_norm": 0.2768022119998932,
      "learning_rate": 0.00019636282300153406,
      "loss": 0.4357,
      "step": 792
    },
    {
      "epoch": 0.12117507735798602,
      "grad_norm": 0.31443700194358826,
      "learning_rate": 0.00019634947163833116,
      "loss": 0.587,
      "step": 793
    },
    {
      "epoch": 0.12132788325629369,
      "grad_norm": 0.3125135898590088,
      "learning_rate": 0.00019633609627046623,
      "loss": 0.707,
      "step": 794
    },
    {
      "epoch": 0.12148068915460136,
      "grad_norm": 0.2981247007846832,
      "learning_rate": 0.00019632269690127158,
      "loss": 0.792,
      "step": 795
    },
    {
      "epoch": 0.12163349505290905,
      "grad_norm": 0.23891063034534454,
      "learning_rate": 0.00019630927353408553,
      "loss": 0.6062,
      "step": 796
    },
    {
      "epoch": 0.12178630095121672,
      "grad_norm": 0.2705599069595337,
      "learning_rate": 0.0001962958261722524,
      "loss": 0.6864,
      "step": 797
    },
    {
      "epoch": 0.12193910684952439,
      "grad_norm": 0.25540342926979065,
      "learning_rate": 0.00019628235481912256,
      "loss": 0.6466,
      "step": 798
    },
    {
      "epoch": 0.12209191274783207,
      "grad_norm": 0.2492659091949463,
      "learning_rate": 0.00019626885947805222,
      "loss": 0.8629,
      "step": 799
    },
    {
      "epoch": 0.12224471864613974,
      "grad_norm": 0.23218658566474915,
      "learning_rate": 0.00019625534015240366,
      "loss": 0.5242,
      "step": 800
    },
    {
      "epoch": 0.12239752454444741,
      "grad_norm": 0.2692018151283264,
      "learning_rate": 0.00019624179684554505,
      "loss": 0.6638,
      "step": 801
    },
    {
      "epoch": 0.1225503304427551,
      "grad_norm": 0.3122497498989105,
      "learning_rate": 0.00019622822956085067,
      "loss": 0.8883,
      "step": 802
    },
    {
      "epoch": 0.12270313634106277,
      "grad_norm": 0.46200600266456604,
      "learning_rate": 0.00019621463830170064,
      "loss": 0.8743,
      "step": 803
    },
    {
      "epoch": 0.12285594223937044,
      "grad_norm": 0.28840991854667664,
      "learning_rate": 0.00019620102307148113,
      "loss": 0.6618,
      "step": 804
    },
    {
      "epoch": 0.12300874813767812,
      "grad_norm": 0.23175831139087677,
      "learning_rate": 0.00019618738387358424,
      "loss": 0.6825,
      "step": 805
    },
    {
      "epoch": 0.12316155403598579,
      "grad_norm": 0.35594913363456726,
      "learning_rate": 0.0001961737207114081,
      "loss": 0.6265,
      "step": 806
    },
    {
      "epoch": 0.12331435993429346,
      "grad_norm": 0.27673351764678955,
      "learning_rate": 0.00019616003358835675,
      "loss": 0.6526,
      "step": 807
    },
    {
      "epoch": 0.12346716583260114,
      "grad_norm": 0.26578110456466675,
      "learning_rate": 0.00019614632250784022,
      "loss": 0.7232,
      "step": 808
    },
    {
      "epoch": 0.12361997173090881,
      "grad_norm": 0.22138115763664246,
      "learning_rate": 0.0001961325874732745,
      "loss": 0.4978,
      "step": 809
    },
    {
      "epoch": 0.12377277762921648,
      "grad_norm": 0.34308725595474243,
      "learning_rate": 0.0001961188284880816,
      "loss": 0.7385,
      "step": 810
    },
    {
      "epoch": 0.12392558352752417,
      "grad_norm": 0.2813006341457367,
      "learning_rate": 0.0001961050455556894,
      "loss": 0.8297,
      "step": 811
    },
    {
      "epoch": 0.12407838942583184,
      "grad_norm": 0.24640551209449768,
      "learning_rate": 0.00019609123867953186,
      "loss": 0.9333,
      "step": 812
    },
    {
      "epoch": 0.12423119532413951,
      "grad_norm": 0.2680695652961731,
      "learning_rate": 0.00019607740786304877,
      "loss": 0.9178,
      "step": 813
    },
    {
      "epoch": 0.12438400122244718,
      "grad_norm": 0.2718088924884796,
      "learning_rate": 0.00019606355310968602,
      "loss": 0.6672,
      "step": 814
    },
    {
      "epoch": 0.12453680712075486,
      "grad_norm": 0.24112388491630554,
      "learning_rate": 0.0001960496744228954,
      "loss": 0.7025,
      "step": 815
    },
    {
      "epoch": 0.12468961301906253,
      "grad_norm": 0.30206188559532166,
      "learning_rate": 0.00019603577180613468,
      "loss": 0.6088,
      "step": 816
    },
    {
      "epoch": 0.1248424189173702,
      "grad_norm": 0.24509626626968384,
      "learning_rate": 0.00019602184526286757,
      "loss": 0.7398,
      "step": 817
    },
    {
      "epoch": 0.12499522481567789,
      "grad_norm": 0.2561381757259369,
      "learning_rate": 0.0001960078947965637,
      "loss": 0.6174,
      "step": 818
    },
    {
      "epoch": 0.12514803071398556,
      "grad_norm": 0.26984918117523193,
      "learning_rate": 0.00019599392041069877,
      "loss": 0.7665,
      "step": 819
    },
    {
      "epoch": 0.12530083661229324,
      "grad_norm": 0.3260689675807953,
      "learning_rate": 0.00019597992210875439,
      "loss": 0.8475,
      "step": 820
    },
    {
      "epoch": 0.1254536425106009,
      "grad_norm": 0.3531004786491394,
      "learning_rate": 0.00019596589989421807,
      "loss": 0.5424,
      "step": 821
    },
    {
      "epoch": 0.12560644840890858,
      "grad_norm": 0.281276673078537,
      "learning_rate": 0.00019595185377058337,
      "loss": 0.7901,
      "step": 822
    },
    {
      "epoch": 0.12575925430721627,
      "grad_norm": 0.32058045268058777,
      "learning_rate": 0.00019593778374134974,
      "loss": 0.5447,
      "step": 823
    },
    {
      "epoch": 0.12591206020552392,
      "grad_norm": 0.2947518229484558,
      "learning_rate": 0.0001959236898100226,
      "loss": 0.598,
      "step": 824
    },
    {
      "epoch": 0.1260648661038316,
      "grad_norm": 0.2923273742198944,
      "learning_rate": 0.0001959095719801134,
      "loss": 0.6917,
      "step": 825
    },
    {
      "epoch": 0.1262176720021393,
      "grad_norm": 0.42918404936790466,
      "learning_rate": 0.00019589543025513937,
      "loss": 0.7697,
      "step": 826
    },
    {
      "epoch": 0.12637047790044695,
      "grad_norm": 0.24322174489498138,
      "learning_rate": 0.00019588126463862388,
      "loss": 0.6655,
      "step": 827
    },
    {
      "epoch": 0.12652328379875463,
      "grad_norm": 0.2740302085876465,
      "learning_rate": 0.00019586707513409617,
      "loss": 0.8133,
      "step": 828
    },
    {
      "epoch": 0.12667608969706232,
      "grad_norm": 0.32025039196014404,
      "learning_rate": 0.00019585286174509143,
      "loss": 0.7481,
      "step": 829
    },
    {
      "epoch": 0.12682889559536997,
      "grad_norm": 0.27031072974205017,
      "learning_rate": 0.00019583862447515075,
      "loss": 0.7403,
      "step": 830
    },
    {
      "epoch": 0.12698170149367766,
      "grad_norm": 0.2899322807788849,
      "learning_rate": 0.00019582436332782132,
      "loss": 0.7497,
      "step": 831
    },
    {
      "epoch": 0.12713450739198534,
      "grad_norm": 0.2485196739435196,
      "learning_rate": 0.00019581007830665615,
      "loss": 0.7895,
      "step": 832
    },
    {
      "epoch": 0.127287313290293,
      "grad_norm": 0.3776859939098358,
      "learning_rate": 0.00019579576941521418,
      "loss": 0.8331,
      "step": 833
    },
    {
      "epoch": 0.12744011918860068,
      "grad_norm": 0.3224911391735077,
      "learning_rate": 0.0001957814366570604,
      "loss": 0.7736,
      "step": 834
    },
    {
      "epoch": 0.12759292508690837,
      "grad_norm": 0.23357270658016205,
      "learning_rate": 0.0001957670800357657,
      "loss": 0.7477,
      "step": 835
    },
    {
      "epoch": 0.12774573098521602,
      "grad_norm": 0.327070415019989,
      "learning_rate": 0.00019575269955490691,
      "loss": 0.9605,
      "step": 836
    },
    {
      "epoch": 0.1278985368835237,
      "grad_norm": 0.3038500249385834,
      "learning_rate": 0.0001957382952180668,
      "loss": 0.7441,
      "step": 837
    },
    {
      "epoch": 0.1280513427818314,
      "grad_norm": 0.25796058773994446,
      "learning_rate": 0.0001957238670288341,
      "loss": 0.622,
      "step": 838
    },
    {
      "epoch": 0.12820414868013905,
      "grad_norm": 0.41515886783599854,
      "learning_rate": 0.00019570941499080343,
      "loss": 1.0251,
      "step": 839
    },
    {
      "epoch": 0.12835695457844673,
      "grad_norm": 0.276103138923645,
      "learning_rate": 0.00019569493910757542,
      "loss": 0.7484,
      "step": 840
    },
    {
      "epoch": 0.12850976047675441,
      "grad_norm": 0.2659948170185089,
      "learning_rate": 0.00019568043938275663,
      "loss": 0.6444,
      "step": 841
    },
    {
      "epoch": 0.12866256637506207,
      "grad_norm": 0.3016412854194641,
      "learning_rate": 0.00019566591581995953,
      "loss": 0.6506,
      "step": 842
    },
    {
      "epoch": 0.12881537227336975,
      "grad_norm": 0.29354071617126465,
      "learning_rate": 0.00019565136842280255,
      "loss": 0.7701,
      "step": 843
    },
    {
      "epoch": 0.12896817817167744,
      "grad_norm": 0.2543928325176239,
      "learning_rate": 0.00019563679719491003,
      "loss": 0.6735,
      "step": 844
    },
    {
      "epoch": 0.1291209840699851,
      "grad_norm": 0.31514692306518555,
      "learning_rate": 0.00019562220213991232,
      "loss": 0.7412,
      "step": 845
    },
    {
      "epoch": 0.12927378996829278,
      "grad_norm": 0.26890984177589417,
      "learning_rate": 0.00019560758326144558,
      "loss": 0.737,
      "step": 846
    },
    {
      "epoch": 0.12942659586660046,
      "grad_norm": 0.41693368554115295,
      "learning_rate": 0.00019559294056315207,
      "loss": 0.9572,
      "step": 847
    },
    {
      "epoch": 0.12957940176490812,
      "grad_norm": 0.24249470233917236,
      "learning_rate": 0.00019557827404867984,
      "loss": 0.5923,
      "step": 848
    },
    {
      "epoch": 0.1297322076632158,
      "grad_norm": 0.2592976689338684,
      "learning_rate": 0.00019556358372168294,
      "loss": 0.6565,
      "step": 849
    },
    {
      "epoch": 0.1298850135615235,
      "grad_norm": 0.25530290603637695,
      "learning_rate": 0.0001955488695858213,
      "loss": 0.6702,
      "step": 850
    },
    {
      "epoch": 0.13003781945983114,
      "grad_norm": 0.27146828174591064,
      "learning_rate": 0.00019553413164476088,
      "loss": 0.7106,
      "step": 851
    },
    {
      "epoch": 0.13019062535813883,
      "grad_norm": 0.24907812476158142,
      "learning_rate": 0.00019551936990217352,
      "loss": 0.5918,
      "step": 852
    },
    {
      "epoch": 0.1303434312564465,
      "grad_norm": 0.4307115972042084,
      "learning_rate": 0.00019550458436173694,
      "loss": 0.6567,
      "step": 853
    },
    {
      "epoch": 0.13049623715475417,
      "grad_norm": 0.2714177966117859,
      "learning_rate": 0.0001954897750271349,
      "loss": 0.6176,
      "step": 854
    },
    {
      "epoch": 0.13064904305306185,
      "grad_norm": 0.2820480167865753,
      "learning_rate": 0.0001954749419020569,
      "loss": 0.6196,
      "step": 855
    },
    {
      "epoch": 0.1308018489513695,
      "grad_norm": 0.34379422664642334,
      "learning_rate": 0.00019546008499019864,
      "loss": 0.6766,
      "step": 856
    },
    {
      "epoch": 0.1309546548496772,
      "grad_norm": 0.3182826638221741,
      "learning_rate": 0.00019544520429526146,
      "loss": 0.7375,
      "step": 857
    },
    {
      "epoch": 0.13110746074798488,
      "grad_norm": 0.2834482789039612,
      "learning_rate": 0.00019543029982095286,
      "loss": 0.6309,
      "step": 858
    },
    {
      "epoch": 0.13126026664629253,
      "grad_norm": 0.2880655527114868,
      "learning_rate": 0.0001954153715709861,
      "loss": 0.835,
      "step": 859
    },
    {
      "epoch": 0.13141307254460022,
      "grad_norm": 0.2973262667655945,
      "learning_rate": 0.0001954004195490805,
      "loss": 0.614,
      "step": 860
    },
    {
      "epoch": 0.1315658784429079,
      "grad_norm": 0.2937520742416382,
      "learning_rate": 0.0001953854437589611,
      "loss": 0.6843,
      "step": 861
    },
    {
      "epoch": 0.13171868434121556,
      "grad_norm": 0.24553163349628448,
      "learning_rate": 0.00019537044420435914,
      "loss": 0.716,
      "step": 862
    },
    {
      "epoch": 0.13187149023952324,
      "grad_norm": 0.2711239755153656,
      "learning_rate": 0.00019535542088901155,
      "loss": 0.6589,
      "step": 863
    },
    {
      "epoch": 0.13202429613783093,
      "grad_norm": 0.29433688521385193,
      "learning_rate": 0.0001953403738166613,
      "loss": 0.71,
      "step": 864
    },
    {
      "epoch": 0.13217710203613858,
      "grad_norm": 0.34008464217185974,
      "learning_rate": 0.0001953253029910572,
      "loss": 0.7356,
      "step": 865
    },
    {
      "epoch": 0.13232990793444627,
      "grad_norm": 0.26458245515823364,
      "learning_rate": 0.00019531020841595406,
      "loss": 0.708,
      "step": 866
    },
    {
      "epoch": 0.13248271383275395,
      "grad_norm": 0.3054756224155426,
      "learning_rate": 0.00019529509009511253,
      "loss": 0.5603,
      "step": 867
    },
    {
      "epoch": 0.1326355197310616,
      "grad_norm": 0.26879218220710754,
      "learning_rate": 0.00019527994803229926,
      "loss": 0.7848,
      "step": 868
    },
    {
      "epoch": 0.1327883256293693,
      "grad_norm": 0.29384973645210266,
      "learning_rate": 0.0001952647822312867,
      "loss": 0.6419,
      "step": 869
    },
    {
      "epoch": 0.13294113152767698,
      "grad_norm": 0.2679104208946228,
      "learning_rate": 0.00019524959269585337,
      "loss": 0.6762,
      "step": 870
    },
    {
      "epoch": 0.13309393742598463,
      "grad_norm": 0.21466131508350372,
      "learning_rate": 0.00019523437942978357,
      "loss": 0.6237,
      "step": 871
    },
    {
      "epoch": 0.13324674332429232,
      "grad_norm": 0.34037020802497864,
      "learning_rate": 0.0001952191424368675,
      "loss": 0.6994,
      "step": 872
    },
    {
      "epoch": 0.1333995492226,
      "grad_norm": 0.2652078866958618,
      "learning_rate": 0.00019520388172090142,
      "loss": 0.6774,
      "step": 873
    },
    {
      "epoch": 0.13355235512090766,
      "grad_norm": 0.2574101686477661,
      "learning_rate": 0.00019518859728568736,
      "loss": 0.6073,
      "step": 874
    },
    {
      "epoch": 0.13370516101921534,
      "grad_norm": 0.2610401213169098,
      "learning_rate": 0.00019517328913503334,
      "loss": 0.6916,
      "step": 875
    },
    {
      "epoch": 0.13385796691752302,
      "grad_norm": 0.23128172755241394,
      "learning_rate": 0.00019515795727275323,
      "loss": 0.7244,
      "step": 876
    },
    {
      "epoch": 0.13401077281583068,
      "grad_norm": 0.2592519521713257,
      "learning_rate": 0.00019514260170266687,
      "loss": 0.6513,
      "step": 877
    },
    {
      "epoch": 0.13416357871413837,
      "grad_norm": 0.23765848577022552,
      "learning_rate": 0.00019512722242859992,
      "loss": 0.7319,
      "step": 878
    },
    {
      "epoch": 0.13431638461244605,
      "grad_norm": 0.2605260908603668,
      "learning_rate": 0.00019511181945438402,
      "loss": 0.7414,
      "step": 879
    },
    {
      "epoch": 0.1344691905107537,
      "grad_norm": 0.2504040002822876,
      "learning_rate": 0.00019509639278385673,
      "loss": 0.6466,
      "step": 880
    },
    {
      "epoch": 0.1346219964090614,
      "grad_norm": 0.37945783138275146,
      "learning_rate": 0.00019508094242086138,
      "loss": 0.6494,
      "step": 881
    },
    {
      "epoch": 0.13477480230736907,
      "grad_norm": 0.2152481973171234,
      "learning_rate": 0.0001950654683692474,
      "loss": 0.7879,
      "step": 882
    },
    {
      "epoch": 0.13492760820567673,
      "grad_norm": 0.24629932641983032,
      "learning_rate": 0.00019504997063286999,
      "loss": 0.7656,
      "step": 883
    },
    {
      "epoch": 0.13508041410398441,
      "grad_norm": 0.3862961530685425,
      "learning_rate": 0.00019503444921559023,
      "loss": 1.0176,
      "step": 884
    },
    {
      "epoch": 0.1352332200022921,
      "grad_norm": 0.30331647396087646,
      "learning_rate": 0.0001950189041212752,
      "loss": 0.8648,
      "step": 885
    },
    {
      "epoch": 0.13538602590059975,
      "grad_norm": 0.24901315569877625,
      "learning_rate": 0.00019500333535379783,
      "loss": 0.6522,
      "step": 886
    },
    {
      "epoch": 0.13553883179890744,
      "grad_norm": 0.24886654317378998,
      "learning_rate": 0.00019498774291703695,
      "loss": 0.6478,
      "step": 887
    },
    {
      "epoch": 0.13569163769721512,
      "grad_norm": 0.7643476724624634,
      "learning_rate": 0.00019497212681487725,
      "loss": 0.6913,
      "step": 888
    },
    {
      "epoch": 0.13584444359552278,
      "grad_norm": 0.2617362141609192,
      "learning_rate": 0.00019495648705120938,
      "loss": 0.7314,
      "step": 889
    },
    {
      "epoch": 0.13599724949383046,
      "grad_norm": 0.2617185711860657,
      "learning_rate": 0.00019494082362992986,
      "loss": 0.6769,
      "step": 890
    },
    {
      "epoch": 0.13615005539213815,
      "grad_norm": 0.2449088990688324,
      "learning_rate": 0.00019492513655494106,
      "loss": 0.6773,
      "step": 891
    },
    {
      "epoch": 0.1363028612904458,
      "grad_norm": 0.27909642457962036,
      "learning_rate": 0.00019490942583015133,
      "loss": 0.8005,
      "step": 892
    },
    {
      "epoch": 0.1364556671887535,
      "grad_norm": 0.269930899143219,
      "learning_rate": 0.00019489369145947487,
      "loss": 0.8991,
      "step": 893
    },
    {
      "epoch": 0.13660847308706117,
      "grad_norm": 0.27242738008499146,
      "learning_rate": 0.00019487793344683172,
      "loss": 0.6498,
      "step": 894
    },
    {
      "epoch": 0.13676127898536883,
      "grad_norm": 0.23424513638019562,
      "learning_rate": 0.00019486215179614788,
      "loss": 0.6458,
      "step": 895
    },
    {
      "epoch": 0.1369140848836765,
      "grad_norm": 0.367795467376709,
      "learning_rate": 0.0001948463465113552,
      "loss": 0.6949,
      "step": 896
    },
    {
      "epoch": 0.1370668907819842,
      "grad_norm": 0.31714144349098206,
      "learning_rate": 0.00019483051759639148,
      "loss": 0.5297,
      "step": 897
    },
    {
      "epoch": 0.13721969668029185,
      "grad_norm": 0.2915026545524597,
      "learning_rate": 0.00019481466505520033,
      "loss": 0.7198,
      "step": 898
    },
    {
      "epoch": 0.13737250257859954,
      "grad_norm": 0.25416919589042664,
      "learning_rate": 0.00019479878889173128,
      "loss": 0.7209,
      "step": 899
    },
    {
      "epoch": 0.13752530847690722,
      "grad_norm": 0.26738041639328003,
      "learning_rate": 0.0001947828891099397,
      "loss": 0.6714,
      "step": 900
    },
    {
      "epoch": 0.13767811437521488,
      "grad_norm": 0.28009870648384094,
      "learning_rate": 0.00019476696571378699,
      "loss": 0.7738,
      "step": 901
    },
    {
      "epoch": 0.13783092027352256,
      "grad_norm": 0.26281601190567017,
      "learning_rate": 0.00019475101870724024,
      "loss": 0.6876,
      "step": 902
    },
    {
      "epoch": 0.13798372617183025,
      "grad_norm": 0.3066788613796234,
      "learning_rate": 0.00019473504809427254,
      "loss": 0.6356,
      "step": 903
    },
    {
      "epoch": 0.1381365320701379,
      "grad_norm": 0.2479480803012848,
      "learning_rate": 0.00019471905387886281,
      "loss": 0.6844,
      "step": 904
    },
    {
      "epoch": 0.13828933796844559,
      "grad_norm": 0.2982046902179718,
      "learning_rate": 0.00019470303606499597,
      "loss": 0.6945,
      "step": 905
    },
    {
      "epoch": 0.13844214386675327,
      "grad_norm": 0.2929346263408661,
      "learning_rate": 0.0001946869946566626,
      "loss": 0.9048,
      "step": 906
    },
    {
      "epoch": 0.13859494976506093,
      "grad_norm": 0.2749553322792053,
      "learning_rate": 0.00019467092965785933,
      "loss": 0.6481,
      "step": 907
    },
    {
      "epoch": 0.1387477556633686,
      "grad_norm": 0.25245675444602966,
      "learning_rate": 0.00019465484107258866,
      "loss": 0.5614,
      "step": 908
    },
    {
      "epoch": 0.1389005615616763,
      "grad_norm": 0.278685599565506,
      "learning_rate": 0.00019463872890485888,
      "loss": 0.6961,
      "step": 909
    },
    {
      "epoch": 0.13905336745998395,
      "grad_norm": 0.27726492285728455,
      "learning_rate": 0.0001946225931586842,
      "loss": 0.8507,
      "step": 910
    },
    {
      "epoch": 0.13920617335829163,
      "grad_norm": 0.23716701567173004,
      "learning_rate": 0.00019460643383808473,
      "loss": 0.658,
      "step": 911
    },
    {
      "epoch": 0.1393589792565993,
      "grad_norm": 0.22296889126300812,
      "learning_rate": 0.00019459025094708645,
      "loss": 0.57,
      "step": 912
    },
    {
      "epoch": 0.13951178515490698,
      "grad_norm": 0.2558571696281433,
      "learning_rate": 0.0001945740444897211,
      "loss": 0.767,
      "step": 913
    },
    {
      "epoch": 0.13966459105321466,
      "grad_norm": 0.2778489291667938,
      "learning_rate": 0.0001945578144700265,
      "loss": 0.573,
      "step": 914
    },
    {
      "epoch": 0.13981739695152232,
      "grad_norm": 0.26163187623023987,
      "learning_rate": 0.00019454156089204614,
      "loss": 0.6999,
      "step": 915
    },
    {
      "epoch": 0.13997020284983,
      "grad_norm": 0.21346476674079895,
      "learning_rate": 0.0001945252837598295,
      "loss": 0.7107,
      "step": 916
    },
    {
      "epoch": 0.14012300874813768,
      "grad_norm": 0.30867141485214233,
      "learning_rate": 0.00019450898307743185,
      "loss": 0.7034,
      "step": 917
    },
    {
      "epoch": 0.14027581464644534,
      "grad_norm": 0.31402018666267395,
      "learning_rate": 0.00019449265884891444,
      "loss": 0.7362,
      "step": 918
    },
    {
      "epoch": 0.14042862054475302,
      "grad_norm": 0.2718082666397095,
      "learning_rate": 0.00019447631107834422,
      "loss": 0.6461,
      "step": 919
    },
    {
      "epoch": 0.1405814264430607,
      "grad_norm": 0.2874963581562042,
      "learning_rate": 0.0001944599397697942,
      "loss": 0.7703,
      "step": 920
    },
    {
      "epoch": 0.14073423234136836,
      "grad_norm": 0.2410213053226471,
      "learning_rate": 0.00019444354492734308,
      "loss": 0.8031,
      "step": 921
    },
    {
      "epoch": 0.14088703823967605,
      "grad_norm": 0.3052217960357666,
      "learning_rate": 0.00019442712655507553,
      "loss": 0.6492,
      "step": 922
    },
    {
      "epoch": 0.14103984413798373,
      "grad_norm": 0.253045916557312,
      "learning_rate": 0.00019441068465708204,
      "loss": 0.7135,
      "step": 923
    },
    {
      "epoch": 0.1411926500362914,
      "grad_norm": 0.26858294010162354,
      "learning_rate": 0.00019439421923745897,
      "loss": 0.6473,
      "step": 924
    },
    {
      "epoch": 0.14134545593459907,
      "grad_norm": 0.2760922610759735,
      "learning_rate": 0.00019437773030030856,
      "loss": 0.6578,
      "step": 925
    },
    {
      "epoch": 0.14149826183290676,
      "grad_norm": 0.27938711643218994,
      "learning_rate": 0.00019436121784973886,
      "loss": 0.6319,
      "step": 926
    },
    {
      "epoch": 0.14165106773121441,
      "grad_norm": 0.2972564399242401,
      "learning_rate": 0.00019434468188986385,
      "loss": 0.521,
      "step": 927
    },
    {
      "epoch": 0.1418038736295221,
      "grad_norm": 0.24861447513103485,
      "learning_rate": 0.00019432812242480327,
      "loss": 0.5922,
      "step": 928
    },
    {
      "epoch": 0.14195667952782978,
      "grad_norm": 0.2783520221710205,
      "learning_rate": 0.00019431153945868282,
      "loss": 0.6947,
      "step": 929
    },
    {
      "epoch": 0.14210948542613744,
      "grad_norm": 0.27699464559555054,
      "learning_rate": 0.00019429493299563398,
      "loss": 0.7188,
      "step": 930
    },
    {
      "epoch": 0.14226229132444512,
      "grad_norm": 0.272151380777359,
      "learning_rate": 0.00019427830303979412,
      "loss": 0.7472,
      "step": 931
    },
    {
      "epoch": 0.1424150972227528,
      "grad_norm": 0.2500142753124237,
      "learning_rate": 0.00019426164959530646,
      "loss": 0.699,
      "step": 932
    },
    {
      "epoch": 0.14256790312106046,
      "grad_norm": 0.22516344487667084,
      "learning_rate": 0.00019424497266632,
      "loss": 0.6416,
      "step": 933
    },
    {
      "epoch": 0.14272070901936815,
      "grad_norm": 0.24510562419891357,
      "learning_rate": 0.00019422827225698978,
      "loss": 0.6235,
      "step": 934
    },
    {
      "epoch": 0.14287351491767583,
      "grad_norm": 0.26091206073760986,
      "learning_rate": 0.0001942115483714765,
      "loss": 0.5763,
      "step": 935
    },
    {
      "epoch": 0.1430263208159835,
      "grad_norm": 0.27128151059150696,
      "learning_rate": 0.00019419480101394679,
      "loss": 0.6988,
      "step": 936
    },
    {
      "epoch": 0.14317912671429117,
      "grad_norm": 0.24952010810375214,
      "learning_rate": 0.00019417803018857306,
      "loss": 0.5557,
      "step": 937
    },
    {
      "epoch": 0.14333193261259886,
      "grad_norm": 0.24118371307849884,
      "learning_rate": 0.00019416123589953367,
      "loss": 0.7261,
      "step": 938
    },
    {
      "epoch": 0.1434847385109065,
      "grad_norm": 0.2614760994911194,
      "learning_rate": 0.00019414441815101277,
      "loss": 0.9327,
      "step": 939
    },
    {
      "epoch": 0.1436375444092142,
      "grad_norm": 0.23340481519699097,
      "learning_rate": 0.00019412757694720038,
      "loss": 0.7183,
      "step": 940
    },
    {
      "epoch": 0.14379035030752188,
      "grad_norm": 0.2697202265262604,
      "learning_rate": 0.0001941107122922923,
      "loss": 0.8033,
      "step": 941
    },
    {
      "epoch": 0.14394315620582954,
      "grad_norm": 0.2892836630344391,
      "learning_rate": 0.00019409382419049024,
      "loss": 0.7165,
      "step": 942
    },
    {
      "epoch": 0.14409596210413722,
      "grad_norm": 0.2863559126853943,
      "learning_rate": 0.00019407691264600177,
      "loss": 0.9925,
      "step": 943
    },
    {
      "epoch": 0.1442487680024449,
      "grad_norm": 0.4062459170818329,
      "learning_rate": 0.00019405997766304019,
      "loss": 0.98,
      "step": 944
    },
    {
      "epoch": 0.14440157390075256,
      "grad_norm": 0.2909787595272064,
      "learning_rate": 0.00019404301924582474,
      "loss": 0.7841,
      "step": 945
    },
    {
      "epoch": 0.14455437979906025,
      "grad_norm": 0.22701376676559448,
      "learning_rate": 0.00019402603739858046,
      "loss": 0.6955,
      "step": 946
    },
    {
      "epoch": 0.14470718569736793,
      "grad_norm": 0.24725738167762756,
      "learning_rate": 0.00019400903212553824,
      "loss": 0.542,
      "step": 947
    },
    {
      "epoch": 0.14485999159567559,
      "grad_norm": 0.6905295252799988,
      "learning_rate": 0.00019399200343093477,
      "loss": 0.7339,
      "step": 948
    },
    {
      "epoch": 0.14501279749398327,
      "grad_norm": 0.25552988052368164,
      "learning_rate": 0.00019397495131901268,
      "loss": 0.6901,
      "step": 949
    },
    {
      "epoch": 0.14516560339229095,
      "grad_norm": 0.26159825921058655,
      "learning_rate": 0.0001939578757940203,
      "loss": 0.5935,
      "step": 950
    },
    {
      "epoch": 0.1453184092905986,
      "grad_norm": 0.3060537278652191,
      "learning_rate": 0.0001939407768602119,
      "loss": 0.8255,
      "step": 951
    },
    {
      "epoch": 0.1454712151889063,
      "grad_norm": 0.3530615270137787,
      "learning_rate": 0.00019392365452184745,
      "loss": 0.8534,
      "step": 952
    },
    {
      "epoch": 0.14562402108721398,
      "grad_norm": 0.28947460651397705,
      "learning_rate": 0.00019390650878319297,
      "loss": 0.7282,
      "step": 953
    },
    {
      "epoch": 0.14577682698552163,
      "grad_norm": 0.29009896516799927,
      "learning_rate": 0.00019388933964852004,
      "loss": 0.8321,
      "step": 954
    },
    {
      "epoch": 0.14592963288382932,
      "grad_norm": 0.3136522173881531,
      "learning_rate": 0.0001938721471221063,
      "loss": 0.7298,
      "step": 955
    },
    {
      "epoch": 0.146082438782137,
      "grad_norm": 0.28415438532829285,
      "learning_rate": 0.00019385493120823507,
      "loss": 0.587,
      "step": 956
    },
    {
      "epoch": 0.14623524468044466,
      "grad_norm": 0.3139444887638092,
      "learning_rate": 0.00019383769191119556,
      "loss": 0.6301,
      "step": 957
    },
    {
      "epoch": 0.14638805057875234,
      "grad_norm": 0.25484979152679443,
      "learning_rate": 0.0001938204292352828,
      "loss": 0.7721,
      "step": 958
    },
    {
      "epoch": 0.14654085647706003,
      "grad_norm": 0.3041636049747467,
      "learning_rate": 0.00019380314318479772,
      "loss": 0.6631,
      "step": 959
    },
    {
      "epoch": 0.14669366237536768,
      "grad_norm": 0.24949052929878235,
      "learning_rate": 0.00019378583376404685,
      "loss": 0.7336,
      "step": 960
    },
    {
      "epoch": 0.14684646827367537,
      "grad_norm": 0.265553742647171,
      "learning_rate": 0.00019376850097734276,
      "loss": 0.5804,
      "step": 961
    },
    {
      "epoch": 0.14699927417198305,
      "grad_norm": 0.3368707597255707,
      "learning_rate": 0.0001937511448290038,
      "loss": 0.658,
      "step": 962
    },
    {
      "epoch": 0.1471520800702907,
      "grad_norm": 0.253562331199646,
      "learning_rate": 0.00019373376532335406,
      "loss": 0.5727,
      "step": 963
    },
    {
      "epoch": 0.1473048859685984,
      "grad_norm": 0.3116651475429535,
      "learning_rate": 0.00019371636246472355,
      "loss": 0.7254,
      "step": 964
    },
    {
      "epoch": 0.14745769186690605,
      "grad_norm": 0.31176048517227173,
      "learning_rate": 0.00019369893625744794,
      "loss": 0.5388,
      "step": 965
    },
    {
      "epoch": 0.14761049776521373,
      "grad_norm": 0.29979297518730164,
      "learning_rate": 0.00019368148670586893,
      "loss": 0.8029,
      "step": 966
    },
    {
      "epoch": 0.14776330366352142,
      "grad_norm": 0.2583375573158264,
      "learning_rate": 0.0001936640138143339,
      "loss": 0.7159,
      "step": 967
    },
    {
      "epoch": 0.14791610956182907,
      "grad_norm": 0.3004581034183502,
      "learning_rate": 0.00019364651758719607,
      "loss": 0.7379,
      "step": 968
    },
    {
      "epoch": 0.14806891546013676,
      "grad_norm": 0.27960747480392456,
      "learning_rate": 0.00019362899802881446,
      "loss": 0.6646,
      "step": 969
    },
    {
      "epoch": 0.14822172135844444,
      "grad_norm": 0.36117398738861084,
      "learning_rate": 0.00019361145514355395,
      "loss": 0.6869,
      "step": 970
    },
    {
      "epoch": 0.1483745272567521,
      "grad_norm": 0.22343234717845917,
      "learning_rate": 0.00019359388893578516,
      "loss": 0.7267,
      "step": 971
    },
    {
      "epoch": 0.14852733315505978,
      "grad_norm": 0.2645972967147827,
      "learning_rate": 0.00019357629940988463,
      "loss": 0.6949,
      "step": 972
    },
    {
      "epoch": 0.14868013905336747,
      "grad_norm": 0.3185739517211914,
      "learning_rate": 0.00019355868657023456,
      "loss": 0.8325,
      "step": 973
    },
    {
      "epoch": 0.14883294495167512,
      "grad_norm": 0.29953569173812866,
      "learning_rate": 0.00019354105042122311,
      "loss": 0.7535,
      "step": 974
    },
    {
      "epoch": 0.1489857508499828,
      "grad_norm": 0.3828144967556,
      "learning_rate": 0.00019352339096724417,
      "loss": 0.7962,
      "step": 975
    },
    {
      "epoch": 0.1491385567482905,
      "grad_norm": 0.370355486869812,
      "learning_rate": 0.0001935057082126974,
      "loss": 0.7899,
      "step": 976
    },
    {
      "epoch": 0.14929136264659815,
      "grad_norm": 0.3137153685092926,
      "learning_rate": 0.00019348800216198835,
      "loss": 0.681,
      "step": 977
    },
    {
      "epoch": 0.14944416854490583,
      "grad_norm": 0.25897806882858276,
      "learning_rate": 0.00019347027281952834,
      "loss": 0.7847,
      "step": 978
    },
    {
      "epoch": 0.14959697444321352,
      "grad_norm": 0.23975513875484467,
      "learning_rate": 0.00019345252018973446,
      "loss": 0.7329,
      "step": 979
    },
    {
      "epoch": 0.14974978034152117,
      "grad_norm": 0.5979729294776917,
      "learning_rate": 0.0001934347442770296,
      "loss": 0.7083,
      "step": 980
    },
    {
      "epoch": 0.14990258623982886,
      "grad_norm": 0.29401421546936035,
      "learning_rate": 0.00019341694508584256,
      "loss": 0.5436,
      "step": 981
    },
    {
      "epoch": 0.15005539213813654,
      "grad_norm": 0.31312599778175354,
      "learning_rate": 0.0001933991226206078,
      "loss": 0.7891,
      "step": 982
    },
    {
      "epoch": 0.1502081980364442,
      "grad_norm": 0.2626318633556366,
      "learning_rate": 0.00019338127688576566,
      "loss": 0.8839,
      "step": 983
    },
    {
      "epoch": 0.15036100393475188,
      "grad_norm": 0.3405093848705292,
      "learning_rate": 0.00019336340788576225,
      "loss": 0.6524,
      "step": 984
    },
    {
      "epoch": 0.15051380983305956,
      "grad_norm": 0.23463614284992218,
      "learning_rate": 0.00019334551562504948,
      "loss": 0.6376,
      "step": 985
    },
    {
      "epoch": 0.15066661573136722,
      "grad_norm": 0.2980091869831085,
      "learning_rate": 0.00019332760010808505,
      "loss": 0.5438,
      "step": 986
    },
    {
      "epoch": 0.1508194216296749,
      "grad_norm": 0.4364708662033081,
      "learning_rate": 0.00019330966133933246,
      "loss": 0.8847,
      "step": 987
    },
    {
      "epoch": 0.1509722275279826,
      "grad_norm": 0.3133280575275421,
      "learning_rate": 0.00019329169932326103,
      "loss": 0.8726,
      "step": 988
    },
    {
      "epoch": 0.15112503342629025,
      "grad_norm": 0.24318096041679382,
      "learning_rate": 0.0001932737140643458,
      "loss": 0.8904,
      "step": 989
    },
    {
      "epoch": 0.15127783932459793,
      "grad_norm": 0.2581064999103546,
      "learning_rate": 0.00019325570556706772,
      "loss": 0.7594,
      "step": 990
    },
    {
      "epoch": 0.1514306452229056,
      "grad_norm": 0.2756637930870056,
      "learning_rate": 0.00019323767383591338,
      "loss": 0.7214,
      "step": 991
    },
    {
      "epoch": 0.15158345112121327,
      "grad_norm": 0.2461249828338623,
      "learning_rate": 0.00019321961887537524,
      "loss": 0.5824,
      "step": 992
    },
    {
      "epoch": 0.15173625701952095,
      "grad_norm": 0.2575419247150421,
      "learning_rate": 0.00019320154068995163,
      "loss": 0.9961,
      "step": 993
    },
    {
      "epoch": 0.15188906291782864,
      "grad_norm": 0.28650832176208496,
      "learning_rate": 0.00019318343928414645,
      "loss": 0.7662,
      "step": 994
    },
    {
      "epoch": 0.1520418688161363,
      "grad_norm": 0.29323071241378784,
      "learning_rate": 0.00019316531466246964,
      "loss": 0.8253,
      "step": 995
    },
    {
      "epoch": 0.15219467471444398,
      "grad_norm": 0.2523307502269745,
      "learning_rate": 0.00019314716682943667,
      "loss": 0.7602,
      "step": 996
    },
    {
      "epoch": 0.15234748061275166,
      "grad_norm": 0.2807372212409973,
      "learning_rate": 0.000193128995789569,
      "loss": 0.671,
      "step": 997
    },
    {
      "epoch": 0.15250028651105932,
      "grad_norm": 0.3469073176383972,
      "learning_rate": 0.0001931108015473938,
      "loss": 0.6428,
      "step": 998
    },
    {
      "epoch": 0.152653092409367,
      "grad_norm": 0.2644406855106354,
      "learning_rate": 0.00019309258410744399,
      "loss": 0.7001,
      "step": 999
    },
    {
      "epoch": 0.1528058983076747,
      "grad_norm": 0.23576515913009644,
      "learning_rate": 0.00019307434347425826,
      "loss": 0.8893,
      "step": 1000
    },
    {
      "epoch": 0.15295870420598234,
      "grad_norm": 0.25611841678619385,
      "learning_rate": 0.00019305607965238117,
      "loss": 0.7812,
      "step": 1001
    },
    {
      "epoch": 0.15311151010429003,
      "grad_norm": 0.29595786333084106,
      "learning_rate": 0.00019303779264636295,
      "loss": 0.8537,
      "step": 1002
    },
    {
      "epoch": 0.1532643160025977,
      "grad_norm": 0.26146572828292847,
      "learning_rate": 0.00019301948246075966,
      "loss": 0.6906,
      "step": 1003
    },
    {
      "epoch": 0.15341712190090537,
      "grad_norm": 0.23449784517288208,
      "learning_rate": 0.00019300114910013322,
      "loss": 0.804,
      "step": 1004
    },
    {
      "epoch": 0.15356992779921305,
      "grad_norm": 0.2150595486164093,
      "learning_rate": 0.00019298279256905107,
      "loss": 0.6666,
      "step": 1005
    },
    {
      "epoch": 0.15372273369752074,
      "grad_norm": 0.28082481026649475,
      "learning_rate": 0.0001929644128720867,
      "loss": 0.6423,
      "step": 1006
    },
    {
      "epoch": 0.1538755395958284,
      "grad_norm": 0.2738368809223175,
      "learning_rate": 0.00019294601001381925,
      "loss": 0.685,
      "step": 1007
    },
    {
      "epoch": 0.15402834549413608,
      "grad_norm": 0.39818379282951355,
      "learning_rate": 0.0001929275839988336,
      "loss": 0.8979,
      "step": 1008
    },
    {
      "epoch": 0.15418115139244376,
      "grad_norm": 0.28069233894348145,
      "learning_rate": 0.00019290913483172045,
      "loss": 0.7443,
      "step": 1009
    },
    {
      "epoch": 0.15433395729075142,
      "grad_norm": 0.4211709797382355,
      "learning_rate": 0.00019289066251707625,
      "loss": 0.8838,
      "step": 1010
    },
    {
      "epoch": 0.1544867631890591,
      "grad_norm": 0.25366538763046265,
      "learning_rate": 0.00019287216705950324,
      "loss": 0.672,
      "step": 1011
    },
    {
      "epoch": 0.15463956908736678,
      "grad_norm": 0.2813873589038849,
      "learning_rate": 0.00019285364846360943,
      "loss": 0.9237,
      "step": 1012
    },
    {
      "epoch": 0.15479237498567444,
      "grad_norm": 0.30833756923675537,
      "learning_rate": 0.0001928351067340085,
      "loss": 0.876,
      "step": 1013
    },
    {
      "epoch": 0.15494518088398213,
      "grad_norm": 0.3030012547969818,
      "learning_rate": 0.00019281654187532,
      "loss": 0.6612,
      "step": 1014
    },
    {
      "epoch": 0.1550979867822898,
      "grad_norm": 0.28833889961242676,
      "learning_rate": 0.00019279795389216922,
      "loss": 0.8364,
      "step": 1015
    },
    {
      "epoch": 0.15525079268059747,
      "grad_norm": 0.2622121274471283,
      "learning_rate": 0.00019277934278918725,
      "loss": 0.6571,
      "step": 1016
    },
    {
      "epoch": 0.15540359857890515,
      "grad_norm": 0.30616286396980286,
      "learning_rate": 0.0001927607085710108,
      "loss": 0.5411,
      "step": 1017
    },
    {
      "epoch": 0.15555640447721283,
      "grad_norm": 0.28586992621421814,
      "learning_rate": 0.00019274205124228245,
      "loss": 0.608,
      "step": 1018
    },
    {
      "epoch": 0.1557092103755205,
      "grad_norm": 0.26837894320487976,
      "learning_rate": 0.00019272337080765057,
      "loss": 0.8362,
      "step": 1019
    },
    {
      "epoch": 0.15586201627382817,
      "grad_norm": 0.3665698170661926,
      "learning_rate": 0.00019270466727176917,
      "loss": 0.5847,
      "step": 1020
    },
    {
      "epoch": 0.15601482217213583,
      "grad_norm": 0.2480638474225998,
      "learning_rate": 0.0001926859406392981,
      "loss": 0.6732,
      "step": 1021
    },
    {
      "epoch": 0.15616762807044351,
      "grad_norm": 0.3244907259941101,
      "learning_rate": 0.00019266719091490296,
      "loss": 0.8776,
      "step": 1022
    },
    {
      "epoch": 0.1563204339687512,
      "grad_norm": 0.39362213015556335,
      "learning_rate": 0.00019264841810325508,
      "loss": 0.7167,
      "step": 1023
    },
    {
      "epoch": 0.15647323986705886,
      "grad_norm": 0.2980339229106903,
      "learning_rate": 0.00019262962220903152,
      "loss": 0.8809,
      "step": 1024
    },
    {
      "epoch": 0.15662604576536654,
      "grad_norm": 0.32592833042144775,
      "learning_rate": 0.00019261080323691517,
      "loss": 0.9097,
      "step": 1025
    },
    {
      "epoch": 0.15677885166367422,
      "grad_norm": 0.30861401557922363,
      "learning_rate": 0.00019259196119159454,
      "loss": 0.5337,
      "step": 1026
    },
    {
      "epoch": 0.15693165756198188,
      "grad_norm": 0.3252275884151459,
      "learning_rate": 0.00019257309607776407,
      "loss": 0.8202,
      "step": 1027
    },
    {
      "epoch": 0.15708446346028956,
      "grad_norm": 0.3161613345146179,
      "learning_rate": 0.00019255420790012377,
      "loss": 0.5353,
      "step": 1028
    },
    {
      "epoch": 0.15723726935859725,
      "grad_norm": 0.22845958173274994,
      "learning_rate": 0.00019253529666337952,
      "loss": 0.6994,
      "step": 1029
    },
    {
      "epoch": 0.1573900752569049,
      "grad_norm": 0.2573609948158264,
      "learning_rate": 0.00019251636237224283,
      "loss": 0.7671,
      "step": 1030
    },
    {
      "epoch": 0.1575428811552126,
      "grad_norm": 0.24271678924560547,
      "learning_rate": 0.0001924974050314311,
      "loss": 0.5922,
      "step": 1031
    },
    {
      "epoch": 0.15769568705352027,
      "grad_norm": 0.2551860213279724,
      "learning_rate": 0.00019247842464566734,
      "loss": 0.7007,
      "step": 1032
    },
    {
      "epoch": 0.15784849295182793,
      "grad_norm": 0.2626005709171295,
      "learning_rate": 0.00019245942121968036,
      "loss": 0.7403,
      "step": 1033
    },
    {
      "epoch": 0.1580012988501356,
      "grad_norm": 0.30448147654533386,
      "learning_rate": 0.0001924403947582047,
      "loss": 0.7081,
      "step": 1034
    },
    {
      "epoch": 0.1581541047484433,
      "grad_norm": 0.24635186791419983,
      "learning_rate": 0.00019242134526598067,
      "loss": 0.5654,
      "step": 1035
    },
    {
      "epoch": 0.15830691064675095,
      "grad_norm": 0.29201674461364746,
      "learning_rate": 0.00019240227274775425,
      "loss": 0.7182,
      "step": 1036
    },
    {
      "epoch": 0.15845971654505864,
      "grad_norm": 0.46213141083717346,
      "learning_rate": 0.00019238317720827729,
      "loss": 0.8169,
      "step": 1037
    },
    {
      "epoch": 0.15861252244336632,
      "grad_norm": 0.2620154321193695,
      "learning_rate": 0.00019236405865230712,
      "loss": 0.9387,
      "step": 1038
    },
    {
      "epoch": 0.15876532834167398,
      "grad_norm": 0.26951172947883606,
      "learning_rate": 0.00019234491708460712,
      "loss": 0.511,
      "step": 1039
    },
    {
      "epoch": 0.15891813423998166,
      "grad_norm": 0.22812886536121368,
      "learning_rate": 0.0001923257525099462,
      "loss": 0.7245,
      "step": 1040
    },
    {
      "epoch": 0.15907094013828935,
      "grad_norm": 0.27627134323120117,
      "learning_rate": 0.00019230656493309902,
      "loss": 0.5724,
      "step": 1041
    },
    {
      "epoch": 0.159223746036597,
      "grad_norm": 0.26270973682403564,
      "learning_rate": 0.00019228735435884606,
      "loss": 0.6993,
      "step": 1042
    },
    {
      "epoch": 0.1593765519349047,
      "grad_norm": 0.27464500069618225,
      "learning_rate": 0.0001922681207919734,
      "loss": 0.8781,
      "step": 1043
    },
    {
      "epoch": 0.15952935783321237,
      "grad_norm": 0.3051292300224304,
      "learning_rate": 0.000192248864237273,
      "loss": 0.7656,
      "step": 1044
    },
    {
      "epoch": 0.15968216373152003,
      "grad_norm": 0.2516727149486542,
      "learning_rate": 0.00019222958469954242,
      "loss": 0.8011,
      "step": 1045
    },
    {
      "epoch": 0.1598349696298277,
      "grad_norm": 0.2554173767566681,
      "learning_rate": 0.00019221028218358504,
      "loss": 0.6839,
      "step": 1046
    },
    {
      "epoch": 0.1599877755281354,
      "grad_norm": 0.23768573999404907,
      "learning_rate": 0.00019219095669420984,
      "loss": 0.6611,
      "step": 1047
    },
    {
      "epoch": 0.16014058142644305,
      "grad_norm": 0.28291550278663635,
      "learning_rate": 0.00019217160823623169,
      "loss": 0.7191,
      "step": 1048
    },
    {
      "epoch": 0.16029338732475074,
      "grad_norm": 0.3698108494281769,
      "learning_rate": 0.00019215223681447104,
      "loss": 0.7449,
      "step": 1049
    },
    {
      "epoch": 0.16044619322305842,
      "grad_norm": 0.24757391214370728,
      "learning_rate": 0.00019213284243375415,
      "loss": 0.676,
      "step": 1050
    },
    {
      "epoch": 0.16059899912136608,
      "grad_norm": 0.25702232122421265,
      "learning_rate": 0.00019211342509891293,
      "loss": 0.7301,
      "step": 1051
    },
    {
      "epoch": 0.16075180501967376,
      "grad_norm": 0.37221047282218933,
      "learning_rate": 0.0001920939848147851,
      "loss": 0.667,
      "step": 1052
    },
    {
      "epoch": 0.16090461091798144,
      "grad_norm": 0.5544690489768982,
      "learning_rate": 0.000192074521586214,
      "loss": 0.8024,
      "step": 1053
    },
    {
      "epoch": 0.1610574168162891,
      "grad_norm": 0.2715505361557007,
      "learning_rate": 0.00019205503541804873,
      "loss": 0.6859,
      "step": 1054
    },
    {
      "epoch": 0.16121022271459678,
      "grad_norm": 0.2992199957370758,
      "learning_rate": 0.00019203552631514415,
      "loss": 0.8794,
      "step": 1055
    },
    {
      "epoch": 0.16136302861290447,
      "grad_norm": 0.2541504502296448,
      "learning_rate": 0.00019201599428236073,
      "loss": 0.6467,
      "step": 1056
    },
    {
      "epoch": 0.16151583451121213,
      "grad_norm": 0.27539893984794617,
      "learning_rate": 0.00019199643932456476,
      "loss": 0.6035,
      "step": 1057
    },
    {
      "epoch": 0.1616686404095198,
      "grad_norm": 0.261419415473938,
      "learning_rate": 0.00019197686144662815,
      "loss": 0.7197,
      "step": 1058
    },
    {
      "epoch": 0.1618214463078275,
      "grad_norm": 0.2520885765552521,
      "learning_rate": 0.00019195726065342856,
      "loss": 0.5276,
      "step": 1059
    },
    {
      "epoch": 0.16197425220613515,
      "grad_norm": 0.29256707429885864,
      "learning_rate": 0.00019193763694984943,
      "loss": 0.6546,
      "step": 1060
    },
    {
      "epoch": 0.16212705810444283,
      "grad_norm": 0.5188539624214172,
      "learning_rate": 0.00019191799034077981,
      "loss": 0.759,
      "step": 1061
    },
    {
      "epoch": 0.16227986400275052,
      "grad_norm": 0.23571883141994476,
      "learning_rate": 0.00019189832083111444,
      "loss": 0.6998,
      "step": 1062
    },
    {
      "epoch": 0.16243266990105817,
      "grad_norm": 0.24244998395442963,
      "learning_rate": 0.00019187862842575388,
      "loss": 0.6818,
      "step": 1063
    },
    {
      "epoch": 0.16258547579936586,
      "grad_norm": 0.29776662588119507,
      "learning_rate": 0.0001918589131296043,
      "loss": 0.6825,
      "step": 1064
    },
    {
      "epoch": 0.16273828169767354,
      "grad_norm": 0.29446274042129517,
      "learning_rate": 0.0001918391749475776,
      "loss": 0.7367,
      "step": 1065
    },
    {
      "epoch": 0.1628910875959812,
      "grad_norm": 0.27736350893974304,
      "learning_rate": 0.00019181941388459137,
      "loss": 0.7743,
      "step": 1066
    },
    {
      "epoch": 0.16304389349428888,
      "grad_norm": 0.2596782147884369,
      "learning_rate": 0.00019179962994556892,
      "loss": 0.6474,
      "step": 1067
    },
    {
      "epoch": 0.16319669939259657,
      "grad_norm": 0.2921583652496338,
      "learning_rate": 0.0001917798231354393,
      "loss": 0.7613,
      "step": 1068
    },
    {
      "epoch": 0.16334950529090422,
      "grad_norm": 0.24355407059192657,
      "learning_rate": 0.00019175999345913712,
      "loss": 0.5877,
      "step": 1069
    },
    {
      "epoch": 0.1635023111892119,
      "grad_norm": 0.2529122531414032,
      "learning_rate": 0.00019174014092160287,
      "loss": 0.7902,
      "step": 1070
    },
    {
      "epoch": 0.1636551170875196,
      "grad_norm": 0.3269736170768738,
      "learning_rate": 0.00019172026552778256,
      "loss": 0.8058,
      "step": 1071
    },
    {
      "epoch": 0.16380792298582725,
      "grad_norm": 0.2811448574066162,
      "learning_rate": 0.00019170036728262803,
      "loss": 1.0175,
      "step": 1072
    },
    {
      "epoch": 0.16396072888413493,
      "grad_norm": 0.23702581226825714,
      "learning_rate": 0.00019168044619109672,
      "loss": 0.5767,
      "step": 1073
    },
    {
      "epoch": 0.16411353478244262,
      "grad_norm": 0.36616837978363037,
      "learning_rate": 0.00019166050225815186,
      "loss": 0.7306,
      "step": 1074
    },
    {
      "epoch": 0.16426634068075027,
      "grad_norm": 0.24824507534503937,
      "learning_rate": 0.00019164053548876227,
      "loss": 0.6099,
      "step": 1075
    },
    {
      "epoch": 0.16441914657905796,
      "grad_norm": 0.24607868492603302,
      "learning_rate": 0.00019162054588790252,
      "loss": 0.7479,
      "step": 1076
    },
    {
      "epoch": 0.1645719524773656,
      "grad_norm": 0.2548847794532776,
      "learning_rate": 0.00019160053346055285,
      "loss": 0.6783,
      "step": 1077
    },
    {
      "epoch": 0.1647247583756733,
      "grad_norm": 0.3230314254760742,
      "learning_rate": 0.0001915804982116992,
      "loss": 0.8464,
      "step": 1078
    },
    {
      "epoch": 0.16487756427398098,
      "grad_norm": 0.2413746565580368,
      "learning_rate": 0.00019156044014633316,
      "loss": 0.7222,
      "step": 1079
    },
    {
      "epoch": 0.16503037017228864,
      "grad_norm": 0.2753642797470093,
      "learning_rate": 0.00019154035926945202,
      "loss": 0.8344,
      "step": 1080
    },
    {
      "epoch": 0.16518317607059632,
      "grad_norm": 1.3304742574691772,
      "learning_rate": 0.0001915202555860588,
      "loss": 0.715,
      "step": 1081
    },
    {
      "epoch": 0.165335981968904,
      "grad_norm": 0.3043583035469055,
      "learning_rate": 0.00019150012910116213,
      "loss": 0.6851,
      "step": 1082
    },
    {
      "epoch": 0.16548878786721166,
      "grad_norm": 0.252945214509964,
      "learning_rate": 0.00019147997981977638,
      "loss": 0.8384,
      "step": 1083
    },
    {
      "epoch": 0.16564159376551935,
      "grad_norm": 0.2606157064437866,
      "learning_rate": 0.00019145980774692157,
      "loss": 0.7957,
      "step": 1084
    },
    {
      "epoch": 0.16579439966382703,
      "grad_norm": 0.3107841908931732,
      "learning_rate": 0.00019143961288762336,
      "loss": 0.7824,
      "step": 1085
    },
    {
      "epoch": 0.1659472055621347,
      "grad_norm": 0.35161152482032776,
      "learning_rate": 0.0001914193952469132,
      "loss": 0.7518,
      "step": 1086
    },
    {
      "epoch": 0.16610001146044237,
      "grad_norm": 0.30095744132995605,
      "learning_rate": 0.0001913991548298281,
      "loss": 0.5003,
      "step": 1087
    },
    {
      "epoch": 0.16625281735875005,
      "grad_norm": 0.27559372782707214,
      "learning_rate": 0.0001913788916414108,
      "loss": 0.742,
      "step": 1088
    },
    {
      "epoch": 0.1664056232570577,
      "grad_norm": 0.2867778241634369,
      "learning_rate": 0.00019135860568670972,
      "loss": 0.7433,
      "step": 1089
    },
    {
      "epoch": 0.1665584291553654,
      "grad_norm": 0.30389800667762756,
      "learning_rate": 0.0001913382969707789,
      "loss": 0.7928,
      "step": 1090
    },
    {
      "epoch": 0.16671123505367308,
      "grad_norm": 0.2673511803150177,
      "learning_rate": 0.00019131796549867812,
      "loss": 0.7581,
      "step": 1091
    },
    {
      "epoch": 0.16686404095198074,
      "grad_norm": 0.3299412131309509,
      "learning_rate": 0.00019129761127547275,
      "loss": 0.7698,
      "step": 1092
    },
    {
      "epoch": 0.16701684685028842,
      "grad_norm": 0.33078551292419434,
      "learning_rate": 0.00019127723430623395,
      "loss": 0.6046,
      "step": 1093
    },
    {
      "epoch": 0.1671696527485961,
      "grad_norm": 0.28574293851852417,
      "learning_rate": 0.00019125683459603838,
      "loss": 0.5757,
      "step": 1094
    },
    {
      "epoch": 0.16732245864690376,
      "grad_norm": 0.32351842522621155,
      "learning_rate": 0.00019123641214996852,
      "loss": 0.5831,
      "step": 1095
    },
    {
      "epoch": 0.16747526454521144,
      "grad_norm": 0.2723073363304138,
      "learning_rate": 0.00019121596697311245,
      "loss": 0.8194,
      "step": 1096
    },
    {
      "epoch": 0.16762807044351913,
      "grad_norm": 0.32978907227516174,
      "learning_rate": 0.00019119549907056392,
      "loss": 0.6952,
      "step": 1097
    },
    {
      "epoch": 0.16778087634182678,
      "grad_norm": 0.30837321281433105,
      "learning_rate": 0.00019117500844742223,
      "loss": 0.7523,
      "step": 1098
    },
    {
      "epoch": 0.16793368224013447,
      "grad_norm": 0.24898523092269897,
      "learning_rate": 0.0001911544951087926,
      "loss": 0.7134,
      "step": 1099
    },
    {
      "epoch": 0.16808648813844215,
      "grad_norm": 0.27018532156944275,
      "learning_rate": 0.00019113395905978568,
      "loss": 0.613,
      "step": 1100
    },
    {
      "epoch": 0.1682392940367498,
      "grad_norm": 0.3074743151664734,
      "learning_rate": 0.00019111340030551784,
      "loss": 0.7982,
      "step": 1101
    },
    {
      "epoch": 0.1683920999350575,
      "grad_norm": 0.29580435156822205,
      "learning_rate": 0.00019109281885111115,
      "loss": 0.7358,
      "step": 1102
    },
    {
      "epoch": 0.16854490583336518,
      "grad_norm": 0.48277348279953003,
      "learning_rate": 0.00019107221470169333,
      "loss": 0.6511,
      "step": 1103
    },
    {
      "epoch": 0.16869771173167283,
      "grad_norm": 0.26997652649879456,
      "learning_rate": 0.00019105158786239765,
      "loss": 0.7542,
      "step": 1104
    },
    {
      "epoch": 0.16885051762998052,
      "grad_norm": 0.3362867832183838,
      "learning_rate": 0.0001910309383383632,
      "loss": 0.7393,
      "step": 1105
    },
    {
      "epoch": 0.1690033235282882,
      "grad_norm": 0.27180519700050354,
      "learning_rate": 0.00019101026613473456,
      "loss": 0.6968,
      "step": 1106
    },
    {
      "epoch": 0.16915612942659586,
      "grad_norm": 0.3135218918323517,
      "learning_rate": 0.00019098957125666212,
      "loss": 0.6301,
      "step": 1107
    },
    {
      "epoch": 0.16930893532490354,
      "grad_norm": 0.2497778683900833,
      "learning_rate": 0.00019096885370930173,
      "loss": 0.6232,
      "step": 1108
    },
    {
      "epoch": 0.16946174122321123,
      "grad_norm": 0.2656903862953186,
      "learning_rate": 0.0001909481134978151,
      "loss": 0.8126,
      "step": 1109
    },
    {
      "epoch": 0.16961454712151888,
      "grad_norm": 0.28536882996559143,
      "learning_rate": 0.00019092735062736945,
      "loss": 0.8282,
      "step": 1110
    },
    {
      "epoch": 0.16976735301982657,
      "grad_norm": 0.27915704250335693,
      "learning_rate": 0.00019090656510313762,
      "loss": 0.5578,
      "step": 1111
    },
    {
      "epoch": 0.16992015891813425,
      "grad_norm": 0.9698283672332764,
      "learning_rate": 0.00019088575693029818,
      "loss": 0.5033,
      "step": 1112
    },
    {
      "epoch": 0.1700729648164419,
      "grad_norm": 0.2613937258720398,
      "learning_rate": 0.00019086492611403535,
      "loss": 0.6993,
      "step": 1113
    },
    {
      "epoch": 0.1702257707147496,
      "grad_norm": 0.35720980167388916,
      "learning_rate": 0.00019084407265953889,
      "loss": 0.7705,
      "step": 1114
    },
    {
      "epoch": 0.17037857661305728,
      "grad_norm": 0.3005627691745758,
      "learning_rate": 0.0001908231965720043,
      "loss": 0.7495,
      "step": 1115
    },
    {
      "epoch": 0.17053138251136493,
      "grad_norm": 0.3507259488105774,
      "learning_rate": 0.00019080229785663268,
      "loss": 0.5559,
      "step": 1116
    },
    {
      "epoch": 0.17068418840967262,
      "grad_norm": 0.31441208720207214,
      "learning_rate": 0.00019078137651863078,
      "loss": 0.794,
      "step": 1117
    },
    {
      "epoch": 0.1708369943079803,
      "grad_norm": 0.2835392355918884,
      "learning_rate": 0.00019076043256321094,
      "loss": 0.6644,
      "step": 1118
    },
    {
      "epoch": 0.17098980020628796,
      "grad_norm": 0.2525550127029419,
      "learning_rate": 0.00019073946599559123,
      "loss": 0.7448,
      "step": 1119
    },
    {
      "epoch": 0.17114260610459564,
      "grad_norm": 0.3684603273868561,
      "learning_rate": 0.00019071847682099522,
      "loss": 0.8866,
      "step": 1120
    },
    {
      "epoch": 0.17129541200290332,
      "grad_norm": 0.350276917219162,
      "learning_rate": 0.00019069746504465224,
      "loss": 0.6864,
      "step": 1121
    },
    {
      "epoch": 0.17144821790121098,
      "grad_norm": 0.2641281187534332,
      "learning_rate": 0.00019067643067179714,
      "loss": 0.712,
      "step": 1122
    },
    {
      "epoch": 0.17160102379951866,
      "grad_norm": 0.28185218572616577,
      "learning_rate": 0.00019065537370767055,
      "loss": 0.6921,
      "step": 1123
    },
    {
      "epoch": 0.17175382969782635,
      "grad_norm": 0.2548218071460724,
      "learning_rate": 0.00019063429415751857,
      "loss": 0.7298,
      "step": 1124
    },
    {
      "epoch": 0.171906635596134,
      "grad_norm": 0.2929299473762512,
      "learning_rate": 0.000190613192026593,
      "loss": 0.6102,
      "step": 1125
    },
    {
      "epoch": 0.1720594414944417,
      "grad_norm": 0.22588087618350983,
      "learning_rate": 0.00019059206732015128,
      "loss": 0.6073,
      "step": 1126
    },
    {
      "epoch": 0.17221224739274937,
      "grad_norm": 0.2806350588798523,
      "learning_rate": 0.00019057092004345642,
      "loss": 0.7085,
      "step": 1127
    },
    {
      "epoch": 0.17236505329105703,
      "grad_norm": 0.2670913338661194,
      "learning_rate": 0.0001905497502017771,
      "loss": 0.6747,
      "step": 1128
    },
    {
      "epoch": 0.17251785918936471,
      "grad_norm": 0.2958940267562866,
      "learning_rate": 0.00019052855780038764,
      "loss": 0.5434,
      "step": 1129
    },
    {
      "epoch": 0.17267066508767237,
      "grad_norm": 0.46745023131370544,
      "learning_rate": 0.00019050734284456792,
      "loss": 0.7918,
      "step": 1130
    },
    {
      "epoch": 0.17282347098598005,
      "grad_norm": 0.24842768907546997,
      "learning_rate": 0.00019048610533960346,
      "loss": 0.7636,
      "step": 1131
    },
    {
      "epoch": 0.17297627688428774,
      "grad_norm": 0.23693160712718964,
      "learning_rate": 0.00019046484529078542,
      "loss": 0.5601,
      "step": 1132
    },
    {
      "epoch": 0.1731290827825954,
      "grad_norm": 0.27304303646087646,
      "learning_rate": 0.00019044356270341055,
      "loss": 0.6694,
      "step": 1133
    },
    {
      "epoch": 0.17328188868090308,
      "grad_norm": 0.28675514459609985,
      "learning_rate": 0.00019042225758278124,
      "loss": 0.7822,
      "step": 1134
    },
    {
      "epoch": 0.17343469457921076,
      "grad_norm": 0.32071536779403687,
      "learning_rate": 0.0001904009299342055,
      "loss": 0.6399,
      "step": 1135
    },
    {
      "epoch": 0.17358750047751842,
      "grad_norm": 0.2699134945869446,
      "learning_rate": 0.0001903795797629969,
      "loss": 0.699,
      "step": 1136
    },
    {
      "epoch": 0.1737403063758261,
      "grad_norm": 0.27211201190948486,
      "learning_rate": 0.00019035820707447468,
      "loss": 0.6925,
      "step": 1137
    },
    {
      "epoch": 0.1738931122741338,
      "grad_norm": 0.27725741267204285,
      "learning_rate": 0.00019033681187396364,
      "loss": 0.544,
      "step": 1138
    },
    {
      "epoch": 0.17404591817244144,
      "grad_norm": 0.3354361951351166,
      "learning_rate": 0.0001903153941667942,
      "loss": 0.65,
      "step": 1139
    },
    {
      "epoch": 0.17419872407074913,
      "grad_norm": 0.2714371383190155,
      "learning_rate": 0.0001902939539583025,
      "loss": 0.679,
      "step": 1140
    },
    {
      "epoch": 0.1743515299690568,
      "grad_norm": 0.33846041560173035,
      "learning_rate": 0.00019027249125383008,
      "loss": 0.7282,
      "step": 1141
    },
    {
      "epoch": 0.17450433586736447,
      "grad_norm": 0.29932504892349243,
      "learning_rate": 0.00019025100605872425,
      "loss": 0.7207,
      "step": 1142
    },
    {
      "epoch": 0.17465714176567215,
      "grad_norm": 0.26585763692855835,
      "learning_rate": 0.00019022949837833782,
      "loss": 0.5864,
      "step": 1143
    },
    {
      "epoch": 0.17480994766397984,
      "grad_norm": 0.2552662789821625,
      "learning_rate": 0.00019020796821802934,
      "loss": 0.6423,
      "step": 1144
    },
    {
      "epoch": 0.1749627535622875,
      "grad_norm": 0.333668977022171,
      "learning_rate": 0.00019018641558316276,
      "loss": 0.7273,
      "step": 1145
    },
    {
      "epoch": 0.17511555946059518,
      "grad_norm": 0.27263349294662476,
      "learning_rate": 0.0001901648404791078,
      "loss": 0.8035,
      "step": 1146
    },
    {
      "epoch": 0.17526836535890286,
      "grad_norm": 0.3458087146282196,
      "learning_rate": 0.00019014324291123966,
      "loss": 0.7291,
      "step": 1147
    },
    {
      "epoch": 0.17542117125721052,
      "grad_norm": 0.27026665210723877,
      "learning_rate": 0.00019012162288493926,
      "loss": 0.7192,
      "step": 1148
    },
    {
      "epoch": 0.1755739771555182,
      "grad_norm": 0.4639197587966919,
      "learning_rate": 0.00019009998040559305,
      "loss": 0.7784,
      "step": 1149
    },
    {
      "epoch": 0.17572678305382589,
      "grad_norm": 0.4161984324455261,
      "learning_rate": 0.000190078315478593,
      "loss": 0.6429,
      "step": 1150
    },
    {
      "epoch": 0.17587958895213354,
      "grad_norm": 0.33808472752571106,
      "learning_rate": 0.0001900566281093368,
      "loss": 0.8773,
      "step": 1151
    },
    {
      "epoch": 0.17603239485044123,
      "grad_norm": 0.3738580346107483,
      "learning_rate": 0.00019003491830322768,
      "loss": 0.7163,
      "step": 1152
    },
    {
      "epoch": 0.1761852007487489,
      "grad_norm": 0.2702450752258301,
      "learning_rate": 0.00019001318606567442,
      "loss": 0.7637,
      "step": 1153
    },
    {
      "epoch": 0.17633800664705657,
      "grad_norm": 0.2791532278060913,
      "learning_rate": 0.00018999143140209146,
      "loss": 0.7335,
      "step": 1154
    },
    {
      "epoch": 0.17649081254536425,
      "grad_norm": 0.29353049397468567,
      "learning_rate": 0.00018996965431789878,
      "loss": 0.7129,
      "step": 1155
    },
    {
      "epoch": 0.17664361844367193,
      "grad_norm": 0.2993602752685547,
      "learning_rate": 0.00018994785481852192,
      "loss": 1.0111,
      "step": 1156
    },
    {
      "epoch": 0.1767964243419796,
      "grad_norm": 0.24442961812019348,
      "learning_rate": 0.0001899260329093921,
      "loss": 0.7211,
      "step": 1157
    },
    {
      "epoch": 0.17694923024028728,
      "grad_norm": 0.3937727212905884,
      "learning_rate": 0.00018990418859594606,
      "loss": 0.8264,
      "step": 1158
    },
    {
      "epoch": 0.17710203613859496,
      "grad_norm": 0.34876108169555664,
      "learning_rate": 0.00018988232188362609,
      "loss": 0.6769,
      "step": 1159
    },
    {
      "epoch": 0.17725484203690262,
      "grad_norm": 0.26349112391471863,
      "learning_rate": 0.00018986043277788013,
      "loss": 0.6767,
      "step": 1160
    },
    {
      "epoch": 0.1774076479352103,
      "grad_norm": 0.30671951174736023,
      "learning_rate": 0.00018983852128416162,
      "loss": 0.6329,
      "step": 1161
    },
    {
      "epoch": 0.17756045383351798,
      "grad_norm": 0.30573394894599915,
      "learning_rate": 0.00018981658740792968,
      "loss": 0.7471,
      "step": 1162
    },
    {
      "epoch": 0.17771325973182564,
      "grad_norm": 0.30444616079330444,
      "learning_rate": 0.00018979463115464894,
      "loss": 0.6539,
      "step": 1163
    },
    {
      "epoch": 0.17786606563013332,
      "grad_norm": 0.26366716623306274,
      "learning_rate": 0.00018977265252978959,
      "loss": 0.7571,
      "step": 1164
    },
    {
      "epoch": 0.178018871528441,
      "grad_norm": 0.3460015058517456,
      "learning_rate": 0.00018975065153882745,
      "loss": 0.8319,
      "step": 1165
    },
    {
      "epoch": 0.17817167742674866,
      "grad_norm": 0.2514568269252777,
      "learning_rate": 0.00018972862818724385,
      "loss": 0.879,
      "step": 1166
    },
    {
      "epoch": 0.17832448332505635,
      "grad_norm": 0.30631664395332336,
      "learning_rate": 0.00018970658248052574,
      "loss": 0.7929,
      "step": 1167
    },
    {
      "epoch": 0.17847728922336403,
      "grad_norm": 0.3222149908542633,
      "learning_rate": 0.00018968451442416564,
      "loss": 0.698,
      "step": 1168
    },
    {
      "epoch": 0.1786300951216717,
      "grad_norm": 0.26225098967552185,
      "learning_rate": 0.00018966242402366162,
      "loss": 0.7549,
      "step": 1169
    },
    {
      "epoch": 0.17878290101997937,
      "grad_norm": 0.4013647139072418,
      "learning_rate": 0.00018964031128451727,
      "loss": 0.6848,
      "step": 1170
    },
    {
      "epoch": 0.17893570691828706,
      "grad_norm": 0.36838847398757935,
      "learning_rate": 0.00018961817621224186,
      "loss": 0.8063,
      "step": 1171
    },
    {
      "epoch": 0.1790885128165947,
      "grad_norm": 0.2401532381772995,
      "learning_rate": 0.00018959601881235008,
      "loss": 0.5936,
      "step": 1172
    },
    {
      "epoch": 0.1792413187149024,
      "grad_norm": 0.24672825634479523,
      "learning_rate": 0.00018957383909036233,
      "loss": 0.6094,
      "step": 1173
    },
    {
      "epoch": 0.17939412461321008,
      "grad_norm": 0.31857630610466003,
      "learning_rate": 0.00018955163705180444,
      "loss": 0.7282,
      "step": 1174
    },
    {
      "epoch": 0.17954693051151774,
      "grad_norm": 0.2470935434103012,
      "learning_rate": 0.00018952941270220793,
      "loss": 0.7646,
      "step": 1175
    },
    {
      "epoch": 0.17969973640982542,
      "grad_norm": 0.24539603292942047,
      "learning_rate": 0.00018950716604710982,
      "loss": 0.7425,
      "step": 1176
    },
    {
      "epoch": 0.1798525423081331,
      "grad_norm": 0.2655848562717438,
      "learning_rate": 0.00018948489709205254,
      "loss": 0.6178,
      "step": 1177
    },
    {
      "epoch": 0.18000534820644076,
      "grad_norm": 0.27233999967575073,
      "learning_rate": 0.00018946260584258438,
      "loss": 0.7679,
      "step": 1178
    },
    {
      "epoch": 0.18015815410474845,
      "grad_norm": 0.24756716191768646,
      "learning_rate": 0.0001894402923042589,
      "loss": 0.6063,
      "step": 1179
    },
    {
      "epoch": 0.18031096000305613,
      "grad_norm": 0.24609854817390442,
      "learning_rate": 0.0001894179564826354,
      "loss": 0.8233,
      "step": 1180
    },
    {
      "epoch": 0.1804637659013638,
      "grad_norm": 0.2573990523815155,
      "learning_rate": 0.00018939559838327866,
      "loss": 0.6456,
      "step": 1181
    },
    {
      "epoch": 0.18061657179967147,
      "grad_norm": 0.27310270071029663,
      "learning_rate": 0.00018937321801175896,
      "loss": 0.8405,
      "step": 1182
    },
    {
      "epoch": 0.18076937769797916,
      "grad_norm": 0.3258965313434601,
      "learning_rate": 0.0001893508153736522,
      "loss": 0.8264,
      "step": 1183
    },
    {
      "epoch": 0.1809221835962868,
      "grad_norm": 0.246231347322464,
      "learning_rate": 0.00018932839047453986,
      "loss": 0.892,
      "step": 1184
    },
    {
      "epoch": 0.1810749894945945,
      "grad_norm": 0.43690553307533264,
      "learning_rate": 0.00018930594332000885,
      "loss": 0.649,
      "step": 1185
    },
    {
      "epoch": 0.18122779539290215,
      "grad_norm": 0.25682827830314636,
      "learning_rate": 0.00018928347391565173,
      "loss": 0.7664,
      "step": 1186
    },
    {
      "epoch": 0.18138060129120984,
      "grad_norm": 0.3272826075553894,
      "learning_rate": 0.00018926098226706655,
      "loss": 0.7886,
      "step": 1187
    },
    {
      "epoch": 0.18153340718951752,
      "grad_norm": 0.2642538845539093,
      "learning_rate": 0.00018923846837985692,
      "loss": 0.8355,
      "step": 1188
    },
    {
      "epoch": 0.18168621308782518,
      "grad_norm": 0.2583806812763214,
      "learning_rate": 0.000189215932259632,
      "loss": 0.6944,
      "step": 1189
    },
    {
      "epoch": 0.18183901898613286,
      "grad_norm": 0.3627317249774933,
      "learning_rate": 0.00018919337391200644,
      "loss": 0.625,
      "step": 1190
    },
    {
      "epoch": 0.18199182488444055,
      "grad_norm": 0.2754598557949066,
      "learning_rate": 0.00018917079334260044,
      "loss": 0.7383,
      "step": 1191
    },
    {
      "epoch": 0.1821446307827482,
      "grad_norm": 0.2284909188747406,
      "learning_rate": 0.00018914819055703986,
      "loss": 0.68,
      "step": 1192
    },
    {
      "epoch": 0.18229743668105589,
      "grad_norm": 0.22947415709495544,
      "learning_rate": 0.0001891255655609559,
      "loss": 0.5471,
      "step": 1193
    },
    {
      "epoch": 0.18245024257936357,
      "grad_norm": 0.32317447662353516,
      "learning_rate": 0.0001891029183599854,
      "loss": 0.8991,
      "step": 1194
    },
    {
      "epoch": 0.18260304847767123,
      "grad_norm": 0.24611344933509827,
      "learning_rate": 0.0001890802489597708,
      "loss": 0.8077,
      "step": 1195
    },
    {
      "epoch": 0.1827558543759789,
      "grad_norm": 0.27162209153175354,
      "learning_rate": 0.0001890575573659599,
      "loss": 0.8446,
      "step": 1196
    },
    {
      "epoch": 0.1829086602742866,
      "grad_norm": 0.2550401985645294,
      "learning_rate": 0.00018903484358420616,
      "loss": 0.5734,
      "step": 1197
    },
    {
      "epoch": 0.18306146617259425,
      "grad_norm": 0.2913491427898407,
      "learning_rate": 0.0001890121076201685,
      "loss": 0.6703,
      "step": 1198
    },
    {
      "epoch": 0.18321427207090193,
      "grad_norm": 0.2754542827606201,
      "learning_rate": 0.00018898934947951147,
      "loss": 0.7495,
      "step": 1199
    },
    {
      "epoch": 0.18336707796920962,
      "grad_norm": 0.2145329713821411,
      "learning_rate": 0.00018896656916790497,
      "loss": 0.6425,
      "step": 1200
    },
    {
      "epoch": 0.18351988386751728,
      "grad_norm": 0.2763057053089142,
      "learning_rate": 0.0001889437666910246,
      "loss": 0.7225,
      "step": 1201
    },
    {
      "epoch": 0.18367268976582496,
      "grad_norm": 0.3310900926589966,
      "learning_rate": 0.00018892094205455134,
      "loss": 0.5517,
      "step": 1202
    },
    {
      "epoch": 0.18382549566413264,
      "grad_norm": 0.2717922031879425,
      "learning_rate": 0.0001888980952641718,
      "loss": 0.7338,
      "step": 1203
    },
    {
      "epoch": 0.1839783015624403,
      "grad_norm": 0.2514691650867462,
      "learning_rate": 0.00018887522632557807,
      "loss": 0.5614,
      "step": 1204
    },
    {
      "epoch": 0.18413110746074798,
      "grad_norm": 0.24077638983726501,
      "learning_rate": 0.00018885233524446773,
      "loss": 0.6412,
      "step": 1205
    },
    {
      "epoch": 0.18428391335905567,
      "grad_norm": 0.2869153320789337,
      "learning_rate": 0.00018882942202654392,
      "loss": 0.6988,
      "step": 1206
    },
    {
      "epoch": 0.18443671925736332,
      "grad_norm": 0.2389439344406128,
      "learning_rate": 0.00018880648667751526,
      "loss": 0.6581,
      "step": 1207
    },
    {
      "epoch": 0.184589525155671,
      "grad_norm": 0.2723163962364197,
      "learning_rate": 0.00018878352920309593,
      "loss": 0.942,
      "step": 1208
    },
    {
      "epoch": 0.1847423310539787,
      "grad_norm": 0.26030030846595764,
      "learning_rate": 0.00018876054960900555,
      "loss": 0.6953,
      "step": 1209
    },
    {
      "epoch": 0.18489513695228635,
      "grad_norm": 0.3128332793712616,
      "learning_rate": 0.00018873754790096932,
      "loss": 0.6775,
      "step": 1210
    },
    {
      "epoch": 0.18504794285059403,
      "grad_norm": 0.29483985900878906,
      "learning_rate": 0.0001887145240847179,
      "loss": 0.7367,
      "step": 1211
    },
    {
      "epoch": 0.18520074874890172,
      "grad_norm": 0.30954962968826294,
      "learning_rate": 0.00018869147816598752,
      "loss": 0.6747,
      "step": 1212
    },
    {
      "epoch": 0.18535355464720937,
      "grad_norm": 0.2984929084777832,
      "learning_rate": 0.00018866841015051985,
      "loss": 0.847,
      "step": 1213
    },
    {
      "epoch": 0.18550636054551706,
      "grad_norm": 0.26335060596466064,
      "learning_rate": 0.00018864532004406206,
      "loss": 0.6406,
      "step": 1214
    },
    {
      "epoch": 0.18565916644382474,
      "grad_norm": 0.3111365735530853,
      "learning_rate": 0.0001886222078523669,
      "loss": 0.8221,
      "step": 1215
    },
    {
      "epoch": 0.1858119723421324,
      "grad_norm": 0.2846592664718628,
      "learning_rate": 0.00018859907358119259,
      "loss": 0.6764,
      "step": 1216
    },
    {
      "epoch": 0.18596477824044008,
      "grad_norm": 0.3108222186565399,
      "learning_rate": 0.00018857591723630282,
      "loss": 0.6278,
      "step": 1217
    },
    {
      "epoch": 0.18611758413874777,
      "grad_norm": 0.27452194690704346,
      "learning_rate": 0.0001885527388234668,
      "loss": 0.6661,
      "step": 1218
    },
    {
      "epoch": 0.18627039003705542,
      "grad_norm": 0.3490808308124542,
      "learning_rate": 0.00018852953834845923,
      "loss": 0.7153,
      "step": 1219
    },
    {
      "epoch": 0.1864231959353631,
      "grad_norm": 0.4313880503177643,
      "learning_rate": 0.00018850631581706032,
      "loss": 0.5908,
      "step": 1220
    },
    {
      "epoch": 0.1865760018336708,
      "grad_norm": 0.2889242470264435,
      "learning_rate": 0.00018848307123505578,
      "loss": 0.796,
      "step": 1221
    },
    {
      "epoch": 0.18672880773197845,
      "grad_norm": 0.37442779541015625,
      "learning_rate": 0.00018845980460823676,
      "loss": 0.6915,
      "step": 1222
    },
    {
      "epoch": 0.18688161363028613,
      "grad_norm": 0.3386521339416504,
      "learning_rate": 0.00018843651594239997,
      "loss": 0.8258,
      "step": 1223
    },
    {
      "epoch": 0.18703441952859381,
      "grad_norm": 0.29641178250312805,
      "learning_rate": 0.0001884132052433476,
      "loss": 0.6895,
      "step": 1224
    },
    {
      "epoch": 0.18718722542690147,
      "grad_norm": 0.29515600204467773,
      "learning_rate": 0.00018838987251688734,
      "loss": 0.6559,
      "step": 1225
    },
    {
      "epoch": 0.18734003132520916,
      "grad_norm": 0.2754113972187042,
      "learning_rate": 0.0001883665177688323,
      "loss": 0.9137,
      "step": 1226
    },
    {
      "epoch": 0.18749283722351684,
      "grad_norm": 0.32451876997947693,
      "learning_rate": 0.0001883431410050011,
      "loss": 0.5024,
      "step": 1227
    },
    {
      "epoch": 0.1876456431218245,
      "grad_norm": 0.2551485300064087,
      "learning_rate": 0.00018831974223121792,
      "loss": 0.7639,
      "step": 1228
    },
    {
      "epoch": 0.18779844902013218,
      "grad_norm": 0.2415483593940735,
      "learning_rate": 0.0001882963214533123,
      "loss": 0.5037,
      "step": 1229
    },
    {
      "epoch": 0.18795125491843986,
      "grad_norm": 0.2431052029132843,
      "learning_rate": 0.00018827287867711942,
      "loss": 0.7216,
      "step": 1230
    },
    {
      "epoch": 0.18810406081674752,
      "grad_norm": 0.27758708596229553,
      "learning_rate": 0.00018824941390847976,
      "loss": 0.7804,
      "step": 1231
    },
    {
      "epoch": 0.1882568667150552,
      "grad_norm": 0.3011445105075836,
      "learning_rate": 0.00018822592715323944,
      "loss": 0.7279,
      "step": 1232
    },
    {
      "epoch": 0.1884096726133629,
      "grad_norm": 0.36954644322395325,
      "learning_rate": 0.00018820241841724996,
      "loss": 0.7755,
      "step": 1233
    },
    {
      "epoch": 0.18856247851167054,
      "grad_norm": 0.29674795269966125,
      "learning_rate": 0.0001881788877063683,
      "loss": 0.7926,
      "step": 1234
    },
    {
      "epoch": 0.18871528440997823,
      "grad_norm": 0.29829445481300354,
      "learning_rate": 0.00018815533502645698,
      "loss": 0.7176,
      "step": 1235
    },
    {
      "epoch": 0.1888680903082859,
      "grad_norm": 0.29634547233581543,
      "learning_rate": 0.00018813176038338393,
      "loss": 0.5793,
      "step": 1236
    },
    {
      "epoch": 0.18902089620659357,
      "grad_norm": 0.2551177442073822,
      "learning_rate": 0.00018810816378302258,
      "loss": 0.8047,
      "step": 1237
    },
    {
      "epoch": 0.18917370210490125,
      "grad_norm": 0.3007522225379944,
      "learning_rate": 0.00018808454523125184,
      "loss": 0.646,
      "step": 1238
    },
    {
      "epoch": 0.18932650800320894,
      "grad_norm": 0.32498404383659363,
      "learning_rate": 0.00018806090473395603,
      "loss": 0.9898,
      "step": 1239
    },
    {
      "epoch": 0.1894793139015166,
      "grad_norm": 0.28269198536872864,
      "learning_rate": 0.00018803724229702503,
      "loss": 0.6897,
      "step": 1240
    },
    {
      "epoch": 0.18963211979982428,
      "grad_norm": 0.4503588080406189,
      "learning_rate": 0.00018801355792635413,
      "loss": 0.5233,
      "step": 1241
    },
    {
      "epoch": 0.18978492569813193,
      "grad_norm": 0.29055777192115784,
      "learning_rate": 0.00018798985162784404,
      "loss": 0.7211,
      "step": 1242
    },
    {
      "epoch": 0.18993773159643962,
      "grad_norm": 0.27893009781837463,
      "learning_rate": 0.00018796612340740105,
      "loss": 0.7643,
      "step": 1243
    },
    {
      "epoch": 0.1900905374947473,
      "grad_norm": 0.335261732339859,
      "learning_rate": 0.00018794237327093684,
      "loss": 0.6786,
      "step": 1244
    },
    {
      "epoch": 0.19024334339305496,
      "grad_norm": 0.5441724061965942,
      "learning_rate": 0.0001879186012243685,
      "loss": 0.6558,
      "step": 1245
    },
    {
      "epoch": 0.19039614929136264,
      "grad_norm": 0.25137859582901,
      "learning_rate": 0.00018789480727361872,
      "loss": 0.6788,
      "step": 1246
    },
    {
      "epoch": 0.19054895518967033,
      "grad_norm": 0.293194055557251,
      "learning_rate": 0.00018787099142461547,
      "loss": 0.7672,
      "step": 1247
    },
    {
      "epoch": 0.19070176108797798,
      "grad_norm": 0.3383125066757202,
      "learning_rate": 0.00018784715368329235,
      "loss": 0.8097,
      "step": 1248
    },
    {
      "epoch": 0.19085456698628567,
      "grad_norm": 0.3129540979862213,
      "learning_rate": 0.0001878232940555883,
      "loss": 0.7752,
      "step": 1249
    },
    {
      "epoch": 0.19100737288459335,
      "grad_norm": 0.31098222732543945,
      "learning_rate": 0.00018779941254744772,
      "loss": 0.599,
      "step": 1250
    },
    {
      "epoch": 0.191160178782901,
      "grad_norm": 0.37664595246315,
      "learning_rate": 0.00018777550916482055,
      "loss": 0.8034,
      "step": 1251
    },
    {
      "epoch": 0.1913129846812087,
      "grad_norm": 0.3807818293571472,
      "learning_rate": 0.00018775158391366205,
      "loss": 0.6301,
      "step": 1252
    },
    {
      "epoch": 0.19146579057951638,
      "grad_norm": 0.2625497281551361,
      "learning_rate": 0.00018772763679993304,
      "loss": 0.6262,
      "step": 1253
    },
    {
      "epoch": 0.19161859647782403,
      "grad_norm": 0.43181419372558594,
      "learning_rate": 0.00018770366782959973,
      "loss": 0.6965,
      "step": 1254
    },
    {
      "epoch": 0.19177140237613172,
      "grad_norm": 0.25425031781196594,
      "learning_rate": 0.00018767967700863378,
      "loss": 0.7899,
      "step": 1255
    },
    {
      "epoch": 0.1919242082744394,
      "grad_norm": 0.28523769974708557,
      "learning_rate": 0.0001876556643430123,
      "loss": 0.6635,
      "step": 1256
    },
    {
      "epoch": 0.19207701417274706,
      "grad_norm": 0.2615564167499542,
      "learning_rate": 0.00018763162983871786,
      "loss": 0.5732,
      "step": 1257
    },
    {
      "epoch": 0.19222982007105474,
      "grad_norm": 0.2762017250061035,
      "learning_rate": 0.00018760757350173846,
      "loss": 0.6992,
      "step": 1258
    },
    {
      "epoch": 0.19238262596936243,
      "grad_norm": 0.3806002140045166,
      "learning_rate": 0.00018758349533806753,
      "loss": 0.7677,
      "step": 1259
    },
    {
      "epoch": 0.19253543186767008,
      "grad_norm": 0.5583063960075378,
      "learning_rate": 0.00018755939535370391,
      "loss": 0.6268,
      "step": 1260
    },
    {
      "epoch": 0.19268823776597777,
      "grad_norm": 0.3534693121910095,
      "learning_rate": 0.00018753527355465193,
      "loss": 0.8504,
      "step": 1261
    },
    {
      "epoch": 0.19284104366428545,
      "grad_norm": 0.26150697469711304,
      "learning_rate": 0.00018751112994692132,
      "loss": 0.7045,
      "step": 1262
    },
    {
      "epoch": 0.1929938495625931,
      "grad_norm": 0.28841933608055115,
      "learning_rate": 0.0001874869645365273,
      "loss": 0.8777,
      "step": 1263
    },
    {
      "epoch": 0.1931466554609008,
      "grad_norm": 0.32916703820228577,
      "learning_rate": 0.00018746277732949044,
      "loss": 0.7558,
      "step": 1264
    },
    {
      "epoch": 0.19329946135920847,
      "grad_norm": 0.24659676849842072,
      "learning_rate": 0.0001874385683318368,
      "loss": 0.6683,
      "step": 1265
    },
    {
      "epoch": 0.19345226725751613,
      "grad_norm": 0.21921052038669586,
      "learning_rate": 0.00018741433754959784,
      "loss": 0.6024,
      "step": 1266
    },
    {
      "epoch": 0.19360507315582381,
      "grad_norm": 0.3062233328819275,
      "learning_rate": 0.00018739008498881048,
      "loss": 0.5211,
      "step": 1267
    },
    {
      "epoch": 0.1937578790541315,
      "grad_norm": 0.414465993642807,
      "learning_rate": 0.000187365810655517,
      "loss": 0.6716,
      "step": 1268
    },
    {
      "epoch": 0.19391068495243916,
      "grad_norm": 0.41710537672042847,
      "learning_rate": 0.00018734151455576515,
      "loss": 0.7272,
      "step": 1269
    },
    {
      "epoch": 0.19406349085074684,
      "grad_norm": 0.29968956112861633,
      "learning_rate": 0.00018731719669560812,
      "loss": 0.8149,
      "step": 1270
    },
    {
      "epoch": 0.19421629674905452,
      "grad_norm": 0.6653236746788025,
      "learning_rate": 0.0001872928570811045,
      "loss": 0.5673,
      "step": 1271
    },
    {
      "epoch": 0.19436910264736218,
      "grad_norm": 0.24293015897274017,
      "learning_rate": 0.0001872684957183183,
      "loss": 0.6617,
      "step": 1272
    },
    {
      "epoch": 0.19452190854566986,
      "grad_norm": 0.2898862659931183,
      "learning_rate": 0.00018724411261331896,
      "loss": 0.8086,
      "step": 1273
    },
    {
      "epoch": 0.19467471444397755,
      "grad_norm": 0.26143643260002136,
      "learning_rate": 0.00018721970777218127,
      "loss": 0.6261,
      "step": 1274
    },
    {
      "epoch": 0.1948275203422852,
      "grad_norm": 0.3271556496620178,
      "learning_rate": 0.00018719528120098556,
      "loss": 0.7828,
      "step": 1275
    },
    {
      "epoch": 0.1949803262405929,
      "grad_norm": 0.2851318418979645,
      "learning_rate": 0.00018717083290581746,
      "loss": 0.6906,
      "step": 1276
    },
    {
      "epoch": 0.19513313213890057,
      "grad_norm": 0.26299694180488586,
      "learning_rate": 0.0001871463628927681,
      "loss": 0.7106,
      "step": 1277
    },
    {
      "epoch": 0.19528593803720823,
      "grad_norm": 0.4681147634983063,
      "learning_rate": 0.00018712187116793393,
      "loss": 0.7675,
      "step": 1278
    },
    {
      "epoch": 0.1954387439355159,
      "grad_norm": 0.2557898461818695,
      "learning_rate": 0.0001870973577374169,
      "loss": 0.7315,
      "step": 1279
    },
    {
      "epoch": 0.1955915498338236,
      "grad_norm": 0.31964412331581116,
      "learning_rate": 0.0001870728226073243,
      "loss": 0.8681,
      "step": 1280
    },
    {
      "epoch": 0.19574435573213125,
      "grad_norm": 0.25558051466941833,
      "learning_rate": 0.00018704826578376884,
      "loss": 0.7058,
      "step": 1281
    },
    {
      "epoch": 0.19589716163043894,
      "grad_norm": 0.28534409403800964,
      "learning_rate": 0.0001870236872728687,
      "loss": 0.7881,
      "step": 1282
    },
    {
      "epoch": 0.19604996752874662,
      "grad_norm": 0.24193304777145386,
      "learning_rate": 0.00018699908708074735,
      "loss": 0.7273,
      "step": 1283
    },
    {
      "epoch": 0.19620277342705428,
      "grad_norm": 0.28959253430366516,
      "learning_rate": 0.00018697446521353375,
      "loss": 0.6541,
      "step": 1284
    },
    {
      "epoch": 0.19635557932536196,
      "grad_norm": 0.263320654630661,
      "learning_rate": 0.00018694982167736222,
      "loss": 0.5601,
      "step": 1285
    },
    {
      "epoch": 0.19650838522366965,
      "grad_norm": 0.23733118176460266,
      "learning_rate": 0.0001869251564783725,
      "loss": 0.554,
      "step": 1286
    },
    {
      "epoch": 0.1966611911219773,
      "grad_norm": 0.2797900140285492,
      "learning_rate": 0.00018690046962270974,
      "loss": 0.9695,
      "step": 1287
    },
    {
      "epoch": 0.196813997020285,
      "grad_norm": 0.28238213062286377,
      "learning_rate": 0.00018687576111652438,
      "loss": 0.6728,
      "step": 1288
    },
    {
      "epoch": 0.19696680291859267,
      "grad_norm": 0.3334377408027649,
      "learning_rate": 0.00018685103096597244,
      "loss": 0.6607,
      "step": 1289
    },
    {
      "epoch": 0.19711960881690033,
      "grad_norm": 0.2756267488002777,
      "learning_rate": 0.00018682627917721516,
      "loss": 0.6685,
      "step": 1290
    },
    {
      "epoch": 0.197272414715208,
      "grad_norm": 0.25085243582725525,
      "learning_rate": 0.00018680150575641928,
      "loss": 0.6337,
      "step": 1291
    },
    {
      "epoch": 0.1974252206135157,
      "grad_norm": 0.2986142039299011,
      "learning_rate": 0.00018677671070975688,
      "loss": 0.6334,
      "step": 1292
    },
    {
      "epoch": 0.19757802651182335,
      "grad_norm": 0.35889819264411926,
      "learning_rate": 0.00018675189404340542,
      "loss": 0.6769,
      "step": 1293
    },
    {
      "epoch": 0.19773083241013104,
      "grad_norm": 0.35846251249313354,
      "learning_rate": 0.00018672705576354775,
      "loss": 0.7592,
      "step": 1294
    },
    {
      "epoch": 0.19788363830843872,
      "grad_norm": 0.2749708592891693,
      "learning_rate": 0.00018670219587637219,
      "loss": 0.6868,
      "step": 1295
    },
    {
      "epoch": 0.19803644420674638,
      "grad_norm": 0.31376180052757263,
      "learning_rate": 0.0001866773143880723,
      "loss": 0.5926,
      "step": 1296
    },
    {
      "epoch": 0.19818925010505406,
      "grad_norm": 0.400387167930603,
      "learning_rate": 0.00018665241130484713,
      "loss": 0.8536,
      "step": 1297
    },
    {
      "epoch": 0.19834205600336172,
      "grad_norm": 0.3091200590133667,
      "learning_rate": 0.00018662748663290105,
      "loss": 0.7177,
      "step": 1298
    },
    {
      "epoch": 0.1984948619016694,
      "grad_norm": 0.27559390664100647,
      "learning_rate": 0.00018660254037844388,
      "loss": 0.8545,
      "step": 1299
    },
    {
      "epoch": 0.19864766779997708,
      "grad_norm": 0.2838318943977356,
      "learning_rate": 0.00018657757254769074,
      "loss": 0.758,
      "step": 1300
    },
    {
      "epoch": 0.19880047369828474,
      "grad_norm": 0.2726922035217285,
      "learning_rate": 0.0001865525831468621,
      "loss": 0.549,
      "step": 1301
    },
    {
      "epoch": 0.19895327959659243,
      "grad_norm": 0.6867300271987915,
      "learning_rate": 0.00018652757218218396,
      "loss": 0.6198,
      "step": 1302
    },
    {
      "epoch": 0.1991060854949001,
      "grad_norm": 0.32437586784362793,
      "learning_rate": 0.0001865025396598875,
      "loss": 0.7343,
      "step": 1303
    },
    {
      "epoch": 0.19925889139320777,
      "grad_norm": 0.29952913522720337,
      "learning_rate": 0.00018647748558620942,
      "loss": 0.8007,
      "step": 1304
    },
    {
      "epoch": 0.19941169729151545,
      "grad_norm": 1.2307347059249878,
      "learning_rate": 0.00018645240996739175,
      "loss": 0.6912,
      "step": 1305
    },
    {
      "epoch": 0.19956450318982313,
      "grad_norm": 0.3030075430870056,
      "learning_rate": 0.00018642731280968185,
      "loss": 0.6624,
      "step": 1306
    },
    {
      "epoch": 0.1997173090881308,
      "grad_norm": 0.35436445474624634,
      "learning_rate": 0.0001864021941193324,
      "loss": 0.7682,
      "step": 1307
    },
    {
      "epoch": 0.19987011498643847,
      "grad_norm": 0.33362630009651184,
      "learning_rate": 0.00018637705390260161,
      "loss": 0.6417,
      "step": 1308
    },
    {
      "epoch": 0.20002292088474616,
      "grad_norm": 0.4144555628299713,
      "learning_rate": 0.00018635189216575291,
      "loss": 0.7121,
      "step": 1309
    },
    {
      "epoch": 0.20017572678305381,
      "grad_norm": 0.2980126738548279,
      "learning_rate": 0.0001863267089150551,
      "loss": 0.6802,
      "step": 1310
    },
    {
      "epoch": 0.2003285326813615,
      "grad_norm": 0.27214938402175903,
      "learning_rate": 0.00018630150415678242,
      "loss": 0.5862,
      "step": 1311
    },
    {
      "epoch": 0.20048133857966918,
      "grad_norm": 0.2389996200799942,
      "learning_rate": 0.00018627627789721444,
      "loss": 0.6268,
      "step": 1312
    },
    {
      "epoch": 0.20063414447797684,
      "grad_norm": 0.26987066864967346,
      "learning_rate": 0.00018625103014263602,
      "loss": 0.7125,
      "step": 1313
    },
    {
      "epoch": 0.20078695037628452,
      "grad_norm": 0.4237341582775116,
      "learning_rate": 0.0001862257608993375,
      "loss": 0.3945,
      "step": 1314
    },
    {
      "epoch": 0.2009397562745922,
      "grad_norm": 0.30507996678352356,
      "learning_rate": 0.00018620047017361442,
      "loss": 0.7114,
      "step": 1315
    },
    {
      "epoch": 0.20109256217289986,
      "grad_norm": 0.3909916281700134,
      "learning_rate": 0.00018617515797176776,
      "loss": 0.8767,
      "step": 1316
    },
    {
      "epoch": 0.20124536807120755,
      "grad_norm": 0.3162682056427002,
      "learning_rate": 0.00018614982430010388,
      "loss": 0.6625,
      "step": 1317
    },
    {
      "epoch": 0.20139817396951523,
      "grad_norm": 0.2585495412349701,
      "learning_rate": 0.00018612446916493444,
      "loss": 0.7066,
      "step": 1318
    },
    {
      "epoch": 0.2015509798678229,
      "grad_norm": 0.27862757444381714,
      "learning_rate": 0.00018609909257257648,
      "loss": 0.9383,
      "step": 1319
    },
    {
      "epoch": 0.20170378576613057,
      "grad_norm": 0.2943915128707886,
      "learning_rate": 0.00018607369452935233,
      "loss": 0.7859,
      "step": 1320
    },
    {
      "epoch": 0.20185659166443826,
      "grad_norm": 0.4653479754924774,
      "learning_rate": 0.00018604827504158967,
      "loss": 0.8381,
      "step": 1321
    },
    {
      "epoch": 0.2020093975627459,
      "grad_norm": 0.23425963521003723,
      "learning_rate": 0.00018602283411562164,
      "loss": 0.7697,
      "step": 1322
    },
    {
      "epoch": 0.2021622034610536,
      "grad_norm": 0.2639780640602112,
      "learning_rate": 0.0001859973717577866,
      "loss": 0.8266,
      "step": 1323
    },
    {
      "epoch": 0.20231500935936128,
      "grad_norm": 0.3585314154624939,
      "learning_rate": 0.00018597188797442823,
      "loss": 0.5168,
      "step": 1324
    },
    {
      "epoch": 0.20246781525766894,
      "grad_norm": 0.3173975646495819,
      "learning_rate": 0.00018594638277189568,
      "loss": 0.7392,
      "step": 1325
    },
    {
      "epoch": 0.20262062115597662,
      "grad_norm": 0.24657927453517914,
      "learning_rate": 0.0001859208561565433,
      "loss": 0.7558,
      "step": 1326
    },
    {
      "epoch": 0.2027734270542843,
      "grad_norm": 0.26230642199516296,
      "learning_rate": 0.0001858953081347308,
      "loss": 0.7076,
      "step": 1327
    },
    {
      "epoch": 0.20292623295259196,
      "grad_norm": 0.3020760416984558,
      "learning_rate": 0.00018586973871282338,
      "loss": 0.6357,
      "step": 1328
    },
    {
      "epoch": 0.20307903885089965,
      "grad_norm": 0.27209731936454773,
      "learning_rate": 0.00018584414789719132,
      "loss": 0.6761,
      "step": 1329
    },
    {
      "epoch": 0.20323184474920733,
      "grad_norm": 0.24238243699073792,
      "learning_rate": 0.00018581853569421043,
      "loss": 0.6273,
      "step": 1330
    },
    {
      "epoch": 0.203384650647515,
      "grad_norm": 0.3168526589870453,
      "learning_rate": 0.00018579290211026173,
      "loss": 0.556,
      "step": 1331
    },
    {
      "epoch": 0.20353745654582267,
      "grad_norm": 0.2814149856567383,
      "learning_rate": 0.00018576724715173168,
      "loss": 0.5308,
      "step": 1332
    },
    {
      "epoch": 0.20369026244413035,
      "grad_norm": 0.3175278604030609,
      "learning_rate": 0.00018574157082501194,
      "loss": 0.8015,
      "step": 1333
    },
    {
      "epoch": 0.203843068342438,
      "grad_norm": 0.32856446504592896,
      "learning_rate": 0.00018571587313649955,
      "loss": 0.7576,
      "step": 1334
    },
    {
      "epoch": 0.2039958742407457,
      "grad_norm": 0.3181629180908203,
      "learning_rate": 0.00018569015409259688,
      "loss": 0.7387,
      "step": 1335
    },
    {
      "epoch": 0.20414868013905338,
      "grad_norm": 0.2765921652317047,
      "learning_rate": 0.00018566441369971166,
      "loss": 0.7357,
      "step": 1336
    },
    {
      "epoch": 0.20430148603736104,
      "grad_norm": 0.30099403858184814,
      "learning_rate": 0.00018563865196425682,
      "loss": 0.6671,
      "step": 1337
    },
    {
      "epoch": 0.20445429193566872,
      "grad_norm": 0.37037232518196106,
      "learning_rate": 0.00018561286889265074,
      "loss": 0.6421,
      "step": 1338
    },
    {
      "epoch": 0.2046070978339764,
      "grad_norm": 0.32211172580718994,
      "learning_rate": 0.000185587064491317,
      "loss": 0.6952,
      "step": 1339
    },
    {
      "epoch": 0.20475990373228406,
      "grad_norm": 0.31535395979881287,
      "learning_rate": 0.00018556123876668459,
      "loss": 0.5887,
      "step": 1340
    },
    {
      "epoch": 0.20491270963059174,
      "grad_norm": 0.4243486523628235,
      "learning_rate": 0.00018553539172518776,
      "loss": 0.8713,
      "step": 1341
    },
    {
      "epoch": 0.20506551552889943,
      "grad_norm": 0.2893839478492737,
      "learning_rate": 0.00018550952337326607,
      "loss": 0.5753,
      "step": 1342
    },
    {
      "epoch": 0.20521832142720708,
      "grad_norm": 0.24352984130382538,
      "learning_rate": 0.00018548363371736449,
      "loss": 0.6823,
      "step": 1343
    },
    {
      "epoch": 0.20537112732551477,
      "grad_norm": 0.2798251509666443,
      "learning_rate": 0.00018545772276393308,
      "loss": 0.5801,
      "step": 1344
    },
    {
      "epoch": 0.20552393322382245,
      "grad_norm": 0.2867914140224457,
      "learning_rate": 0.0001854317905194274,
      "loss": 0.7108,
      "step": 1345
    },
    {
      "epoch": 0.2056767391221301,
      "grad_norm": 0.3005681335926056,
      "learning_rate": 0.00018540583699030826,
      "loss": 0.7227,
      "step": 1346
    },
    {
      "epoch": 0.2058295450204378,
      "grad_norm": 0.26915448904037476,
      "learning_rate": 0.00018537986218304176,
      "loss": 0.6557,
      "step": 1347
    },
    {
      "epoch": 0.20598235091874548,
      "grad_norm": 0.2573173940181732,
      "learning_rate": 0.00018535386610409927,
      "loss": 0.6926,
      "step": 1348
    },
    {
      "epoch": 0.20613515681705313,
      "grad_norm": 0.3156735897064209,
      "learning_rate": 0.00018532784875995755,
      "loss": 0.7268,
      "step": 1349
    },
    {
      "epoch": 0.20628796271536082,
      "grad_norm": 0.35365813970565796,
      "learning_rate": 0.00018530181015709855,
      "loss": 0.7369,
      "step": 1350
    },
    {
      "epoch": 0.20644076861366847,
      "grad_norm": 0.33551231026649475,
      "learning_rate": 0.0001852757503020096,
      "loss": 0.6259,
      "step": 1351
    },
    {
      "epoch": 0.20659357451197616,
      "grad_norm": 0.3139243423938751,
      "learning_rate": 0.0001852496692011833,
      "loss": 0.7496,
      "step": 1352
    },
    {
      "epoch": 0.20674638041028384,
      "grad_norm": 0.2617344260215759,
      "learning_rate": 0.00018522356686111752,
      "loss": 0.8014,
      "step": 1353
    },
    {
      "epoch": 0.2068991863085915,
      "grad_norm": 0.27416306734085083,
      "learning_rate": 0.00018519744328831543,
      "loss": 0.7364,
      "step": 1354
    },
    {
      "epoch": 0.20705199220689918,
      "grad_norm": 0.2772804796695709,
      "learning_rate": 0.00018517129848928554,
      "loss": 0.7281,
      "step": 1355
    },
    {
      "epoch": 0.20720479810520687,
      "grad_norm": 0.39369335770606995,
      "learning_rate": 0.00018514513247054154,
      "loss": 0.7729,
      "step": 1356
    },
    {
      "epoch": 0.20735760400351452,
      "grad_norm": 0.2790491282939911,
      "learning_rate": 0.00018511894523860254,
      "loss": 0.8568,
      "step": 1357
    },
    {
      "epoch": 0.2075104099018222,
      "grad_norm": 0.3145041763782501,
      "learning_rate": 0.00018509273679999283,
      "loss": 0.9169,
      "step": 1358
    },
    {
      "epoch": 0.2076632158001299,
      "grad_norm": 0.2785448729991913,
      "learning_rate": 0.00018506650716124207,
      "loss": 0.7077,
      "step": 1359
    },
    {
      "epoch": 0.20781602169843755,
      "grad_norm": 0.3012505769729614,
      "learning_rate": 0.0001850402563288851,
      "loss": 0.6312,
      "step": 1360
    },
    {
      "epoch": 0.20796882759674523,
      "grad_norm": 0.28249379992485046,
      "learning_rate": 0.00018501398430946207,
      "loss": 0.7125,
      "step": 1361
    },
    {
      "epoch": 0.20812163349505292,
      "grad_norm": 0.30596253275871277,
      "learning_rate": 0.00018498769110951855,
      "loss": 0.837,
      "step": 1362
    },
    {
      "epoch": 0.20827443939336057,
      "grad_norm": 0.2675941288471222,
      "learning_rate": 0.00018496137673560518,
      "loss": 0.7414,
      "step": 1363
    },
    {
      "epoch": 0.20842724529166826,
      "grad_norm": 0.248866006731987,
      "learning_rate": 0.00018493504119427795,
      "loss": 0.749,
      "step": 1364
    },
    {
      "epoch": 0.20858005118997594,
      "grad_norm": 0.2572340667247772,
      "learning_rate": 0.0001849086844920982,
      "loss": 0.5073,
      "step": 1365
    },
    {
      "epoch": 0.2087328570882836,
      "grad_norm": 0.2993871569633484,
      "learning_rate": 0.00018488230663563242,
      "loss": 0.6901,
      "step": 1366
    },
    {
      "epoch": 0.20888566298659128,
      "grad_norm": 0.2996583878993988,
      "learning_rate": 0.0001848559076314525,
      "loss": 1.0662,
      "step": 1367
    },
    {
      "epoch": 0.20903846888489896,
      "grad_norm": 0.2594098448753357,
      "learning_rate": 0.00018482948748613546,
      "loss": 0.7223,
      "step": 1368
    },
    {
      "epoch": 0.20919127478320662,
      "grad_norm": 0.2878977060317993,
      "learning_rate": 0.0001848030462062637,
      "loss": 0.7029,
      "step": 1369
    },
    {
      "epoch": 0.2093440806815143,
      "grad_norm": 0.23204508423805237,
      "learning_rate": 0.00018477658379842485,
      "loss": 0.7097,
      "step": 1370
    },
    {
      "epoch": 0.209496886579822,
      "grad_norm": 0.29869040846824646,
      "learning_rate": 0.0001847501002692118,
      "loss": 0.5587,
      "step": 1371
    },
    {
      "epoch": 0.20964969247812965,
      "grad_norm": 0.4763803780078888,
      "learning_rate": 0.00018472359562522267,
      "loss": 0.7924,
      "step": 1372
    },
    {
      "epoch": 0.20980249837643733,
      "grad_norm": 0.29070818424224854,
      "learning_rate": 0.00018469706987306087,
      "loss": 0.7127,
      "step": 1373
    },
    {
      "epoch": 0.209955304274745,
      "grad_norm": 0.4801795184612274,
      "learning_rate": 0.00018467052301933507,
      "loss": 0.7563,
      "step": 1374
    },
    {
      "epoch": 0.21010811017305267,
      "grad_norm": 0.2665102481842041,
      "learning_rate": 0.0001846439550706592,
      "loss": 0.6069,
      "step": 1375
    },
    {
      "epoch": 0.21026091607136035,
      "grad_norm": 0.4513528048992157,
      "learning_rate": 0.00018461736603365248,
      "loss": 0.5492,
      "step": 1376
    },
    {
      "epoch": 0.21041372196966804,
      "grad_norm": 0.23580753803253174,
      "learning_rate": 0.0001845907559149393,
      "loss": 0.7941,
      "step": 1377
    },
    {
      "epoch": 0.2105665278679757,
      "grad_norm": 0.29539886116981506,
      "learning_rate": 0.00018456412472114936,
      "loss": 0.6216,
      "step": 1378
    },
    {
      "epoch": 0.21071933376628338,
      "grad_norm": 0.25598660111427307,
      "learning_rate": 0.00018453747245891758,
      "loss": 0.7376,
      "step": 1379
    },
    {
      "epoch": 0.21087213966459106,
      "grad_norm": 0.30273643136024475,
      "learning_rate": 0.0001845107991348842,
      "loss": 0.6537,
      "step": 1380
    },
    {
      "epoch": 0.21102494556289872,
      "grad_norm": 0.28207799792289734,
      "learning_rate": 0.00018448410475569457,
      "loss": 0.7825,
      "step": 1381
    },
    {
      "epoch": 0.2111777514612064,
      "grad_norm": 0.42546963691711426,
      "learning_rate": 0.00018445738932799946,
      "loss": 0.8203,
      "step": 1382
    },
    {
      "epoch": 0.2113305573595141,
      "grad_norm": 0.3256038427352905,
      "learning_rate": 0.00018443065285845474,
      "loss": 0.7071,
      "step": 1383
    },
    {
      "epoch": 0.21148336325782174,
      "grad_norm": 0.2774294912815094,
      "learning_rate": 0.0001844038953537216,
      "loss": 0.6433,
      "step": 1384
    },
    {
      "epoch": 0.21163616915612943,
      "grad_norm": 0.3057693541049957,
      "learning_rate": 0.0001843771168204664,
      "loss": 0.6704,
      "step": 1385
    },
    {
      "epoch": 0.2117889750544371,
      "grad_norm": 0.2981327176094055,
      "learning_rate": 0.00018435031726536088,
      "loss": 0.7927,
      "step": 1386
    },
    {
      "epoch": 0.21194178095274477,
      "grad_norm": 0.3180636763572693,
      "learning_rate": 0.00018432349669508184,
      "loss": 0.6058,
      "step": 1387
    },
    {
      "epoch": 0.21209458685105245,
      "grad_norm": 0.34904152154922485,
      "learning_rate": 0.00018429665511631143,
      "loss": 0.9131,
      "step": 1388
    },
    {
      "epoch": 0.21224739274936014,
      "grad_norm": 0.3404789865016937,
      "learning_rate": 0.00018426979253573702,
      "loss": 0.5944,
      "step": 1389
    },
    {
      "epoch": 0.2124001986476678,
      "grad_norm": 0.24927204847335815,
      "learning_rate": 0.00018424290896005118,
      "loss": 0.6979,
      "step": 1390
    },
    {
      "epoch": 0.21255300454597548,
      "grad_norm": 0.29016798734664917,
      "learning_rate": 0.00018421600439595171,
      "loss": 0.7192,
      "step": 1391
    },
    {
      "epoch": 0.21270581044428316,
      "grad_norm": 0.27132588624954224,
      "learning_rate": 0.0001841890788501417,
      "loss": 0.7125,
      "step": 1392
    },
    {
      "epoch": 0.21285861634259082,
      "grad_norm": 0.2970811128616333,
      "learning_rate": 0.00018416213232932938,
      "loss": 0.8096,
      "step": 1393
    },
    {
      "epoch": 0.2130114222408985,
      "grad_norm": 0.3342524766921997,
      "learning_rate": 0.00018413516484022826,
      "loss": 0.716,
      "step": 1394
    },
    {
      "epoch": 0.21316422813920619,
      "grad_norm": 0.24330155551433563,
      "learning_rate": 0.0001841081763895571,
      "loss": 0.6139,
      "step": 1395
    },
    {
      "epoch": 0.21331703403751384,
      "grad_norm": 0.26770898699760437,
      "learning_rate": 0.0001840811669840398,
      "loss": 0.6953,
      "step": 1396
    },
    {
      "epoch": 0.21346983993582153,
      "grad_norm": 0.3319970965385437,
      "learning_rate": 0.0001840541366304055,
      "loss": 0.6463,
      "step": 1397
    },
    {
      "epoch": 0.2136226458341292,
      "grad_norm": 0.2949671447277069,
      "learning_rate": 0.0001840270853353887,
      "loss": 0.7701,
      "step": 1398
    },
    {
      "epoch": 0.21377545173243687,
      "grad_norm": 0.2886238396167755,
      "learning_rate": 0.0001840000131057289,
      "loss": 0.6067,
      "step": 1399
    },
    {
      "epoch": 0.21392825763074455,
      "grad_norm": 0.3149748146533966,
      "learning_rate": 0.00018397291994817097,
      "loss": 0.7083,
      "step": 1400
    },
    {
      "epoch": 0.21408106352905223,
      "grad_norm": 0.27100950479507446,
      "learning_rate": 0.0001839458058694649,
      "loss": 0.705,
      "step": 1401
    },
    {
      "epoch": 0.2142338694273599,
      "grad_norm": 0.29059261083602905,
      "learning_rate": 0.00018391867087636597,
      "loss": 0.6235,
      "step": 1402
    },
    {
      "epoch": 0.21438667532566758,
      "grad_norm": 0.30112072825431824,
      "learning_rate": 0.0001838915149756346,
      "loss": 1.0977,
      "step": 1403
    },
    {
      "epoch": 0.21453948122397526,
      "grad_norm": 0.31074076890945435,
      "learning_rate": 0.00018386433817403654,
      "loss": 0.7036,
      "step": 1404
    },
    {
      "epoch": 0.21469228712228292,
      "grad_norm": 0.2648938000202179,
      "learning_rate": 0.00018383714047834256,
      "loss": 0.6831,
      "step": 1405
    },
    {
      "epoch": 0.2148450930205906,
      "grad_norm": 0.33752021193504333,
      "learning_rate": 0.00018380992189532877,
      "loss": 0.6442,
      "step": 1406
    },
    {
      "epoch": 0.21499789891889826,
      "grad_norm": 0.25656089186668396,
      "learning_rate": 0.0001837826824317765,
      "loss": 0.7233,
      "step": 1407
    },
    {
      "epoch": 0.21515070481720594,
      "grad_norm": 0.3936312198638916,
      "learning_rate": 0.00018375542209447216,
      "loss": 0.9006,
      "step": 1408
    },
    {
      "epoch": 0.21530351071551362,
      "grad_norm": 0.2963830530643463,
      "learning_rate": 0.0001837281408902075,
      "loss": 0.7601,
      "step": 1409
    },
    {
      "epoch": 0.21545631661382128,
      "grad_norm": 0.39272454380989075,
      "learning_rate": 0.00018370083882577934,
      "loss": 0.6576,
      "step": 1410
    },
    {
      "epoch": 0.21560912251212896,
      "grad_norm": 0.28687795996665955,
      "learning_rate": 0.00018367351590798978,
      "loss": 0.7846,
      "step": 1411
    },
    {
      "epoch": 0.21576192841043665,
      "grad_norm": 0.2810840904712677,
      "learning_rate": 0.00018364617214364614,
      "loss": 0.684,
      "step": 1412
    },
    {
      "epoch": 0.2159147343087443,
      "grad_norm": 0.26902371644973755,
      "learning_rate": 0.00018361880753956083,
      "loss": 0.6707,
      "step": 1413
    },
    {
      "epoch": 0.216067540207052,
      "grad_norm": 0.27405041456222534,
      "learning_rate": 0.00018359142210255154,
      "loss": 0.6771,
      "step": 1414
    },
    {
      "epoch": 0.21622034610535967,
      "grad_norm": 0.25500932335853577,
      "learning_rate": 0.00018356401583944116,
      "loss": 0.7029,
      "step": 1415
    },
    {
      "epoch": 0.21637315200366733,
      "grad_norm": 0.2854139506816864,
      "learning_rate": 0.00018353658875705766,
      "loss": 0.6724,
      "step": 1416
    },
    {
      "epoch": 0.216525957901975,
      "grad_norm": 0.26606252789497375,
      "learning_rate": 0.0001835091408622343,
      "loss": 0.7796,
      "step": 1417
    },
    {
      "epoch": 0.2166787638002827,
      "grad_norm": 0.3369622826576233,
      "learning_rate": 0.00018348167216180952,
      "loss": 0.643,
      "step": 1418
    },
    {
      "epoch": 0.21683156969859035,
      "grad_norm": 0.40154317021369934,
      "learning_rate": 0.00018345418266262683,
      "loss": 0.6505,
      "step": 1419
    },
    {
      "epoch": 0.21698437559689804,
      "grad_norm": 0.2847399413585663,
      "learning_rate": 0.0001834266723715351,
      "loss": 0.8197,
      "step": 1420
    },
    {
      "epoch": 0.21713718149520572,
      "grad_norm": 0.3490140438079834,
      "learning_rate": 0.00018339914129538826,
      "loss": 0.655,
      "step": 1421
    },
    {
      "epoch": 0.21728998739351338,
      "grad_norm": 0.3189198076725006,
      "learning_rate": 0.0001833715894410454,
      "loss": 0.6823,
      "step": 1422
    },
    {
      "epoch": 0.21744279329182106,
      "grad_norm": 0.27019596099853516,
      "learning_rate": 0.00018334401681537093,
      "loss": 0.7576,
      "step": 1423
    },
    {
      "epoch": 0.21759559919012875,
      "grad_norm": 0.25581902265548706,
      "learning_rate": 0.00018331642342523424,
      "loss": 0.5591,
      "step": 1424
    },
    {
      "epoch": 0.2177484050884364,
      "grad_norm": 0.32097867131233215,
      "learning_rate": 0.00018328880927751003,
      "loss": 0.8419,
      "step": 1425
    },
    {
      "epoch": 0.2179012109867441,
      "grad_norm": 0.29979658126831055,
      "learning_rate": 0.00018326117437907815,
      "loss": 0.6647,
      "step": 1426
    },
    {
      "epoch": 0.21805401688505177,
      "grad_norm": 0.2586615979671478,
      "learning_rate": 0.00018323351873682358,
      "loss": 0.7442,
      "step": 1427
    },
    {
      "epoch": 0.21820682278335943,
      "grad_norm": 0.25333309173583984,
      "learning_rate": 0.0001832058423576365,
      "loss": 0.5425,
      "step": 1428
    },
    {
      "epoch": 0.2183596286816671,
      "grad_norm": 0.33443784713745117,
      "learning_rate": 0.00018317814524841224,
      "loss": 0.7438,
      "step": 1429
    },
    {
      "epoch": 0.2185124345799748,
      "grad_norm": 0.28940871357917786,
      "learning_rate": 0.00018315042741605132,
      "loss": 0.7608,
      "step": 1430
    },
    {
      "epoch": 0.21866524047828245,
      "grad_norm": 0.2656884491443634,
      "learning_rate": 0.0001831226888674594,
      "loss": 0.8089,
      "step": 1431
    },
    {
      "epoch": 0.21881804637659014,
      "grad_norm": 0.24992115795612335,
      "learning_rate": 0.0001830949296095473,
      "loss": 0.7727,
      "step": 1432
    },
    {
      "epoch": 0.21897085227489782,
      "grad_norm": 0.2528163492679596,
      "learning_rate": 0.00018306714964923097,
      "loss": 0.7669,
      "step": 1433
    },
    {
      "epoch": 0.21912365817320548,
      "grad_norm": 0.2704116106033325,
      "learning_rate": 0.00018303934899343161,
      "loss": 0.6762,
      "step": 1434
    },
    {
      "epoch": 0.21927646407151316,
      "grad_norm": 0.36805975437164307,
      "learning_rate": 0.00018301152764907554,
      "loss": 0.7063,
      "step": 1435
    },
    {
      "epoch": 0.21942926996982084,
      "grad_norm": 0.47136247158050537,
      "learning_rate": 0.00018298368562309414,
      "loss": 0.6372,
      "step": 1436
    },
    {
      "epoch": 0.2195820758681285,
      "grad_norm": 0.2653694152832031,
      "learning_rate": 0.00018295582292242405,
      "loss": 0.5245,
      "step": 1437
    },
    {
      "epoch": 0.21973488176643619,
      "grad_norm": 0.3250346779823303,
      "learning_rate": 0.00018292793955400702,
      "loss": 0.8513,
      "step": 1438
    },
    {
      "epoch": 0.21988768766474387,
      "grad_norm": 0.282665878534317,
      "learning_rate": 0.00018290003552479003,
      "loss": 0.712,
      "step": 1439
    },
    {
      "epoch": 0.22004049356305153,
      "grad_norm": 0.41571080684661865,
      "learning_rate": 0.000182872110841725,
      "loss": 0.7272,
      "step": 1440
    },
    {
      "epoch": 0.2201932994613592,
      "grad_norm": 0.27098286151885986,
      "learning_rate": 0.00018284416551176923,
      "loss": 0.775,
      "step": 1441
    },
    {
      "epoch": 0.2203461053596669,
      "grad_norm": 0.29416346549987793,
      "learning_rate": 0.00018281619954188506,
      "loss": 0.9036,
      "step": 1442
    },
    {
      "epoch": 0.22049891125797455,
      "grad_norm": 0.5548993349075317,
      "learning_rate": 0.0001827882129390399,
      "loss": 0.7088,
      "step": 1443
    },
    {
      "epoch": 0.22065171715628223,
      "grad_norm": 0.3549652695655823,
      "learning_rate": 0.00018276020571020646,
      "loss": 0.7721,
      "step": 1444
    },
    {
      "epoch": 0.22080452305458992,
      "grad_norm": 0.27087077498435974,
      "learning_rate": 0.0001827321778623625,
      "loss": 0.784,
      "step": 1445
    },
    {
      "epoch": 0.22095732895289757,
      "grad_norm": 0.2703557312488556,
      "learning_rate": 0.00018270412940249087,
      "loss": 0.7766,
      "step": 1446
    },
    {
      "epoch": 0.22111013485120526,
      "grad_norm": 0.2888347804546356,
      "learning_rate": 0.00018267606033757966,
      "loss": 0.5097,
      "step": 1447
    },
    {
      "epoch": 0.22126294074951294,
      "grad_norm": 0.2999062240123749,
      "learning_rate": 0.00018264797067462198,
      "loss": 0.7807,
      "step": 1448
    },
    {
      "epoch": 0.2214157466478206,
      "grad_norm": 0.27103522419929504,
      "learning_rate": 0.0001826198604206162,
      "loss": 0.6412,
      "step": 1449
    },
    {
      "epoch": 0.22156855254612828,
      "grad_norm": 0.3419981300830841,
      "learning_rate": 0.00018259172958256574,
      "loss": 0.6001,
      "step": 1450
    },
    {
      "epoch": 0.22172135844443597,
      "grad_norm": 0.2843935191631317,
      "learning_rate": 0.00018256357816747912,
      "loss": 0.6716,
      "step": 1451
    },
    {
      "epoch": 0.22187416434274362,
      "grad_norm": 0.30148133635520935,
      "learning_rate": 0.00018253540618237007,
      "loss": 0.5867,
      "step": 1452
    },
    {
      "epoch": 0.2220269702410513,
      "grad_norm": 0.30288344621658325,
      "learning_rate": 0.0001825072136342574,
      "loss": 0.8905,
      "step": 1453
    },
    {
      "epoch": 0.222179776139359,
      "grad_norm": 0.26123127341270447,
      "learning_rate": 0.00018247900053016504,
      "loss": 1.0583,
      "step": 1454
    },
    {
      "epoch": 0.22233258203766665,
      "grad_norm": 0.32431039214134216,
      "learning_rate": 0.00018245076687712204,
      "loss": 0.734,
      "step": 1455
    },
    {
      "epoch": 0.22248538793597433,
      "grad_norm": 0.3419983386993408,
      "learning_rate": 0.0001824225126821626,
      "loss": 0.6659,
      "step": 1456
    },
    {
      "epoch": 0.22263819383428202,
      "grad_norm": 0.2817968428134918,
      "learning_rate": 0.00018239423795232598,
      "loss": 0.8019,
      "step": 1457
    },
    {
      "epoch": 0.22279099973258967,
      "grad_norm": 0.287589430809021,
      "learning_rate": 0.0001823659426946566,
      "loss": 0.6464,
      "step": 1458
    },
    {
      "epoch": 0.22294380563089736,
      "grad_norm": 0.29114627838134766,
      "learning_rate": 0.00018233762691620403,
      "loss": 0.7999,
      "step": 1459
    },
    {
      "epoch": 0.22309661152920504,
      "grad_norm": 0.2640954852104187,
      "learning_rate": 0.00018230929062402286,
      "loss": 0.7596,
      "step": 1460
    },
    {
      "epoch": 0.2232494174275127,
      "grad_norm": 0.28602683544158936,
      "learning_rate": 0.00018228093382517284,
      "loss": 0.5454,
      "step": 1461
    },
    {
      "epoch": 0.22340222332582038,
      "grad_norm": 0.27925559878349304,
      "learning_rate": 0.00018225255652671888,
      "loss": 0.6297,
      "step": 1462
    },
    {
      "epoch": 0.22355502922412804,
      "grad_norm": 0.2729659676551819,
      "learning_rate": 0.0001822241587357309,
      "loss": 0.698,
      "step": 1463
    },
    {
      "epoch": 0.22370783512243572,
      "grad_norm": 0.2541782557964325,
      "learning_rate": 0.00018219574045928396,
      "loss": 0.701,
      "step": 1464
    },
    {
      "epoch": 0.2238606410207434,
      "grad_norm": 0.24185724556446075,
      "learning_rate": 0.00018216730170445827,
      "loss": 0.6249,
      "step": 1465
    },
    {
      "epoch": 0.22401344691905106,
      "grad_norm": 0.2809990346431732,
      "learning_rate": 0.00018213884247833908,
      "loss": 0.6986,
      "step": 1466
    },
    {
      "epoch": 0.22416625281735875,
      "grad_norm": 0.25735121965408325,
      "learning_rate": 0.00018211036278801678,
      "loss": 0.6038,
      "step": 1467
    },
    {
      "epoch": 0.22431905871566643,
      "grad_norm": 0.24584175646305084,
      "learning_rate": 0.00018208186264058687,
      "loss": 0.7304,
      "step": 1468
    },
    {
      "epoch": 0.2244718646139741,
      "grad_norm": 0.27485716342926025,
      "learning_rate": 0.00018205334204314988,
      "loss": 0.6952,
      "step": 1469
    },
    {
      "epoch": 0.22462467051228177,
      "grad_norm": 0.254385769367218,
      "learning_rate": 0.00018202480100281147,
      "loss": 0.7846,
      "step": 1470
    },
    {
      "epoch": 0.22477747641058946,
      "grad_norm": 0.263438880443573,
      "learning_rate": 0.00018199623952668245,
      "loss": 0.5202,
      "step": 1471
    },
    {
      "epoch": 0.2249302823088971,
      "grad_norm": 0.31777387857437134,
      "learning_rate": 0.0001819676576218787,
      "loss": 0.6698,
      "step": 1472
    },
    {
      "epoch": 0.2250830882072048,
      "grad_norm": 0.2673969566822052,
      "learning_rate": 0.00018193905529552103,
      "loss": 0.6729,
      "step": 1473
    },
    {
      "epoch": 0.22523589410551248,
      "grad_norm": 0.2530229091644287,
      "learning_rate": 0.0001819104325547356,
      "loss": 0.8424,
      "step": 1474
    },
    {
      "epoch": 0.22538870000382014,
      "grad_norm": 0.255082368850708,
      "learning_rate": 0.00018188178940665344,
      "loss": 0.7186,
      "step": 1475
    },
    {
      "epoch": 0.22554150590212782,
      "grad_norm": 0.2778492271900177,
      "learning_rate": 0.00018185312585841082,
      "loss": 0.6712,
      "step": 1476
    },
    {
      "epoch": 0.2256943118004355,
      "grad_norm": 0.29443123936653137,
      "learning_rate": 0.00018182444191714895,
      "loss": 0.6747,
      "step": 1477
    },
    {
      "epoch": 0.22584711769874316,
      "grad_norm": 0.2747706472873688,
      "learning_rate": 0.00018179573759001424,
      "loss": 0.6845,
      "step": 1478
    },
    {
      "epoch": 0.22599992359705084,
      "grad_norm": 0.24506688117980957,
      "learning_rate": 0.00018176701288415817,
      "loss": 0.6688,
      "step": 1479
    },
    {
      "epoch": 0.22615272949535853,
      "grad_norm": 0.29049941897392273,
      "learning_rate": 0.00018173826780673715,
      "loss": 0.9259,
      "step": 1480
    },
    {
      "epoch": 0.22630553539366619,
      "grad_norm": 0.2717791795730591,
      "learning_rate": 0.00018170950236491286,
      "loss": 0.6171,
      "step": 1481
    },
    {
      "epoch": 0.22645834129197387,
      "grad_norm": 0.504237174987793,
      "learning_rate": 0.00018168071656585194,
      "loss": 0.9517,
      "step": 1482
    },
    {
      "epoch": 0.22661114719028155,
      "grad_norm": 0.4023924171924591,
      "learning_rate": 0.00018165191041672615,
      "loss": 0.8926,
      "step": 1483
    },
    {
      "epoch": 0.2267639530885892,
      "grad_norm": 0.2688741683959961,
      "learning_rate": 0.0001816230839247123,
      "loss": 0.7705,
      "step": 1484
    },
    {
      "epoch": 0.2269167589868969,
      "grad_norm": 0.28259801864624023,
      "learning_rate": 0.00018159423709699222,
      "loss": 0.6859,
      "step": 1485
    },
    {
      "epoch": 0.22706956488520458,
      "grad_norm": 0.28821465373039246,
      "learning_rate": 0.00018156536994075288,
      "loss": 0.5653,
      "step": 1486
    },
    {
      "epoch": 0.22722237078351223,
      "grad_norm": 0.35280266404151917,
      "learning_rate": 0.00018153648246318634,
      "loss": 0.7832,
      "step": 1487
    },
    {
      "epoch": 0.22737517668181992,
      "grad_norm": 0.4009726345539093,
      "learning_rate": 0.0001815075746714896,
      "loss": 0.8749,
      "step": 1488
    },
    {
      "epoch": 0.2275279825801276,
      "grad_norm": 0.2834427058696747,
      "learning_rate": 0.00018147864657286483,
      "loss": 0.8026,
      "step": 1489
    },
    {
      "epoch": 0.22768078847843526,
      "grad_norm": 0.325809121131897,
      "learning_rate": 0.00018144969817451923,
      "loss": 0.6645,
      "step": 1490
    },
    {
      "epoch": 0.22783359437674294,
      "grad_norm": 0.273645281791687,
      "learning_rate": 0.00018142072948366505,
      "loss": 0.7575,
      "step": 1491
    },
    {
      "epoch": 0.22798640027505063,
      "grad_norm": 0.3082992434501648,
      "learning_rate": 0.00018139174050751957,
      "loss": 0.7648,
      "step": 1492
    },
    {
      "epoch": 0.22813920617335828,
      "grad_norm": 0.28899475932121277,
      "learning_rate": 0.00018136273125330513,
      "loss": 0.7882,
      "step": 1493
    },
    {
      "epoch": 0.22829201207166597,
      "grad_norm": 0.31805676221847534,
      "learning_rate": 0.0001813337017282492,
      "loss": 0.7491,
      "step": 1494
    },
    {
      "epoch": 0.22844481796997365,
      "grad_norm": 0.2605206072330475,
      "learning_rate": 0.00018130465193958424,
      "loss": 0.7592,
      "step": 1495
    },
    {
      "epoch": 0.2285976238682813,
      "grad_norm": 0.5190498232841492,
      "learning_rate": 0.00018127558189454774,
      "loss": 0.6756,
      "step": 1496
    },
    {
      "epoch": 0.228750429766589,
      "grad_norm": 0.286194384098053,
      "learning_rate": 0.00018124649160038226,
      "loss": 0.5045,
      "step": 1497
    },
    {
      "epoch": 0.22890323566489668,
      "grad_norm": 0.2897211015224457,
      "learning_rate": 0.00018121738106433537,
      "loss": 0.611,
      "step": 1498
    },
    {
      "epoch": 0.22905604156320433,
      "grad_norm": 0.26120197772979736,
      "learning_rate": 0.00018118825029365975,
      "loss": 0.6519,
      "step": 1499
    },
    {
      "epoch": 0.22920884746151202,
      "grad_norm": 0.32554882764816284,
      "learning_rate": 0.0001811590992956131,
      "loss": 0.9085,
      "step": 1500
    },
    {
      "epoch": 0.2293616533598197,
      "grad_norm": 0.26989874243736267,
      "learning_rate": 0.00018112992807745815,
      "loss": 0.7141,
      "step": 1501
    },
    {
      "epoch": 0.22951445925812736,
      "grad_norm": 0.28747060894966125,
      "learning_rate": 0.00018110073664646262,
      "loss": 0.7211,
      "step": 1502
    },
    {
      "epoch": 0.22966726515643504,
      "grad_norm": 0.22999897599220276,
      "learning_rate": 0.0001810715250098993,
      "loss": 0.6093,
      "step": 1503
    },
    {
      "epoch": 0.22982007105474273,
      "grad_norm": 0.31016895174980164,
      "learning_rate": 0.00018104229317504614,
      "loss": 0.715,
      "step": 1504
    },
    {
      "epoch": 0.22997287695305038,
      "grad_norm": 0.2531152069568634,
      "learning_rate": 0.00018101304114918583,
      "loss": 0.5904,
      "step": 1505
    },
    {
      "epoch": 0.23012568285135807,
      "grad_norm": 0.3257233798503876,
      "learning_rate": 0.00018098376893960642,
      "loss": 0.7489,
      "step": 1506
    },
    {
      "epoch": 0.23027848874966575,
      "grad_norm": 0.2525555491447449,
      "learning_rate": 0.00018095447655360077,
      "loss": 0.7849,
      "step": 1507
    },
    {
      "epoch": 0.2304312946479734,
      "grad_norm": 0.24588941037654877,
      "learning_rate": 0.00018092516399846682,
      "loss": 0.6703,
      "step": 1508
    },
    {
      "epoch": 0.2305841005462811,
      "grad_norm": 0.30313611030578613,
      "learning_rate": 0.0001808958312815076,
      "loss": 0.8151,
      "step": 1509
    },
    {
      "epoch": 0.23073690644458877,
      "grad_norm": 0.29434850811958313,
      "learning_rate": 0.00018086647841003103,
      "loss": 0.7981,
      "step": 1510
    },
    {
      "epoch": 0.23088971234289643,
      "grad_norm": 0.24499566853046417,
      "learning_rate": 0.0001808371053913502,
      "loss": 0.8504,
      "step": 1511
    },
    {
      "epoch": 0.23104251824120411,
      "grad_norm": 0.2640714943408966,
      "learning_rate": 0.00018080771223278315,
      "loss": 0.6601,
      "step": 1512
    },
    {
      "epoch": 0.2311953241395118,
      "grad_norm": 0.23578722774982452,
      "learning_rate": 0.00018077829894165288,
      "loss": 0.6778,
      "step": 1513
    },
    {
      "epoch": 0.23134813003781945,
      "grad_norm": 0.47748589515686035,
      "learning_rate": 0.00018074886552528753,
      "loss": 0.7285,
      "step": 1514
    },
    {
      "epoch": 0.23150093593612714,
      "grad_norm": 0.27540603280067444,
      "learning_rate": 0.00018071941199102013,
      "loss": 0.9043,
      "step": 1515
    },
    {
      "epoch": 0.2316537418344348,
      "grad_norm": 0.2582077980041504,
      "learning_rate": 0.00018068993834618883,
      "loss": 0.6843,
      "step": 1516
    },
    {
      "epoch": 0.23180654773274248,
      "grad_norm": 0.2842862010002136,
      "learning_rate": 0.0001806604445981367,
      "loss": 0.7826,
      "step": 1517
    },
    {
      "epoch": 0.23195935363105016,
      "grad_norm": 0.3156132698059082,
      "learning_rate": 0.0001806309307542119,
      "loss": 0.6503,
      "step": 1518
    },
    {
      "epoch": 0.23211215952935782,
      "grad_norm": 0.29756492376327515,
      "learning_rate": 0.00018060139682176754,
      "loss": 0.7223,
      "step": 1519
    },
    {
      "epoch": 0.2322649654276655,
      "grad_norm": 0.26929807662963867,
      "learning_rate": 0.00018057184280816175,
      "loss": 0.6358,
      "step": 1520
    },
    {
      "epoch": 0.2324177713259732,
      "grad_norm": 0.3058578670024872,
      "learning_rate": 0.00018054226872075768,
      "loss": 0.6521,
      "step": 1521
    },
    {
      "epoch": 0.23257057722428084,
      "grad_norm": 0.3043581247329712,
      "learning_rate": 0.00018051267456692345,
      "loss": 0.6487,
      "step": 1522
    },
    {
      "epoch": 0.23272338312258853,
      "grad_norm": 0.2621524930000305,
      "learning_rate": 0.00018048306035403216,
      "loss": 0.7336,
      "step": 1523
    },
    {
      "epoch": 0.2328761890208962,
      "grad_norm": 0.2857302129268646,
      "learning_rate": 0.000180453426089462,
      "loss": 0.7413,
      "step": 1524
    },
    {
      "epoch": 0.23302899491920387,
      "grad_norm": 0.3124992847442627,
      "learning_rate": 0.00018042377178059606,
      "loss": 0.83,
      "step": 1525
    },
    {
      "epoch": 0.23318180081751155,
      "grad_norm": 0.24599871039390564,
      "learning_rate": 0.0001803940974348225,
      "loss": 0.658,
      "step": 1526
    },
    {
      "epoch": 0.23333460671581924,
      "grad_norm": 0.2612040042877197,
      "learning_rate": 0.0001803644030595344,
      "loss": 0.6338,
      "step": 1527
    },
    {
      "epoch": 0.2334874126141269,
      "grad_norm": 0.3595271110534668,
      "learning_rate": 0.00018033468866212986,
      "loss": 0.6995,
      "step": 1528
    },
    {
      "epoch": 0.23364021851243458,
      "grad_norm": 0.32448646426200867,
      "learning_rate": 0.00018030495425001202,
      "loss": 0.6831,
      "step": 1529
    },
    {
      "epoch": 0.23379302441074226,
      "grad_norm": 0.3007851243019104,
      "learning_rate": 0.0001802751998305889,
      "loss": 0.6032,
      "step": 1530
    },
    {
      "epoch": 0.23394583030904992,
      "grad_norm": 0.2284546047449112,
      "learning_rate": 0.00018024542541127358,
      "loss": 0.6778,
      "step": 1531
    },
    {
      "epoch": 0.2340986362073576,
      "grad_norm": 0.24730284512043,
      "learning_rate": 0.00018021563099948414,
      "loss": 0.5785,
      "step": 1532
    },
    {
      "epoch": 0.2342514421056653,
      "grad_norm": 0.31631672382354736,
      "learning_rate": 0.0001801858166026436,
      "loss": 0.749,
      "step": 1533
    },
    {
      "epoch": 0.23440424800397294,
      "grad_norm": 0.30484116077423096,
      "learning_rate": 0.00018015598222817996,
      "loss": 0.656,
      "step": 1534
    },
    {
      "epoch": 0.23455705390228063,
      "grad_norm": 0.24168114364147186,
      "learning_rate": 0.00018012612788352616,
      "loss": 0.6987,
      "step": 1535
    },
    {
      "epoch": 0.2347098598005883,
      "grad_norm": 0.33276891708374023,
      "learning_rate": 0.00018009625357612023,
      "loss": 0.7676,
      "step": 1536
    },
    {
      "epoch": 0.23486266569889597,
      "grad_norm": 0.25853464007377625,
      "learning_rate": 0.00018006635931340506,
      "loss": 0.6653,
      "step": 1537
    },
    {
      "epoch": 0.23501547159720365,
      "grad_norm": 0.3082162141799927,
      "learning_rate": 0.00018003644510282855,
      "loss": 0.557,
      "step": 1538
    },
    {
      "epoch": 0.23516827749551134,
      "grad_norm": 0.4157916307449341,
      "learning_rate": 0.00018000651095184358,
      "loss": 0.5726,
      "step": 1539
    },
    {
      "epoch": 0.235321083393819,
      "grad_norm": 0.24570941925048828,
      "learning_rate": 0.00017997655686790803,
      "loss": 0.7184,
      "step": 1540
    },
    {
      "epoch": 0.23547388929212668,
      "grad_norm": 0.269633024930954,
      "learning_rate": 0.00017994658285848465,
      "loss": 0.5958,
      "step": 1541
    },
    {
      "epoch": 0.23562669519043436,
      "grad_norm": 0.24222281575202942,
      "learning_rate": 0.00017991658893104124,
      "loss": 0.7112,
      "step": 1542
    },
    {
      "epoch": 0.23577950108874202,
      "grad_norm": 0.26471802592277527,
      "learning_rate": 0.00017988657509305055,
      "loss": 0.799,
      "step": 1543
    },
    {
      "epoch": 0.2359323069870497,
      "grad_norm": 0.26221612095832825,
      "learning_rate": 0.00017985654135199027,
      "loss": 0.6478,
      "step": 1544
    },
    {
      "epoch": 0.23608511288535738,
      "grad_norm": 0.47572270035743713,
      "learning_rate": 0.00017982648771534306,
      "loss": 0.8253,
      "step": 1545
    },
    {
      "epoch": 0.23623791878366504,
      "grad_norm": 0.4527488052845001,
      "learning_rate": 0.00017979641419059648,
      "loss": 0.7986,
      "step": 1546
    },
    {
      "epoch": 0.23639072468197272,
      "grad_norm": 0.25220146775245667,
      "learning_rate": 0.0001797663207852432,
      "loss": 0.5181,
      "step": 1547
    },
    {
      "epoch": 0.2365435305802804,
      "grad_norm": 0.2821711599826813,
      "learning_rate": 0.00017973620750678059,
      "loss": 0.7455,
      "step": 1548
    },
    {
      "epoch": 0.23669633647858807,
      "grad_norm": 0.32014167308807373,
      "learning_rate": 0.00017970607436271126,
      "loss": 0.6829,
      "step": 1549
    },
    {
      "epoch": 0.23684914237689575,
      "grad_norm": 0.2855893671512604,
      "learning_rate": 0.00017967592136054257,
      "loss": 0.6884,
      "step": 1550
    },
    {
      "epoch": 0.23700194827520343,
      "grad_norm": 0.2573263645172119,
      "learning_rate": 0.00017964574850778687,
      "loss": 0.7325,
      "step": 1551
    },
    {
      "epoch": 0.2371547541735111,
      "grad_norm": 0.3028693199157715,
      "learning_rate": 0.0001796155558119615,
      "loss": 0.7879,
      "step": 1552
    },
    {
      "epoch": 0.23730756007181877,
      "grad_norm": 0.26804086565971375,
      "learning_rate": 0.00017958534328058872,
      "loss": 0.7159,
      "step": 1553
    },
    {
      "epoch": 0.23746036597012646,
      "grad_norm": 0.2625160217285156,
      "learning_rate": 0.0001795551109211957,
      "loss": 0.8026,
      "step": 1554
    },
    {
      "epoch": 0.23761317186843411,
      "grad_norm": 0.34923064708709717,
      "learning_rate": 0.00017952485874131463,
      "loss": 0.7361,
      "step": 1555
    },
    {
      "epoch": 0.2377659777667418,
      "grad_norm": 0.23876674473285675,
      "learning_rate": 0.00017949458674848255,
      "loss": 0.6431,
      "step": 1556
    },
    {
      "epoch": 0.23791878366504948,
      "grad_norm": 0.3087947964668274,
      "learning_rate": 0.00017946429495024145,
      "loss": 0.7473,
      "step": 1557
    },
    {
      "epoch": 0.23807158956335714,
      "grad_norm": 0.24753893911838531,
      "learning_rate": 0.00017943398335413835,
      "loss": 0.6258,
      "step": 1558
    },
    {
      "epoch": 0.23822439546166482,
      "grad_norm": 0.3573136627674103,
      "learning_rate": 0.00017940365196772508,
      "loss": 0.6592,
      "step": 1559
    },
    {
      "epoch": 0.2383772013599725,
      "grad_norm": 0.2909756004810333,
      "learning_rate": 0.00017937330079855843,
      "loss": 0.7145,
      "step": 1560
    },
    {
      "epoch": 0.23853000725828016,
      "grad_norm": 0.29025787115097046,
      "learning_rate": 0.00017934292985420015,
      "loss": 0.4892,
      "step": 1561
    },
    {
      "epoch": 0.23868281315658785,
      "grad_norm": 0.27839645743370056,
      "learning_rate": 0.00017931253914221698,
      "loss": 0.6972,
      "step": 1562
    },
    {
      "epoch": 0.23883561905489553,
      "grad_norm": 0.3256765604019165,
      "learning_rate": 0.00017928212867018042,
      "loss": 0.8926,
      "step": 1563
    },
    {
      "epoch": 0.2389884249532032,
      "grad_norm": 0.40683630108833313,
      "learning_rate": 0.000179251698445667,
      "loss": 0.7542,
      "step": 1564
    },
    {
      "epoch": 0.23914123085151087,
      "grad_norm": 0.3646388053894043,
      "learning_rate": 0.00017922124847625818,
      "loss": 0.6908,
      "step": 1565
    },
    {
      "epoch": 0.23929403674981856,
      "grad_norm": 0.30164778232574463,
      "learning_rate": 0.00017919077876954028,
      "loss": 0.7484,
      "step": 1566
    },
    {
      "epoch": 0.2394468426481262,
      "grad_norm": 0.2960456609725952,
      "learning_rate": 0.00017916028933310463,
      "loss": 0.5881,
      "step": 1567
    },
    {
      "epoch": 0.2395996485464339,
      "grad_norm": 0.3058547079563141,
      "learning_rate": 0.00017912978017454737,
      "loss": 0.6527,
      "step": 1568
    },
    {
      "epoch": 0.23975245444474158,
      "grad_norm": 0.286178857088089,
      "learning_rate": 0.00017909925130146962,
      "loss": 0.6846,
      "step": 1569
    },
    {
      "epoch": 0.23990526034304924,
      "grad_norm": 0.30656570196151733,
      "learning_rate": 0.00017906870272147742,
      "loss": 0.8488,
      "step": 1570
    },
    {
      "epoch": 0.24005806624135692,
      "grad_norm": 0.2935943305492401,
      "learning_rate": 0.0001790381344421816,
      "loss": 0.5972,
      "step": 1571
    },
    {
      "epoch": 0.24021087213966458,
      "grad_norm": 0.2677885591983795,
      "learning_rate": 0.0001790075464711981,
      "loss": 0.687,
      "step": 1572
    },
    {
      "epoch": 0.24036367803797226,
      "grad_norm": 0.34668412804603577,
      "learning_rate": 0.00017897693881614756,
      "loss": 0.6028,
      "step": 1573
    },
    {
      "epoch": 0.24051648393627995,
      "grad_norm": 0.2729659974575043,
      "learning_rate": 0.0001789463114846557,
      "loss": 0.5658,
      "step": 1574
    },
    {
      "epoch": 0.2406692898345876,
      "grad_norm": 0.30905503034591675,
      "learning_rate": 0.00017891566448435302,
      "loss": 0.7118,
      "step": 1575
    },
    {
      "epoch": 0.24082209573289529,
      "grad_norm": 0.3494151830673218,
      "learning_rate": 0.00017888499782287495,
      "loss": 0.6256,
      "step": 1576
    },
    {
      "epoch": 0.24097490163120297,
      "grad_norm": 0.2960670590400696,
      "learning_rate": 0.00017885431150786187,
      "loss": 0.6596,
      "step": 1577
    },
    {
      "epoch": 0.24112770752951063,
      "grad_norm": 0.3915059268474579,
      "learning_rate": 0.00017882360554695898,
      "loss": 0.7953,
      "step": 1578
    },
    {
      "epoch": 0.2412805134278183,
      "grad_norm": 0.2618657946586609,
      "learning_rate": 0.00017879287994781645,
      "loss": 0.5848,
      "step": 1579
    },
    {
      "epoch": 0.241433319326126,
      "grad_norm": 0.2906941771507263,
      "learning_rate": 0.0001787621347180893,
      "loss": 0.6394,
      "step": 1580
    },
    {
      "epoch": 0.24158612522443365,
      "grad_norm": 0.3870331346988678,
      "learning_rate": 0.00017873136986543744,
      "loss": 0.694,
      "step": 1581
    },
    {
      "epoch": 0.24173893112274134,
      "grad_norm": 0.3732616603374481,
      "learning_rate": 0.00017870058539752565,
      "loss": 0.9083,
      "step": 1582
    },
    {
      "epoch": 0.24189173702104902,
      "grad_norm": 0.2706056535243988,
      "learning_rate": 0.00017866978132202363,
      "loss": 0.6549,
      "step": 1583
    },
    {
      "epoch": 0.24204454291935668,
      "grad_norm": 0.24143487215042114,
      "learning_rate": 0.00017863895764660596,
      "loss": 0.5494,
      "step": 1584
    },
    {
      "epoch": 0.24219734881766436,
      "grad_norm": 0.3117775321006775,
      "learning_rate": 0.00017860811437895216,
      "loss": 0.5881,
      "step": 1585
    },
    {
      "epoch": 0.24235015471597204,
      "grad_norm": 0.31304922699928284,
      "learning_rate": 0.00017857725152674645,
      "loss": 0.7791,
      "step": 1586
    },
    {
      "epoch": 0.2425029606142797,
      "grad_norm": 0.2848494350910187,
      "learning_rate": 0.00017854636909767817,
      "loss": 0.755,
      "step": 1587
    },
    {
      "epoch": 0.24265576651258738,
      "grad_norm": 0.32094806432724,
      "learning_rate": 0.00017851546709944134,
      "loss": 0.9501,
      "step": 1588
    },
    {
      "epoch": 0.24280857241089507,
      "grad_norm": 0.3411880433559418,
      "learning_rate": 0.00017848454553973496,
      "loss": 0.839,
      "step": 1589
    },
    {
      "epoch": 0.24296137830920272,
      "grad_norm": 0.2581973969936371,
      "learning_rate": 0.00017845360442626289,
      "loss": 0.7196,
      "step": 1590
    },
    {
      "epoch": 0.2431141842075104,
      "grad_norm": 0.3275444507598877,
      "learning_rate": 0.00017842264376673384,
      "loss": 0.7177,
      "step": 1591
    },
    {
      "epoch": 0.2432669901058181,
      "grad_norm": 0.3695151209831238,
      "learning_rate": 0.0001783916635688614,
      "loss": 0.5328,
      "step": 1592
    },
    {
      "epoch": 0.24341979600412575,
      "grad_norm": 0.30539098381996155,
      "learning_rate": 0.000178360663840364,
      "loss": 0.7765,
      "step": 1593
    },
    {
      "epoch": 0.24357260190243343,
      "grad_norm": 1.2540498971939087,
      "learning_rate": 0.00017832964458896496,
      "loss": 0.5908,
      "step": 1594
    },
    {
      "epoch": 0.24372540780074112,
      "grad_norm": 0.33404994010925293,
      "learning_rate": 0.00017829860582239252,
      "loss": 0.6183,
      "step": 1595
    },
    {
      "epoch": 0.24387821369904877,
      "grad_norm": 0.2673746347427368,
      "learning_rate": 0.0001782675475483797,
      "loss": 0.6918,
      "step": 1596
    },
    {
      "epoch": 0.24403101959735646,
      "grad_norm": 0.3022105395793915,
      "learning_rate": 0.0001782364697746644,
      "loss": 0.7363,
      "step": 1597
    },
    {
      "epoch": 0.24418382549566414,
      "grad_norm": 0.28759056329727173,
      "learning_rate": 0.00017820537250898939,
      "loss": 0.6314,
      "step": 1598
    },
    {
      "epoch": 0.2443366313939718,
      "grad_norm": 0.30016323924064636,
      "learning_rate": 0.00017817425575910228,
      "loss": 0.5981,
      "step": 1599
    },
    {
      "epoch": 0.24448943729227948,
      "grad_norm": 0.3459952771663666,
      "learning_rate": 0.0001781431195327556,
      "loss": 0.6946,
      "step": 1600
    },
    {
      "epoch": 0.24464224319058717,
      "grad_norm": 0.2870761752128601,
      "learning_rate": 0.0001781119638377066,
      "loss": 1.0651,
      "step": 1601
    },
    {
      "epoch": 0.24479504908889482,
      "grad_norm": 0.2479250431060791,
      "learning_rate": 0.0001780807886817175,
      "loss": 0.7689,
      "step": 1602
    },
    {
      "epoch": 0.2449478549872025,
      "grad_norm": 0.28080081939697266,
      "learning_rate": 0.00017804959407255537,
      "loss": 0.7588,
      "step": 1603
    },
    {
      "epoch": 0.2451006608855102,
      "grad_norm": 0.36210957169532776,
      "learning_rate": 0.00017801838001799204,
      "loss": 0.9166,
      "step": 1604
    },
    {
      "epoch": 0.24525346678381785,
      "grad_norm": 0.3351990282535553,
      "learning_rate": 0.0001779871465258042,
      "loss": 0.5861,
      "step": 1605
    },
    {
      "epoch": 0.24540627268212553,
      "grad_norm": 0.2677927613258362,
      "learning_rate": 0.00017795589360377346,
      "loss": 0.7167,
      "step": 1606
    },
    {
      "epoch": 0.24555907858043322,
      "grad_norm": 0.32118043303489685,
      "learning_rate": 0.0001779246212596862,
      "loss": 0.6725,
      "step": 1607
    },
    {
      "epoch": 0.24571188447874087,
      "grad_norm": 0.34118232131004333,
      "learning_rate": 0.00017789332950133367,
      "loss": 0.6812,
      "step": 1608
    },
    {
      "epoch": 0.24586469037704856,
      "grad_norm": 0.3112337589263916,
      "learning_rate": 0.00017786201833651198,
      "loss": 0.7925,
      "step": 1609
    },
    {
      "epoch": 0.24601749627535624,
      "grad_norm": 0.2794199287891388,
      "learning_rate": 0.000177830687773022,
      "loss": 0.5572,
      "step": 1610
    },
    {
      "epoch": 0.2461703021736639,
      "grad_norm": 0.2742474675178528,
      "learning_rate": 0.0001777993378186695,
      "loss": 0.8369,
      "step": 1611
    },
    {
      "epoch": 0.24632310807197158,
      "grad_norm": 0.3400551378726959,
      "learning_rate": 0.00017776796848126503,
      "loss": 0.6672,
      "step": 1612
    },
    {
      "epoch": 0.24647591397027926,
      "grad_norm": 0.26672646403312683,
      "learning_rate": 0.00017773657976862399,
      "loss": 0.6773,
      "step": 1613
    },
    {
      "epoch": 0.24662871986858692,
      "grad_norm": 0.4063914120197296,
      "learning_rate": 0.0001777051716885667,
      "loss": 0.8437,
      "step": 1614
    },
    {
      "epoch": 0.2467815257668946,
      "grad_norm": 0.30643579363822937,
      "learning_rate": 0.00017767374424891813,
      "loss": 0.6577,
      "step": 1615
    },
    {
      "epoch": 0.2469343316652023,
      "grad_norm": 0.308155357837677,
      "learning_rate": 0.0001776422974575082,
      "loss": 0.7437,
      "step": 1616
    },
    {
      "epoch": 0.24708713756350995,
      "grad_norm": 0.2763682007789612,
      "learning_rate": 0.0001776108313221716,
      "loss": 0.6708,
      "step": 1617
    },
    {
      "epoch": 0.24723994346181763,
      "grad_norm": 0.2899835705757141,
      "learning_rate": 0.00017757934585074784,
      "loss": 0.6588,
      "step": 1618
    },
    {
      "epoch": 0.2473927493601253,
      "grad_norm": 0.2819088101387024,
      "learning_rate": 0.0001775478410510813,
      "loss": 0.6812,
      "step": 1619
    },
    {
      "epoch": 0.24754555525843297,
      "grad_norm": 0.31118282675743103,
      "learning_rate": 0.00017751631693102113,
      "loss": 0.8102,
      "step": 1620
    },
    {
      "epoch": 0.24769836115674065,
      "grad_norm": 0.37787777185440063,
      "learning_rate": 0.0001774847734984213,
      "loss": 0.4826,
      "step": 1621
    },
    {
      "epoch": 0.24785116705504834,
      "grad_norm": 0.2848077416419983,
      "learning_rate": 0.00017745321076114055,
      "loss": 0.7106,
      "step": 1622
    },
    {
      "epoch": 0.248003972953356,
      "grad_norm": 0.3185364902019501,
      "learning_rate": 0.0001774216287270425,
      "loss": 0.9623,
      "step": 1623
    },
    {
      "epoch": 0.24815677885166368,
      "grad_norm": 0.2377936691045761,
      "learning_rate": 0.00017739002740399556,
      "loss": 0.5686,
      "step": 1624
    },
    {
      "epoch": 0.24830958474997136,
      "grad_norm": 0.2903329133987427,
      "learning_rate": 0.0001773584067998729,
      "loss": 0.5424,
      "step": 1625
    },
    {
      "epoch": 0.24846239064827902,
      "grad_norm": 0.4083361327648163,
      "learning_rate": 0.0001773267669225526,
      "loss": 0.5707,
      "step": 1626
    },
    {
      "epoch": 0.2486151965465867,
      "grad_norm": 0.2921728789806366,
      "learning_rate": 0.00017729510777991737,
      "loss": 0.6263,
      "step": 1627
    },
    {
      "epoch": 0.24876800244489436,
      "grad_norm": 0.6114667057991028,
      "learning_rate": 0.0001772634293798549,
      "loss": 1.0645,
      "step": 1628
    },
    {
      "epoch": 0.24892080834320204,
      "grad_norm": 0.3961344361305237,
      "learning_rate": 0.00017723173173025755,
      "loss": 0.869,
      "step": 1629
    },
    {
      "epoch": 0.24907361424150973,
      "grad_norm": 0.26961538195610046,
      "learning_rate": 0.00017720001483902256,
      "loss": 0.7127,
      "step": 1630
    },
    {
      "epoch": 0.24922642013981738,
      "grad_norm": 0.3177333474159241,
      "learning_rate": 0.00017716827871405187,
      "loss": 0.6924,
      "step": 1631
    },
    {
      "epoch": 0.24937922603812507,
      "grad_norm": 0.3858714997768402,
      "learning_rate": 0.00017713652336325236,
      "loss": 0.6336,
      "step": 1632
    },
    {
      "epoch": 0.24953203193643275,
      "grad_norm": 0.311939537525177,
      "learning_rate": 0.00017710474879453552,
      "loss": 0.8036,
      "step": 1633
    },
    {
      "epoch": 0.2496848378347404,
      "grad_norm": 0.3360339105129242,
      "learning_rate": 0.0001770729550158178,
      "loss": 0.4666,
      "step": 1634
    },
    {
      "epoch": 0.2498376437330481,
      "grad_norm": 0.31603994965553284,
      "learning_rate": 0.00017704114203502023,
      "loss": 0.7722,
      "step": 1635
    },
    {
      "epoch": 0.24999044963135578,
      "grad_norm": 0.26751232147216797,
      "learning_rate": 0.0001770093098600689,
      "loss": 0.5519,
      "step": 1636
    },
    {
      "epoch": 0.25014325552966343,
      "grad_norm": 0.3818928301334381,
      "learning_rate": 0.00017697745849889443,
      "loss": 0.5669,
      "step": 1637
    },
    {
      "epoch": 0.2502960614279711,
      "grad_norm": 0.2536742091178894,
      "learning_rate": 0.00017694558795943233,
      "loss": 0.6771,
      "step": 1638
    },
    {
      "epoch": 0.2504488673262788,
      "grad_norm": 0.30967119336128235,
      "learning_rate": 0.0001769136982496229,
      "loss": 0.7453,
      "step": 1639
    },
    {
      "epoch": 0.2506016732245865,
      "grad_norm": 0.4818248152732849,
      "learning_rate": 0.00017688178937741116,
      "loss": 0.8101,
      "step": 1640
    },
    {
      "epoch": 0.25075447912289417,
      "grad_norm": 0.3688648045063019,
      "learning_rate": 0.000176849861350747,
      "loss": 0.9537,
      "step": 1641
    },
    {
      "epoch": 0.2509072850212018,
      "grad_norm": 0.3641200363636017,
      "learning_rate": 0.00017681791417758496,
      "loss": 0.6488,
      "step": 1642
    },
    {
      "epoch": 0.2510600909195095,
      "grad_norm": 0.2852065861225128,
      "learning_rate": 0.00017678594786588444,
      "loss": 0.6796,
      "step": 1643
    },
    {
      "epoch": 0.25121289681781717,
      "grad_norm": 0.2645616829395294,
      "learning_rate": 0.00017675396242360956,
      "loss": 0.6531,
      "step": 1644
    },
    {
      "epoch": 0.25136570271612485,
      "grad_norm": 0.31781354546546936,
      "learning_rate": 0.00017672195785872923,
      "loss": 0.7932,
      "step": 1645
    },
    {
      "epoch": 0.25151850861443253,
      "grad_norm": 0.2881321907043457,
      "learning_rate": 0.0001766899341792171,
      "loss": 0.7011,
      "step": 1646
    },
    {
      "epoch": 0.2516713145127402,
      "grad_norm": 0.30181893706321716,
      "learning_rate": 0.00017665789139305167,
      "loss": 0.6204,
      "step": 1647
    },
    {
      "epoch": 0.25182412041104785,
      "grad_norm": 0.31074196100234985,
      "learning_rate": 0.00017662582950821607,
      "loss": 0.5312,
      "step": 1648
    },
    {
      "epoch": 0.25197692630935553,
      "grad_norm": 0.30659812688827515,
      "learning_rate": 0.00017659374853269824,
      "loss": 0.7559,
      "step": 1649
    },
    {
      "epoch": 0.2521297322076632,
      "grad_norm": 0.27685531973838806,
      "learning_rate": 0.00017656164847449092,
      "loss": 0.8348,
      "step": 1650
    },
    {
      "epoch": 0.2522825381059709,
      "grad_norm": 0.33594951033592224,
      "learning_rate": 0.00017652952934159156,
      "loss": 0.9363,
      "step": 1651
    },
    {
      "epoch": 0.2524353440042786,
      "grad_norm": 0.26607444882392883,
      "learning_rate": 0.0001764973911420024,
      "loss": 0.8577,
      "step": 1652
    },
    {
      "epoch": 0.25258814990258627,
      "grad_norm": 0.36697816848754883,
      "learning_rate": 0.00017646523388373036,
      "loss": 0.8523,
      "step": 1653
    },
    {
      "epoch": 0.2527409558008939,
      "grad_norm": 0.34142768383026123,
      "learning_rate": 0.00017643305757478715,
      "loss": 0.7041,
      "step": 1654
    },
    {
      "epoch": 0.2528937616992016,
      "grad_norm": 0.27803367376327515,
      "learning_rate": 0.00017640086222318925,
      "loss": 0.7229,
      "step": 1655
    },
    {
      "epoch": 0.25304656759750926,
      "grad_norm": 0.3529926836490631,
      "learning_rate": 0.00017636864783695787,
      "loss": 0.6857,
      "step": 1656
    },
    {
      "epoch": 0.25319937349581695,
      "grad_norm": 0.28934431076049805,
      "learning_rate": 0.0001763364144241189,
      "loss": 0.572,
      "step": 1657
    },
    {
      "epoch": 0.25335217939412463,
      "grad_norm": 0.25978824496269226,
      "learning_rate": 0.0001763041619927031,
      "loss": 0.6781,
      "step": 1658
    },
    {
      "epoch": 0.25350498529243226,
      "grad_norm": 0.25913646817207336,
      "learning_rate": 0.00017627189055074584,
      "loss": 0.6003,
      "step": 1659
    },
    {
      "epoch": 0.25365779119073995,
      "grad_norm": 0.26361286640167236,
      "learning_rate": 0.0001762396001062873,
      "loss": 0.6378,
      "step": 1660
    },
    {
      "epoch": 0.25381059708904763,
      "grad_norm": 0.3600101172924042,
      "learning_rate": 0.00017620729066737236,
      "loss": 0.8028,
      "step": 1661
    },
    {
      "epoch": 0.2539634029873553,
      "grad_norm": 0.33632373809814453,
      "learning_rate": 0.00017617496224205069,
      "loss": 0.548,
      "step": 1662
    },
    {
      "epoch": 0.254116208885663,
      "grad_norm": 0.2929910123348236,
      "learning_rate": 0.00017614261483837656,
      "loss": 0.7541,
      "step": 1663
    },
    {
      "epoch": 0.2542690147839707,
      "grad_norm": 0.2633499503135681,
      "learning_rate": 0.00017611024846440911,
      "loss": 0.6927,
      "step": 1664
    },
    {
      "epoch": 0.2544218206822783,
      "grad_norm": 0.3387669026851654,
      "learning_rate": 0.00017607786312821215,
      "loss": 0.6464,
      "step": 1665
    },
    {
      "epoch": 0.254574626580586,
      "grad_norm": 0.27477720379829407,
      "learning_rate": 0.0001760454588378542,
      "loss": 0.7899,
      "step": 1666
    },
    {
      "epoch": 0.2547274324788937,
      "grad_norm": 0.28007772564888,
      "learning_rate": 0.00017601303560140855,
      "loss": 0.7528,
      "step": 1667
    },
    {
      "epoch": 0.25488023837720136,
      "grad_norm": 0.2639395594596863,
      "learning_rate": 0.00017598059342695312,
      "loss": 0.6548,
      "step": 1668
    },
    {
      "epoch": 0.25503304427550905,
      "grad_norm": 0.26824504137039185,
      "learning_rate": 0.00017594813232257067,
      "loss": 0.6124,
      "step": 1669
    },
    {
      "epoch": 0.25518585017381673,
      "grad_norm": 0.3445259928703308,
      "learning_rate": 0.00017591565229634857,
      "loss": 0.6439,
      "step": 1670
    },
    {
      "epoch": 0.25533865607212436,
      "grad_norm": 0.2897654175758362,
      "learning_rate": 0.00017588315335637894,
      "loss": 0.7828,
      "step": 1671
    },
    {
      "epoch": 0.25549146197043204,
      "grad_norm": 0.24861669540405273,
      "learning_rate": 0.00017585063551075862,
      "loss": 0.6605,
      "step": 1672
    },
    {
      "epoch": 0.2556442678687397,
      "grad_norm": 0.31479543447494507,
      "learning_rate": 0.00017581809876758922,
      "loss": 0.7128,
      "step": 1673
    },
    {
      "epoch": 0.2557970737670474,
      "grad_norm": 0.35827958583831787,
      "learning_rate": 0.0001757855431349769,
      "loss": 0.8827,
      "step": 1674
    },
    {
      "epoch": 0.2559498796653551,
      "grad_norm": 0.3290880024433136,
      "learning_rate": 0.0001757529686210327,
      "loss": 0.5662,
      "step": 1675
    },
    {
      "epoch": 0.2561026855636628,
      "grad_norm": 0.3016948699951172,
      "learning_rate": 0.00017572037523387227,
      "loss": 0.6582,
      "step": 1676
    },
    {
      "epoch": 0.2562554914619704,
      "grad_norm": 0.3011932969093323,
      "learning_rate": 0.0001756877629816159,
      "loss": 0.7446,
      "step": 1677
    },
    {
      "epoch": 0.2564082973602781,
      "grad_norm": 0.340775728225708,
      "learning_rate": 0.00017565513187238878,
      "loss": 0.5849,
      "step": 1678
    },
    {
      "epoch": 0.2565611032585858,
      "grad_norm": 0.28078779578208923,
      "learning_rate": 0.00017562248191432063,
      "loss": 0.6052,
      "step": 1679
    },
    {
      "epoch": 0.25671390915689346,
      "grad_norm": 0.5157023072242737,
      "learning_rate": 0.00017558981311554587,
      "loss": 0.7927,
      "step": 1680
    },
    {
      "epoch": 0.25686671505520114,
      "grad_norm": 0.9208202958106995,
      "learning_rate": 0.00017555712548420372,
      "loss": 0.5478,
      "step": 1681
    },
    {
      "epoch": 0.25701952095350883,
      "grad_norm": 0.2912384569644928,
      "learning_rate": 0.00017552441902843796,
      "loss": 0.6392,
      "step": 1682
    },
    {
      "epoch": 0.25717232685181646,
      "grad_norm": 0.2768346071243286,
      "learning_rate": 0.0001754916937563972,
      "loss": 0.7224,
      "step": 1683
    },
    {
      "epoch": 0.25732513275012414,
      "grad_norm": 0.25909632444381714,
      "learning_rate": 0.00017545894967623462,
      "loss": 0.7825,
      "step": 1684
    },
    {
      "epoch": 0.2574779386484318,
      "grad_norm": 0.2799522578716278,
      "learning_rate": 0.00017542618679610816,
      "loss": 0.6992,
      "step": 1685
    },
    {
      "epoch": 0.2576307445467395,
      "grad_norm": 0.3406999409198761,
      "learning_rate": 0.0001753934051241804,
      "loss": 0.6755,
      "step": 1686
    },
    {
      "epoch": 0.2577835504450472,
      "grad_norm": 0.3167582154273987,
      "learning_rate": 0.00017536060466861864,
      "loss": 0.9995,
      "step": 1687
    },
    {
      "epoch": 0.2579363563433549,
      "grad_norm": 0.3207745850086212,
      "learning_rate": 0.00017532778543759482,
      "loss": 0.6792,
      "step": 1688
    },
    {
      "epoch": 0.2580891622416625,
      "grad_norm": 0.3233521282672882,
      "learning_rate": 0.00017529494743928555,
      "loss": 0.4878,
      "step": 1689
    },
    {
      "epoch": 0.2582419681399702,
      "grad_norm": 0.34627631306648254,
      "learning_rate": 0.00017526209068187217,
      "loss": 0.8386,
      "step": 1690
    },
    {
      "epoch": 0.2583947740382779,
      "grad_norm": 0.28958991169929504,
      "learning_rate": 0.00017522921517354071,
      "loss": 0.6777,
      "step": 1691
    },
    {
      "epoch": 0.25854757993658556,
      "grad_norm": 0.28479400277137756,
      "learning_rate": 0.00017519632092248175,
      "loss": 0.5887,
      "step": 1692
    },
    {
      "epoch": 0.25870038583489324,
      "grad_norm": 0.3165437579154968,
      "learning_rate": 0.00017516340793689066,
      "loss": 0.7553,
      "step": 1693
    },
    {
      "epoch": 0.2588531917332009,
      "grad_norm": 0.40525293350219727,
      "learning_rate": 0.0001751304762249674,
      "loss": 0.8909,
      "step": 1694
    },
    {
      "epoch": 0.25900599763150856,
      "grad_norm": 0.28751781582832336,
      "learning_rate": 0.00017509752579491667,
      "loss": 0.8133,
      "step": 1695
    },
    {
      "epoch": 0.25915880352981624,
      "grad_norm": 0.2711454927921295,
      "learning_rate": 0.00017506455665494775,
      "loss": 0.7187,
      "step": 1696
    },
    {
      "epoch": 0.2593116094281239,
      "grad_norm": 0.3209768533706665,
      "learning_rate": 0.0001750315688132747,
      "loss": 0.8423,
      "step": 1697
    },
    {
      "epoch": 0.2594644153264316,
      "grad_norm": 0.24135245382785797,
      "learning_rate": 0.0001749985622781161,
      "loss": 0.5551,
      "step": 1698
    },
    {
      "epoch": 0.2596172212247393,
      "grad_norm": 0.2836229205131531,
      "learning_rate": 0.00017496553705769526,
      "loss": 0.6805,
      "step": 1699
    },
    {
      "epoch": 0.259770027123047,
      "grad_norm": 0.3115346431732178,
      "learning_rate": 0.00017493249316024011,
      "loss": 0.7877,
      "step": 1700
    },
    {
      "epoch": 0.2599228330213546,
      "grad_norm": 0.25913530588150024,
      "learning_rate": 0.00017489943059398333,
      "loss": 0.7332,
      "step": 1701
    },
    {
      "epoch": 0.2600756389196623,
      "grad_norm": 0.2903793454170227,
      "learning_rate": 0.0001748663493671621,
      "loss": 0.7419,
      "step": 1702
    },
    {
      "epoch": 0.26022844481797,
      "grad_norm": 0.3538905382156372,
      "learning_rate": 0.0001748332494880184,
      "loss": 0.7564,
      "step": 1703
    },
    {
      "epoch": 0.26038125071627766,
      "grad_norm": 0.3246188163757324,
      "learning_rate": 0.00017480013096479876,
      "loss": 0.7791,
      "step": 1704
    },
    {
      "epoch": 0.26053405661458534,
      "grad_norm": 0.26643890142440796,
      "learning_rate": 0.00017476699380575438,
      "loss": 0.6845,
      "step": 1705
    },
    {
      "epoch": 0.260686862512893,
      "grad_norm": 0.34562361240386963,
      "learning_rate": 0.00017473383801914108,
      "loss": 0.676,
      "step": 1706
    },
    {
      "epoch": 0.26083966841120065,
      "grad_norm": 0.27726852893829346,
      "learning_rate": 0.0001747006636132194,
      "loss": 0.6042,
      "step": 1707
    },
    {
      "epoch": 0.26099247430950834,
      "grad_norm": 0.3062208890914917,
      "learning_rate": 0.00017466747059625444,
      "loss": 0.64,
      "step": 1708
    },
    {
      "epoch": 0.261145280207816,
      "grad_norm": 0.25582143664360046,
      "learning_rate": 0.00017463425897651594,
      "loss": 0.5985,
      "step": 1709
    },
    {
      "epoch": 0.2612980861061237,
      "grad_norm": 0.3339386284351349,
      "learning_rate": 0.00017460102876227832,
      "loss": 0.6921,
      "step": 1710
    },
    {
      "epoch": 0.2614508920044314,
      "grad_norm": 0.28748372197151184,
      "learning_rate": 0.00017456777996182062,
      "loss": 0.605,
      "step": 1711
    },
    {
      "epoch": 0.261603697902739,
      "grad_norm": 0.4000266194343567,
      "learning_rate": 0.00017453451258342645,
      "loss": 0.866,
      "step": 1712
    },
    {
      "epoch": 0.2617565038010467,
      "grad_norm": 0.33299750089645386,
      "learning_rate": 0.00017450122663538415,
      "loss": 0.7733,
      "step": 1713
    },
    {
      "epoch": 0.2619093096993544,
      "grad_norm": 0.3416946530342102,
      "learning_rate": 0.0001744679221259866,
      "loss": 0.8105,
      "step": 1714
    },
    {
      "epoch": 0.26206211559766207,
      "grad_norm": 0.2502969205379486,
      "learning_rate": 0.0001744345990635314,
      "loss": 0.6472,
      "step": 1715
    },
    {
      "epoch": 0.26221492149596975,
      "grad_norm": 0.2692801058292389,
      "learning_rate": 0.0001744012574563206,
      "loss": 0.7379,
      "step": 1716
    },
    {
      "epoch": 0.26236772739427744,
      "grad_norm": 0.30326759815216064,
      "learning_rate": 0.0001743678973126611,
      "loss": 0.8629,
      "step": 1717
    },
    {
      "epoch": 0.26252053329258507,
      "grad_norm": 0.2786160409450531,
      "learning_rate": 0.0001743345186408642,
      "loss": 0.6748,
      "step": 1718
    },
    {
      "epoch": 0.26267333919089275,
      "grad_norm": 0.28507113456726074,
      "learning_rate": 0.000174301121449246,
      "loss": 0.5543,
      "step": 1719
    },
    {
      "epoch": 0.26282614508920044,
      "grad_norm": 0.2629023492336273,
      "learning_rate": 0.0001742677057461271,
      "loss": 0.8118,
      "step": 1720
    },
    {
      "epoch": 0.2629789509875081,
      "grad_norm": 0.24799314141273499,
      "learning_rate": 0.0001742342715398327,
      "loss": 0.6217,
      "step": 1721
    },
    {
      "epoch": 0.2631317568858158,
      "grad_norm": 0.30429476499557495,
      "learning_rate": 0.0001742008188386927,
      "loss": 0.739,
      "step": 1722
    },
    {
      "epoch": 0.2632845627841235,
      "grad_norm": 0.30273282527923584,
      "learning_rate": 0.00017416734765104156,
      "loss": 0.8007,
      "step": 1723
    },
    {
      "epoch": 0.2634373686824311,
      "grad_norm": 0.321262925863266,
      "learning_rate": 0.0001741338579852183,
      "loss": 0.6496,
      "step": 1724
    },
    {
      "epoch": 0.2635901745807388,
      "grad_norm": 0.31347712874412537,
      "learning_rate": 0.00017410034984956666,
      "loss": 0.6371,
      "step": 1725
    },
    {
      "epoch": 0.2637429804790465,
      "grad_norm": 0.33219581842422485,
      "learning_rate": 0.00017406682325243485,
      "loss": 0.8095,
      "step": 1726
    },
    {
      "epoch": 0.26389578637735417,
      "grad_norm": 0.3433677852153778,
      "learning_rate": 0.00017403327820217577,
      "loss": 0.7147,
      "step": 1727
    },
    {
      "epoch": 0.26404859227566185,
      "grad_norm": 0.34055739641189575,
      "learning_rate": 0.00017399971470714686,
      "loss": 0.6552,
      "step": 1728
    },
    {
      "epoch": 0.26420139817396954,
      "grad_norm": 0.3190424144268036,
      "learning_rate": 0.00017396613277571022,
      "loss": 0.8839,
      "step": 1729
    },
    {
      "epoch": 0.26435420407227717,
      "grad_norm": 0.32356637716293335,
      "learning_rate": 0.00017393253241623245,
      "loss": 0.7138,
      "step": 1730
    },
    {
      "epoch": 0.26450700997058485,
      "grad_norm": 0.2742416262626648,
      "learning_rate": 0.0001738989136370849,
      "loss": 0.6513,
      "step": 1731
    },
    {
      "epoch": 0.26465981586889253,
      "grad_norm": 0.2900165319442749,
      "learning_rate": 0.0001738652764466433,
      "loss": 0.7172,
      "step": 1732
    },
    {
      "epoch": 0.2648126217672002,
      "grad_norm": 0.2783643901348114,
      "learning_rate": 0.00017383162085328816,
      "loss": 0.6468,
      "step": 1733
    },
    {
      "epoch": 0.2649654276655079,
      "grad_norm": 0.38063931465148926,
      "learning_rate": 0.0001737979468654044,
      "loss": 0.6689,
      "step": 1734
    },
    {
      "epoch": 0.2651182335638156,
      "grad_norm": 0.43439793586730957,
      "learning_rate": 0.00017376425449138166,
      "loss": 0.4789,
      "step": 1735
    },
    {
      "epoch": 0.2652710394621232,
      "grad_norm": 0.30460643768310547,
      "learning_rate": 0.00017373054373961413,
      "loss": 0.7675,
      "step": 1736
    },
    {
      "epoch": 0.2654238453604309,
      "grad_norm": 0.3618842661380768,
      "learning_rate": 0.00017369681461850052,
      "loss": 0.5867,
      "step": 1737
    },
    {
      "epoch": 0.2655766512587386,
      "grad_norm": 0.3465817868709564,
      "learning_rate": 0.00017366306713644417,
      "loss": 0.8111,
      "step": 1738
    },
    {
      "epoch": 0.26572945715704627,
      "grad_norm": 0.37939634919166565,
      "learning_rate": 0.00017362930130185303,
      "loss": 0.599,
      "step": 1739
    },
    {
      "epoch": 0.26588226305535395,
      "grad_norm": 0.25240159034729004,
      "learning_rate": 0.0001735955171231395,
      "loss": 0.6037,
      "step": 1740
    },
    {
      "epoch": 0.26603506895366164,
      "grad_norm": 0.25819000601768494,
      "learning_rate": 0.00017356171460872064,
      "loss": 0.6909,
      "step": 1741
    },
    {
      "epoch": 0.26618787485196926,
      "grad_norm": 0.29703691601753235,
      "learning_rate": 0.0001735278937670181,
      "loss": 0.7321,
      "step": 1742
    },
    {
      "epoch": 0.26634068075027695,
      "grad_norm": 0.4220583438873291,
      "learning_rate": 0.00017349405460645806,
      "loss": 0.6388,
      "step": 1743
    },
    {
      "epoch": 0.26649348664858463,
      "grad_norm": 0.2786288857460022,
      "learning_rate": 0.00017346019713547123,
      "loss": 0.748,
      "step": 1744
    },
    {
      "epoch": 0.2666462925468923,
      "grad_norm": 0.2728956639766693,
      "learning_rate": 0.00017342632136249292,
      "loss": 0.4844,
      "step": 1745
    },
    {
      "epoch": 0.2667990984452,
      "grad_norm": 0.2649093270301819,
      "learning_rate": 0.000173392427295963,
      "loss": 0.6031,
      "step": 1746
    },
    {
      "epoch": 0.2669519043435077,
      "grad_norm": 0.4376051723957062,
      "learning_rate": 0.0001733585149443259,
      "loss": 0.7994,
      "step": 1747
    },
    {
      "epoch": 0.2671047102418153,
      "grad_norm": 0.42373695969581604,
      "learning_rate": 0.00017332458431603057,
      "loss": 0.7156,
      "step": 1748
    },
    {
      "epoch": 0.267257516140123,
      "grad_norm": 0.33878302574157715,
      "learning_rate": 0.0001732906354195306,
      "loss": 0.6929,
      "step": 1749
    },
    {
      "epoch": 0.2674103220384307,
      "grad_norm": 0.28887563943862915,
      "learning_rate": 0.000173256668263284,
      "loss": 0.4979,
      "step": 1750
    },
    {
      "epoch": 0.26756312793673837,
      "grad_norm": 0.3251109719276428,
      "learning_rate": 0.00017322268285575344,
      "loss": 0.6312,
      "step": 1751
    },
    {
      "epoch": 0.26771593383504605,
      "grad_norm": 0.2713668942451477,
      "learning_rate": 0.00017318867920540615,
      "loss": 0.7334,
      "step": 1752
    },
    {
      "epoch": 0.26786873973335373,
      "grad_norm": 0.2358706146478653,
      "learning_rate": 0.00017315465732071372,
      "loss": 0.908,
      "step": 1753
    },
    {
      "epoch": 0.26802154563166136,
      "grad_norm": 0.35049954056739807,
      "learning_rate": 0.00017312061721015253,
      "loss": 0.8059,
      "step": 1754
    },
    {
      "epoch": 0.26817435152996905,
      "grad_norm": 0.26363444328308105,
      "learning_rate": 0.00017308655888220335,
      "loss": 0.6745,
      "step": 1755
    },
    {
      "epoch": 0.26832715742827673,
      "grad_norm": 0.2871282696723938,
      "learning_rate": 0.00017305248234535158,
      "loss": 0.7254,
      "step": 1756
    },
    {
      "epoch": 0.2684799633265844,
      "grad_norm": 0.3954513669013977,
      "learning_rate": 0.00017301838760808697,
      "loss": 0.7484,
      "step": 1757
    },
    {
      "epoch": 0.2686327692248921,
      "grad_norm": 0.28392788767814636,
      "learning_rate": 0.00017298427467890405,
      "loss": 0.8204,
      "step": 1758
    },
    {
      "epoch": 0.2687855751231998,
      "grad_norm": 0.2613278329372406,
      "learning_rate": 0.00017295014356630178,
      "loss": 0.8254,
      "step": 1759
    },
    {
      "epoch": 0.2689383810215074,
      "grad_norm": 0.2831525504589081,
      "learning_rate": 0.00017291599427878357,
      "loss": 0.4994,
      "step": 1760
    },
    {
      "epoch": 0.2690911869198151,
      "grad_norm": 0.36036214232444763,
      "learning_rate": 0.00017288182682485747,
      "loss": 0.8176,
      "step": 1761
    },
    {
      "epoch": 0.2692439928181228,
      "grad_norm": 0.31184542179107666,
      "learning_rate": 0.00017284764121303602,
      "loss": 0.7208,
      "step": 1762
    },
    {
      "epoch": 0.26939679871643046,
      "grad_norm": 0.3088816702365875,
      "learning_rate": 0.00017281343745183622,
      "loss": 0.6944,
      "step": 1763
    },
    {
      "epoch": 0.26954960461473815,
      "grad_norm": 0.2538401484489441,
      "learning_rate": 0.0001727792155497797,
      "loss": 0.7502,
      "step": 1764
    },
    {
      "epoch": 0.2697024105130458,
      "grad_norm": 0.3166246712207794,
      "learning_rate": 0.00017274497551539257,
      "loss": 0.7718,
      "step": 1765
    },
    {
      "epoch": 0.26985521641135346,
      "grad_norm": 0.2860322892665863,
      "learning_rate": 0.00017271071735720542,
      "loss": 0.6644,
      "step": 1766
    },
    {
      "epoch": 0.27000802230966114,
      "grad_norm": 0.2913316488265991,
      "learning_rate": 0.0001726764410837534,
      "loss": 0.7526,
      "step": 1767
    },
    {
      "epoch": 0.27016082820796883,
      "grad_norm": 0.29444432258605957,
      "learning_rate": 0.00017264214670357616,
      "loss": 0.5704,
      "step": 1768
    },
    {
      "epoch": 0.2703136341062765,
      "grad_norm": 0.3528589904308319,
      "learning_rate": 0.00017260783422521785,
      "loss": 0.6162,
      "step": 1769
    },
    {
      "epoch": 0.2704664400045842,
      "grad_norm": 0.2790892720222473,
      "learning_rate": 0.0001725735036572271,
      "loss": 0.7002,
      "step": 1770
    },
    {
      "epoch": 0.2706192459028918,
      "grad_norm": 0.29821377992630005,
      "learning_rate": 0.00017253915500815712,
      "loss": 0.6549,
      "step": 1771
    },
    {
      "epoch": 0.2707720518011995,
      "grad_norm": 0.32526329159736633,
      "learning_rate": 0.00017250478828656558,
      "loss": 0.7888,
      "step": 1772
    },
    {
      "epoch": 0.2709248576995072,
      "grad_norm": 0.3157137334346771,
      "learning_rate": 0.0001724704035010147,
      "loss": 0.6242,
      "step": 1773
    },
    {
      "epoch": 0.2710776635978149,
      "grad_norm": 0.27002689242362976,
      "learning_rate": 0.00017243600066007105,
      "loss": 0.6096,
      "step": 1774
    },
    {
      "epoch": 0.27123046949612256,
      "grad_norm": 0.32272231578826904,
      "learning_rate": 0.00017240157977230593,
      "loss": 0.6981,
      "step": 1775
    },
    {
      "epoch": 0.27138327539443025,
      "grad_norm": 0.3192976713180542,
      "learning_rate": 0.00017236714084629498,
      "loss": 0.7044,
      "step": 1776
    },
    {
      "epoch": 0.2715360812927379,
      "grad_norm": 0.30380040407180786,
      "learning_rate": 0.0001723326838906183,
      "loss": 0.9246,
      "step": 1777
    },
    {
      "epoch": 0.27168888719104556,
      "grad_norm": 0.33051881194114685,
      "learning_rate": 0.00017229820891386064,
      "loss": 0.7069,
      "step": 1778
    },
    {
      "epoch": 0.27184169308935324,
      "grad_norm": 0.33114558458328247,
      "learning_rate": 0.00017226371592461113,
      "loss": 0.7682,
      "step": 1779
    },
    {
      "epoch": 0.2719944989876609,
      "grad_norm": 0.3122152090072632,
      "learning_rate": 0.00017222920493146338,
      "loss": 0.6132,
      "step": 1780
    },
    {
      "epoch": 0.2721473048859686,
      "grad_norm": 0.2902887761592865,
      "learning_rate": 0.00017219467594301553,
      "loss": 0.5294,
      "step": 1781
    },
    {
      "epoch": 0.2723001107842763,
      "grad_norm": 0.3151678740978241,
      "learning_rate": 0.0001721601289678702,
      "loss": 0.5898,
      "step": 1782
    },
    {
      "epoch": 0.2724529166825839,
      "grad_norm": 0.27645257115364075,
      "learning_rate": 0.00017212556401463447,
      "loss": 0.5349,
      "step": 1783
    },
    {
      "epoch": 0.2726057225808916,
      "grad_norm": 0.6515416502952576,
      "learning_rate": 0.00017209098109191988,
      "loss": 0.6182,
      "step": 1784
    },
    {
      "epoch": 0.2727585284791993,
      "grad_norm": 0.28953275084495544,
      "learning_rate": 0.0001720563802083425,
      "loss": 0.7238,
      "step": 1785
    },
    {
      "epoch": 0.272911334377507,
      "grad_norm": 0.3147642910480499,
      "learning_rate": 0.00017202176137252287,
      "loss": 0.8161,
      "step": 1786
    },
    {
      "epoch": 0.27306414027581466,
      "grad_norm": 0.2654118239879608,
      "learning_rate": 0.00017198712459308598,
      "loss": 0.7172,
      "step": 1787
    },
    {
      "epoch": 0.27321694617412234,
      "grad_norm": 0.2835211157798767,
      "learning_rate": 0.00017195246987866124,
      "loss": 0.6829,
      "step": 1788
    },
    {
      "epoch": 0.27336975207243,
      "grad_norm": 0.2858033776283264,
      "learning_rate": 0.00017191779723788262,
      "loss": 0.7478,
      "step": 1789
    },
    {
      "epoch": 0.27352255797073766,
      "grad_norm": 0.3065092861652374,
      "learning_rate": 0.00017188310667938853,
      "loss": 0.663,
      "step": 1790
    },
    {
      "epoch": 0.27367536386904534,
      "grad_norm": 0.2754859924316406,
      "learning_rate": 0.0001718483982118218,
      "loss": 0.6342,
      "step": 1791
    },
    {
      "epoch": 0.273828169767353,
      "grad_norm": 0.4353344440460205,
      "learning_rate": 0.00017181367184382977,
      "loss": 0.8865,
      "step": 1792
    },
    {
      "epoch": 0.2739809756656607,
      "grad_norm": 0.27528804540634155,
      "learning_rate": 0.00017177892758406425,
      "loss": 0.6648,
      "step": 1793
    },
    {
      "epoch": 0.2741337815639684,
      "grad_norm": 0.28295937180519104,
      "learning_rate": 0.0001717441654411814,
      "loss": 0.63,
      "step": 1794
    },
    {
      "epoch": 0.274286587462276,
      "grad_norm": 0.2904326915740967,
      "learning_rate": 0.00017170938542384202,
      "loss": 0.6147,
      "step": 1795
    },
    {
      "epoch": 0.2744393933605837,
      "grad_norm": 0.29448312520980835,
      "learning_rate": 0.00017167458754071118,
      "loss": 0.6123,
      "step": 1796
    },
    {
      "epoch": 0.2745921992588914,
      "grad_norm": 0.28427526354789734,
      "learning_rate": 0.00017163977180045855,
      "loss": 0.6606,
      "step": 1797
    },
    {
      "epoch": 0.2747450051571991,
      "grad_norm": 0.2858867943286896,
      "learning_rate": 0.00017160493821175807,
      "loss": 0.7894,
      "step": 1798
    },
    {
      "epoch": 0.27489781105550676,
      "grad_norm": 0.29473498463630676,
      "learning_rate": 0.00017157008678328833,
      "loss": 0.7398,
      "step": 1799
    },
    {
      "epoch": 0.27505061695381444,
      "grad_norm": 0.2681381106376648,
      "learning_rate": 0.00017153521752373227,
      "loss": 0.76,
      "step": 1800
    },
    {
      "epoch": 0.27520342285212207,
      "grad_norm": 0.30416882038116455,
      "learning_rate": 0.00017150033044177723,
      "loss": 0.8435,
      "step": 1801
    },
    {
      "epoch": 0.27535622875042975,
      "grad_norm": 0.2652147710323334,
      "learning_rate": 0.00017146542554611504,
      "loss": 0.7317,
      "step": 1802
    },
    {
      "epoch": 0.27550903464873744,
      "grad_norm": 0.3508474826812744,
      "learning_rate": 0.00017143050284544197,
      "loss": 0.8121,
      "step": 1803
    },
    {
      "epoch": 0.2756618405470451,
      "grad_norm": 0.3159068524837494,
      "learning_rate": 0.00017139556234845876,
      "loss": 0.6164,
      "step": 1804
    },
    {
      "epoch": 0.2758146464453528,
      "grad_norm": 0.311353862285614,
      "learning_rate": 0.00017136060406387044,
      "loss": 0.6027,
      "step": 1805
    },
    {
      "epoch": 0.2759674523436605,
      "grad_norm": 0.282478004693985,
      "learning_rate": 0.0001713256280003867,
      "loss": 0.8524,
      "step": 1806
    },
    {
      "epoch": 0.2761202582419681,
      "grad_norm": 0.30795755982398987,
      "learning_rate": 0.00017129063416672144,
      "loss": 0.8327,
      "step": 1807
    },
    {
      "epoch": 0.2762730641402758,
      "grad_norm": 0.33893677592277527,
      "learning_rate": 0.00017125562257159311,
      "loss": 0.7226,
      "step": 1808
    },
    {
      "epoch": 0.2764258700385835,
      "grad_norm": 0.3511805236339569,
      "learning_rate": 0.00017122059322372457,
      "loss": 0.6256,
      "step": 1809
    },
    {
      "epoch": 0.27657867593689117,
      "grad_norm": 0.31907960772514343,
      "learning_rate": 0.00017118554613184303,
      "loss": 0.8154,
      "step": 1810
    },
    {
      "epoch": 0.27673148183519886,
      "grad_norm": 0.301350861787796,
      "learning_rate": 0.00017115048130468026,
      "loss": 0.7192,
      "step": 1811
    },
    {
      "epoch": 0.27688428773350654,
      "grad_norm": 0.28029438853263855,
      "learning_rate": 0.0001711153987509723,
      "loss": 0.7313,
      "step": 1812
    },
    {
      "epoch": 0.27703709363181417,
      "grad_norm": 0.28119203448295593,
      "learning_rate": 0.00017108029847945973,
      "loss": 0.7761,
      "step": 1813
    },
    {
      "epoch": 0.27718989953012185,
      "grad_norm": 0.27862101793289185,
      "learning_rate": 0.00017104518049888742,
      "loss": 0.8314,
      "step": 1814
    },
    {
      "epoch": 0.27734270542842954,
      "grad_norm": 0.3129073977470398,
      "learning_rate": 0.00017101004481800478,
      "loss": 0.7194,
      "step": 1815
    },
    {
      "epoch": 0.2774955113267372,
      "grad_norm": 0.3475363552570343,
      "learning_rate": 0.00017097489144556557,
      "loss": 0.6641,
      "step": 1816
    },
    {
      "epoch": 0.2776483172250449,
      "grad_norm": 0.3343164324760437,
      "learning_rate": 0.0001709397203903279,
      "loss": 0.5883,
      "step": 1817
    },
    {
      "epoch": 0.2778011231233526,
      "grad_norm": 0.31171104311943054,
      "learning_rate": 0.0001709045316610544,
      "loss": 0.7454,
      "step": 1818
    },
    {
      "epoch": 0.2779539290216602,
      "grad_norm": 0.27940425276756287,
      "learning_rate": 0.00017086932526651203,
      "loss": 0.5857,
      "step": 1819
    },
    {
      "epoch": 0.2781067349199679,
      "grad_norm": 0.3223339021205902,
      "learning_rate": 0.00017083410121547217,
      "loss": 0.6367,
      "step": 1820
    },
    {
      "epoch": 0.2782595408182756,
      "grad_norm": 0.2605260908603668,
      "learning_rate": 0.00017079885951671057,
      "loss": 0.4917,
      "step": 1821
    },
    {
      "epoch": 0.27841234671658327,
      "grad_norm": 0.31542614102363586,
      "learning_rate": 0.00017076360017900742,
      "loss": 0.8394,
      "step": 1822
    },
    {
      "epoch": 0.27856515261489095,
      "grad_norm": 0.30797964334487915,
      "learning_rate": 0.0001707283232111473,
      "loss": 0.864,
      "step": 1823
    },
    {
      "epoch": 0.2787179585131986,
      "grad_norm": 0.3249169588088989,
      "learning_rate": 0.00017069302862191918,
      "loss": 0.8062,
      "step": 1824
    },
    {
      "epoch": 0.27887076441150627,
      "grad_norm": 0.2909657657146454,
      "learning_rate": 0.00017065771642011638,
      "loss": 0.6884,
      "step": 1825
    },
    {
      "epoch": 0.27902357030981395,
      "grad_norm": 0.3967931568622589,
      "learning_rate": 0.00017062238661453666,
      "loss": 0.9421,
      "step": 1826
    },
    {
      "epoch": 0.27917637620812163,
      "grad_norm": 0.26568347215652466,
      "learning_rate": 0.00017058703921398212,
      "loss": 0.7233,
      "step": 1827
    },
    {
      "epoch": 0.2793291821064293,
      "grad_norm": 0.29785382747650146,
      "learning_rate": 0.0001705516742272593,
      "loss": 0.7348,
      "step": 1828
    },
    {
      "epoch": 0.279481988004737,
      "grad_norm": 0.2735860347747803,
      "learning_rate": 0.00017051629166317907,
      "loss": 0.7623,
      "step": 1829
    },
    {
      "epoch": 0.27963479390304463,
      "grad_norm": 0.2826923131942749,
      "learning_rate": 0.0001704808915305567,
      "loss": 0.9977,
      "step": 1830
    },
    {
      "epoch": 0.2797875998013523,
      "grad_norm": 0.3087044656276703,
      "learning_rate": 0.00017044547383821183,
      "loss": 0.7793,
      "step": 1831
    },
    {
      "epoch": 0.27994040569966,
      "grad_norm": 0.2505679130554199,
      "learning_rate": 0.00017041003859496852,
      "loss": 0.6777,
      "step": 1832
    },
    {
      "epoch": 0.2800932115979677,
      "grad_norm": 0.34263527393341064,
      "learning_rate": 0.0001703745858096551,
      "loss": 0.7956,
      "step": 1833
    },
    {
      "epoch": 0.28024601749627537,
      "grad_norm": 0.35427922010421753,
      "learning_rate": 0.0001703391154911044,
      "loss": 0.7152,
      "step": 1834
    },
    {
      "epoch": 0.28039882339458305,
      "grad_norm": 0.37862199544906616,
      "learning_rate": 0.00017030362764815346,
      "loss": 0.8365,
      "step": 1835
    },
    {
      "epoch": 0.2805516292928907,
      "grad_norm": 0.3023863732814789,
      "learning_rate": 0.00017026812228964388,
      "loss": 0.6895,
      "step": 1836
    },
    {
      "epoch": 0.28070443519119836,
      "grad_norm": 0.2860608994960785,
      "learning_rate": 0.0001702325994244215,
      "loss": 0.6517,
      "step": 1837
    },
    {
      "epoch": 0.28085724108950605,
      "grad_norm": 0.36048266291618347,
      "learning_rate": 0.00017019705906133647,
      "loss": 0.722,
      "step": 1838
    },
    {
      "epoch": 0.28101004698781373,
      "grad_norm": 0.28945350646972656,
      "learning_rate": 0.0001701615012092435,
      "loss": 0.6221,
      "step": 1839
    },
    {
      "epoch": 0.2811628528861214,
      "grad_norm": 0.33494412899017334,
      "learning_rate": 0.00017012592587700137,
      "loss": 0.6345,
      "step": 1840
    },
    {
      "epoch": 0.2813156587844291,
      "grad_norm": 0.28341996669769287,
      "learning_rate": 0.00017009033307347353,
      "loss": 0.6094,
      "step": 1841
    },
    {
      "epoch": 0.28146846468273673,
      "grad_norm": 0.26581546664237976,
      "learning_rate": 0.00017005472280752753,
      "loss": 0.694,
      "step": 1842
    },
    {
      "epoch": 0.2816212705810444,
      "grad_norm": 0.44395822286605835,
      "learning_rate": 0.00017001909508803539,
      "loss": 0.87,
      "step": 1843
    },
    {
      "epoch": 0.2817740764793521,
      "grad_norm": 0.28351446986198425,
      "learning_rate": 0.00016998344992387348,
      "loss": 0.6607,
      "step": 1844
    },
    {
      "epoch": 0.2819268823776598,
      "grad_norm": 0.25666123628616333,
      "learning_rate": 0.0001699477873239225,
      "loss": 0.5578,
      "step": 1845
    },
    {
      "epoch": 0.28207968827596747,
      "grad_norm": 0.2706250548362732,
      "learning_rate": 0.00016991210729706743,
      "loss": 0.8191,
      "step": 1846
    },
    {
      "epoch": 0.28223249417427515,
      "grad_norm": 0.31308892369270325,
      "learning_rate": 0.0001698764098521977,
      "loss": 0.6986,
      "step": 1847
    },
    {
      "epoch": 0.2823853000725828,
      "grad_norm": 0.3809071183204651,
      "learning_rate": 0.00016984069499820703,
      "loss": 0.7085,
      "step": 1848
    },
    {
      "epoch": 0.28253810597089046,
      "grad_norm": 0.344176322221756,
      "learning_rate": 0.00016980496274399343,
      "loss": 0.8155,
      "step": 1849
    },
    {
      "epoch": 0.28269091186919815,
      "grad_norm": 0.2814493179321289,
      "learning_rate": 0.00016976921309845935,
      "loss": 0.7716,
      "step": 1850
    },
    {
      "epoch": 0.28284371776750583,
      "grad_norm": 0.2829038202762604,
      "learning_rate": 0.00016973344607051146,
      "loss": 0.6583,
      "step": 1851
    },
    {
      "epoch": 0.2829965236658135,
      "grad_norm": 0.33343979716300964,
      "learning_rate": 0.00016969766166906086,
      "loss": 0.5937,
      "step": 1852
    },
    {
      "epoch": 0.2831493295641212,
      "grad_norm": 0.30288052558898926,
      "learning_rate": 0.00016966185990302293,
      "loss": 0.6314,
      "step": 1853
    },
    {
      "epoch": 0.28330213546242883,
      "grad_norm": 0.31784963607788086,
      "learning_rate": 0.00016962604078131732,
      "loss": 0.7495,
      "step": 1854
    },
    {
      "epoch": 0.2834549413607365,
      "grad_norm": 0.34505629539489746,
      "learning_rate": 0.00016959020431286815,
      "loss": 0.5472,
      "step": 1855
    },
    {
      "epoch": 0.2836077472590442,
      "grad_norm": 0.33716028928756714,
      "learning_rate": 0.0001695543505066037,
      "loss": 0.6097,
      "step": 1856
    },
    {
      "epoch": 0.2837605531573519,
      "grad_norm": 0.25437086820602417,
      "learning_rate": 0.0001695184793714567,
      "loss": 0.8513,
      "step": 1857
    },
    {
      "epoch": 0.28391335905565956,
      "grad_norm": 0.3078169524669647,
      "learning_rate": 0.00016948259091636411,
      "loss": 0.6392,
      "step": 1858
    },
    {
      "epoch": 0.28406616495396725,
      "grad_norm": 0.3437117338180542,
      "learning_rate": 0.00016944668515026724,
      "loss": 0.6377,
      "step": 1859
    },
    {
      "epoch": 0.2842189708522749,
      "grad_norm": 0.34416788816452026,
      "learning_rate": 0.00016941076208211176,
      "loss": 0.7733,
      "step": 1860
    },
    {
      "epoch": 0.28437177675058256,
      "grad_norm": 0.24578352272510529,
      "learning_rate": 0.0001693748217208475,
      "loss": 0.6101,
      "step": 1861
    },
    {
      "epoch": 0.28452458264889025,
      "grad_norm": 0.2477305680513382,
      "learning_rate": 0.00016933886407542877,
      "loss": 0.6225,
      "step": 1862
    },
    {
      "epoch": 0.28467738854719793,
      "grad_norm": 0.31731775403022766,
      "learning_rate": 0.00016930288915481412,
      "loss": 0.6381,
      "step": 1863
    },
    {
      "epoch": 0.2848301944455056,
      "grad_norm": 0.28951868414878845,
      "learning_rate": 0.00016926689696796638,
      "loss": 0.6691,
      "step": 1864
    },
    {
      "epoch": 0.2849830003438133,
      "grad_norm": 0.25854676961898804,
      "learning_rate": 0.0001692308875238527,
      "loss": 0.6969,
      "step": 1865
    },
    {
      "epoch": 0.2851358062421209,
      "grad_norm": 0.287022203207016,
      "learning_rate": 0.00016919486083144455,
      "loss": 0.5745,
      "step": 1866
    },
    {
      "epoch": 0.2852886121404286,
      "grad_norm": 0.2648172080516815,
      "learning_rate": 0.00016915881689971764,
      "loss": 0.6962,
      "step": 1867
    },
    {
      "epoch": 0.2854414180387363,
      "grad_norm": 0.3037422001361847,
      "learning_rate": 0.00016912275573765205,
      "loss": 0.5824,
      "step": 1868
    },
    {
      "epoch": 0.285594223937044,
      "grad_norm": 0.27793166041374207,
      "learning_rate": 0.00016908667735423207,
      "loss": 0.5969,
      "step": 1869
    },
    {
      "epoch": 0.28574702983535166,
      "grad_norm": 0.28627026081085205,
      "learning_rate": 0.0001690505817584464,
      "loss": 0.6728,
      "step": 1870
    },
    {
      "epoch": 0.28589983573365935,
      "grad_norm": 0.26689401268959045,
      "learning_rate": 0.0001690144689592879,
      "loss": 0.5442,
      "step": 1871
    },
    {
      "epoch": 0.286052641631967,
      "grad_norm": 0.24917180836200714,
      "learning_rate": 0.00016897833896575376,
      "loss": 0.6467,
      "step": 1872
    },
    {
      "epoch": 0.28620544753027466,
      "grad_norm": 0.5315676927566528,
      "learning_rate": 0.0001689421917868455,
      "loss": 0.7294,
      "step": 1873
    },
    {
      "epoch": 0.28635825342858234,
      "grad_norm": 0.27206841111183167,
      "learning_rate": 0.00016890602743156887,
      "loss": 0.7553,
      "step": 1874
    },
    {
      "epoch": 0.28651105932689,
      "grad_norm": 0.3734266757965088,
      "learning_rate": 0.00016886984590893395,
      "loss": 0.7681,
      "step": 1875
    },
    {
      "epoch": 0.2866638652251977,
      "grad_norm": 0.32891905307769775,
      "learning_rate": 0.000168833647227955,
      "loss": 0.5659,
      "step": 1876
    },
    {
      "epoch": 0.28681667112350534,
      "grad_norm": 0.2967846989631653,
      "learning_rate": 0.00016879743139765066,
      "loss": 0.8604,
      "step": 1877
    },
    {
      "epoch": 0.286969477021813,
      "grad_norm": 0.31898820400238037,
      "learning_rate": 0.00016876119842704378,
      "loss": 0.6555,
      "step": 1878
    },
    {
      "epoch": 0.2871222829201207,
      "grad_norm": 0.2863745093345642,
      "learning_rate": 0.00016872494832516151,
      "loss": 0.5857,
      "step": 1879
    },
    {
      "epoch": 0.2872750888184284,
      "grad_norm": 0.27155035734176636,
      "learning_rate": 0.00016868868110103528,
      "loss": 0.7242,
      "step": 1880
    },
    {
      "epoch": 0.2874278947167361,
      "grad_norm": 0.35988694429397583,
      "learning_rate": 0.00016865239676370074,
      "loss": 0.5611,
      "step": 1881
    },
    {
      "epoch": 0.28758070061504376,
      "grad_norm": 0.28384700417518616,
      "learning_rate": 0.00016861609532219782,
      "loss": 0.7728,
      "step": 1882
    },
    {
      "epoch": 0.2877335065133514,
      "grad_norm": 0.2415805160999298,
      "learning_rate": 0.00016857977678557073,
      "loss": 0.707,
      "step": 1883
    },
    {
      "epoch": 0.2878863124116591,
      "grad_norm": 0.3154737949371338,
      "learning_rate": 0.0001685434411628679,
      "loss": 0.667,
      "step": 1884
    },
    {
      "epoch": 0.28803911830996676,
      "grad_norm": 0.6415541768074036,
      "learning_rate": 0.00016850708846314214,
      "loss": 0.8504,
      "step": 1885
    },
    {
      "epoch": 0.28819192420827444,
      "grad_norm": 0.28032657504081726,
      "learning_rate": 0.0001684707186954503,
      "loss": 0.7435,
      "step": 1886
    },
    {
      "epoch": 0.2883447301065821,
      "grad_norm": 0.355499804019928,
      "learning_rate": 0.0001684343318688537,
      "loss": 0.6103,
      "step": 1887
    },
    {
      "epoch": 0.2884975360048898,
      "grad_norm": 0.27321726083755493,
      "learning_rate": 0.00016839792799241773,
      "loss": 0.6049,
      "step": 1888
    },
    {
      "epoch": 0.28865034190319744,
      "grad_norm": 0.3146701157093048,
      "learning_rate": 0.00016836150707521218,
      "loss": 0.6746,
      "step": 1889
    },
    {
      "epoch": 0.2888031478015051,
      "grad_norm": 0.28722837567329407,
      "learning_rate": 0.00016832506912631097,
      "loss": 0.531,
      "step": 1890
    },
    {
      "epoch": 0.2889559536998128,
      "grad_norm": 0.24441641569137573,
      "learning_rate": 0.00016828861415479234,
      "loss": 0.5921,
      "step": 1891
    },
    {
      "epoch": 0.2891087595981205,
      "grad_norm": 0.35211068391799927,
      "learning_rate": 0.00016825214216973874,
      "loss": 0.6858,
      "step": 1892
    },
    {
      "epoch": 0.2892615654964282,
      "grad_norm": 0.3029313385486603,
      "learning_rate": 0.00016821565318023677,
      "loss": 0.8322,
      "step": 1893
    },
    {
      "epoch": 0.28941437139473586,
      "grad_norm": 0.5261650681495667,
      "learning_rate": 0.00016817914719537749,
      "loss": 0.6593,
      "step": 1894
    },
    {
      "epoch": 0.2895671772930435,
      "grad_norm": 0.25222522020339966,
      "learning_rate": 0.00016814262422425597,
      "loss": 0.6869,
      "step": 1895
    },
    {
      "epoch": 0.28971998319135117,
      "grad_norm": 0.28435593843460083,
      "learning_rate": 0.00016810608427597162,
      "loss": 0.7868,
      "step": 1896
    },
    {
      "epoch": 0.28987278908965886,
      "grad_norm": 0.3061954975128174,
      "learning_rate": 0.0001680695273596281,
      "loss": 0.5183,
      "step": 1897
    },
    {
      "epoch": 0.29002559498796654,
      "grad_norm": 0.2951694130897522,
      "learning_rate": 0.00016803295348433324,
      "loss": 0.8351,
      "step": 1898
    },
    {
      "epoch": 0.2901784008862742,
      "grad_norm": 0.2741797864437103,
      "learning_rate": 0.00016799636265919912,
      "loss": 0.5841,
      "step": 1899
    },
    {
      "epoch": 0.2903312067845819,
      "grad_norm": 0.31489941477775574,
      "learning_rate": 0.00016795975489334195,
      "loss": 0.4929,
      "step": 1900
    },
    {
      "epoch": 0.29048401268288954,
      "grad_norm": 0.3178277313709259,
      "learning_rate": 0.0001679231301958824,
      "loss": 0.9264,
      "step": 1901
    },
    {
      "epoch": 0.2906368185811972,
      "grad_norm": 0.3057640790939331,
      "learning_rate": 0.00016788648857594507,
      "loss": 0.76,
      "step": 1902
    },
    {
      "epoch": 0.2907896244795049,
      "grad_norm": 0.24972616136074066,
      "learning_rate": 0.00016784983004265898,
      "loss": 0.7661,
      "step": 1903
    },
    {
      "epoch": 0.2909424303778126,
      "grad_norm": 0.4688352644443512,
      "learning_rate": 0.00016781315460515726,
      "loss": 0.5192,
      "step": 1904
    },
    {
      "epoch": 0.2910952362761203,
      "grad_norm": 0.2574828863143921,
      "learning_rate": 0.00016777646227257736,
      "loss": 0.6807,
      "step": 1905
    },
    {
      "epoch": 0.29124804217442796,
      "grad_norm": 0.2910616993904114,
      "learning_rate": 0.0001677397530540608,
      "loss": 0.6761,
      "step": 1906
    },
    {
      "epoch": 0.2914008480727356,
      "grad_norm": 0.3315010964870453,
      "learning_rate": 0.00016770302695875335,
      "loss": 0.6879,
      "step": 1907
    },
    {
      "epoch": 0.29155365397104327,
      "grad_norm": 0.2516402304172516,
      "learning_rate": 0.0001676662839958051,
      "loss": 0.6013,
      "step": 1908
    },
    {
      "epoch": 0.29170645986935095,
      "grad_norm": 0.30346551537513733,
      "learning_rate": 0.00016762952417437017,
      "loss": 0.5684,
      "step": 1909
    },
    {
      "epoch": 0.29185926576765864,
      "grad_norm": 0.3043062686920166,
      "learning_rate": 0.00016759274750360702,
      "loss": 0.7597,
      "step": 1910
    },
    {
      "epoch": 0.2920120716659663,
      "grad_norm": 0.34094980359077454,
      "learning_rate": 0.00016755595399267818,
      "loss": 0.6636,
      "step": 1911
    },
    {
      "epoch": 0.292164877564274,
      "grad_norm": 0.4071411192417145,
      "learning_rate": 0.0001675191436507505,
      "loss": 0.7575,
      "step": 1912
    },
    {
      "epoch": 0.29231768346258163,
      "grad_norm": 0.4260135591030121,
      "learning_rate": 0.00016748231648699497,
      "loss": 0.6146,
      "step": 1913
    },
    {
      "epoch": 0.2924704893608893,
      "grad_norm": 0.3323768377304077,
      "learning_rate": 0.00016744547251058674,
      "loss": 0.8563,
      "step": 1914
    },
    {
      "epoch": 0.292623295259197,
      "grad_norm": 0.27061089873313904,
      "learning_rate": 0.0001674086117307052,
      "loss": 0.7546,
      "step": 1915
    },
    {
      "epoch": 0.2927761011575047,
      "grad_norm": 0.2773573696613312,
      "learning_rate": 0.00016737173415653386,
      "loss": 0.6676,
      "step": 1916
    },
    {
      "epoch": 0.29292890705581237,
      "grad_norm": 0.34023284912109375,
      "learning_rate": 0.0001673348397972605,
      "loss": 0.6807,
      "step": 1917
    },
    {
      "epoch": 0.29308171295412005,
      "grad_norm": 0.2626526951789856,
      "learning_rate": 0.00016729792866207704,
      "loss": 0.6535,
      "step": 1918
    },
    {
      "epoch": 0.2932345188524277,
      "grad_norm": 0.27187174558639526,
      "learning_rate": 0.00016726100076017955,
      "loss": 0.6563,
      "step": 1919
    },
    {
      "epoch": 0.29338732475073537,
      "grad_norm": 0.26734450459480286,
      "learning_rate": 0.00016722405610076834,
      "loss": 0.6657,
      "step": 1920
    },
    {
      "epoch": 0.29354013064904305,
      "grad_norm": 0.26832592487335205,
      "learning_rate": 0.00016718709469304787,
      "loss": 0.6849,
      "step": 1921
    },
    {
      "epoch": 0.29369293654735074,
      "grad_norm": 0.32193028926849365,
      "learning_rate": 0.00016715011654622671,
      "loss": 0.8245,
      "step": 1922
    },
    {
      "epoch": 0.2938457424456584,
      "grad_norm": 0.28302088379859924,
      "learning_rate": 0.00016711312166951768,
      "loss": 0.6771,
      "step": 1923
    },
    {
      "epoch": 0.2939985483439661,
      "grad_norm": 0.33167263865470886,
      "learning_rate": 0.0001670761100721378,
      "loss": 0.7652,
      "step": 1924
    },
    {
      "epoch": 0.29415135424227373,
      "grad_norm": 0.2596791982650757,
      "learning_rate": 0.0001670390817633081,
      "loss": 0.9119,
      "step": 1925
    },
    {
      "epoch": 0.2943041601405814,
      "grad_norm": 0.34436488151550293,
      "learning_rate": 0.00016700203675225393,
      "loss": 0.4812,
      "step": 1926
    },
    {
      "epoch": 0.2944569660388891,
      "grad_norm": 0.23007529973983765,
      "learning_rate": 0.00016696497504820474,
      "loss": 0.7726,
      "step": 1927
    },
    {
      "epoch": 0.2946097719371968,
      "grad_norm": 0.3009369969367981,
      "learning_rate": 0.00016692789666039416,
      "loss": 0.8195,
      "step": 1928
    },
    {
      "epoch": 0.29476257783550447,
      "grad_norm": 0.36392152309417725,
      "learning_rate": 0.0001668908015980599,
      "loss": 0.7136,
      "step": 1929
    },
    {
      "epoch": 0.2949153837338121,
      "grad_norm": 0.27673837542533875,
      "learning_rate": 0.00016685368987044393,
      "loss": 0.8112,
      "step": 1930
    },
    {
      "epoch": 0.2950681896321198,
      "grad_norm": 0.22855332493782043,
      "learning_rate": 0.00016681656148679233,
      "loss": 0.569,
      "step": 1931
    },
    {
      "epoch": 0.29522099553042747,
      "grad_norm": 0.27440088987350464,
      "learning_rate": 0.00016677941645635528,
      "loss": 0.6439,
      "step": 1932
    },
    {
      "epoch": 0.29537380142873515,
      "grad_norm": 0.27212488651275635,
      "learning_rate": 0.00016674225478838724,
      "loss": 0.6632,
      "step": 1933
    },
    {
      "epoch": 0.29552660732704283,
      "grad_norm": 0.35999348759651184,
      "learning_rate": 0.00016670507649214658,
      "loss": 0.7135,
      "step": 1934
    },
    {
      "epoch": 0.2956794132253505,
      "grad_norm": 0.3128264248371124,
      "learning_rate": 0.00016666788157689615,
      "loss": 0.7247,
      "step": 1935
    },
    {
      "epoch": 0.29583221912365815,
      "grad_norm": 0.4011903405189514,
      "learning_rate": 0.00016663067005190255,
      "loss": 0.6513,
      "step": 1936
    },
    {
      "epoch": 0.29598502502196583,
      "grad_norm": 0.3012496531009674,
      "learning_rate": 0.00016659344192643691,
      "loss": 0.9517,
      "step": 1937
    },
    {
      "epoch": 0.2961378309202735,
      "grad_norm": 0.30148524045944214,
      "learning_rate": 0.00016655619720977417,
      "loss": 0.7281,
      "step": 1938
    },
    {
      "epoch": 0.2962906368185812,
      "grad_norm": 0.2771994471549988,
      "learning_rate": 0.00016651893591119362,
      "loss": 0.5911,
      "step": 1939
    },
    {
      "epoch": 0.2964434427168889,
      "grad_norm": 0.3425995111465454,
      "learning_rate": 0.00016648165803997853,
      "loss": 0.6915,
      "step": 1940
    },
    {
      "epoch": 0.29659624861519657,
      "grad_norm": 0.2747991681098938,
      "learning_rate": 0.00016644436360541639,
      "loss": 0.8483,
      "step": 1941
    },
    {
      "epoch": 0.2967490545135042,
      "grad_norm": 0.2685058116912842,
      "learning_rate": 0.00016640705261679887,
      "loss": 0.7119,
      "step": 1942
    },
    {
      "epoch": 0.2969018604118119,
      "grad_norm": 0.4401903748512268,
      "learning_rate": 0.00016636972508342156,
      "loss": 0.6441,
      "step": 1943
    },
    {
      "epoch": 0.29705466631011956,
      "grad_norm": 0.3246113657951355,
      "learning_rate": 0.0001663323810145844,
      "loss": 0.6992,
      "step": 1944
    },
    {
      "epoch": 0.29720747220842725,
      "grad_norm": 0.2685340642929077,
      "learning_rate": 0.00016629502041959132,
      "loss": 0.6728,
      "step": 1945
    },
    {
      "epoch": 0.29736027810673493,
      "grad_norm": 0.3675488233566284,
      "learning_rate": 0.0001662576433077504,
      "loss": 0.814,
      "step": 1946
    },
    {
      "epoch": 0.2975130840050426,
      "grad_norm": 0.3309258818626404,
      "learning_rate": 0.0001662202496883738,
      "loss": 0.7315,
      "step": 1947
    },
    {
      "epoch": 0.29766588990335024,
      "grad_norm": 0.308794766664505,
      "learning_rate": 0.0001661828395707779,
      "loss": 0.7723,
      "step": 1948
    },
    {
      "epoch": 0.29781869580165793,
      "grad_norm": 0.3159720301628113,
      "learning_rate": 0.00016614541296428308,
      "loss": 0.742,
      "step": 1949
    },
    {
      "epoch": 0.2979715016999656,
      "grad_norm": 0.29673030972480774,
      "learning_rate": 0.0001661079698782138,
      "loss": 0.8073,
      "step": 1950
    },
    {
      "epoch": 0.2981243075982733,
      "grad_norm": 0.37807202339172363,
      "learning_rate": 0.00016607051032189882,
      "loss": 0.6621,
      "step": 1951
    },
    {
      "epoch": 0.298277113496581,
      "grad_norm": 0.31420621275901794,
      "learning_rate": 0.00016603303430467076,
      "loss": 0.6614,
      "step": 1952
    },
    {
      "epoch": 0.29842991939488867,
      "grad_norm": 0.2999773621559143,
      "learning_rate": 0.0001659955418358665,
      "loss": 0.668,
      "step": 1953
    },
    {
      "epoch": 0.2985827252931963,
      "grad_norm": 0.28562691807746887,
      "learning_rate": 0.00016595803292482702,
      "loss": 0.6569,
      "step": 1954
    },
    {
      "epoch": 0.298735531191504,
      "grad_norm": 0.3034481406211853,
      "learning_rate": 0.00016592050758089727,
      "loss": 0.7714,
      "step": 1955
    },
    {
      "epoch": 0.29888833708981166,
      "grad_norm": 0.2904307246208191,
      "learning_rate": 0.00016588296581342645,
      "loss": 0.67,
      "step": 1956
    },
    {
      "epoch": 0.29904114298811935,
      "grad_norm": 0.28086069226264954,
      "learning_rate": 0.0001658454076317677,
      "loss": 0.6636,
      "step": 1957
    },
    {
      "epoch": 0.29919394888642703,
      "grad_norm": 0.46149349212646484,
      "learning_rate": 0.00016580783304527837,
      "loss": 0.672,
      "step": 1958
    },
    {
      "epoch": 0.2993467547847347,
      "grad_norm": 0.29076477885246277,
      "learning_rate": 0.0001657702420633198,
      "loss": 0.653,
      "step": 1959
    },
    {
      "epoch": 0.29949956068304234,
      "grad_norm": 0.3762834370136261,
      "learning_rate": 0.00016573263469525754,
      "loss": 0.6253,
      "step": 1960
    },
    {
      "epoch": 0.29965236658135,
      "grad_norm": 0.36436668038368225,
      "learning_rate": 0.00016569501095046115,
      "loss": 0.6948,
      "step": 1961
    },
    {
      "epoch": 0.2998051724796577,
      "grad_norm": 0.2805554270744324,
      "learning_rate": 0.00016565737083830423,
      "loss": 0.6422,
      "step": 1962
    },
    {
      "epoch": 0.2999579783779654,
      "grad_norm": 0.3336206376552582,
      "learning_rate": 0.0001656197143681645,
      "loss": 0.7162,
      "step": 1963
    },
    {
      "epoch": 0.3001107842762731,
      "grad_norm": 0.2919718027114868,
      "learning_rate": 0.00016558204154942376,
      "loss": 0.6768,
      "step": 1964
    },
    {
      "epoch": 0.30026359017458076,
      "grad_norm": 0.282857209444046,
      "learning_rate": 0.0001655443523914679,
      "loss": 0.6641,
      "step": 1965
    },
    {
      "epoch": 0.3004163960728884,
      "grad_norm": 0.301281601190567,
      "learning_rate": 0.0001655066469036868,
      "loss": 0.6862,
      "step": 1966
    },
    {
      "epoch": 0.3005692019711961,
      "grad_norm": 0.2959330081939697,
      "learning_rate": 0.00016546892509547453,
      "loss": 0.7397,
      "step": 1967
    },
    {
      "epoch": 0.30072200786950376,
      "grad_norm": 0.34630370140075684,
      "learning_rate": 0.0001654311869762291,
      "loss": 0.6867,
      "step": 1968
    },
    {
      "epoch": 0.30087481376781144,
      "grad_norm": 0.30559927225112915,
      "learning_rate": 0.00016539343255535274,
      "loss": 0.7231,
      "step": 1969
    },
    {
      "epoch": 0.30102761966611913,
      "grad_norm": 0.4635114073753357,
      "learning_rate": 0.00016535566184225155,
      "loss": 0.6228,
      "step": 1970
    },
    {
      "epoch": 0.3011804255644268,
      "grad_norm": 0.2548908591270447,
      "learning_rate": 0.0001653178748463358,
      "loss": 0.7538,
      "step": 1971
    },
    {
      "epoch": 0.30133323146273444,
      "grad_norm": 0.29736220836639404,
      "learning_rate": 0.00016528007157701988,
      "loss": 0.6011,
      "step": 1972
    },
    {
      "epoch": 0.3014860373610421,
      "grad_norm": 0.2700873911380768,
      "learning_rate": 0.0001652422520437221,
      "loss": 0.7904,
      "step": 1973
    },
    {
      "epoch": 0.3016388432593498,
      "grad_norm": 0.3336293399333954,
      "learning_rate": 0.00016520441625586486,
      "loss": 0.5836,
      "step": 1974
    },
    {
      "epoch": 0.3017916491576575,
      "grad_norm": 0.37030869722366333,
      "learning_rate": 0.00016516656422287462,
      "loss": 0.5687,
      "step": 1975
    },
    {
      "epoch": 0.3019444550559652,
      "grad_norm": 0.34676826000213623,
      "learning_rate": 0.00016512869595418196,
      "loss": 0.6698,
      "step": 1976
    },
    {
      "epoch": 0.30209726095427286,
      "grad_norm": 0.333841472864151,
      "learning_rate": 0.00016509081145922144,
      "loss": 0.8103,
      "step": 1977
    },
    {
      "epoch": 0.3022500668525805,
      "grad_norm": 0.3339821696281433,
      "learning_rate": 0.00016505291074743158,
      "loss": 0.8072,
      "step": 1978
    },
    {
      "epoch": 0.3024028727508882,
      "grad_norm": 0.29102015495300293,
      "learning_rate": 0.00016501499382825513,
      "loss": 0.6759,
      "step": 1979
    },
    {
      "epoch": 0.30255567864919586,
      "grad_norm": 0.3134000301361084,
      "learning_rate": 0.00016497706071113866,
      "loss": 0.7879,
      "step": 1980
    },
    {
      "epoch": 0.30270848454750354,
      "grad_norm": 0.2835538983345032,
      "learning_rate": 0.00016493911140553298,
      "loss": 0.8545,
      "step": 1981
    },
    {
      "epoch": 0.3028612904458112,
      "grad_norm": 0.30128997564315796,
      "learning_rate": 0.0001649011459208928,
      "loss": 0.8597,
      "step": 1982
    },
    {
      "epoch": 0.3030140963441189,
      "grad_norm": 0.281778484582901,
      "learning_rate": 0.0001648631642666769,
      "loss": 0.9106,
      "step": 1983
    },
    {
      "epoch": 0.30316690224242654,
      "grad_norm": 0.351546049118042,
      "learning_rate": 0.00016482516645234814,
      "loss": 0.635,
      "step": 1984
    },
    {
      "epoch": 0.3033197081407342,
      "grad_norm": 0.2829291522502899,
      "learning_rate": 0.0001647871524873733,
      "loss": 0.8733,
      "step": 1985
    },
    {
      "epoch": 0.3034725140390419,
      "grad_norm": 0.26995211839675903,
      "learning_rate": 0.00016474912238122324,
      "loss": 0.7474,
      "step": 1986
    },
    {
      "epoch": 0.3036253199373496,
      "grad_norm": 0.2779309153556824,
      "learning_rate": 0.00016471107614337286,
      "loss": 0.7124,
      "step": 1987
    },
    {
      "epoch": 0.3037781258356573,
      "grad_norm": 0.31373247504234314,
      "learning_rate": 0.00016467301378330108,
      "loss": 0.6688,
      "step": 1988
    },
    {
      "epoch": 0.3039309317339649,
      "grad_norm": 0.33604127168655396,
      "learning_rate": 0.00016463493531049077,
      "loss": 0.7257,
      "step": 1989
    },
    {
      "epoch": 0.3040837376322726,
      "grad_norm": 0.32262903451919556,
      "learning_rate": 0.0001645968407344289,
      "loss": 0.8301,
      "step": 1990
    },
    {
      "epoch": 0.3042365435305803,
      "grad_norm": 1.0630546808242798,
      "learning_rate": 0.0001645587300646064,
      "loss": 0.7924,
      "step": 1991
    },
    {
      "epoch": 0.30438934942888796,
      "grad_norm": 0.28364071249961853,
      "learning_rate": 0.00016452060331051822,
      "loss": 0.5656,
      "step": 1992
    },
    {
      "epoch": 0.30454215532719564,
      "grad_norm": 0.3063963055610657,
      "learning_rate": 0.00016448246048166335,
      "loss": 0.7863,
      "step": 1993
    },
    {
      "epoch": 0.3046949612255033,
      "grad_norm": 0.3313276171684265,
      "learning_rate": 0.0001644443015875447,
      "loss": 0.6872,
      "step": 1994
    },
    {
      "epoch": 0.30484776712381095,
      "grad_norm": 0.30340656638145447,
      "learning_rate": 0.0001644061266376693,
      "loss": 0.6541,
      "step": 1995
    },
    {
      "epoch": 0.30500057302211864,
      "grad_norm": 0.31530138850212097,
      "learning_rate": 0.00016436793564154808,
      "loss": 0.6445,
      "step": 1996
    },
    {
      "epoch": 0.3051533789204263,
      "grad_norm": 0.2796996533870697,
      "learning_rate": 0.00016432972860869603,
      "loss": 0.7765,
      "step": 1997
    },
    {
      "epoch": 0.305306184818734,
      "grad_norm": 0.28395867347717285,
      "learning_rate": 0.0001642915055486321,
      "loss": 0.8339,
      "step": 1998
    },
    {
      "epoch": 0.3054589907170417,
      "grad_norm": 0.32714176177978516,
      "learning_rate": 0.0001642532664708792,
      "loss": 0.7457,
      "step": 1999
    },
    {
      "epoch": 0.3056117966153494,
      "grad_norm": 0.2937332093715668,
      "learning_rate": 0.00016421501138496431,
      "loss": 0.6448,
      "step": 2000
    },
    {
      "epoch": 0.305764602513657,
      "grad_norm": 0.3177519142627716,
      "learning_rate": 0.00016417674030041841,
      "loss": 0.5321,
      "step": 2001
    },
    {
      "epoch": 0.3059174084119647,
      "grad_norm": 0.3196076452732086,
      "learning_rate": 0.00016413845322677637,
      "loss": 0.6613,
      "step": 2002
    },
    {
      "epoch": 0.30607021431027237,
      "grad_norm": 0.34329482913017273,
      "learning_rate": 0.00016410015017357708,
      "loss": 0.6171,
      "step": 2003
    },
    {
      "epoch": 0.30622302020858005,
      "grad_norm": 0.33029940724372864,
      "learning_rate": 0.0001640618311503635,
      "loss": 0.5726,
      "step": 2004
    },
    {
      "epoch": 0.30637582610688774,
      "grad_norm": 0.2705060839653015,
      "learning_rate": 0.0001640234961666824,
      "loss": 0.8022,
      "step": 2005
    },
    {
      "epoch": 0.3065286320051954,
      "grad_norm": 0.28415077924728394,
      "learning_rate": 0.00016398514523208467,
      "loss": 0.7025,
      "step": 2006
    },
    {
      "epoch": 0.30668143790350305,
      "grad_norm": 0.2916298508644104,
      "learning_rate": 0.0001639467783561251,
      "loss": 0.7154,
      "step": 2007
    },
    {
      "epoch": 0.30683424380181074,
      "grad_norm": 0.2760631740093231,
      "learning_rate": 0.0001639083955483625,
      "loss": 0.677,
      "step": 2008
    },
    {
      "epoch": 0.3069870497001184,
      "grad_norm": 0.2400038093328476,
      "learning_rate": 0.00016386999681835963,
      "loss": 0.7028,
      "step": 2009
    },
    {
      "epoch": 0.3071398555984261,
      "grad_norm": 0.30769845843315125,
      "learning_rate": 0.00016383158217568315,
      "loss": 0.6044,
      "step": 2010
    },
    {
      "epoch": 0.3072926614967338,
      "grad_norm": 0.23859497904777527,
      "learning_rate": 0.00016379315162990378,
      "loss": 0.4944,
      "step": 2011
    },
    {
      "epoch": 0.30744546739504147,
      "grad_norm": 0.30362075567245483,
      "learning_rate": 0.00016375470519059624,
      "loss": 0.8197,
      "step": 2012
    },
    {
      "epoch": 0.3075982732933491,
      "grad_norm": 0.29339346289634705,
      "learning_rate": 0.000163716242867339,
      "loss": 0.768,
      "step": 2013
    },
    {
      "epoch": 0.3077510791916568,
      "grad_norm": 0.24972614645957947,
      "learning_rate": 0.00016367776466971477,
      "loss": 0.7026,
      "step": 2014
    },
    {
      "epoch": 0.30790388508996447,
      "grad_norm": 0.306267648935318,
      "learning_rate": 0.00016363927060730995,
      "loss": 0.6663,
      "step": 2015
    },
    {
      "epoch": 0.30805669098827215,
      "grad_norm": 0.2774108350276947,
      "learning_rate": 0.0001636007606897151,
      "loss": 0.6631,
      "step": 2016
    },
    {
      "epoch": 0.30820949688657984,
      "grad_norm": 0.2911866307258606,
      "learning_rate": 0.0001635622349265246,
      "loss": 0.7182,
      "step": 2017
    },
    {
      "epoch": 0.3083623027848875,
      "grad_norm": 0.2774654030799866,
      "learning_rate": 0.00016352369332733679,
      "loss": 0.646,
      "step": 2018
    },
    {
      "epoch": 0.30851510868319515,
      "grad_norm": 0.28200235962867737,
      "learning_rate": 0.00016348513590175404,
      "loss": 0.7008,
      "step": 2019
    },
    {
      "epoch": 0.30866791458150283,
      "grad_norm": 0.2759782075881958,
      "learning_rate": 0.00016344656265938258,
      "loss": 0.8021,
      "step": 2020
    },
    {
      "epoch": 0.3088207204798105,
      "grad_norm": 0.24658828973770142,
      "learning_rate": 0.0001634079736098326,
      "loss": 0.6682,
      "step": 2021
    },
    {
      "epoch": 0.3089735263781182,
      "grad_norm": 0.2983681857585907,
      "learning_rate": 0.00016336936876271832,
      "loss": 0.7827,
      "step": 2022
    },
    {
      "epoch": 0.3091263322764259,
      "grad_norm": 0.3705412447452545,
      "learning_rate": 0.00016333074812765772,
      "loss": 0.9868,
      "step": 2023
    },
    {
      "epoch": 0.30927913817473357,
      "grad_norm": 0.2668742835521698,
      "learning_rate": 0.0001632921117142728,
      "loss": 0.8599,
      "step": 2024
    },
    {
      "epoch": 0.3094319440730412,
      "grad_norm": 0.30914178490638733,
      "learning_rate": 0.0001632534595321896,
      "loss": 0.9663,
      "step": 2025
    },
    {
      "epoch": 0.3095847499713489,
      "grad_norm": 0.27188578248023987,
      "learning_rate": 0.00016321479159103788,
      "loss": 0.6205,
      "step": 2026
    },
    {
      "epoch": 0.30973755586965657,
      "grad_norm": 0.26725485920906067,
      "learning_rate": 0.0001631761079004515,
      "loss": 0.6375,
      "step": 2027
    },
    {
      "epoch": 0.30989036176796425,
      "grad_norm": 0.3215772807598114,
      "learning_rate": 0.00016313740847006812,
      "loss": 0.8451,
      "step": 2028
    },
    {
      "epoch": 0.31004316766627193,
      "grad_norm": 0.3371334373950958,
      "learning_rate": 0.00016309869330952945,
      "loss": 0.6311,
      "step": 2029
    },
    {
      "epoch": 0.3101959735645796,
      "grad_norm": 0.29730215668678284,
      "learning_rate": 0.00016305996242848097,
      "loss": 0.7364,
      "step": 2030
    },
    {
      "epoch": 0.31034877946288725,
      "grad_norm": 0.32004204392433167,
      "learning_rate": 0.0001630212158365722,
      "loss": 0.7113,
      "step": 2031
    },
    {
      "epoch": 0.31050158536119493,
      "grad_norm": 0.29394999146461487,
      "learning_rate": 0.00016298245354345655,
      "loss": 0.5658,
      "step": 2032
    },
    {
      "epoch": 0.3106543912595026,
      "grad_norm": 0.3030238747596741,
      "learning_rate": 0.00016294367555879126,
      "loss": 0.7351,
      "step": 2033
    },
    {
      "epoch": 0.3108071971578103,
      "grad_norm": 0.2705308794975281,
      "learning_rate": 0.00016290488189223758,
      "loss": 0.6108,
      "step": 2034
    },
    {
      "epoch": 0.310960003056118,
      "grad_norm": 0.31228870153427124,
      "learning_rate": 0.00016286607255346062,
      "loss": 0.7637,
      "step": 2035
    },
    {
      "epoch": 0.31111280895442567,
      "grad_norm": 0.2760096490383148,
      "learning_rate": 0.0001628272475521294,
      "loss": 0.6447,
      "step": 2036
    },
    {
      "epoch": 0.3112656148527333,
      "grad_norm": 0.2895592451095581,
      "learning_rate": 0.0001627884068979168,
      "loss": 0.6692,
      "step": 2037
    },
    {
      "epoch": 0.311418420751041,
      "grad_norm": 0.2655385434627533,
      "learning_rate": 0.00016274955060049972,
      "loss": 0.6578,
      "step": 2038
    },
    {
      "epoch": 0.31157122664934866,
      "grad_norm": 0.30148744583129883,
      "learning_rate": 0.00016271067866955883,
      "loss": 0.5564,
      "step": 2039
    },
    {
      "epoch": 0.31172403254765635,
      "grad_norm": 0.2806140184402466,
      "learning_rate": 0.00016267179111477878,
      "loss": 0.7039,
      "step": 2040
    },
    {
      "epoch": 0.31187683844596403,
      "grad_norm": 0.5120315551757812,
      "learning_rate": 0.00016263288794584805,
      "loss": 0.6463,
      "step": 2041
    },
    {
      "epoch": 0.31202964434427166,
      "grad_norm": 0.30157095193862915,
      "learning_rate": 0.00016259396917245902,
      "loss": 0.782,
      "step": 2042
    },
    {
      "epoch": 0.31218245024257935,
      "grad_norm": 0.6643047332763672,
      "learning_rate": 0.00016255503480430803,
      "loss": 0.7354,
      "step": 2043
    },
    {
      "epoch": 0.31233525614088703,
      "grad_norm": 0.33008846640586853,
      "learning_rate": 0.0001625160848510952,
      "loss": 0.7089,
      "step": 2044
    },
    {
      "epoch": 0.3124880620391947,
      "grad_norm": 0.3063755929470062,
      "learning_rate": 0.0001624771193225246,
      "loss": 0.8467,
      "step": 2045
    },
    {
      "epoch": 0.3126408679375024,
      "grad_norm": 0.33746209740638733,
      "learning_rate": 0.00016243813822830417,
      "loss": 0.7556,
      "step": 2046
    },
    {
      "epoch": 0.3127936738358101,
      "grad_norm": 0.28747060894966125,
      "learning_rate": 0.00016239914157814572,
      "loss": 0.8213,
      "step": 2047
    },
    {
      "epoch": 0.3129464797341177,
      "grad_norm": 0.292519748210907,
      "learning_rate": 0.00016236012938176497,
      "loss": 0.7229,
      "step": 2048
    },
    {
      "epoch": 0.3130992856324254,
      "grad_norm": 0.3621499836444855,
      "learning_rate": 0.00016232110164888142,
      "loss": 0.6529,
      "step": 2049
    },
    {
      "epoch": 0.3132520915307331,
      "grad_norm": 0.31153249740600586,
      "learning_rate": 0.00016228205838921854,
      "loss": 0.9509,
      "step": 2050
    },
    {
      "epoch": 0.31340489742904076,
      "grad_norm": 0.2779485583305359,
      "learning_rate": 0.00016224299961250363,
      "loss": 0.8127,
      "step": 2051
    },
    {
      "epoch": 0.31355770332734845,
      "grad_norm": 0.3095969259738922,
      "learning_rate": 0.00016220392532846785,
      "loss": 0.5948,
      "step": 2052
    },
    {
      "epoch": 0.31371050922565613,
      "grad_norm": 0.2988138496875763,
      "learning_rate": 0.00016216483554684622,
      "loss": 0.6803,
      "step": 2053
    },
    {
      "epoch": 0.31386331512396376,
      "grad_norm": 0.3316000699996948,
      "learning_rate": 0.00016212573027737763,
      "loss": 0.7689,
      "step": 2054
    },
    {
      "epoch": 0.31401612102227144,
      "grad_norm": 0.32596075534820557,
      "learning_rate": 0.00016208660952980486,
      "loss": 0.7207,
      "step": 2055
    },
    {
      "epoch": 0.31416892692057913,
      "grad_norm": 0.29113471508026123,
      "learning_rate": 0.0001620474733138745,
      "loss": 0.7664,
      "step": 2056
    },
    {
      "epoch": 0.3143217328188868,
      "grad_norm": 0.3138737976551056,
      "learning_rate": 0.000162008321639337,
      "loss": 0.6088,
      "step": 2057
    },
    {
      "epoch": 0.3144745387171945,
      "grad_norm": 0.28373363614082336,
      "learning_rate": 0.00016196915451594665,
      "loss": 0.7374,
      "step": 2058
    },
    {
      "epoch": 0.3146273446155022,
      "grad_norm": 0.29363298416137695,
      "learning_rate": 0.00016192997195346167,
      "loss": 0.8168,
      "step": 2059
    },
    {
      "epoch": 0.3147801505138098,
      "grad_norm": 2.7054712772369385,
      "learning_rate": 0.000161890773961644,
      "loss": 0.6765,
      "step": 2060
    },
    {
      "epoch": 0.3149329564121175,
      "grad_norm": 0.29709509015083313,
      "learning_rate": 0.00016185156055025955,
      "loss": 0.6439,
      "step": 2061
    },
    {
      "epoch": 0.3150857623104252,
      "grad_norm": 0.25600048899650574,
      "learning_rate": 0.00016181233172907797,
      "loss": 0.6808,
      "step": 2062
    },
    {
      "epoch": 0.31523856820873286,
      "grad_norm": 0.5635945796966553,
      "learning_rate": 0.0001617730875078728,
      "loss": 0.7865,
      "step": 2063
    },
    {
      "epoch": 0.31539137410704055,
      "grad_norm": 0.6080973744392395,
      "learning_rate": 0.00016173382789642145,
      "loss": 0.7357,
      "step": 2064
    },
    {
      "epoch": 0.31554418000534823,
      "grad_norm": 0.24305948615074158,
      "learning_rate": 0.00016169455290450507,
      "loss": 0.5111,
      "step": 2065
    },
    {
      "epoch": 0.31569698590365586,
      "grad_norm": 0.3057420551776886,
      "learning_rate": 0.00016165526254190873,
      "loss": 0.758,
      "step": 2066
    },
    {
      "epoch": 0.31584979180196354,
      "grad_norm": 1.9893947839736938,
      "learning_rate": 0.00016161595681842125,
      "loss": 0.708,
      "step": 2067
    },
    {
      "epoch": 0.3160025977002712,
      "grad_norm": 0.29663994908332825,
      "learning_rate": 0.0001615766357438354,
      "loss": 0.6464,
      "step": 2068
    },
    {
      "epoch": 0.3161554035985789,
      "grad_norm": 0.3185891807079315,
      "learning_rate": 0.00016153729932794756,
      "loss": 0.8377,
      "step": 2069
    },
    {
      "epoch": 0.3163082094968866,
      "grad_norm": 0.3387928307056427,
      "learning_rate": 0.0001614979475805582,
      "loss": 0.6747,
      "step": 2070
    },
    {
      "epoch": 0.3164610153951943,
      "grad_norm": 0.40630900859832764,
      "learning_rate": 0.00016145858051147145,
      "loss": 0.6742,
      "step": 2071
    },
    {
      "epoch": 0.3166138212935019,
      "grad_norm": 0.2950742840766907,
      "learning_rate": 0.0001614191981304952,
      "loss": 0.6839,
      "step": 2072
    },
    {
      "epoch": 0.3167666271918096,
      "grad_norm": 0.3646473288536072,
      "learning_rate": 0.00016137980044744136,
      "loss": 0.8953,
      "step": 2073
    },
    {
      "epoch": 0.3169194330901173,
      "grad_norm": 0.647003710269928,
      "learning_rate": 0.00016134038747212545,
      "loss": 0.5832,
      "step": 2074
    },
    {
      "epoch": 0.31707223898842496,
      "grad_norm": 0.44197747111320496,
      "learning_rate": 0.00016130095921436692,
      "loss": 0.8293,
      "step": 2075
    },
    {
      "epoch": 0.31722504488673264,
      "grad_norm": 0.33136236667633057,
      "learning_rate": 0.00016126151568398897,
      "loss": 0.7455,
      "step": 2076
    },
    {
      "epoch": 0.3173778507850403,
      "grad_norm": 0.2798633277416229,
      "learning_rate": 0.00016122205689081864,
      "loss": 0.6635,
      "step": 2077
    },
    {
      "epoch": 0.31753065668334796,
      "grad_norm": 0.34174054861068726,
      "learning_rate": 0.00016118258284468671,
      "loss": 0.6709,
      "step": 2078
    },
    {
      "epoch": 0.31768346258165564,
      "grad_norm": 0.31651896238327026,
      "learning_rate": 0.0001611430935554279,
      "loss": 0.8362,
      "step": 2079
    },
    {
      "epoch": 0.3178362684799633,
      "grad_norm": 0.3442460000514984,
      "learning_rate": 0.00016110358903288056,
      "loss": 0.9762,
      "step": 2080
    },
    {
      "epoch": 0.317989074378271,
      "grad_norm": 0.270297646522522,
      "learning_rate": 0.00016106406928688693,
      "loss": 0.5487,
      "step": 2081
    },
    {
      "epoch": 0.3181418802765787,
      "grad_norm": 0.312498539686203,
      "learning_rate": 0.000161024534327293,
      "loss": 0.7125,
      "step": 2082
    },
    {
      "epoch": 0.3182946861748864,
      "grad_norm": 0.27466461062431335,
      "learning_rate": 0.00016098498416394864,
      "loss": 0.7155,
      "step": 2083
    },
    {
      "epoch": 0.318447492073194,
      "grad_norm": 0.3596421480178833,
      "learning_rate": 0.0001609454188067074,
      "loss": 0.6314,
      "step": 2084
    },
    {
      "epoch": 0.3186002979715017,
      "grad_norm": 0.36655640602111816,
      "learning_rate": 0.0001609058382654266,
      "loss": 0.6903,
      "step": 2085
    },
    {
      "epoch": 0.3187531038698094,
      "grad_norm": 0.37121638655662537,
      "learning_rate": 0.00016086624254996748,
      "loss": 0.6563,
      "step": 2086
    },
    {
      "epoch": 0.31890590976811706,
      "grad_norm": 0.2979934811592102,
      "learning_rate": 0.000160826631670195,
      "loss": 0.5967,
      "step": 2087
    },
    {
      "epoch": 0.31905871566642474,
      "grad_norm": 0.2676079273223877,
      "learning_rate": 0.00016078700563597776,
      "loss": 0.4784,
      "step": 2088
    },
    {
      "epoch": 0.3192115215647324,
      "grad_norm": 0.2784518897533417,
      "learning_rate": 0.0001607473644571884,
      "loss": 0.654,
      "step": 2089
    },
    {
      "epoch": 0.31936432746304005,
      "grad_norm": 0.3202001750469208,
      "learning_rate": 0.00016070770814370305,
      "loss": 0.7928,
      "step": 2090
    },
    {
      "epoch": 0.31951713336134774,
      "grad_norm": 0.39485278725624084,
      "learning_rate": 0.00016066803670540183,
      "loss": 0.6701,
      "step": 2091
    },
    {
      "epoch": 0.3196699392596554,
      "grad_norm": 0.37572166323661804,
      "learning_rate": 0.00016062835015216855,
      "loss": 0.7101,
      "step": 2092
    },
    {
      "epoch": 0.3198227451579631,
      "grad_norm": 0.6303053498268127,
      "learning_rate": 0.00016058864849389075,
      "loss": 0.8098,
      "step": 2093
    },
    {
      "epoch": 0.3199755510562708,
      "grad_norm": 0.3596165180206299,
      "learning_rate": 0.00016054893174045974,
      "loss": 0.6311,
      "step": 2094
    },
    {
      "epoch": 0.3201283569545784,
      "grad_norm": 0.2687673270702362,
      "learning_rate": 0.00016050919990177068,
      "loss": 0.626,
      "step": 2095
    },
    {
      "epoch": 0.3202811628528861,
      "grad_norm": 0.25072038173675537,
      "learning_rate": 0.0001604694529877224,
      "loss": 0.768,
      "step": 2096
    },
    {
      "epoch": 0.3204339687511938,
      "grad_norm": 0.2828698754310608,
      "learning_rate": 0.0001604296910082175,
      "loss": 0.6626,
      "step": 2097
    },
    {
      "epoch": 0.32058677464950147,
      "grad_norm": 0.4138115346431732,
      "learning_rate": 0.00016038991397316233,
      "loss": 0.8001,
      "step": 2098
    },
    {
      "epoch": 0.32073958054780916,
      "grad_norm": 0.29085302352905273,
      "learning_rate": 0.000160350121892467,
      "loss": 0.7274,
      "step": 2099
    },
    {
      "epoch": 0.32089238644611684,
      "grad_norm": 0.2617502808570862,
      "learning_rate": 0.00016031031477604547,
      "loss": 0.6377,
      "step": 2100
    },
    {
      "epoch": 0.32104519234442447,
      "grad_norm": 0.3535154461860657,
      "learning_rate": 0.0001602704926338152,
      "loss": 0.9398,
      "step": 2101
    },
    {
      "epoch": 0.32119799824273215,
      "grad_norm": 0.3721776306629181,
      "learning_rate": 0.00016023065547569765,
      "loss": 0.8525,
      "step": 2102
    },
    {
      "epoch": 0.32135080414103984,
      "grad_norm": 0.27641820907592773,
      "learning_rate": 0.00016019080331161788,
      "loss": 0.8148,
      "step": 2103
    },
    {
      "epoch": 0.3215036100393475,
      "grad_norm": 0.3367394506931305,
      "learning_rate": 0.00016015093615150472,
      "loss": 0.7703,
      "step": 2104
    },
    {
      "epoch": 0.3216564159376552,
      "grad_norm": 0.3287603557109833,
      "learning_rate": 0.00016011105400529072,
      "loss": 0.7462,
      "step": 2105
    },
    {
      "epoch": 0.3218092218359629,
      "grad_norm": 0.31794461607933044,
      "learning_rate": 0.0001600711568829122,
      "loss": 0.6779,
      "step": 2106
    },
    {
      "epoch": 0.3219620277342705,
      "grad_norm": 0.2856120765209198,
      "learning_rate": 0.0001600312447943092,
      "loss": 0.5557,
      "step": 2107
    },
    {
      "epoch": 0.3221148336325782,
      "grad_norm": 0.34538280963897705,
      "learning_rate": 0.00015999131774942552,
      "loss": 0.746,
      "step": 2108
    },
    {
      "epoch": 0.3222676395308859,
      "grad_norm": 0.30335336923599243,
      "learning_rate": 0.00015995137575820857,
      "loss": 0.8004,
      "step": 2109
    },
    {
      "epoch": 0.32242044542919357,
      "grad_norm": 0.31408512592315674,
      "learning_rate": 0.0001599114188306096,
      "loss": 0.7996,
      "step": 2110
    },
    {
      "epoch": 0.32257325132750125,
      "grad_norm": 0.40797099471092224,
      "learning_rate": 0.00015987144697658353,
      "loss": 0.599,
      "step": 2111
    },
    {
      "epoch": 0.32272605722580894,
      "grad_norm": 0.29327741265296936,
      "learning_rate": 0.00015983146020608904,
      "loss": 0.5498,
      "step": 2112
    },
    {
      "epoch": 0.32287886312411657,
      "grad_norm": 0.31773462891578674,
      "learning_rate": 0.00015979145852908845,
      "loss": 0.6583,
      "step": 2113
    },
    {
      "epoch": 0.32303166902242425,
      "grad_norm": 0.2868436574935913,
      "learning_rate": 0.00015975144195554786,
      "loss": 0.5934,
      "step": 2114
    },
    {
      "epoch": 0.32318447492073193,
      "grad_norm": 0.25718802213668823,
      "learning_rate": 0.0001597114104954371,
      "loss": 0.7702,
      "step": 2115
    },
    {
      "epoch": 0.3233372808190396,
      "grad_norm": 0.3285646140575409,
      "learning_rate": 0.00015967136415872968,
      "loss": 0.6344,
      "step": 2116
    },
    {
      "epoch": 0.3234900867173473,
      "grad_norm": 0.342434823513031,
      "learning_rate": 0.00015963130295540274,
      "loss": 0.6717,
      "step": 2117
    },
    {
      "epoch": 0.323642892615655,
      "grad_norm": 0.31285926699638367,
      "learning_rate": 0.00015959122689543725,
      "loss": 0.8469,
      "step": 2118
    },
    {
      "epoch": 0.3237956985139626,
      "grad_norm": 0.3020860552787781,
      "learning_rate": 0.00015955113598881777,
      "loss": 0.5288,
      "step": 2119
    },
    {
      "epoch": 0.3239485044122703,
      "grad_norm": 0.28416410088539124,
      "learning_rate": 0.00015951103024553268,
      "loss": 0.6605,
      "step": 2120
    },
    {
      "epoch": 0.324101310310578,
      "grad_norm": 0.46280670166015625,
      "learning_rate": 0.00015947090967557393,
      "loss": 0.6801,
      "step": 2121
    },
    {
      "epoch": 0.32425411620888567,
      "grad_norm": 0.3016008138656616,
      "learning_rate": 0.00015943077428893726,
      "loss": 0.758,
      "step": 2122
    },
    {
      "epoch": 0.32440692210719335,
      "grad_norm": 0.33130350708961487,
      "learning_rate": 0.00015939062409562203,
      "loss": 0.5521,
      "step": 2123
    },
    {
      "epoch": 0.32455972800550104,
      "grad_norm": 0.2970220744609833,
      "learning_rate": 0.00015935045910563136,
      "loss": 0.8987,
      "step": 2124
    },
    {
      "epoch": 0.32471253390380866,
      "grad_norm": 0.2839277386665344,
      "learning_rate": 0.000159310279328972,
      "loss": 0.647,
      "step": 2125
    },
    {
      "epoch": 0.32486533980211635,
      "grad_norm": 0.7329890131950378,
      "learning_rate": 0.00015927008477565444,
      "loss": 0.7763,
      "step": 2126
    },
    {
      "epoch": 0.32501814570042403,
      "grad_norm": 0.4290359318256378,
      "learning_rate": 0.00015922987545569274,
      "loss": 0.7703,
      "step": 2127
    },
    {
      "epoch": 0.3251709515987317,
      "grad_norm": 0.29252350330352783,
      "learning_rate": 0.0001591896513791048,
      "loss": 0.823,
      "step": 2128
    },
    {
      "epoch": 0.3253237574970394,
      "grad_norm": 0.8785410523414612,
      "learning_rate": 0.00015914941255591204,
      "loss": 0.7813,
      "step": 2129
    },
    {
      "epoch": 0.3254765633953471,
      "grad_norm": 0.29600057005882263,
      "learning_rate": 0.00015910915899613968,
      "loss": 0.7444,
      "step": 2130
    },
    {
      "epoch": 0.3256293692936547,
      "grad_norm": 0.3276137709617615,
      "learning_rate": 0.0001590688907098165,
      "loss": 0.6706,
      "step": 2131
    },
    {
      "epoch": 0.3257821751919624,
      "grad_norm": 0.31205666065216064,
      "learning_rate": 0.00015902860770697507,
      "loss": 0.6286,
      "step": 2132
    },
    {
      "epoch": 0.3259349810902701,
      "grad_norm": 0.27540236711502075,
      "learning_rate": 0.0001589883099976515,
      "loss": 0.771,
      "step": 2133
    },
    {
      "epoch": 0.32608778698857777,
      "grad_norm": 0.2716180086135864,
      "learning_rate": 0.00015894799759188572,
      "loss": 0.7021,
      "step": 2134
    },
    {
      "epoch": 0.32624059288688545,
      "grad_norm": 0.25274068117141724,
      "learning_rate": 0.00015890767049972114,
      "loss": 0.6938,
      "step": 2135
    },
    {
      "epoch": 0.32639339878519313,
      "grad_norm": 0.3524169921875,
      "learning_rate": 0.0001588673287312049,
      "loss": 0.7538,
      "step": 2136
    },
    {
      "epoch": 0.32654620468350076,
      "grad_norm": 0.384371817111969,
      "learning_rate": 0.00015882697229638787,
      "loss": 0.5032,
      "step": 2137
    },
    {
      "epoch": 0.32669901058180845,
      "grad_norm": 0.4573408365249634,
      "learning_rate": 0.00015878660120532452,
      "loss": 0.7039,
      "step": 2138
    },
    {
      "epoch": 0.32685181648011613,
      "grad_norm": 0.30394843220710754,
      "learning_rate": 0.0001587462154680729,
      "loss": 0.8154,
      "step": 2139
    },
    {
      "epoch": 0.3270046223784238,
      "grad_norm": 0.2765500247478485,
      "learning_rate": 0.00015870581509469487,
      "loss": 0.7432,
      "step": 2140
    },
    {
      "epoch": 0.3271574282767315,
      "grad_norm": 0.29486072063446045,
      "learning_rate": 0.0001586654000952558,
      "loss": 0.6089,
      "step": 2141
    },
    {
      "epoch": 0.3273102341750392,
      "grad_norm": 0.25128594040870667,
      "learning_rate": 0.00015862497047982473,
      "loss": 0.6048,
      "step": 2142
    },
    {
      "epoch": 0.3274630400733468,
      "grad_norm": 0.3318636417388916,
      "learning_rate": 0.0001585845262584744,
      "loss": 0.8185,
      "step": 2143
    },
    {
      "epoch": 0.3276158459716545,
      "grad_norm": 0.3293468654155731,
      "learning_rate": 0.00015854406744128112,
      "loss": 0.7598,
      "step": 2144
    },
    {
      "epoch": 0.3277686518699622,
      "grad_norm": 0.312021404504776,
      "learning_rate": 0.00015850359403832485,
      "loss": 0.6752,
      "step": 2145
    },
    {
      "epoch": 0.32792145776826986,
      "grad_norm": 0.46644726395606995,
      "learning_rate": 0.00015846310605968923,
      "loss": 0.7358,
      "step": 2146
    },
    {
      "epoch": 0.32807426366657755,
      "grad_norm": 0.3361137807369232,
      "learning_rate": 0.0001584226035154615,
      "loss": 0.7287,
      "step": 2147
    },
    {
      "epoch": 0.32822706956488523,
      "grad_norm": 0.30001696944236755,
      "learning_rate": 0.00015838208641573252,
      "loss": 0.9108,
      "step": 2148
    },
    {
      "epoch": 0.32837987546319286,
      "grad_norm": 0.2829294502735138,
      "learning_rate": 0.00015834155477059672,
      "loss": 0.6461,
      "step": 2149
    },
    {
      "epoch": 0.32853268136150054,
      "grad_norm": 0.283859521150589,
      "learning_rate": 0.00015830100859015237,
      "loss": 0.8114,
      "step": 2150
    },
    {
      "epoch": 0.32868548725980823,
      "grad_norm": 0.2840181291103363,
      "learning_rate": 0.0001582604478845011,
      "loss": 0.7424,
      "step": 2151
    },
    {
      "epoch": 0.3288382931581159,
      "grad_norm": 0.2813766896724701,
      "learning_rate": 0.00015821987266374828,
      "loss": 0.7707,
      "step": 2152
    },
    {
      "epoch": 0.3289910990564236,
      "grad_norm": 0.3396928608417511,
      "learning_rate": 0.00015817928293800288,
      "loss": 0.8722,
      "step": 2153
    },
    {
      "epoch": 0.3291439049547312,
      "grad_norm": 0.2577609717845917,
      "learning_rate": 0.00015813867871737752,
      "loss": 0.662,
      "step": 2154
    },
    {
      "epoch": 0.3292967108530389,
      "grad_norm": 0.28058573603630066,
      "learning_rate": 0.0001580980600119884,
      "loss": 0.6872,
      "step": 2155
    },
    {
      "epoch": 0.3294495167513466,
      "grad_norm": 0.2761460244655609,
      "learning_rate": 0.00015805742683195527,
      "loss": 0.7247,
      "step": 2156
    },
    {
      "epoch": 0.3296023226496543,
      "grad_norm": 0.8709086179733276,
      "learning_rate": 0.00015801677918740167,
      "loss": 0.5721,
      "step": 2157
    },
    {
      "epoch": 0.32975512854796196,
      "grad_norm": 0.2942737340927124,
      "learning_rate": 0.00015797611708845449,
      "loss": 0.5378,
      "step": 2158
    },
    {
      "epoch": 0.32990793444626965,
      "grad_norm": 0.33184701204299927,
      "learning_rate": 0.0001579354405452444,
      "loss": 0.8383,
      "step": 2159
    },
    {
      "epoch": 0.3300607403445773,
      "grad_norm": 0.31365641951560974,
      "learning_rate": 0.00015789474956790563,
      "loss": 0.6231,
      "step": 2160
    },
    {
      "epoch": 0.33021354624288496,
      "grad_norm": 0.3012298047542572,
      "learning_rate": 0.00015785404416657602,
      "loss": 0.76,
      "step": 2161
    },
    {
      "epoch": 0.33036635214119264,
      "grad_norm": 0.38045307993888855,
      "learning_rate": 0.00015781332435139693,
      "loss": 0.9937,
      "step": 2162
    },
    {
      "epoch": 0.3305191580395003,
      "grad_norm": 0.3367868661880493,
      "learning_rate": 0.00015777259013251334,
      "loss": 0.8202,
      "step": 2163
    },
    {
      "epoch": 0.330671963937808,
      "grad_norm": 0.2767188847064972,
      "learning_rate": 0.00015773184152007393,
      "loss": 0.7562,
      "step": 2164
    },
    {
      "epoch": 0.3308247698361157,
      "grad_norm": 0.296550452709198,
      "learning_rate": 0.0001576910785242308,
      "loss": 0.8002,
      "step": 2165
    },
    {
      "epoch": 0.3309775757344233,
      "grad_norm": 0.288141667842865,
      "learning_rate": 0.0001576503011551397,
      "loss": 0.5228,
      "step": 2166
    },
    {
      "epoch": 0.331130381632731,
      "grad_norm": 0.3159697651863098,
      "learning_rate": 0.00015760950942296002,
      "loss": 0.6596,
      "step": 2167
    },
    {
      "epoch": 0.3312831875310387,
      "grad_norm": 0.42363399267196655,
      "learning_rate": 0.00015756870333785464,
      "loss": 0.9706,
      "step": 2168
    },
    {
      "epoch": 0.3314359934293464,
      "grad_norm": 0.37459617853164673,
      "learning_rate": 0.00015752788290999013,
      "loss": 0.648,
      "step": 2169
    },
    {
      "epoch": 0.33158879932765406,
      "grad_norm": 0.30844661593437195,
      "learning_rate": 0.00015748704814953643,
      "loss": 0.7611,
      "step": 2170
    },
    {
      "epoch": 0.33174160522596174,
      "grad_norm": 0.2618614733219147,
      "learning_rate": 0.00015744619906666725,
      "loss": 0.6118,
      "step": 2171
    },
    {
      "epoch": 0.3318944111242694,
      "grad_norm": 0.6982774138450623,
      "learning_rate": 0.0001574053356715598,
      "loss": 0.6533,
      "step": 2172
    },
    {
      "epoch": 0.33204721702257706,
      "grad_norm": 0.29015034437179565,
      "learning_rate": 0.00015736445797439488,
      "loss": 0.6744,
      "step": 2173
    },
    {
      "epoch": 0.33220002292088474,
      "grad_norm": 0.38911595940589905,
      "learning_rate": 0.00015732356598535676,
      "loss": 0.6925,
      "step": 2174
    },
    {
      "epoch": 0.3323528288191924,
      "grad_norm": 0.4622102677822113,
      "learning_rate": 0.00015728265971463333,
      "loss": 0.7888,
      "step": 2175
    },
    {
      "epoch": 0.3325056347175001,
      "grad_norm": 0.27185773849487305,
      "learning_rate": 0.00015724173917241614,
      "loss": 0.5808,
      "step": 2176
    },
    {
      "epoch": 0.3326584406158078,
      "grad_norm": 0.3064304292201996,
      "learning_rate": 0.00015720080436890007,
      "loss": 0.8677,
      "step": 2177
    },
    {
      "epoch": 0.3328112465141154,
      "grad_norm": 0.2787809669971466,
      "learning_rate": 0.00015715985531428379,
      "loss": 0.7143,
      "step": 2178
    },
    {
      "epoch": 0.3329640524124231,
      "grad_norm": 0.48594728112220764,
      "learning_rate": 0.00015711889201876935,
      "loss": 0.773,
      "step": 2179
    },
    {
      "epoch": 0.3331168583107308,
      "grad_norm": 0.3239424228668213,
      "learning_rate": 0.00015707791449256247,
      "loss": 0.5903,
      "step": 2180
    },
    {
      "epoch": 0.3332696642090385,
      "grad_norm": 0.26795390248298645,
      "learning_rate": 0.0001570369227458723,
      "loss": 0.7426,
      "step": 2181
    },
    {
      "epoch": 0.33342247010734616,
      "grad_norm": 0.34255295991897583,
      "learning_rate": 0.0001569959167889116,
      "loss": 0.7992,
      "step": 2182
    },
    {
      "epoch": 0.33342247010734616,
      "eval_loss": 0.7136940956115723,
      "eval_runtime": 1441.2558,
      "eval_samples_per_second": 7.738,
      "eval_steps_per_second": 3.869,
      "step": 2182
    },
    {
      "epoch": 0.33357527600565384,
      "grad_norm": 0.2938944697380066,
      "learning_rate": 0.00015695489663189666,
      "loss": 0.6712,
      "step": 2183
    },
    {
      "epoch": 0.33372808190396147,
      "grad_norm": 0.28934624791145325,
      "learning_rate": 0.00015691386228504733,
      "loss": 0.797,
      "step": 2184
    },
    {
      "epoch": 0.33388088780226916,
      "grad_norm": 0.2854679226875305,
      "learning_rate": 0.00015687281375858695,
      "loss": 0.6246,
      "step": 2185
    },
    {
      "epoch": 0.33403369370057684,
      "grad_norm": 0.3314021825790405,
      "learning_rate": 0.00015683175106274242,
      "loss": 0.5735,
      "step": 2186
    },
    {
      "epoch": 0.3341864995988845,
      "grad_norm": 0.2750674784183502,
      "learning_rate": 0.00015679067420774423,
      "loss": 0.6508,
      "step": 2187
    },
    {
      "epoch": 0.3343393054971922,
      "grad_norm": 0.3193671405315399,
      "learning_rate": 0.00015674958320382624,
      "loss": 0.5197,
      "step": 2188
    },
    {
      "epoch": 0.3344921113954999,
      "grad_norm": 0.3214784264564514,
      "learning_rate": 0.00015670847806122597,
      "loss": 0.5785,
      "step": 2189
    },
    {
      "epoch": 0.3346449172938075,
      "grad_norm": 0.27920016646385193,
      "learning_rate": 0.0001566673587901844,
      "loss": 0.5692,
      "step": 2190
    },
    {
      "epoch": 0.3347977231921152,
      "grad_norm": 0.43938395380973816,
      "learning_rate": 0.00015662622540094608,
      "loss": 0.7549,
      "step": 2191
    },
    {
      "epoch": 0.3349505290904229,
      "grad_norm": 0.30526235699653625,
      "learning_rate": 0.00015658507790375904,
      "loss": 0.94,
      "step": 2192
    },
    {
      "epoch": 0.3351033349887306,
      "grad_norm": 0.33049049973487854,
      "learning_rate": 0.0001565439163088748,
      "loss": 0.7732,
      "step": 2193
    },
    {
      "epoch": 0.33525614088703826,
      "grad_norm": 0.29554682970046997,
      "learning_rate": 0.00015650274062654847,
      "loss": 0.6675,
      "step": 2194
    },
    {
      "epoch": 0.33540894678534594,
      "grad_norm": 0.2941046357154846,
      "learning_rate": 0.0001564615508670386,
      "loss": 0.7829,
      "step": 2195
    },
    {
      "epoch": 0.33556175268365357,
      "grad_norm": 0.3211367726325989,
      "learning_rate": 0.00015642034704060732,
      "loss": 0.5786,
      "step": 2196
    },
    {
      "epoch": 0.33571455858196125,
      "grad_norm": 0.27026689052581787,
      "learning_rate": 0.00015637912915752016,
      "loss": 0.6511,
      "step": 2197
    },
    {
      "epoch": 0.33586736448026894,
      "grad_norm": 0.31031954288482666,
      "learning_rate": 0.00015633789722804622,
      "loss": 0.7701,
      "step": 2198
    },
    {
      "epoch": 0.3360201703785766,
      "grad_norm": 0.342227578163147,
      "learning_rate": 0.00015629665126245813,
      "loss": 0.6661,
      "step": 2199
    },
    {
      "epoch": 0.3361729762768843,
      "grad_norm": 0.3071631193161011,
      "learning_rate": 0.0001562553912710319,
      "loss": 0.6731,
      "step": 2200
    },
    {
      "epoch": 0.336325782175192,
      "grad_norm": 0.26992887258529663,
      "learning_rate": 0.00015621411726404717,
      "loss": 0.7173,
      "step": 2201
    },
    {
      "epoch": 0.3364785880734996,
      "grad_norm": 0.3526805639266968,
      "learning_rate": 0.00015617282925178705,
      "loss": 0.6753,
      "step": 2202
    },
    {
      "epoch": 0.3366313939718073,
      "grad_norm": 0.30212274193763733,
      "learning_rate": 0.00015613152724453799,
      "loss": 0.715,
      "step": 2203
    },
    {
      "epoch": 0.336784199870115,
      "grad_norm": 0.34919580817222595,
      "learning_rate": 0.0001560902112525901,
      "loss": 0.6164,
      "step": 2204
    },
    {
      "epoch": 0.33693700576842267,
      "grad_norm": 0.2764431834220886,
      "learning_rate": 0.00015604888128623693,
      "loss": 0.6118,
      "step": 2205
    },
    {
      "epoch": 0.33708981166673035,
      "grad_norm": 0.3970886766910553,
      "learning_rate": 0.0001560075373557755,
      "loss": 0.6037,
      "step": 2206
    },
    {
      "epoch": 0.337242617565038,
      "grad_norm": 0.29963481426239014,
      "learning_rate": 0.00015596617947150624,
      "loss": 0.5707,
      "step": 2207
    },
    {
      "epoch": 0.33739542346334567,
      "grad_norm": 0.3079460561275482,
      "learning_rate": 0.0001559248076437332,
      "loss": 0.8306,
      "step": 2208
    },
    {
      "epoch": 0.33754822936165335,
      "grad_norm": 0.3257281482219696,
      "learning_rate": 0.00015588342188276375,
      "loss": 0.5394,
      "step": 2209
    },
    {
      "epoch": 0.33770103525996104,
      "grad_norm": 0.4615156650543213,
      "learning_rate": 0.00015584202219890884,
      "loss": 0.7179,
      "step": 2210
    },
    {
      "epoch": 0.3378538411582687,
      "grad_norm": 0.33638259768486023,
      "learning_rate": 0.00015580060860248286,
      "loss": 0.6865,
      "step": 2211
    },
    {
      "epoch": 0.3380066470565764,
      "grad_norm": 0.3506909906864166,
      "learning_rate": 0.00015575918110380364,
      "loss": 0.6989,
      "step": 2212
    },
    {
      "epoch": 0.33815945295488403,
      "grad_norm": 0.3745541572570801,
      "learning_rate": 0.00015571773971319251,
      "loss": 0.8131,
      "step": 2213
    },
    {
      "epoch": 0.3383122588531917,
      "grad_norm": 0.31607136130332947,
      "learning_rate": 0.0001556762844409742,
      "loss": 0.8365,
      "step": 2214
    },
    {
      "epoch": 0.3384650647514994,
      "grad_norm": 0.33056318759918213,
      "learning_rate": 0.00015563481529747705,
      "loss": 0.5826,
      "step": 2215
    },
    {
      "epoch": 0.3386178706498071,
      "grad_norm": 0.3306300938129425,
      "learning_rate": 0.00015559333229303262,
      "loss": 0.7303,
      "step": 2216
    },
    {
      "epoch": 0.33877067654811477,
      "grad_norm": 0.24888025224208832,
      "learning_rate": 0.00015555183543797618,
      "loss": 0.5677,
      "step": 2217
    },
    {
      "epoch": 0.33892348244642245,
      "grad_norm": 0.3338901400566101,
      "learning_rate": 0.0001555103247426462,
      "loss": 0.6068,
      "step": 2218
    },
    {
      "epoch": 0.3390762883447301,
      "grad_norm": 0.26496437191963196,
      "learning_rate": 0.00015546880021738478,
      "loss": 0.6084,
      "step": 2219
    },
    {
      "epoch": 0.33922909424303777,
      "grad_norm": 0.3822322189807892,
      "learning_rate": 0.00015542726187253744,
      "loss": 0.7601,
      "step": 2220
    },
    {
      "epoch": 0.33938190014134545,
      "grad_norm": 0.3385266661643982,
      "learning_rate": 0.00015538570971845305,
      "loss": 0.5632,
      "step": 2221
    },
    {
      "epoch": 0.33953470603965313,
      "grad_norm": 0.2914586365222931,
      "learning_rate": 0.00015534414376548402,
      "loss": 0.7443,
      "step": 2222
    },
    {
      "epoch": 0.3396875119379608,
      "grad_norm": 0.2860872745513916,
      "learning_rate": 0.0001553025640239861,
      "loss": 0.6005,
      "step": 2223
    },
    {
      "epoch": 0.3398403178362685,
      "grad_norm": 0.2960110604763031,
      "learning_rate": 0.00015526097050431865,
      "loss": 0.7422,
      "step": 2224
    },
    {
      "epoch": 0.33999312373457613,
      "grad_norm": 0.2951801121234894,
      "learning_rate": 0.0001552193632168442,
      "loss": 0.7805,
      "step": 2225
    },
    {
      "epoch": 0.3401459296328838,
      "grad_norm": 0.5373976230621338,
      "learning_rate": 0.00015517774217192897,
      "loss": 0.7439,
      "step": 2226
    },
    {
      "epoch": 0.3402987355311915,
      "grad_norm": 0.296344131231308,
      "learning_rate": 0.00015513610737994245,
      "loss": 0.6432,
      "step": 2227
    },
    {
      "epoch": 0.3404515414294992,
      "grad_norm": 0.26670217514038086,
      "learning_rate": 0.0001550944588512576,
      "loss": 0.6878,
      "step": 2228
    },
    {
      "epoch": 0.34060434732780687,
      "grad_norm": 0.3236304223537445,
      "learning_rate": 0.0001550527965962508,
      "loss": 0.5546,
      "step": 2229
    },
    {
      "epoch": 0.34075715322611455,
      "grad_norm": 0.3119784891605377,
      "learning_rate": 0.00015501112062530186,
      "loss": 0.6956,
      "step": 2230
    },
    {
      "epoch": 0.3409099591244222,
      "grad_norm": 0.47150805592536926,
      "learning_rate": 0.00015496943094879398,
      "loss": 0.785,
      "step": 2231
    },
    {
      "epoch": 0.34106276502272986,
      "grad_norm": 0.6498871445655823,
      "learning_rate": 0.0001549277275771138,
      "loss": 0.6983,
      "step": 2232
    },
    {
      "epoch": 0.34121557092103755,
      "grad_norm": 0.33664408326148987,
      "learning_rate": 0.0001548860105206514,
      "loss": 0.5466,
      "step": 2233
    },
    {
      "epoch": 0.34136837681934523,
      "grad_norm": 0.2958558201789856,
      "learning_rate": 0.00015484427978980017,
      "loss": 0.804,
      "step": 2234
    },
    {
      "epoch": 0.3415211827176529,
      "grad_norm": 0.2821539044380188,
      "learning_rate": 0.00015480253539495707,
      "loss": 0.6465,
      "step": 2235
    },
    {
      "epoch": 0.3416739886159606,
      "grad_norm": 0.30043548345565796,
      "learning_rate": 0.00015476077734652224,
      "loss": 0.6388,
      "step": 2236
    },
    {
      "epoch": 0.34182679451426823,
      "grad_norm": 0.3065933287143707,
      "learning_rate": 0.0001547190056548994,
      "loss": 0.6553,
      "step": 2237
    },
    {
      "epoch": 0.3419796004125759,
      "grad_norm": 0.29310041666030884,
      "learning_rate": 0.00015467722033049567,
      "loss": 0.7219,
      "step": 2238
    },
    {
      "epoch": 0.3421324063108836,
      "grad_norm": 0.3400419354438782,
      "learning_rate": 0.00015463542138372148,
      "loss": 0.7735,
      "step": 2239
    },
    {
      "epoch": 0.3422852122091913,
      "grad_norm": 0.33613109588623047,
      "learning_rate": 0.00015459360882499063,
      "loss": 0.7178,
      "step": 2240
    },
    {
      "epoch": 0.34243801810749896,
      "grad_norm": 0.26561689376831055,
      "learning_rate": 0.00015455178266472045,
      "loss": 0.4622,
      "step": 2241
    },
    {
      "epoch": 0.34259082400580665,
      "grad_norm": 0.3775576055049896,
      "learning_rate": 0.00015450994291333153,
      "loss": 0.7419,
      "step": 2242
    },
    {
      "epoch": 0.3427436299041143,
      "grad_norm": 3.781869649887085,
      "learning_rate": 0.00015446808958124785,
      "loss": 0.9276,
      "step": 2243
    },
    {
      "epoch": 0.34289643580242196,
      "grad_norm": 0.389053612947464,
      "learning_rate": 0.00015442622267889693,
      "loss": 0.8774,
      "step": 2244
    },
    {
      "epoch": 0.34304924170072965,
      "grad_norm": 0.2652193307876587,
      "learning_rate": 0.0001543843422167095,
      "loss": 0.737,
      "step": 2245
    },
    {
      "epoch": 0.34320204759903733,
      "grad_norm": 0.3126509487628937,
      "learning_rate": 0.00015434244820511966,
      "loss": 0.683,
      "step": 2246
    },
    {
      "epoch": 0.343354853497345,
      "grad_norm": 0.30898094177246094,
      "learning_rate": 0.00015430054065456507,
      "loss": 0.7826,
      "step": 2247
    },
    {
      "epoch": 0.3435076593956527,
      "grad_norm": 0.2741771340370178,
      "learning_rate": 0.00015425861957548656,
      "loss": 0.7594,
      "step": 2248
    },
    {
      "epoch": 0.3436604652939603,
      "grad_norm": 0.3694680333137512,
      "learning_rate": 0.00015421668497832847,
      "loss": 0.6474,
      "step": 2249
    },
    {
      "epoch": 0.343813271192268,
      "grad_norm": 0.36894744634628296,
      "learning_rate": 0.0001541747368735384,
      "loss": 0.6786,
      "step": 2250
    },
    {
      "epoch": 0.3439660770905757,
      "grad_norm": 0.3785475790500641,
      "learning_rate": 0.00015413277527156742,
      "loss": 0.4514,
      "step": 2251
    },
    {
      "epoch": 0.3441188829888834,
      "grad_norm": 0.3092028498649597,
      "learning_rate": 0.00015409080018286987,
      "loss": 0.7509,
      "step": 2252
    },
    {
      "epoch": 0.34427168888719106,
      "grad_norm": 0.31305885314941406,
      "learning_rate": 0.00015404881161790353,
      "loss": 0.6581,
      "step": 2253
    },
    {
      "epoch": 0.34442449478549875,
      "grad_norm": 0.2979021668434143,
      "learning_rate": 0.00015400680958712942,
      "loss": 0.5952,
      "step": 2254
    },
    {
      "epoch": 0.3445773006838064,
      "grad_norm": 0.3245038390159607,
      "learning_rate": 0.00015396479410101208,
      "loss": 0.6446,
      "step": 2255
    },
    {
      "epoch": 0.34473010658211406,
      "grad_norm": 0.35698649287223816,
      "learning_rate": 0.0001539227651700193,
      "loss": 0.8561,
      "step": 2256
    },
    {
      "epoch": 0.34488291248042174,
      "grad_norm": 0.25988495349884033,
      "learning_rate": 0.00015388072280462218,
      "loss": 0.537,
      "step": 2257
    },
    {
      "epoch": 0.34503571837872943,
      "grad_norm": 0.2652510702610016,
      "learning_rate": 0.0001538386670152953,
      "loss": 0.6016,
      "step": 2258
    },
    {
      "epoch": 0.3451885242770371,
      "grad_norm": 0.38364800810813904,
      "learning_rate": 0.00015379659781251644,
      "loss": 0.601,
      "step": 2259
    },
    {
      "epoch": 0.34534133017534474,
      "grad_norm": 0.29123881459236145,
      "learning_rate": 0.00015375451520676685,
      "loss": 0.6864,
      "step": 2260
    },
    {
      "epoch": 0.3454941360736524,
      "grad_norm": 0.37606048583984375,
      "learning_rate": 0.000153712419208531,
      "loss": 0.7216,
      "step": 2261
    },
    {
      "epoch": 0.3456469419719601,
      "grad_norm": 0.30718401074409485,
      "learning_rate": 0.00015367030982829676,
      "loss": 0.7234,
      "step": 2262
    },
    {
      "epoch": 0.3457997478702678,
      "grad_norm": 0.34343576431274414,
      "learning_rate": 0.00015362818707655536,
      "loss": 0.7448,
      "step": 2263
    },
    {
      "epoch": 0.3459525537685755,
      "grad_norm": 0.30725371837615967,
      "learning_rate": 0.0001535860509638013,
      "loss": 0.7892,
      "step": 2264
    },
    {
      "epoch": 0.34610535966688316,
      "grad_norm": 0.28746816515922546,
      "learning_rate": 0.00015354390150053253,
      "loss": 0.6234,
      "step": 2265
    },
    {
      "epoch": 0.3462581655651908,
      "grad_norm": 0.35895246267318726,
      "learning_rate": 0.0001535017386972501,
      "loss": 0.7443,
      "step": 2266
    },
    {
      "epoch": 0.3464109714634985,
      "grad_norm": 0.2841184139251709,
      "learning_rate": 0.00015345956256445858,
      "loss": 0.6936,
      "step": 2267
    },
    {
      "epoch": 0.34656377736180616,
      "grad_norm": 0.2917341887950897,
      "learning_rate": 0.00015341737311266583,
      "loss": 0.7372,
      "step": 2268
    },
    {
      "epoch": 0.34671658326011384,
      "grad_norm": 0.3071459233760834,
      "learning_rate": 0.00015337517035238294,
      "loss": 0.6283,
      "step": 2269
    },
    {
      "epoch": 0.3468693891584215,
      "grad_norm": 0.2792901396751404,
      "learning_rate": 0.0001533329542941244,
      "loss": 0.5536,
      "step": 2270
    },
    {
      "epoch": 0.3470221950567292,
      "grad_norm": 0.2752489447593689,
      "learning_rate": 0.00015329072494840804,
      "loss": 0.7074,
      "step": 2271
    },
    {
      "epoch": 0.34717500095503684,
      "grad_norm": 0.28680381178855896,
      "learning_rate": 0.00015324848232575484,
      "loss": 0.7837,
      "step": 2272
    },
    {
      "epoch": 0.3473278068533445,
      "grad_norm": 0.31360378861427307,
      "learning_rate": 0.00015320622643668927,
      "loss": 0.7676,
      "step": 2273
    },
    {
      "epoch": 0.3474806127516522,
      "grad_norm": 0.29546040296554565,
      "learning_rate": 0.00015316395729173899,
      "loss": 0.606,
      "step": 2274
    },
    {
      "epoch": 0.3476334186499599,
      "grad_norm": 1.059844732284546,
      "learning_rate": 0.00015312167490143502,
      "loss": 0.5151,
      "step": 2275
    },
    {
      "epoch": 0.3477862245482676,
      "grad_norm": 0.29025423526763916,
      "learning_rate": 0.0001530793792763117,
      "loss": 0.5859,
      "step": 2276
    },
    {
      "epoch": 0.34793903044657526,
      "grad_norm": 0.33331283926963806,
      "learning_rate": 0.0001530370704269066,
      "loss": 0.6959,
      "step": 2277
    },
    {
      "epoch": 0.3480918363448829,
      "grad_norm": 0.29462912678718567,
      "learning_rate": 0.00015299474836376055,
      "loss": 0.643,
      "step": 2278
    },
    {
      "epoch": 0.34824464224319057,
      "grad_norm": 0.28086116909980774,
      "learning_rate": 0.00015295241309741783,
      "loss": 0.6262,
      "step": 2279
    },
    {
      "epoch": 0.34839744814149826,
      "grad_norm": 0.3096199333667755,
      "learning_rate": 0.00015291006463842588,
      "loss": 0.7098,
      "step": 2280
    },
    {
      "epoch": 0.34855025403980594,
      "grad_norm": 0.29386383295059204,
      "learning_rate": 0.00015286770299733547,
      "loss": 0.5968,
      "step": 2281
    },
    {
      "epoch": 0.3487030599381136,
      "grad_norm": 0.27785131335258484,
      "learning_rate": 0.00015282532818470065,
      "loss": 0.6851,
      "step": 2282
    },
    {
      "epoch": 0.3488558658364213,
      "grad_norm": 0.2330974042415619,
      "learning_rate": 0.0001527829402110787,
      "loss": 0.6592,
      "step": 2283
    },
    {
      "epoch": 0.34900867173472894,
      "grad_norm": 0.291621595621109,
      "learning_rate": 0.00015274053908703034,
      "loss": 0.7363,
      "step": 2284
    },
    {
      "epoch": 0.3491614776330366,
      "grad_norm": 0.29270750284194946,
      "learning_rate": 0.0001526981248231193,
      "loss": 0.5755,
      "step": 2285
    },
    {
      "epoch": 0.3493142835313443,
      "grad_norm": 0.31941109895706177,
      "learning_rate": 0.00015265569742991292,
      "loss": 0.5933,
      "step": 2286
    },
    {
      "epoch": 0.349467089429652,
      "grad_norm": 0.3711247146129608,
      "learning_rate": 0.00015261325691798145,
      "loss": 0.833,
      "step": 2287
    },
    {
      "epoch": 0.3496198953279597,
      "grad_norm": 0.28365087509155273,
      "learning_rate": 0.0001525708032978987,
      "loss": 0.8233,
      "step": 2288
    },
    {
      "epoch": 0.34977270122626736,
      "grad_norm": 0.33713802695274353,
      "learning_rate": 0.00015252833658024157,
      "loss": 0.698,
      "step": 2289
    },
    {
      "epoch": 0.349925507124575,
      "grad_norm": 0.3051641583442688,
      "learning_rate": 0.00015248585677559034,
      "loss": 0.6146,
      "step": 2290
    },
    {
      "epoch": 0.35007831302288267,
      "grad_norm": 0.29534676671028137,
      "learning_rate": 0.0001524433638945285,
      "loss": 0.6897,
      "step": 2291
    },
    {
      "epoch": 0.35023111892119035,
      "grad_norm": 0.27716103196144104,
      "learning_rate": 0.00015240085794764272,
      "loss": 0.7955,
      "step": 2292
    },
    {
      "epoch": 0.35038392481949804,
      "grad_norm": 0.295163631439209,
      "learning_rate": 0.00015235833894552308,
      "loss": 0.6941,
      "step": 2293
    },
    {
      "epoch": 0.3505367307178057,
      "grad_norm": 0.35691267251968384,
      "learning_rate": 0.00015231580689876277,
      "loss": 0.6965,
      "step": 2294
    },
    {
      "epoch": 0.3506895366161134,
      "grad_norm": 0.3154979348182678,
      "learning_rate": 0.00015227326181795837,
      "loss": 0.7597,
      "step": 2295
    },
    {
      "epoch": 0.35084234251442104,
      "grad_norm": 0.2859799563884735,
      "learning_rate": 0.00015223070371370954,
      "loss": 0.6982,
      "step": 2296
    },
    {
      "epoch": 0.3509951484127287,
      "grad_norm": 0.35966408252716064,
      "learning_rate": 0.00015218813259661933,
      "loss": 0.8101,
      "step": 2297
    },
    {
      "epoch": 0.3511479543110364,
      "grad_norm": 0.6553919315338135,
      "learning_rate": 0.00015214554847729395,
      "loss": 0.7671,
      "step": 2298
    },
    {
      "epoch": 0.3513007602093441,
      "grad_norm": 0.38289788365364075,
      "learning_rate": 0.00015210295136634293,
      "loss": 0.5688,
      "step": 2299
    },
    {
      "epoch": 0.35145356610765177,
      "grad_norm": 0.4104590117931366,
      "learning_rate": 0.0001520603412743789,
      "loss": 0.5505,
      "step": 2300
    },
    {
      "epoch": 0.35160637200595946,
      "grad_norm": 0.3637326955795288,
      "learning_rate": 0.00015201771821201789,
      "loss": 0.6424,
      "step": 2301
    },
    {
      "epoch": 0.3517591779042671,
      "grad_norm": 0.29642170667648315,
      "learning_rate": 0.000151975082189879,
      "loss": 0.792,
      "step": 2302
    },
    {
      "epoch": 0.35191198380257477,
      "grad_norm": 0.3389260172843933,
      "learning_rate": 0.00015193243321858467,
      "loss": 0.7985,
      "step": 2303
    },
    {
      "epoch": 0.35206478970088245,
      "grad_norm": 0.41423532366752625,
      "learning_rate": 0.00015188977130876056,
      "loss": 0.682,
      "step": 2304
    },
    {
      "epoch": 0.35221759559919014,
      "grad_norm": 0.2945079207420349,
      "learning_rate": 0.0001518470964710355,
      "loss": 0.8196,
      "step": 2305
    },
    {
      "epoch": 0.3523704014974978,
      "grad_norm": 0.2931058704853058,
      "learning_rate": 0.00015180440871604155,
      "loss": 0.8806,
      "step": 2306
    },
    {
      "epoch": 0.3525232073958055,
      "grad_norm": 0.2553795874118805,
      "learning_rate": 0.00015176170805441408,
      "loss": 0.7261,
      "step": 2307
    },
    {
      "epoch": 0.35267601329411313,
      "grad_norm": 0.35140493512153625,
      "learning_rate": 0.0001517189944967915,
      "loss": 0.6785,
      "step": 2308
    },
    {
      "epoch": 0.3528288191924208,
      "grad_norm": 0.2723594009876251,
      "learning_rate": 0.0001516762680538156,
      "loss": 0.7115,
      "step": 2309
    },
    {
      "epoch": 0.3529816250907285,
      "grad_norm": 0.4120732247829437,
      "learning_rate": 0.00015163352873613127,
      "loss": 0.5396,
      "step": 2310
    },
    {
      "epoch": 0.3531344309890362,
      "grad_norm": 0.30499234795570374,
      "learning_rate": 0.00015159077655438674,
      "loss": 0.787,
      "step": 2311
    },
    {
      "epoch": 0.35328723688734387,
      "grad_norm": 0.3186348080635071,
      "learning_rate": 0.00015154801151923323,
      "loss": 0.6939,
      "step": 2312
    },
    {
      "epoch": 0.35344004278565155,
      "grad_norm": 0.44435304403305054,
      "learning_rate": 0.0001515052336413254,
      "loss": 0.8076,
      "step": 2313
    },
    {
      "epoch": 0.3535928486839592,
      "grad_norm": 0.26792144775390625,
      "learning_rate": 0.00015146244293132096,
      "loss": 0.5881,
      "step": 2314
    },
    {
      "epoch": 0.35374565458226687,
      "grad_norm": 0.2927224636077881,
      "learning_rate": 0.00015141963939988083,
      "loss": 0.6064,
      "step": 2315
    },
    {
      "epoch": 0.35389846048057455,
      "grad_norm": 0.29608336091041565,
      "learning_rate": 0.0001513768230576692,
      "loss": 0.7699,
      "step": 2316
    },
    {
      "epoch": 0.35405126637888223,
      "grad_norm": 0.30591922998428345,
      "learning_rate": 0.0001513339939153533,
      "loss": 0.6701,
      "step": 2317
    },
    {
      "epoch": 0.3542040722771899,
      "grad_norm": 0.26143383979797363,
      "learning_rate": 0.0001512911519836038,
      "loss": 0.5759,
      "step": 2318
    },
    {
      "epoch": 0.35435687817549755,
      "grad_norm": 0.34693998098373413,
      "learning_rate": 0.0001512482972730943,
      "loss": 0.6425,
      "step": 2319
    },
    {
      "epoch": 0.35450968407380523,
      "grad_norm": 0.2774498462677002,
      "learning_rate": 0.00015120542979450173,
      "loss": 0.7096,
      "step": 2320
    },
    {
      "epoch": 0.3546624899721129,
      "grad_norm": 0.9198269844055176,
      "learning_rate": 0.0001511625495585062,
      "loss": 0.9403,
      "step": 2321
    },
    {
      "epoch": 0.3548152958704206,
      "grad_norm": 0.30706116557121277,
      "learning_rate": 0.00015111965657579085,
      "loss": 0.6938,
      "step": 2322
    },
    {
      "epoch": 0.3549681017687283,
      "grad_norm": 0.3365491032600403,
      "learning_rate": 0.00015107675085704222,
      "loss": 0.5908,
      "step": 2323
    },
    {
      "epoch": 0.35512090766703597,
      "grad_norm": 0.2673099637031555,
      "learning_rate": 0.00015103383241294984,
      "loss": 0.7071,
      "step": 2324
    },
    {
      "epoch": 0.3552737135653436,
      "grad_norm": 0.2802966833114624,
      "learning_rate": 0.0001509909012542065,
      "loss": 0.7405,
      "step": 2325
    },
    {
      "epoch": 0.3554265194636513,
      "grad_norm": 0.2657721936702728,
      "learning_rate": 0.0001509479573915082,
      "loss": 0.6928,
      "step": 2326
    },
    {
      "epoch": 0.35557932536195896,
      "grad_norm": 0.31786054372787476,
      "learning_rate": 0.00015090500083555394,
      "loss": 0.735,
      "step": 2327
    },
    {
      "epoch": 0.35573213126026665,
      "grad_norm": 0.31089332699775696,
      "learning_rate": 0.000150862031597046,
      "loss": 0.683,
      "step": 2328
    },
    {
      "epoch": 0.35588493715857433,
      "grad_norm": 0.310997873544693,
      "learning_rate": 0.0001508190496866899,
      "loss": 0.7928,
      "step": 2329
    },
    {
      "epoch": 0.356037743056882,
      "grad_norm": 0.39357268810272217,
      "learning_rate": 0.00015077605511519415,
      "loss": 0.8346,
      "step": 2330
    },
    {
      "epoch": 0.35619054895518965,
      "grad_norm": 0.3538849353790283,
      "learning_rate": 0.00015073304789327044,
      "loss": 0.801,
      "step": 2331
    },
    {
      "epoch": 0.35634335485349733,
      "grad_norm": 0.25808286666870117,
      "learning_rate": 0.00015069002803163377,
      "loss": 0.7358,
      "step": 2332
    },
    {
      "epoch": 0.356496160751805,
      "grad_norm": 0.27462631464004517,
      "learning_rate": 0.0001506469955410021,
      "loss": 0.6066,
      "step": 2333
    },
    {
      "epoch": 0.3566489666501127,
      "grad_norm": 0.2881491482257843,
      "learning_rate": 0.00015060395043209663,
      "loss": 0.8394,
      "step": 2334
    },
    {
      "epoch": 0.3568017725484204,
      "grad_norm": 0.2899307906627655,
      "learning_rate": 0.0001505608927156417,
      "loss": 0.5998,
      "step": 2335
    },
    {
      "epoch": 0.35695457844672807,
      "grad_norm": 0.3605771064758301,
      "learning_rate": 0.00015051782240236476,
      "loss": 0.6971,
      "step": 2336
    },
    {
      "epoch": 0.3571073843450357,
      "grad_norm": 0.27477413415908813,
      "learning_rate": 0.00015047473950299643,
      "loss": 0.7071,
      "step": 2337
    },
    {
      "epoch": 0.3572601902433434,
      "grad_norm": 0.2961339056491852,
      "learning_rate": 0.00015043164402827043,
      "loss": 0.7441,
      "step": 2338
    },
    {
      "epoch": 0.35741299614165106,
      "grad_norm": 0.30659833550453186,
      "learning_rate": 0.0001503885359889237,
      "loss": 0.7664,
      "step": 2339
    },
    {
      "epoch": 0.35756580203995875,
      "grad_norm": 0.2779198884963989,
      "learning_rate": 0.00015034541539569616,
      "loss": 0.7272,
      "step": 2340
    },
    {
      "epoch": 0.35771860793826643,
      "grad_norm": 0.3521401882171631,
      "learning_rate": 0.00015030228225933106,
      "loss": 0.6322,
      "step": 2341
    },
    {
      "epoch": 0.3578714138365741,
      "grad_norm": 0.39226970076560974,
      "learning_rate": 0.0001502591365905745,
      "loss": 0.6431,
      "step": 2342
    },
    {
      "epoch": 0.35802421973488174,
      "grad_norm": 0.2492583841085434,
      "learning_rate": 0.000150215978400176,
      "loss": 0.6294,
      "step": 2343
    },
    {
      "epoch": 0.3581770256331894,
      "grad_norm": 0.2733481526374817,
      "learning_rate": 0.00015017280769888793,
      "loss": 0.5777,
      "step": 2344
    },
    {
      "epoch": 0.3583298315314971,
      "grad_norm": 0.2837771773338318,
      "learning_rate": 0.00015012962449746607,
      "loss": 0.5669,
      "step": 2345
    },
    {
      "epoch": 0.3584826374298048,
      "grad_norm": 0.2990538477897644,
      "learning_rate": 0.00015008642880666903,
      "loss": 0.7183,
      "step": 2346
    },
    {
      "epoch": 0.3586354433281125,
      "grad_norm": 0.39534905552864075,
      "learning_rate": 0.00015004322063725872,
      "loss": 0.6699,
      "step": 2347
    },
    {
      "epoch": 0.35878824922642016,
      "grad_norm": 0.2837047874927521,
      "learning_rate": 0.00015000000000000001,
      "loss": 0.7628,
      "step": 2348
    },
    {
      "epoch": 0.3589410551247278,
      "grad_norm": 0.3078756630420685,
      "learning_rate": 0.00014995676690566105,
      "loss": 0.6729,
      "step": 2349
    },
    {
      "epoch": 0.3590938610230355,
      "grad_norm": 0.31207966804504395,
      "learning_rate": 0.00014991352136501296,
      "loss": 0.6307,
      "step": 2350
    },
    {
      "epoch": 0.35924666692134316,
      "grad_norm": 0.29956740140914917,
      "learning_rate": 0.00014987026338882998,
      "loss": 0.6225,
      "step": 2351
    },
    {
      "epoch": 0.35939947281965084,
      "grad_norm": 0.23339635133743286,
      "learning_rate": 0.00014982699298788954,
      "loss": 0.6805,
      "step": 2352
    },
    {
      "epoch": 0.35955227871795853,
      "grad_norm": 0.35019242763519287,
      "learning_rate": 0.000149783710172972,
      "loss": 0.772,
      "step": 2353
    },
    {
      "epoch": 0.3597050846162662,
      "grad_norm": 0.4311259388923645,
      "learning_rate": 0.00014974041495486104,
      "loss": 0.4946,
      "step": 2354
    },
    {
      "epoch": 0.35985789051457384,
      "grad_norm": 0.27752676606178284,
      "learning_rate": 0.0001496971073443432,
      "loss": 0.7505,
      "step": 2355
    },
    {
      "epoch": 0.3600106964128815,
      "grad_norm": 0.34155577421188354,
      "learning_rate": 0.00014965378735220822,
      "loss": 0.7861,
      "step": 2356
    },
    {
      "epoch": 0.3601635023111892,
      "grad_norm": 0.2626522481441498,
      "learning_rate": 0.00014961045498924894,
      "loss": 0.712,
      "step": 2357
    },
    {
      "epoch": 0.3603163082094969,
      "grad_norm": 0.2956133186817169,
      "learning_rate": 0.00014956711026626124,
      "loss": 0.6818,
      "step": 2358
    },
    {
      "epoch": 0.3604691141078046,
      "grad_norm": 0.29100513458251953,
      "learning_rate": 0.0001495237531940441,
      "loss": 0.6162,
      "step": 2359
    },
    {
      "epoch": 0.36062192000611226,
      "grad_norm": 0.3451087474822998,
      "learning_rate": 0.00014948038378339955,
      "loss": 0.8069,
      "step": 2360
    },
    {
      "epoch": 0.3607747259044199,
      "grad_norm": 0.2580629885196686,
      "learning_rate": 0.00014943700204513274,
      "loss": 0.5483,
      "step": 2361
    },
    {
      "epoch": 0.3609275318027276,
      "grad_norm": 0.2776690125465393,
      "learning_rate": 0.00014939360799005183,
      "loss": 0.7614,
      "step": 2362
    },
    {
      "epoch": 0.36108033770103526,
      "grad_norm": 0.28003740310668945,
      "learning_rate": 0.00014935020162896816,
      "loss": 0.5608,
      "step": 2363
    },
    {
      "epoch": 0.36123314359934294,
      "grad_norm": 0.28120556473731995,
      "learning_rate": 0.000149306782972696,
      "loss": 0.5789,
      "step": 2364
    },
    {
      "epoch": 0.3613859494976506,
      "grad_norm": 0.32332703471183777,
      "learning_rate": 0.00014926335203205272,
      "loss": 0.5761,
      "step": 2365
    },
    {
      "epoch": 0.3615387553959583,
      "grad_norm": 0.2898085117340088,
      "learning_rate": 0.00014921990881785886,
      "loss": 0.7513,
      "step": 2366
    },
    {
      "epoch": 0.36169156129426594,
      "grad_norm": 0.2950339913368225,
      "learning_rate": 0.00014917645334093784,
      "loss": 0.6948,
      "step": 2367
    },
    {
      "epoch": 0.3618443671925736,
      "grad_norm": 0.34204477071762085,
      "learning_rate": 0.0001491329856121163,
      "loss": 0.624,
      "step": 2368
    },
    {
      "epoch": 0.3619971730908813,
      "grad_norm": 0.3127589523792267,
      "learning_rate": 0.00014908950564222382,
      "loss": 0.7177,
      "step": 2369
    },
    {
      "epoch": 0.362149978989189,
      "grad_norm": 0.28709182143211365,
      "learning_rate": 0.00014904601344209307,
      "loss": 0.5862,
      "step": 2370
    },
    {
      "epoch": 0.3623027848874967,
      "grad_norm": 0.25311172008514404,
      "learning_rate": 0.00014900250902255977,
      "loss": 0.8151,
      "step": 2371
    },
    {
      "epoch": 0.3624555907858043,
      "grad_norm": 0.3411361277103424,
      "learning_rate": 0.0001489589923944627,
      "loss": 0.7777,
      "step": 2372
    },
    {
      "epoch": 0.362608396684112,
      "grad_norm": 0.28155237436294556,
      "learning_rate": 0.00014891546356864363,
      "loss": 0.6464,
      "step": 2373
    },
    {
      "epoch": 0.3627612025824197,
      "grad_norm": 0.28000929951667786,
      "learning_rate": 0.00014887192255594745,
      "loss": 0.662,
      "step": 2374
    },
    {
      "epoch": 0.36291400848072736,
      "grad_norm": 0.27866485714912415,
      "learning_rate": 0.00014882836936722197,
      "loss": 0.6344,
      "step": 2375
    },
    {
      "epoch": 0.36306681437903504,
      "grad_norm": 0.3239542543888092,
      "learning_rate": 0.00014878480401331817,
      "loss": 0.8088,
      "step": 2376
    },
    {
      "epoch": 0.3632196202773427,
      "grad_norm": 0.3022734820842743,
      "learning_rate": 0.00014874122650508994,
      "loss": 0.6214,
      "step": 2377
    },
    {
      "epoch": 0.36337242617565035,
      "grad_norm": 2.2979094982147217,
      "learning_rate": 0.00014869763685339434,
      "loss": 0.6594,
      "step": 2378
    },
    {
      "epoch": 0.36352523207395804,
      "grad_norm": 0.29689502716064453,
      "learning_rate": 0.0001486540350690912,
      "loss": 0.5897,
      "step": 2379
    },
    {
      "epoch": 0.3636780379722657,
      "grad_norm": 0.32437190413475037,
      "learning_rate": 0.00014861042116304368,
      "loss": 0.819,
      "step": 2380
    },
    {
      "epoch": 0.3638308438705734,
      "grad_norm": 0.3406039774417877,
      "learning_rate": 0.00014856679514611777,
      "loss": 0.8232,
      "step": 2381
    },
    {
      "epoch": 0.3639836497688811,
      "grad_norm": 0.7940521836280823,
      "learning_rate": 0.00014852315702918256,
      "loss": 0.7804,
      "step": 2382
    },
    {
      "epoch": 0.3641364556671888,
      "grad_norm": 0.2833361029624939,
      "learning_rate": 0.00014847950682311004,
      "loss": 0.6998,
      "step": 2383
    },
    {
      "epoch": 0.3642892615654964,
      "grad_norm": 0.36385026574134827,
      "learning_rate": 0.00014843584453877538,
      "loss": 0.7231,
      "step": 2384
    },
    {
      "epoch": 0.3644420674638041,
      "grad_norm": 0.33126354217529297,
      "learning_rate": 0.00014839217018705662,
      "loss": 0.7127,
      "step": 2385
    },
    {
      "epoch": 0.36459487336211177,
      "grad_norm": 0.2855713665485382,
      "learning_rate": 0.0001483484837788349,
      "loss": 0.7587,
      "step": 2386
    },
    {
      "epoch": 0.36474767926041946,
      "grad_norm": 0.2814899682998657,
      "learning_rate": 0.00014830478532499428,
      "loss": 0.6957,
      "step": 2387
    },
    {
      "epoch": 0.36490048515872714,
      "grad_norm": 0.6373705267906189,
      "learning_rate": 0.00014826107483642185,
      "loss": 0.6314,
      "step": 2388
    },
    {
      "epoch": 0.3650532910570348,
      "grad_norm": 0.2719639837741852,
      "learning_rate": 0.00014821735232400777,
      "loss": 0.6713,
      "step": 2389
    },
    {
      "epoch": 0.36520609695534245,
      "grad_norm": 0.2806015610694885,
      "learning_rate": 0.00014817361779864507,
      "loss": 0.742,
      "step": 2390
    },
    {
      "epoch": 0.36535890285365014,
      "grad_norm": 0.3191283345222473,
      "learning_rate": 0.00014812987127122993,
      "loss": 0.6505,
      "step": 2391
    },
    {
      "epoch": 0.3655117087519578,
      "grad_norm": 0.2744157612323761,
      "learning_rate": 0.00014808611275266134,
      "loss": 0.465,
      "step": 2392
    },
    {
      "epoch": 0.3656645146502655,
      "grad_norm": 0.33585116267204285,
      "learning_rate": 0.00014804234225384143,
      "loss": 0.6132,
      "step": 2393
    },
    {
      "epoch": 0.3658173205485732,
      "grad_norm": 0.26743748784065247,
      "learning_rate": 0.0001479985597856752,
      "loss": 0.7128,
      "step": 2394
    },
    {
      "epoch": 0.3659701264468809,
      "grad_norm": 0.2847437262535095,
      "learning_rate": 0.00014795476535907074,
      "loss": 0.7707,
      "step": 2395
    },
    {
      "epoch": 0.3661229323451885,
      "grad_norm": 0.2703080177307129,
      "learning_rate": 0.000147910958984939,
      "loss": 0.713,
      "step": 2396
    },
    {
      "epoch": 0.3662757382434962,
      "grad_norm": 0.3985111713409424,
      "learning_rate": 0.000147867140674194,
      "loss": 0.5579,
      "step": 2397
    },
    {
      "epoch": 0.36642854414180387,
      "grad_norm": 0.3106270730495453,
      "learning_rate": 0.00014782331043775276,
      "loss": 0.6585,
      "step": 2398
    },
    {
      "epoch": 0.36658135004011155,
      "grad_norm": 0.3780193030834198,
      "learning_rate": 0.00014777946828653513,
      "loss": 0.733,
      "step": 2399
    },
    {
      "epoch": 0.36673415593841924,
      "grad_norm": 0.3120858371257782,
      "learning_rate": 0.00014773561423146408,
      "loss": 0.7741,
      "step": 2400
    },
    {
      "epoch": 0.3668869618367269,
      "grad_norm": 0.27893051505088806,
      "learning_rate": 0.00014769174828346542,
      "loss": 0.7162,
      "step": 2401
    },
    {
      "epoch": 0.36703976773503455,
      "grad_norm": 0.29121726751327515,
      "learning_rate": 0.00014764787045346803,
      "loss": 0.5927,
      "step": 2402
    },
    {
      "epoch": 0.36719257363334223,
      "grad_norm": 0.28169146180152893,
      "learning_rate": 0.00014760398075240366,
      "loss": 0.682,
      "step": 2403
    },
    {
      "epoch": 0.3673453795316499,
      "grad_norm": 0.3464924693107605,
      "learning_rate": 0.0001475600791912071,
      "loss": 0.5718,
      "step": 2404
    },
    {
      "epoch": 0.3674981854299576,
      "grad_norm": 0.25900718569755554,
      "learning_rate": 0.00014751616578081604,
      "loss": 0.6681,
      "step": 2405
    },
    {
      "epoch": 0.3676509913282653,
      "grad_norm": 0.29786524176597595,
      "learning_rate": 0.0001474722405321711,
      "loss": 0.8521,
      "step": 2406
    },
    {
      "epoch": 0.36780379722657297,
      "grad_norm": 0.36379602551460266,
      "learning_rate": 0.00014742830345621598,
      "loss": 0.6777,
      "step": 2407
    },
    {
      "epoch": 0.3679566031248806,
      "grad_norm": 0.30311641097068787,
      "learning_rate": 0.00014738435456389717,
      "loss": 0.747,
      "step": 2408
    },
    {
      "epoch": 0.3681094090231883,
      "grad_norm": 0.2607172429561615,
      "learning_rate": 0.00014734039386616417,
      "loss": 0.5609,
      "step": 2409
    },
    {
      "epoch": 0.36826221492149597,
      "grad_norm": 0.5097734332084656,
      "learning_rate": 0.00014729642137396943,
      "loss": 0.8335,
      "step": 2410
    },
    {
      "epoch": 0.36841502081980365,
      "grad_norm": 0.32967936992645264,
      "learning_rate": 0.00014725243709826828,
      "loss": 0.6682,
      "step": 2411
    },
    {
      "epoch": 0.36856782671811134,
      "grad_norm": 0.2863999605178833,
      "learning_rate": 0.00014720844105001912,
      "loss": 0.7139,
      "step": 2412
    },
    {
      "epoch": 0.368720632616419,
      "grad_norm": 0.2852937579154968,
      "learning_rate": 0.00014716443324018315,
      "loss": 0.605,
      "step": 2413
    },
    {
      "epoch": 0.36887343851472665,
      "grad_norm": 0.40641558170318604,
      "learning_rate": 0.00014712041367972452,
      "loss": 0.686,
      "step": 2414
    },
    {
      "epoch": 0.36902624441303433,
      "grad_norm": 0.25617754459381104,
      "learning_rate": 0.00014707638237961037,
      "loss": 0.7407,
      "step": 2415
    },
    {
      "epoch": 0.369179050311342,
      "grad_norm": 0.3151395320892334,
      "learning_rate": 0.00014703233935081073,
      "loss": 0.6683,
      "step": 2416
    },
    {
      "epoch": 0.3693318562096497,
      "grad_norm": 0.2913879454135895,
      "learning_rate": 0.00014698828460429854,
      "loss": 0.7352,
      "step": 2417
    },
    {
      "epoch": 0.3694846621079574,
      "grad_norm": 0.2934713363647461,
      "learning_rate": 0.0001469442181510497,
      "loss": 0.7851,
      "step": 2418
    },
    {
      "epoch": 0.36963746800626507,
      "grad_norm": 0.3047449290752411,
      "learning_rate": 0.00014690014000204294,
      "loss": 0.6604,
      "step": 2419
    },
    {
      "epoch": 0.3697902739045727,
      "grad_norm": 0.2977202832698822,
      "learning_rate": 0.00014685605016825996,
      "loss": 0.7277,
      "step": 2420
    },
    {
      "epoch": 0.3699430798028804,
      "grad_norm": 0.2821477949619293,
      "learning_rate": 0.00014681194866068544,
      "loss": 0.7307,
      "step": 2421
    },
    {
      "epoch": 0.37009588570118807,
      "grad_norm": 0.28574398159980774,
      "learning_rate": 0.00014676783549030686,
      "loss": 0.6274,
      "step": 2422
    },
    {
      "epoch": 0.37024869159949575,
      "grad_norm": 0.30137869715690613,
      "learning_rate": 0.00014672371066811463,
      "loss": 0.6889,
      "step": 2423
    },
    {
      "epoch": 0.37040149749780343,
      "grad_norm": 0.3153139054775238,
      "learning_rate": 0.00014667957420510215,
      "loss": 0.6823,
      "step": 2424
    },
    {
      "epoch": 0.37055430339611106,
      "grad_norm": 0.32339897751808167,
      "learning_rate": 0.00014663542611226553,
      "loss": 0.7572,
      "step": 2425
    },
    {
      "epoch": 0.37070710929441875,
      "grad_norm": 0.2944089472293854,
      "learning_rate": 0.000146591266400604,
      "loss": 0.7334,
      "step": 2426
    },
    {
      "epoch": 0.37085991519272643,
      "grad_norm": 0.4568473994731903,
      "learning_rate": 0.0001465470950811195,
      "loss": 0.8559,
      "step": 2427
    },
    {
      "epoch": 0.3710127210910341,
      "grad_norm": 0.2831132411956787,
      "learning_rate": 0.00014650291216481706,
      "loss": 0.7136,
      "step": 2428
    },
    {
      "epoch": 0.3711655269893418,
      "grad_norm": 0.30619436502456665,
      "learning_rate": 0.00014645871766270436,
      "loss": 0.7136,
      "step": 2429
    },
    {
      "epoch": 0.3713183328876495,
      "grad_norm": 0.27592119574546814,
      "learning_rate": 0.00014641451158579216,
      "loss": 0.683,
      "step": 2430
    },
    {
      "epoch": 0.3714711387859571,
      "grad_norm": 0.27662381529808044,
      "learning_rate": 0.000146370293945094,
      "loss": 0.5909,
      "step": 2431
    },
    {
      "epoch": 0.3716239446842648,
      "grad_norm": 0.27695780992507935,
      "learning_rate": 0.00014632606475162635,
      "loss": 0.5979,
      "step": 2432
    },
    {
      "epoch": 0.3717767505825725,
      "grad_norm": 0.2685675024986267,
      "learning_rate": 0.00014628182401640858,
      "loss": 0.7144,
      "step": 2433
    },
    {
      "epoch": 0.37192955648088016,
      "grad_norm": 0.326612263917923,
      "learning_rate": 0.0001462375717504628,
      "loss": 0.7619,
      "step": 2434
    },
    {
      "epoch": 0.37208236237918785,
      "grad_norm": 0.2743641436100006,
      "learning_rate": 0.0001461933079648142,
      "loss": 0.4816,
      "step": 2435
    },
    {
      "epoch": 0.37223516827749553,
      "grad_norm": 0.2942219376564026,
      "learning_rate": 0.0001461490326704906,
      "loss": 0.8433,
      "step": 2436
    },
    {
      "epoch": 0.37238797417580316,
      "grad_norm": 0.25034305453300476,
      "learning_rate": 0.00014610474587852296,
      "loss": 0.6961,
      "step": 2437
    },
    {
      "epoch": 0.37254078007411084,
      "grad_norm": 0.2891073524951935,
      "learning_rate": 0.0001460604475999449,
      "loss": 0.5937,
      "step": 2438
    },
    {
      "epoch": 0.37269358597241853,
      "grad_norm": 0.2591763138771057,
      "learning_rate": 0.00014601613784579295,
      "loss": 0.6111,
      "step": 2439
    },
    {
      "epoch": 0.3728463918707262,
      "grad_norm": 0.3589370846748352,
      "learning_rate": 0.00014597181662710652,
      "loss": 0.7989,
      "step": 2440
    },
    {
      "epoch": 0.3729991977690339,
      "grad_norm": 0.257616251707077,
      "learning_rate": 0.00014592748395492788,
      "loss": 0.7384,
      "step": 2441
    },
    {
      "epoch": 0.3731520036673416,
      "grad_norm": 0.366580605506897,
      "learning_rate": 0.00014588313984030212,
      "loss": 0.6911,
      "step": 2442
    },
    {
      "epoch": 0.3733048095656492,
      "grad_norm": 0.3071226477622986,
      "learning_rate": 0.00014583878429427725,
      "loss": 0.5344,
      "step": 2443
    },
    {
      "epoch": 0.3734576154639569,
      "grad_norm": 0.36921221017837524,
      "learning_rate": 0.00014579441732790404,
      "loss": 0.5783,
      "step": 2444
    },
    {
      "epoch": 0.3736104213622646,
      "grad_norm": 0.30017930269241333,
      "learning_rate": 0.00014575003895223615,
      "loss": 0.8363,
      "step": 2445
    },
    {
      "epoch": 0.37376322726057226,
      "grad_norm": 0.3353256583213806,
      "learning_rate": 0.0001457056491783301,
      "loss": 0.5413,
      "step": 2446
    },
    {
      "epoch": 0.37391603315887995,
      "grad_norm": 0.28771746158599854,
      "learning_rate": 0.00014566124801724522,
      "loss": 0.7268,
      "step": 2447
    },
    {
      "epoch": 0.37406883905718763,
      "grad_norm": 0.2777288854122162,
      "learning_rate": 0.00014561683548004373,
      "loss": 0.8383,
      "step": 2448
    },
    {
      "epoch": 0.37422164495549526,
      "grad_norm": 0.35012948513031006,
      "learning_rate": 0.00014557241157779055,
      "loss": 0.7641,
      "step": 2449
    },
    {
      "epoch": 0.37437445085380294,
      "grad_norm": 0.312569797039032,
      "learning_rate": 0.0001455279763215536,
      "loss": 0.6564,
      "step": 2450
    },
    {
      "epoch": 0.3745272567521106,
      "grad_norm": 0.2877102494239807,
      "learning_rate": 0.00014548352972240354,
      "loss": 0.6939,
      "step": 2451
    },
    {
      "epoch": 0.3746800626504183,
      "grad_norm": 0.5239971876144409,
      "learning_rate": 0.0001454390717914138,
      "loss": 0.6307,
      "step": 2452
    },
    {
      "epoch": 0.374832868548726,
      "grad_norm": 0.3368930518627167,
      "learning_rate": 0.00014539460253966077,
      "loss": 0.6324,
      "step": 2453
    },
    {
      "epoch": 0.3749856744470337,
      "grad_norm": 0.30349984765052795,
      "learning_rate": 0.00014535012197822357,
      "loss": 0.7975,
      "step": 2454
    },
    {
      "epoch": 0.3751384803453413,
      "grad_norm": 0.2840270400047302,
      "learning_rate": 0.00014530563011818417,
      "loss": 0.5472,
      "step": 2455
    },
    {
      "epoch": 0.375291286243649,
      "grad_norm": 0.28692367672920227,
      "learning_rate": 0.00014526112697062733,
      "loss": 0.8516,
      "step": 2456
    },
    {
      "epoch": 0.3754440921419567,
      "grad_norm": 0.29820212721824646,
      "learning_rate": 0.00014521661254664062,
      "loss": 0.5865,
      "step": 2457
    },
    {
      "epoch": 0.37559689804026436,
      "grad_norm": 0.2936681807041168,
      "learning_rate": 0.00014517208685731447,
      "loss": 0.6314,
      "step": 2458
    },
    {
      "epoch": 0.37574970393857204,
      "grad_norm": 0.3220421075820923,
      "learning_rate": 0.00014512754991374206,
      "loss": 0.7181,
      "step": 2459
    },
    {
      "epoch": 0.37590250983687973,
      "grad_norm": 0.29429811239242554,
      "learning_rate": 0.0001450830017270194,
      "loss": 0.6019,
      "step": 2460
    },
    {
      "epoch": 0.37605531573518736,
      "grad_norm": 0.49563896656036377,
      "learning_rate": 0.0001450384423082453,
      "loss": 0.8089,
      "step": 2461
    },
    {
      "epoch": 0.37620812163349504,
      "grad_norm": 0.4126056134700775,
      "learning_rate": 0.00014499387166852135,
      "loss": 0.7697,
      "step": 2462
    },
    {
      "epoch": 0.3763609275318027,
      "grad_norm": 0.3450013995170593,
      "learning_rate": 0.00014494928981895197,
      "loss": 0.7991,
      "step": 2463
    },
    {
      "epoch": 0.3765137334301104,
      "grad_norm": 0.3362366557121277,
      "learning_rate": 0.00014490469677064436,
      "loss": 0.9246,
      "step": 2464
    },
    {
      "epoch": 0.3766665393284181,
      "grad_norm": 0.31218796968460083,
      "learning_rate": 0.00014486009253470846,
      "loss": 0.8765,
      "step": 2465
    },
    {
      "epoch": 0.3768193452267258,
      "grad_norm": 0.3747103810310364,
      "learning_rate": 0.0001448154771222571,
      "loss": 0.7145,
      "step": 2466
    },
    {
      "epoch": 0.3769721511250334,
      "grad_norm": 0.348871648311615,
      "learning_rate": 0.0001447708505444058,
      "loss": 0.7798,
      "step": 2467
    },
    {
      "epoch": 0.3771249570233411,
      "grad_norm": 0.35312315821647644,
      "learning_rate": 0.00014472621281227293,
      "loss": 0.5461,
      "step": 2468
    },
    {
      "epoch": 0.3772777629216488,
      "grad_norm": 0.3236096203327179,
      "learning_rate": 0.00014468156393697954,
      "loss": 0.7983,
      "step": 2469
    },
    {
      "epoch": 0.37743056881995646,
      "grad_norm": 0.23714995384216309,
      "learning_rate": 0.00014463690392964957,
      "loss": 0.5793,
      "step": 2470
    },
    {
      "epoch": 0.37758337471826414,
      "grad_norm": 0.38550207018852234,
      "learning_rate": 0.0001445922328014097,
      "loss": 0.695,
      "step": 2471
    },
    {
      "epoch": 0.3777361806165718,
      "grad_norm": 0.2918228805065155,
      "learning_rate": 0.00014454755056338934,
      "loss": 0.7962,
      "step": 2472
    },
    {
      "epoch": 0.37788898651487945,
      "grad_norm": 0.2856360673904419,
      "learning_rate": 0.00014450285722672067,
      "loss": 0.7473,
      "step": 2473
    },
    {
      "epoch": 0.37804179241318714,
      "grad_norm": 0.33044031262397766,
      "learning_rate": 0.00014445815280253875,
      "loss": 0.5781,
      "step": 2474
    },
    {
      "epoch": 0.3781945983114948,
      "grad_norm": 0.4528699815273285,
      "learning_rate": 0.00014441343730198117,
      "loss": 0.9506,
      "step": 2475
    },
    {
      "epoch": 0.3783474042098025,
      "grad_norm": 0.4538821578025818,
      "learning_rate": 0.0001443687107361886,
      "loss": 0.7509,
      "step": 2476
    },
    {
      "epoch": 0.3785002101081102,
      "grad_norm": 0.5137097239494324,
      "learning_rate": 0.0001443239731163041,
      "loss": 0.7929,
      "step": 2477
    },
    {
      "epoch": 0.3786530160064179,
      "grad_norm": 0.29435819387435913,
      "learning_rate": 0.0001442792244534738,
      "loss": 0.7049,
      "step": 2478
    },
    {
      "epoch": 0.3788058219047255,
      "grad_norm": 0.30987152457237244,
      "learning_rate": 0.00014423446475884643,
      "loss": 0.7649,
      "step": 2479
    },
    {
      "epoch": 0.3789586278030332,
      "grad_norm": 0.3604254424571991,
      "learning_rate": 0.00014418969404357345,
      "loss": 0.6638,
      "step": 2480
    },
    {
      "epoch": 0.37911143370134087,
      "grad_norm": 0.32214394211769104,
      "learning_rate": 0.00014414491231880917,
      "loss": 0.6358,
      "step": 2481
    },
    {
      "epoch": 0.37926423959964856,
      "grad_norm": 0.24983897805213928,
      "learning_rate": 0.00014410011959571054,
      "loss": 0.7039,
      "step": 2482
    },
    {
      "epoch": 0.37941704549795624,
      "grad_norm": 0.31551504135131836,
      "learning_rate": 0.00014405531588543733,
      "loss": 0.7776,
      "step": 2483
    },
    {
      "epoch": 0.37956985139626387,
      "grad_norm": 0.3642079830169678,
      "learning_rate": 0.00014401050119915192,
      "loss": 0.7002,
      "step": 2484
    },
    {
      "epoch": 0.37972265729457155,
      "grad_norm": 0.29362720251083374,
      "learning_rate": 0.00014396567554801962,
      "loss": 0.7925,
      "step": 2485
    },
    {
      "epoch": 0.37987546319287924,
      "grad_norm": 0.3027987480163574,
      "learning_rate": 0.00014392083894320827,
      "loss": 0.915,
      "step": 2486
    },
    {
      "epoch": 0.3800282690911869,
      "grad_norm": 0.38472673296928406,
      "learning_rate": 0.0001438759913958886,
      "loss": 0.8979,
      "step": 2487
    },
    {
      "epoch": 0.3801810749894946,
      "grad_norm": 0.27983352541923523,
      "learning_rate": 0.000143831132917234,
      "loss": 0.7819,
      "step": 2488
    },
    {
      "epoch": 0.3803338808878023,
      "grad_norm": 0.3205126225948334,
      "learning_rate": 0.00014378626351842054,
      "loss": 0.6158,
      "step": 2489
    },
    {
      "epoch": 0.3804866867861099,
      "grad_norm": 0.30116376280784607,
      "learning_rate": 0.0001437413832106271,
      "loss": 0.7862,
      "step": 2490
    },
    {
      "epoch": 0.3806394926844176,
      "grad_norm": 0.3567577004432678,
      "learning_rate": 0.00014369649200503517,
      "loss": 0.6213,
      "step": 2491
    },
    {
      "epoch": 0.3807922985827253,
      "grad_norm": 0.2745025157928467,
      "learning_rate": 0.00014365158991282907,
      "loss": 0.8277,
      "step": 2492
    },
    {
      "epoch": 0.38094510448103297,
      "grad_norm": 0.2893485128879547,
      "learning_rate": 0.00014360667694519576,
      "loss": 0.5813,
      "step": 2493
    },
    {
      "epoch": 0.38109791037934065,
      "grad_norm": 0.3255918323993683,
      "learning_rate": 0.00014356175311332496,
      "loss": 0.853,
      "step": 2494
    },
    {
      "epoch": 0.38125071627764834,
      "grad_norm": 0.3026112914085388,
      "learning_rate": 0.00014351681842840903,
      "loss": 0.5956,
      "step": 2495
    },
    {
      "epoch": 0.38140352217595597,
      "grad_norm": 0.3224642872810364,
      "learning_rate": 0.00014347187290164308,
      "loss": 1.1075,
      "step": 2496
    },
    {
      "epoch": 0.38155632807426365,
      "grad_norm": 0.4730568528175354,
      "learning_rate": 0.00014342691654422492,
      "loss": 0.8043,
      "step": 2497
    },
    {
      "epoch": 0.38170913397257134,
      "grad_norm": 0.2610538601875305,
      "learning_rate": 0.000143381949367355,
      "loss": 0.7726,
      "step": 2498
    },
    {
      "epoch": 0.381861939870879,
      "grad_norm": 0.27993085980415344,
      "learning_rate": 0.0001433369713822366,
      "loss": 0.7121,
      "step": 2499
    },
    {
      "epoch": 0.3820147457691867,
      "grad_norm": 0.3264187276363373,
      "learning_rate": 0.00014329198260007553,
      "loss": 0.6973,
      "step": 2500
    },
    {
      "epoch": 0.3821675516674944,
      "grad_norm": 0.3121355473995209,
      "learning_rate": 0.00014324698303208038,
      "loss": 0.6586,
      "step": 2501
    },
    {
      "epoch": 0.382320357565802,
      "grad_norm": 0.2439948469400406,
      "learning_rate": 0.0001432019726894625,
      "loss": 0.4669,
      "step": 2502
    },
    {
      "epoch": 0.3824731634641097,
      "grad_norm": 0.32409751415252686,
      "learning_rate": 0.00014315695158343572,
      "loss": 0.8436,
      "step": 2503
    },
    {
      "epoch": 0.3826259693624174,
      "grad_norm": 0.3331731855869293,
      "learning_rate": 0.00014311191972521674,
      "loss": 0.6083,
      "step": 2504
    },
    {
      "epoch": 0.38277877526072507,
      "grad_norm": 0.2964318096637726,
      "learning_rate": 0.00014306687712602485,
      "loss": 0.7832,
      "step": 2505
    },
    {
      "epoch": 0.38293158115903275,
      "grad_norm": 0.36654728651046753,
      "learning_rate": 0.00014302182379708205,
      "loss": 0.686,
      "step": 2506
    },
    {
      "epoch": 0.38308438705734044,
      "grad_norm": 0.46433189511299133,
      "learning_rate": 0.00014297675974961295,
      "loss": 0.7919,
      "step": 2507
    },
    {
      "epoch": 0.38323719295564806,
      "grad_norm": 0.29593682289123535,
      "learning_rate": 0.00014293168499484495,
      "loss": 0.7321,
      "step": 2508
    },
    {
      "epoch": 0.38338999885395575,
      "grad_norm": 0.27927494049072266,
      "learning_rate": 0.000142886599544008,
      "loss": 0.6574,
      "step": 2509
    },
    {
      "epoch": 0.38354280475226343,
      "grad_norm": 0.2708612382411957,
      "learning_rate": 0.00014284150340833476,
      "loss": 0.6859,
      "step": 2510
    },
    {
      "epoch": 0.3836956106505711,
      "grad_norm": 0.29946985840797424,
      "learning_rate": 0.00014279639659906058,
      "loss": 0.7239,
      "step": 2511
    },
    {
      "epoch": 0.3838484165488788,
      "grad_norm": 0.35676175355911255,
      "learning_rate": 0.00014275127912742345,
      "loss": 0.7373,
      "step": 2512
    },
    {
      "epoch": 0.3840012224471865,
      "grad_norm": 0.35043084621429443,
      "learning_rate": 0.00014270615100466397,
      "loss": 0.6002,
      "step": 2513
    },
    {
      "epoch": 0.3841540283454941,
      "grad_norm": 0.28143876791000366,
      "learning_rate": 0.00014266101224202546,
      "loss": 0.8477,
      "step": 2514
    },
    {
      "epoch": 0.3843068342438018,
      "grad_norm": 0.2760816514492035,
      "learning_rate": 0.00014261586285075386,
      "loss": 0.719,
      "step": 2515
    },
    {
      "epoch": 0.3844596401421095,
      "grad_norm": 0.27988770604133606,
      "learning_rate": 0.00014257070284209774,
      "loss": 0.6344,
      "step": 2516
    },
    {
      "epoch": 0.38461244604041717,
      "grad_norm": 0.25354310870170593,
      "learning_rate": 0.00014252553222730838,
      "loss": 0.7014,
      "step": 2517
    },
    {
      "epoch": 0.38476525193872485,
      "grad_norm": 0.2850781977176666,
      "learning_rate": 0.00014248035101763963,
      "loss": 0.604,
      "step": 2518
    },
    {
      "epoch": 0.38491805783703253,
      "grad_norm": 0.332959920167923,
      "learning_rate": 0.000142435159224348,
      "loss": 0.7286,
      "step": 2519
    },
    {
      "epoch": 0.38507086373534016,
      "grad_norm": 0.29361769556999207,
      "learning_rate": 0.00014238995685869268,
      "loss": 0.7916,
      "step": 2520
    },
    {
      "epoch": 0.38522366963364785,
      "grad_norm": 0.2901209592819214,
      "learning_rate": 0.00014234474393193543,
      "loss": 0.6919,
      "step": 2521
    },
    {
      "epoch": 0.38537647553195553,
      "grad_norm": 0.2989867329597473,
      "learning_rate": 0.0001422995204553407,
      "loss": 0.858,
      "step": 2522
    },
    {
      "epoch": 0.3855292814302632,
      "grad_norm": 0.2621612250804901,
      "learning_rate": 0.00014225428644017548,
      "loss": 0.7048,
      "step": 2523
    },
    {
      "epoch": 0.3856820873285709,
      "grad_norm": 0.35946473479270935,
      "learning_rate": 0.00014220904189770952,
      "loss": 0.8626,
      "step": 2524
    },
    {
      "epoch": 0.3858348932268786,
      "grad_norm": 0.31629034876823425,
      "learning_rate": 0.00014216378683921504,
      "loss": 0.758,
      "step": 2525
    },
    {
      "epoch": 0.3859876991251862,
      "grad_norm": 0.32585909962654114,
      "learning_rate": 0.00014211852127596705,
      "loss": 0.6084,
      "step": 2526
    },
    {
      "epoch": 0.3861405050234939,
      "grad_norm": 0.32691988348960876,
      "learning_rate": 0.00014207324521924304,
      "loss": 0.7124,
      "step": 2527
    },
    {
      "epoch": 0.3862933109218016,
      "grad_norm": 0.36711400747299194,
      "learning_rate": 0.00014202795868032312,
      "loss": 0.6328,
      "step": 2528
    },
    {
      "epoch": 0.38644611682010926,
      "grad_norm": 0.29490792751312256,
      "learning_rate": 0.00014198266167049012,
      "loss": 0.8087,
      "step": 2529
    },
    {
      "epoch": 0.38659892271841695,
      "grad_norm": 0.3001713752746582,
      "learning_rate": 0.00014193735420102934,
      "loss": 0.579,
      "step": 2530
    },
    {
      "epoch": 0.38675172861672463,
      "grad_norm": 0.2902267575263977,
      "learning_rate": 0.00014189203628322885,
      "loss": 0.6978,
      "step": 2531
    },
    {
      "epoch": 0.38690453451503226,
      "grad_norm": 0.34467917680740356,
      "learning_rate": 0.0001418467079283791,
      "loss": 0.7732,
      "step": 2532
    },
    {
      "epoch": 0.38705734041333995,
      "grad_norm": 0.30216652154922485,
      "learning_rate": 0.0001418013691477734,
      "loss": 0.661,
      "step": 2533
    },
    {
      "epoch": 0.38721014631164763,
      "grad_norm": 0.309682697057724,
      "learning_rate": 0.00014175601995270747,
      "loss": 0.8284,
      "step": 2534
    },
    {
      "epoch": 0.3873629522099553,
      "grad_norm": 0.40115198493003845,
      "learning_rate": 0.00014171066035447965,
      "loss": 0.7166,
      "step": 2535
    },
    {
      "epoch": 0.387515758108263,
      "grad_norm": 0.27580732107162476,
      "learning_rate": 0.00014166529036439094,
      "loss": 0.7984,
      "step": 2536
    },
    {
      "epoch": 0.3876685640065706,
      "grad_norm": 0.30666035413742065,
      "learning_rate": 0.00014161990999374488,
      "loss": 0.6308,
      "step": 2537
    },
    {
      "epoch": 0.3878213699048783,
      "grad_norm": 0.3423399031162262,
      "learning_rate": 0.00014157451925384763,
      "loss": 0.6894,
      "step": 2538
    },
    {
      "epoch": 0.387974175803186,
      "grad_norm": 0.3036220073699951,
      "learning_rate": 0.00014152911815600784,
      "loss": 0.8044,
      "step": 2539
    },
    {
      "epoch": 0.3881269817014937,
      "grad_norm": 0.35811495780944824,
      "learning_rate": 0.00014148370671153692,
      "loss": 0.7668,
      "step": 2540
    },
    {
      "epoch": 0.38827978759980136,
      "grad_norm": 0.29517245292663574,
      "learning_rate": 0.00014143828493174866,
      "loss": 0.7531,
      "step": 2541
    },
    {
      "epoch": 0.38843259349810905,
      "grad_norm": 0.30313557386398315,
      "learning_rate": 0.0001413928528279596,
      "loss": 0.7122,
      "step": 2542
    },
    {
      "epoch": 0.3885853993964167,
      "grad_norm": 0.4556387662887573,
      "learning_rate": 0.0001413474104114887,
      "loss": 0.8926,
      "step": 2543
    },
    {
      "epoch": 0.38873820529472436,
      "grad_norm": 0.30476585030555725,
      "learning_rate": 0.00014130195769365757,
      "loss": 0.8802,
      "step": 2544
    },
    {
      "epoch": 0.38889101119303204,
      "grad_norm": 0.3249836564064026,
      "learning_rate": 0.00014125649468579038,
      "loss": 0.8169,
      "step": 2545
    },
    {
      "epoch": 0.3890438170913397,
      "grad_norm": 0.30261462926864624,
      "learning_rate": 0.00014121102139921386,
      "loss": 0.9638,
      "step": 2546
    },
    {
      "epoch": 0.3891966229896474,
      "grad_norm": 0.27610981464385986,
      "learning_rate": 0.0001411655378452573,
      "loss": 0.6815,
      "step": 2547
    },
    {
      "epoch": 0.3893494288879551,
      "grad_norm": 0.2926682233810425,
      "learning_rate": 0.00014112004403525253,
      "loss": 0.5433,
      "step": 2548
    },
    {
      "epoch": 0.3895022347862627,
      "grad_norm": 0.4395153820514679,
      "learning_rate": 0.00014107453998053396,
      "loss": 0.7815,
      "step": 2549
    },
    {
      "epoch": 0.3896550406845704,
      "grad_norm": 0.287105530500412,
      "learning_rate": 0.00014102902569243855,
      "loss": 0.6523,
      "step": 2550
    },
    {
      "epoch": 0.3898078465828781,
      "grad_norm": 0.34142303466796875,
      "learning_rate": 0.0001409835011823058,
      "loss": 0.7796,
      "step": 2551
    },
    {
      "epoch": 0.3899606524811858,
      "grad_norm": 0.26499852538108826,
      "learning_rate": 0.0001409379664614777,
      "loss": 0.7323,
      "step": 2552
    },
    {
      "epoch": 0.39011345837949346,
      "grad_norm": 0.29665425419807434,
      "learning_rate": 0.00014089242154129898,
      "loss": 0.4781,
      "step": 2553
    },
    {
      "epoch": 0.39026626427780114,
      "grad_norm": 0.271915078163147,
      "learning_rate": 0.00014084686643311666,
      "loss": 0.6668,
      "step": 2554
    },
    {
      "epoch": 0.3904190701761088,
      "grad_norm": 0.2694081962108612,
      "learning_rate": 0.00014080130114828046,
      "loss": 0.7001,
      "step": 2555
    },
    {
      "epoch": 0.39057187607441646,
      "grad_norm": 0.3542138338088989,
      "learning_rate": 0.00014075572569814256,
      "loss": 0.701,
      "step": 2556
    },
    {
      "epoch": 0.39072468197272414,
      "grad_norm": 0.31724610924720764,
      "learning_rate": 0.0001407101400940577,
      "loss": 0.9051,
      "step": 2557
    },
    {
      "epoch": 0.3908774878710318,
      "grad_norm": 0.48276618123054504,
      "learning_rate": 0.00014066454434738318,
      "loss": 0.7013,
      "step": 2558
    },
    {
      "epoch": 0.3910302937693395,
      "grad_norm": 0.34542426466941833,
      "learning_rate": 0.0001406189384694788,
      "loss": 0.7639,
      "step": 2559
    },
    {
      "epoch": 0.3911830996676472,
      "grad_norm": 0.3011816143989563,
      "learning_rate": 0.00014057332247170685,
      "loss": 0.6921,
      "step": 2560
    },
    {
      "epoch": 0.3913359055659548,
      "grad_norm": 0.3123289942741394,
      "learning_rate": 0.0001405276963654322,
      "loss": 0.7067,
      "step": 2561
    },
    {
      "epoch": 0.3914887114642625,
      "grad_norm": 0.3149774670600891,
      "learning_rate": 0.0001404820601620222,
      "loss": 0.7665,
      "step": 2562
    },
    {
      "epoch": 0.3916415173625702,
      "grad_norm": 0.2675241231918335,
      "learning_rate": 0.0001404364138728467,
      "loss": 0.7803,
      "step": 2563
    },
    {
      "epoch": 0.3917943232608779,
      "grad_norm": 0.3044669032096863,
      "learning_rate": 0.00014039075750927813,
      "loss": 0.7445,
      "step": 2564
    },
    {
      "epoch": 0.39194712915918556,
      "grad_norm": 0.27285170555114746,
      "learning_rate": 0.00014034509108269138,
      "loss": 0.7312,
      "step": 2565
    },
    {
      "epoch": 0.39209993505749324,
      "grad_norm": 0.2783736288547516,
      "learning_rate": 0.00014029941460446389,
      "loss": 0.84,
      "step": 2566
    },
    {
      "epoch": 0.39225274095580087,
      "grad_norm": 0.3714994192123413,
      "learning_rate": 0.00014025372808597548,
      "loss": 0.6991,
      "step": 2567
    },
    {
      "epoch": 0.39240554685410856,
      "grad_norm": 0.28046417236328125,
      "learning_rate": 0.00014020803153860865,
      "loss": 0.6944,
      "step": 2568
    },
    {
      "epoch": 0.39255835275241624,
      "grad_norm": 0.28387904167175293,
      "learning_rate": 0.00014016232497374823,
      "loss": 0.7067,
      "step": 2569
    },
    {
      "epoch": 0.3927111586507239,
      "grad_norm": 0.3740023970603943,
      "learning_rate": 0.00014011660840278174,
      "loss": 0.7416,
      "step": 2570
    },
    {
      "epoch": 0.3928639645490316,
      "grad_norm": 0.5043659806251526,
      "learning_rate": 0.00014007088183709895,
      "loss": 0.7961,
      "step": 2571
    },
    {
      "epoch": 0.3930167704473393,
      "grad_norm": 0.3045665919780731,
      "learning_rate": 0.00014002514528809235,
      "loss": 0.6823,
      "step": 2572
    },
    {
      "epoch": 0.3931695763456469,
      "grad_norm": 0.23655778169631958,
      "learning_rate": 0.0001399793987671568,
      "loss": 0.651,
      "step": 2573
    },
    {
      "epoch": 0.3933223822439546,
      "grad_norm": 0.362617552280426,
      "learning_rate": 0.0001399336422856896,
      "loss": 0.6424,
      "step": 2574
    },
    {
      "epoch": 0.3934751881422623,
      "grad_norm": 0.2810218632221222,
      "learning_rate": 0.0001398878758550907,
      "loss": 0.5367,
      "step": 2575
    },
    {
      "epoch": 0.39362799404057,
      "grad_norm": 0.28525862097740173,
      "learning_rate": 0.00013984209948676233,
      "loss": 0.6672,
      "step": 2576
    },
    {
      "epoch": 0.39378079993887766,
      "grad_norm": 0.5437533259391785,
      "learning_rate": 0.00013979631319210932,
      "loss": 0.6273,
      "step": 2577
    },
    {
      "epoch": 0.39393360583718534,
      "grad_norm": 0.29662612080574036,
      "learning_rate": 0.0001397505169825389,
      "loss": 0.6011,
      "step": 2578
    },
    {
      "epoch": 0.39408641173549297,
      "grad_norm": 0.274076908826828,
      "learning_rate": 0.00013970471086946091,
      "loss": 0.6626,
      "step": 2579
    },
    {
      "epoch": 0.39423921763380065,
      "grad_norm": 0.26067155599594116,
      "learning_rate": 0.00013965889486428743,
      "loss": 0.666,
      "step": 2580
    },
    {
      "epoch": 0.39439202353210834,
      "grad_norm": 0.335151731967926,
      "learning_rate": 0.00013961306897843328,
      "loss": 0.7958,
      "step": 2581
    },
    {
      "epoch": 0.394544829430416,
      "grad_norm": 0.29889029264450073,
      "learning_rate": 0.00013956723322331544,
      "loss": 0.6133,
      "step": 2582
    },
    {
      "epoch": 0.3946976353287237,
      "grad_norm": 0.28185123205184937,
      "learning_rate": 0.00013952138761035363,
      "loss": 0.6197,
      "step": 2583
    },
    {
      "epoch": 0.3948504412270314,
      "grad_norm": 0.2703631520271301,
      "learning_rate": 0.00013947553215096982,
      "loss": 0.7928,
      "step": 2584
    },
    {
      "epoch": 0.395003247125339,
      "grad_norm": 0.3054632544517517,
      "learning_rate": 0.00013942966685658855,
      "loss": 0.6414,
      "step": 2585
    },
    {
      "epoch": 0.3951560530236467,
      "grad_norm": 0.28667205572128296,
      "learning_rate": 0.00013938379173863679,
      "loss": 0.6163,
      "step": 2586
    },
    {
      "epoch": 0.3953088589219544,
      "grad_norm": 0.31871435046195984,
      "learning_rate": 0.00013933790680854387,
      "loss": 0.7422,
      "step": 2587
    },
    {
      "epoch": 0.39546166482026207,
      "grad_norm": 0.2837061285972595,
      "learning_rate": 0.0001392920120777417,
      "loss": 0.7655,
      "step": 2588
    },
    {
      "epoch": 0.39561447071856976,
      "grad_norm": 0.2924594581127167,
      "learning_rate": 0.00013924610755766456,
      "loss": 0.5866,
      "step": 2589
    },
    {
      "epoch": 0.39576727661687744,
      "grad_norm": 0.27115708589553833,
      "learning_rate": 0.00013920019325974916,
      "loss": 0.9004,
      "step": 2590
    },
    {
      "epoch": 0.39592008251518507,
      "grad_norm": 0.3006618916988373,
      "learning_rate": 0.00013915426919543466,
      "loss": 0.8016,
      "step": 2591
    },
    {
      "epoch": 0.39607288841349275,
      "grad_norm": 0.3554551601409912,
      "learning_rate": 0.00013910833537616264,
      "loss": 0.5658,
      "step": 2592
    },
    {
      "epoch": 0.39622569431180044,
      "grad_norm": 0.35639873147010803,
      "learning_rate": 0.00013906239181337717,
      "loss": 0.5948,
      "step": 2593
    },
    {
      "epoch": 0.3963785002101081,
      "grad_norm": 0.2902330160140991,
      "learning_rate": 0.0001390164385185247,
      "loss": 0.6889,
      "step": 2594
    },
    {
      "epoch": 0.3965313061084158,
      "grad_norm": 0.3317681550979614,
      "learning_rate": 0.00013897047550305404,
      "loss": 0.6601,
      "step": 2595
    },
    {
      "epoch": 0.39668411200672343,
      "grad_norm": 0.3392220139503479,
      "learning_rate": 0.00013892450277841654,
      "loss": 0.7833,
      "step": 2596
    },
    {
      "epoch": 0.3968369179050311,
      "grad_norm": 0.30571088194847107,
      "learning_rate": 0.00013887852035606596,
      "loss": 0.5213,
      "step": 2597
    },
    {
      "epoch": 0.3969897238033388,
      "grad_norm": 0.3831685781478882,
      "learning_rate": 0.00013883252824745834,
      "loss": 0.7385,
      "step": 2598
    },
    {
      "epoch": 0.3971425297016465,
      "grad_norm": 0.5502047538757324,
      "learning_rate": 0.0001387865264640523,
      "loss": 0.6916,
      "step": 2599
    },
    {
      "epoch": 0.39729533559995417,
      "grad_norm": 0.2634164094924927,
      "learning_rate": 0.0001387405150173088,
      "loss": 0.6008,
      "step": 2600
    },
    {
      "epoch": 0.39744814149826185,
      "grad_norm": 0.4813648760318756,
      "learning_rate": 0.00013869449391869113,
      "loss": 0.8057,
      "step": 2601
    },
    {
      "epoch": 0.3976009473965695,
      "grad_norm": 0.26484498381614685,
      "learning_rate": 0.00013864846317966515,
      "loss": 0.647,
      "step": 2602
    },
    {
      "epoch": 0.39775375329487717,
      "grad_norm": 0.2711394131183624,
      "learning_rate": 0.00013860242281169897,
      "loss": 0.647,
      "step": 2603
    },
    {
      "epoch": 0.39790655919318485,
      "grad_norm": 0.4527345597743988,
      "learning_rate": 0.00013855637282626318,
      "loss": 0.804,
      "step": 2604
    },
    {
      "epoch": 0.39805936509149253,
      "grad_norm": 0.3270074427127838,
      "learning_rate": 0.00013851031323483076,
      "loss": 0.7399,
      "step": 2605
    },
    {
      "epoch": 0.3982121709898002,
      "grad_norm": 0.34323227405548096,
      "learning_rate": 0.0001384642440488771,
      "loss": 0.6582,
      "step": 2606
    },
    {
      "epoch": 0.3983649768881079,
      "grad_norm": 0.2863471210002899,
      "learning_rate": 0.00013841816527987986,
      "loss": 0.4844,
      "step": 2607
    },
    {
      "epoch": 0.39851778278641553,
      "grad_norm": 0.3035363256931305,
      "learning_rate": 0.00013837207693931925,
      "loss": 0.748,
      "step": 2608
    },
    {
      "epoch": 0.3986705886847232,
      "grad_norm": 0.31653252243995667,
      "learning_rate": 0.00013832597903867775,
      "loss": 0.7071,
      "step": 2609
    },
    {
      "epoch": 0.3988233945830309,
      "grad_norm": 0.30605781078338623,
      "learning_rate": 0.00013827987158944035,
      "loss": 0.6058,
      "step": 2610
    },
    {
      "epoch": 0.3989762004813386,
      "grad_norm": 0.37771061062812805,
      "learning_rate": 0.00013823375460309423,
      "loss": 0.6425,
      "step": 2611
    },
    {
      "epoch": 0.39912900637964627,
      "grad_norm": 0.26124832034111023,
      "learning_rate": 0.0001381876280911291,
      "loss": 0.6056,
      "step": 2612
    },
    {
      "epoch": 0.39928181227795395,
      "grad_norm": 0.30022165179252625,
      "learning_rate": 0.000138141492065037,
      "loss": 0.6605,
      "step": 2613
    },
    {
      "epoch": 0.3994346181762616,
      "grad_norm": 0.34445032477378845,
      "learning_rate": 0.00013809534653631237,
      "loss": 0.6027,
      "step": 2614
    },
    {
      "epoch": 0.39958742407456926,
      "grad_norm": 0.27877411246299744,
      "learning_rate": 0.00013804919151645182,
      "loss": 0.805,
      "step": 2615
    },
    {
      "epoch": 0.39974022997287695,
      "grad_norm": 0.2860463559627533,
      "learning_rate": 0.00013800302701695469,
      "loss": 0.6378,
      "step": 2616
    },
    {
      "epoch": 0.39989303587118463,
      "grad_norm": 0.3459800183773041,
      "learning_rate": 0.00013795685304932232,
      "loss": 0.5781,
      "step": 2617
    },
    {
      "epoch": 0.4000458417694923,
      "grad_norm": 3.20552659034729,
      "learning_rate": 0.00013791066962505868,
      "loss": 0.6375,
      "step": 2618
    },
    {
      "epoch": 0.4001986476678,
      "grad_norm": 0.30947524309158325,
      "learning_rate": 0.0001378644767556699,
      "loss": 0.842,
      "step": 2619
    },
    {
      "epoch": 0.40035145356610763,
      "grad_norm": 1.742050290107727,
      "learning_rate": 0.0001378182744526646,
      "loss": 0.8278,
      "step": 2620
    },
    {
      "epoch": 0.4005042594644153,
      "grad_norm": 0.2930509150028229,
      "learning_rate": 0.0001377720627275537,
      "loss": 0.7768,
      "step": 2621
    },
    {
      "epoch": 0.400657065362723,
      "grad_norm": 0.3572491407394409,
      "learning_rate": 0.00013772584159185038,
      "loss": 0.7051,
      "step": 2622
    },
    {
      "epoch": 0.4008098712610307,
      "grad_norm": 0.2924429178237915,
      "learning_rate": 0.00013767961105707035,
      "loss": 0.6823,
      "step": 2623
    },
    {
      "epoch": 0.40096267715933837,
      "grad_norm": 0.3206632733345032,
      "learning_rate": 0.0001376333711347315,
      "loss": 0.6994,
      "step": 2624
    },
    {
      "epoch": 0.40111548305764605,
      "grad_norm": 0.27822092175483704,
      "learning_rate": 0.00013758712183635415,
      "loss": 0.7541,
      "step": 2625
    },
    {
      "epoch": 0.4012682889559537,
      "grad_norm": 0.2822110056877136,
      "learning_rate": 0.0001375408631734609,
      "loss": 0.7284,
      "step": 2626
    },
    {
      "epoch": 0.40142109485426136,
      "grad_norm": 0.26143571734428406,
      "learning_rate": 0.00013749459515757673,
      "loss": 0.7453,
      "step": 2627
    },
    {
      "epoch": 0.40157390075256905,
      "grad_norm": 0.27988147735595703,
      "learning_rate": 0.0001374483178002289,
      "loss": 0.8043,
      "step": 2628
    },
    {
      "epoch": 0.40172670665087673,
      "grad_norm": 0.31378600001335144,
      "learning_rate": 0.00013740203111294703,
      "loss": 0.6827,
      "step": 2629
    },
    {
      "epoch": 0.4018795125491844,
      "grad_norm": 0.30002671480178833,
      "learning_rate": 0.0001373557351072631,
      "loss": 0.818,
      "step": 2630
    },
    {
      "epoch": 0.4020323184474921,
      "grad_norm": 0.3368836045265198,
      "learning_rate": 0.0001373094297947113,
      "loss": 0.682,
      "step": 2631
    },
    {
      "epoch": 0.4021851243457997,
      "grad_norm": 0.3038204610347748,
      "learning_rate": 0.00013726311518682827,
      "loss": 0.5353,
      "step": 2632
    },
    {
      "epoch": 0.4023379302441074,
      "grad_norm": 0.31147778034210205,
      "learning_rate": 0.0001372167912951529,
      "loss": 0.7181,
      "step": 2633
    },
    {
      "epoch": 0.4024907361424151,
      "grad_norm": 0.34573498368263245,
      "learning_rate": 0.00013717045813122639,
      "loss": 0.649,
      "step": 2634
    },
    {
      "epoch": 0.4026435420407228,
      "grad_norm": 0.29101598262786865,
      "learning_rate": 0.00013712411570659223,
      "loss": 0.745,
      "step": 2635
    },
    {
      "epoch": 0.40279634793903046,
      "grad_norm": 0.4098125398159027,
      "learning_rate": 0.00013707776403279627,
      "loss": 0.4228,
      "step": 2636
    },
    {
      "epoch": 0.40294915383733815,
      "grad_norm": 0.29835259914398193,
      "learning_rate": 0.00013703140312138666,
      "loss": 0.75,
      "step": 2637
    },
    {
      "epoch": 0.4031019597356458,
      "grad_norm": 0.2908041477203369,
      "learning_rate": 0.00013698503298391384,
      "loss": 0.5537,
      "step": 2638
    },
    {
      "epoch": 0.40325476563395346,
      "grad_norm": 0.34891489148139954,
      "learning_rate": 0.00013693865363193045,
      "loss": 0.7482,
      "step": 2639
    },
    {
      "epoch": 0.40340757153226114,
      "grad_norm": 0.3375150263309479,
      "learning_rate": 0.0001368922650769916,
      "loss": 0.65,
      "step": 2640
    },
    {
      "epoch": 0.40356037743056883,
      "grad_norm": 0.25705015659332275,
      "learning_rate": 0.00013684586733065464,
      "loss": 0.5924,
      "step": 2641
    },
    {
      "epoch": 0.4037131833288765,
      "grad_norm": 0.4616255760192871,
      "learning_rate": 0.00013679946040447906,
      "loss": 0.8208,
      "step": 2642
    },
    {
      "epoch": 0.4038659892271842,
      "grad_norm": 0.332537978887558,
      "learning_rate": 0.00013675304431002688,
      "loss": 0.7843,
      "step": 2643
    },
    {
      "epoch": 0.4040187951254918,
      "grad_norm": 0.28260141611099243,
      "learning_rate": 0.00013670661905886217,
      "loss": 0.6487,
      "step": 2644
    },
    {
      "epoch": 0.4041716010237995,
      "grad_norm": 0.2809610366821289,
      "learning_rate": 0.00013666018466255148,
      "loss": 0.6374,
      "step": 2645
    },
    {
      "epoch": 0.4043244069221072,
      "grad_norm": 0.26803719997406006,
      "learning_rate": 0.0001366137411326635,
      "loss": 0.5907,
      "step": 2646
    },
    {
      "epoch": 0.4044772128204149,
      "grad_norm": 0.3342551589012146,
      "learning_rate": 0.00013656728848076928,
      "loss": 0.7485,
      "step": 2647
    },
    {
      "epoch": 0.40463001871872256,
      "grad_norm": 0.26108020544052124,
      "learning_rate": 0.00013652082671844205,
      "loss": 0.569,
      "step": 2648
    },
    {
      "epoch": 0.4047828246170302,
      "grad_norm": 0.2628275454044342,
      "learning_rate": 0.00013647435585725746,
      "loss": 0.5711,
      "step": 2649
    },
    {
      "epoch": 0.4049356305153379,
      "grad_norm": 0.42528602480888367,
      "learning_rate": 0.00013642787590879325,
      "loss": 0.8466,
      "step": 2650
    },
    {
      "epoch": 0.40508843641364556,
      "grad_norm": 0.29589298367500305,
      "learning_rate": 0.00013638138688462957,
      "loss": 0.6615,
      "step": 2651
    },
    {
      "epoch": 0.40524124231195324,
      "grad_norm": 0.2670883238315582,
      "learning_rate": 0.0001363348887963487,
      "loss": 0.5713,
      "step": 2652
    },
    {
      "epoch": 0.4053940482102609,
      "grad_norm": 0.27636924386024475,
      "learning_rate": 0.00013628838165553533,
      "loss": 0.8193,
      "step": 2653
    },
    {
      "epoch": 0.4055468541085686,
      "grad_norm": 0.3108629584312439,
      "learning_rate": 0.00013624186547377628,
      "loss": 0.6917,
      "step": 2654
    },
    {
      "epoch": 0.40569966000687624,
      "grad_norm": 0.32495611906051636,
      "learning_rate": 0.00013619534026266064,
      "loss": 0.8104,
      "step": 2655
    },
    {
      "epoch": 0.4058524659051839,
      "grad_norm": 0.4790588617324829,
      "learning_rate": 0.00013614880603377979,
      "loss": 0.6834,
      "step": 2656
    },
    {
      "epoch": 0.4060052718034916,
      "grad_norm": 0.2443542331457138,
      "learning_rate": 0.0001361022627987274,
      "loss": 0.5103,
      "step": 2657
    },
    {
      "epoch": 0.4061580777017993,
      "grad_norm": 0.3075079321861267,
      "learning_rate": 0.0001360557105690993,
      "loss": 0.5277,
      "step": 2658
    },
    {
      "epoch": 0.406310883600107,
      "grad_norm": 0.2426033914089203,
      "learning_rate": 0.00013600914935649354,
      "loss": 0.6479,
      "step": 2659
    },
    {
      "epoch": 0.40646368949841466,
      "grad_norm": 0.3688972592353821,
      "learning_rate": 0.0001359625791725105,
      "loss": 0.6978,
      "step": 2660
    },
    {
      "epoch": 0.4066164953967223,
      "grad_norm": 0.31058305501937866,
      "learning_rate": 0.00013591600002875272,
      "loss": 0.5861,
      "step": 2661
    },
    {
      "epoch": 0.40676930129503,
      "grad_norm": 0.3048050105571747,
      "learning_rate": 0.00013586941193682506,
      "loss": 0.5262,
      "step": 2662
    },
    {
      "epoch": 0.40692210719333766,
      "grad_norm": 0.25697362422943115,
      "learning_rate": 0.00013582281490833446,
      "loss": 0.6828,
      "step": 2663
    },
    {
      "epoch": 0.40707491309164534,
      "grad_norm": 0.28246739506721497,
      "learning_rate": 0.00013577620895489028,
      "loss": 0.6223,
      "step": 2664
    },
    {
      "epoch": 0.407227718989953,
      "grad_norm": 0.3349422216415405,
      "learning_rate": 0.0001357295940881039,
      "loss": 0.5698,
      "step": 2665
    },
    {
      "epoch": 0.4073805248882607,
      "grad_norm": 0.3754185140132904,
      "learning_rate": 0.00013568297031958912,
      "loss": 0.6353,
      "step": 2666
    },
    {
      "epoch": 0.40753333078656834,
      "grad_norm": 0.26379403471946716,
      "learning_rate": 0.00013563633766096179,
      "loss": 0.6373,
      "step": 2667
    },
    {
      "epoch": 0.407686136684876,
      "grad_norm": 0.388322651386261,
      "learning_rate": 0.00013558969612384008,
      "loss": 0.709,
      "step": 2668
    },
    {
      "epoch": 0.4078389425831837,
      "grad_norm": 0.28406521677970886,
      "learning_rate": 0.00013554304571984437,
      "loss": 0.7033,
      "step": 2669
    },
    {
      "epoch": 0.4079917484814914,
      "grad_norm": 0.293530136346817,
      "learning_rate": 0.00013549638646059712,
      "loss": 0.6015,
      "step": 2670
    },
    {
      "epoch": 0.4081445543797991,
      "grad_norm": 0.6281304359436035,
      "learning_rate": 0.0001354497183577232,
      "loss": 0.5931,
      "step": 2671
    },
    {
      "epoch": 0.40829736027810676,
      "grad_norm": 0.2629290521144867,
      "learning_rate": 0.00013540304142284945,
      "loss": 0.6174,
      "step": 2672
    },
    {
      "epoch": 0.4084501661764144,
      "grad_norm": 0.7760477662086487,
      "learning_rate": 0.00013535635566760517,
      "loss": 0.5549,
      "step": 2673
    },
    {
      "epoch": 0.40860297207472207,
      "grad_norm": 0.2590596377849579,
      "learning_rate": 0.00013530966110362165,
      "loss": 0.6117,
      "step": 2674
    },
    {
      "epoch": 0.40875577797302975,
      "grad_norm": 0.24774251878261566,
      "learning_rate": 0.00013526295774253248,
      "loss": 0.4853,
      "step": 2675
    },
    {
      "epoch": 0.40890858387133744,
      "grad_norm": 0.5391387939453125,
      "learning_rate": 0.00013521624559597337,
      "loss": 0.7386,
      "step": 2676
    },
    {
      "epoch": 0.4090613897696451,
      "grad_norm": 0.2838054299354553,
      "learning_rate": 0.0001351695246755823,
      "loss": 0.7681,
      "step": 2677
    },
    {
      "epoch": 0.4092141956679528,
      "grad_norm": 0.28494569659233093,
      "learning_rate": 0.00013512279499299935,
      "loss": 0.7326,
      "step": 2678
    },
    {
      "epoch": 0.40936700156626044,
      "grad_norm": 0.27946600317955017,
      "learning_rate": 0.0001350760565598669,
      "loss": 0.7292,
      "step": 2679
    },
    {
      "epoch": 0.4095198074645681,
      "grad_norm": 0.3265629708766937,
      "learning_rate": 0.00013502930938782937,
      "loss": 0.7616,
      "step": 2680
    },
    {
      "epoch": 0.4096726133628758,
      "grad_norm": 0.3024129867553711,
      "learning_rate": 0.00013498255348853342,
      "loss": 0.7034,
      "step": 2681
    },
    {
      "epoch": 0.4098254192611835,
      "grad_norm": 0.33738934993743896,
      "learning_rate": 0.00013493578887362797,
      "loss": 0.8399,
      "step": 2682
    },
    {
      "epoch": 0.40997822515949117,
      "grad_norm": 0.28026413917541504,
      "learning_rate": 0.00013488901555476395,
      "loss": 0.6149,
      "step": 2683
    },
    {
      "epoch": 0.41013103105779886,
      "grad_norm": 0.2940625548362732,
      "learning_rate": 0.0001348422335435946,
      "loss": 0.583,
      "step": 2684
    },
    {
      "epoch": 0.4102838369561065,
      "grad_norm": 0.30337145924568176,
      "learning_rate": 0.00013479544285177524,
      "loss": 0.6673,
      "step": 2685
    },
    {
      "epoch": 0.41043664285441417,
      "grad_norm": 0.27657046914100647,
      "learning_rate": 0.00013474864349096333,
      "loss": 0.652,
      "step": 2686
    },
    {
      "epoch": 0.41058944875272185,
      "grad_norm": 0.2847646474838257,
      "learning_rate": 0.00013470183547281862,
      "loss": 0.8841,
      "step": 2687
    },
    {
      "epoch": 0.41074225465102954,
      "grad_norm": 0.28591471910476685,
      "learning_rate": 0.0001346550188090029,
      "loss": 0.7805,
      "step": 2688
    },
    {
      "epoch": 0.4108950605493372,
      "grad_norm": 0.2631331980228424,
      "learning_rate": 0.00013460819351118013,
      "loss": 0.5712,
      "step": 2689
    },
    {
      "epoch": 0.4110478664476449,
      "grad_norm": 0.2960440218448639,
      "learning_rate": 0.00013456135959101644,
      "loss": 0.5894,
      "step": 2690
    },
    {
      "epoch": 0.41120067234595253,
      "grad_norm": 0.2985890507698059,
      "learning_rate": 0.00013451451706018017,
      "loss": 0.7551,
      "step": 2691
    },
    {
      "epoch": 0.4113534782442602,
      "grad_norm": 0.3147627115249634,
      "learning_rate": 0.00013446766593034167,
      "loss": 0.6941,
      "step": 2692
    },
    {
      "epoch": 0.4115062841425679,
      "grad_norm": 0.3025978207588196,
      "learning_rate": 0.00013442080621317354,
      "loss": 0.8067,
      "step": 2693
    },
    {
      "epoch": 0.4116590900408756,
      "grad_norm": 0.4465163052082062,
      "learning_rate": 0.00013437393792035046,
      "loss": 0.7604,
      "step": 2694
    },
    {
      "epoch": 0.41181189593918327,
      "grad_norm": 0.27825966477394104,
      "learning_rate": 0.00013432706106354932,
      "loss": 0.5674,
      "step": 2695
    },
    {
      "epoch": 0.41196470183749095,
      "grad_norm": 0.3376471698284149,
      "learning_rate": 0.00013428017565444904,
      "loss": 0.4975,
      "step": 2696
    },
    {
      "epoch": 0.4121175077357986,
      "grad_norm": 0.3866771459579468,
      "learning_rate": 0.00013423328170473076,
      "loss": 0.8205,
      "step": 2697
    },
    {
      "epoch": 0.41227031363410627,
      "grad_norm": 0.30912765860557556,
      "learning_rate": 0.0001341863792260777,
      "loss": 0.6016,
      "step": 2698
    },
    {
      "epoch": 0.41242311953241395,
      "grad_norm": 0.3049004077911377,
      "learning_rate": 0.00013413946823017528,
      "loss": 0.7691,
      "step": 2699
    },
    {
      "epoch": 0.41257592543072164,
      "grad_norm": 0.2947705090045929,
      "learning_rate": 0.00013409254872871084,
      "loss": 0.7282,
      "step": 2700
    },
    {
      "epoch": 0.4127287313290293,
      "grad_norm": 0.31972742080688477,
      "learning_rate": 0.00013404562073337413,
      "loss": 0.606,
      "step": 2701
    },
    {
      "epoch": 0.41288153722733695,
      "grad_norm": 0.33832690119743347,
      "learning_rate": 0.00013399868425585676,
      "loss": 0.6662,
      "step": 2702
    },
    {
      "epoch": 0.41303434312564463,
      "grad_norm": 0.36278048157691956,
      "learning_rate": 0.00013395173930785261,
      "loss": 0.807,
      "step": 2703
    },
    {
      "epoch": 0.4131871490239523,
      "grad_norm": 0.28764086961746216,
      "learning_rate": 0.00013390478590105762,
      "loss": 0.6506,
      "step": 2704
    },
    {
      "epoch": 0.41333995492226,
      "grad_norm": 0.2584592401981354,
      "learning_rate": 0.00013385782404716983,
      "loss": 0.4927,
      "step": 2705
    },
    {
      "epoch": 0.4134927608205677,
      "grad_norm": 0.32915428280830383,
      "learning_rate": 0.00013381085375788939,
      "loss": 0.6524,
      "step": 2706
    },
    {
      "epoch": 0.41364556671887537,
      "grad_norm": 0.29130539298057556,
      "learning_rate": 0.00013376387504491854,
      "loss": 0.5596,
      "step": 2707
    },
    {
      "epoch": 0.413798372617183,
      "grad_norm": 0.293236643075943,
      "learning_rate": 0.00013371688791996168,
      "loss": 0.7917,
      "step": 2708
    },
    {
      "epoch": 0.4139511785154907,
      "grad_norm": 0.34430694580078125,
      "learning_rate": 0.00013366989239472517,
      "loss": 0.6377,
      "step": 2709
    },
    {
      "epoch": 0.41410398441379836,
      "grad_norm": 0.3663583993911743,
      "learning_rate": 0.00013362288848091765,
      "loss": 0.7631,
      "step": 2710
    },
    {
      "epoch": 0.41425679031210605,
      "grad_norm": 0.41205117106437683,
      "learning_rate": 0.00013357587619024965,
      "loss": 0.8005,
      "step": 2711
    },
    {
      "epoch": 0.41440959621041373,
      "grad_norm": 0.3227595388889313,
      "learning_rate": 0.00013352885553443399,
      "loss": 0.614,
      "step": 2712
    },
    {
      "epoch": 0.4145624021087214,
      "grad_norm": 0.3226085305213928,
      "learning_rate": 0.0001334818265251854,
      "loss": 0.679,
      "step": 2713
    },
    {
      "epoch": 0.41471520800702905,
      "grad_norm": 0.3061399459838867,
      "learning_rate": 0.00013343478917422077,
      "loss": 0.6796,
      "step": 2714
    },
    {
      "epoch": 0.41486801390533673,
      "grad_norm": 0.3472737669944763,
      "learning_rate": 0.00013338774349325912,
      "loss": 0.8371,
      "step": 2715
    },
    {
      "epoch": 0.4150208198036444,
      "grad_norm": 0.33723995089530945,
      "learning_rate": 0.00013334068949402141,
      "loss": 0.7688,
      "step": 2716
    },
    {
      "epoch": 0.4151736257019521,
      "grad_norm": 0.24828742444515228,
      "learning_rate": 0.0001332936271882308,
      "loss": 0.8392,
      "step": 2717
    },
    {
      "epoch": 0.4153264316002598,
      "grad_norm": 0.36368826031684875,
      "learning_rate": 0.00013324655658761246,
      "loss": 0.5834,
      "step": 2718
    },
    {
      "epoch": 0.41547923749856747,
      "grad_norm": 0.27683743834495544,
      "learning_rate": 0.00013319947770389364,
      "loss": 0.6469,
      "step": 2719
    },
    {
      "epoch": 0.4156320433968751,
      "grad_norm": 0.31676921248435974,
      "learning_rate": 0.00013315239054880354,
      "loss": 0.7522,
      "step": 2720
    },
    {
      "epoch": 0.4157848492951828,
      "grad_norm": 0.3366953134536743,
      "learning_rate": 0.00013310529513407374,
      "loss": 0.7079,
      "step": 2721
    },
    {
      "epoch": 0.41593765519349046,
      "grad_norm": 0.36535075306892395,
      "learning_rate": 0.00013305819147143747,
      "loss": 0.8439,
      "step": 2722
    },
    {
      "epoch": 0.41609046109179815,
      "grad_norm": 0.3025410771369934,
      "learning_rate": 0.00013301107957263035,
      "loss": 0.6305,
      "step": 2723
    },
    {
      "epoch": 0.41624326699010583,
      "grad_norm": 0.32860392332077026,
      "learning_rate": 0.00013296395944938983,
      "loss": 0.6886,
      "step": 2724
    },
    {
      "epoch": 0.4163960728884135,
      "grad_norm": 0.6233853697776794,
      "learning_rate": 0.00013291683111345552,
      "loss": 0.6825,
      "step": 2725
    },
    {
      "epoch": 0.41654887878672114,
      "grad_norm": 0.26388707756996155,
      "learning_rate": 0.00013286969457656906,
      "loss": 0.848,
      "step": 2726
    },
    {
      "epoch": 0.41670168468502883,
      "grad_norm": 0.30272993445396423,
      "learning_rate": 0.0001328225498504741,
      "loss": 0.5376,
      "step": 2727
    },
    {
      "epoch": 0.4168544905833365,
      "grad_norm": 0.283623605966568,
      "learning_rate": 0.00013277539694691635,
      "loss": 0.6954,
      "step": 2728
    },
    {
      "epoch": 0.4170072964816442,
      "grad_norm": 0.3102206885814667,
      "learning_rate": 0.0001327282358776436,
      "loss": 0.6589,
      "step": 2729
    },
    {
      "epoch": 0.4171601023799519,
      "grad_norm": 0.31666800379753113,
      "learning_rate": 0.0001326810666544056,
      "loss": 0.7268,
      "step": 2730
    },
    {
      "epoch": 0.41731290827825956,
      "grad_norm": 0.3100956380367279,
      "learning_rate": 0.0001326338892889542,
      "loss": 0.6853,
      "step": 2731
    },
    {
      "epoch": 0.4174657141765672,
      "grad_norm": 0.2755642533302307,
      "learning_rate": 0.00013258670379304318,
      "loss": 0.6981,
      "step": 2732
    },
    {
      "epoch": 0.4176185200748749,
      "grad_norm": 0.3609178960323334,
      "learning_rate": 0.0001325395101784285,
      "loss": 0.6268,
      "step": 2733
    },
    {
      "epoch": 0.41777132597318256,
      "grad_norm": 0.30639350414276123,
      "learning_rate": 0.00013249230845686796,
      "loss": 0.6244,
      "step": 2734
    },
    {
      "epoch": 0.41792413187149025,
      "grad_norm": 0.3014542758464813,
      "learning_rate": 0.00013244509864012154,
      "loss": 0.7873,
      "step": 2735
    },
    {
      "epoch": 0.41807693776979793,
      "grad_norm": 0.31064677238464355,
      "learning_rate": 0.00013239788073995113,
      "loss": 0.8028,
      "step": 2736
    },
    {
      "epoch": 0.4182297436681056,
      "grad_norm": 0.36065420508384705,
      "learning_rate": 0.0001323506547681207,
      "loss": 0.738,
      "step": 2737
    },
    {
      "epoch": 0.41838254956641324,
      "grad_norm": 0.8031928539276123,
      "learning_rate": 0.0001323034207363962,
      "loss": 0.7281,
      "step": 2738
    },
    {
      "epoch": 0.4185353554647209,
      "grad_norm": 0.3118455708026886,
      "learning_rate": 0.0001322561786565456,
      "loss": 0.7196,
      "step": 2739
    },
    {
      "epoch": 0.4186881613630286,
      "grad_norm": 0.7187873721122742,
      "learning_rate": 0.0001322089285403388,
      "loss": 0.6527,
      "step": 2740
    },
    {
      "epoch": 0.4188409672613363,
      "grad_norm": 0.31127819418907166,
      "learning_rate": 0.00013216167039954786,
      "loss": 0.7949,
      "step": 2741
    },
    {
      "epoch": 0.418993773159644,
      "grad_norm": 0.2882727086544037,
      "learning_rate": 0.0001321144042459467,
      "loss": 0.7491,
      "step": 2742
    },
    {
      "epoch": 0.41914657905795166,
      "grad_norm": 0.2354152351617813,
      "learning_rate": 0.00013206713009131132,
      "loss": 0.6037,
      "step": 2743
    },
    {
      "epoch": 0.4192993849562593,
      "grad_norm": 0.32294926047325134,
      "learning_rate": 0.00013201984794741965,
      "loss": 0.5798,
      "step": 2744
    },
    {
      "epoch": 0.419452190854567,
      "grad_norm": 0.3169757127761841,
      "learning_rate": 0.00013197255782605163,
      "loss": 0.6941,
      "step": 2745
    },
    {
      "epoch": 0.41960499675287466,
      "grad_norm": 0.28408145904541016,
      "learning_rate": 0.0001319252597389892,
      "loss": 0.719,
      "step": 2746
    },
    {
      "epoch": 0.41975780265118234,
      "grad_norm": 0.3238529860973358,
      "learning_rate": 0.00013187795369801634,
      "loss": 0.7321,
      "step": 2747
    },
    {
      "epoch": 0.41991060854949,
      "grad_norm": 0.29003897309303284,
      "learning_rate": 0.00013183063971491889,
      "loss": 0.7617,
      "step": 2748
    },
    {
      "epoch": 0.4200634144477977,
      "grad_norm": 0.3293977677822113,
      "learning_rate": 0.00013178331780148474,
      "loss": 0.8128,
      "step": 2749
    },
    {
      "epoch": 0.42021622034610534,
      "grad_norm": 0.5713270902633667,
      "learning_rate": 0.00013173598796950375,
      "loss": 0.6511,
      "step": 2750
    },
    {
      "epoch": 0.420369026244413,
      "grad_norm": 0.39325082302093506,
      "learning_rate": 0.00013168865023076778,
      "loss": 0.773,
      "step": 2751
    },
    {
      "epoch": 0.4205218321427207,
      "grad_norm": 0.31554874777793884,
      "learning_rate": 0.00013164130459707057,
      "loss": 0.6337,
      "step": 2752
    },
    {
      "epoch": 0.4206746380410284,
      "grad_norm": 0.5043233036994934,
      "learning_rate": 0.00013159395108020797,
      "loss": 0.9153,
      "step": 2753
    },
    {
      "epoch": 0.4208274439393361,
      "grad_norm": 0.3268618583679199,
      "learning_rate": 0.00013154658969197767,
      "loss": 1.1463,
      "step": 2754
    },
    {
      "epoch": 0.42098024983764376,
      "grad_norm": 0.25591176748275757,
      "learning_rate": 0.0001314992204441793,
      "loss": 0.6545,
      "step": 2755
    },
    {
      "epoch": 0.4211330557359514,
      "grad_norm": 0.31433895230293274,
      "learning_rate": 0.00013145184334861462,
      "loss": 0.7142,
      "step": 2756
    },
    {
      "epoch": 0.4212858616342591,
      "grad_norm": 0.33507999777793884,
      "learning_rate": 0.00013140445841708715,
      "loss": 0.8627,
      "step": 2757
    },
    {
      "epoch": 0.42143866753256676,
      "grad_norm": 0.2889297604560852,
      "learning_rate": 0.0001313570656614025,
      "loss": 0.6666,
      "step": 2758
    },
    {
      "epoch": 0.42159147343087444,
      "grad_norm": 0.34413883090019226,
      "learning_rate": 0.00013130966509336816,
      "loss": 0.7164,
      "step": 2759
    },
    {
      "epoch": 0.4217442793291821,
      "grad_norm": 0.302048921585083,
      "learning_rate": 0.0001312622567247936,
      "loss": 0.524,
      "step": 2760
    },
    {
      "epoch": 0.42189708522748975,
      "grad_norm": 0.30027899146080017,
      "learning_rate": 0.00013121484056749017,
      "loss": 0.6239,
      "step": 2761
    },
    {
      "epoch": 0.42204989112579744,
      "grad_norm": 0.29137787222862244,
      "learning_rate": 0.00013116741663327124,
      "loss": 0.6321,
      "step": 2762
    },
    {
      "epoch": 0.4222026970241051,
      "grad_norm": 0.2735944986343384,
      "learning_rate": 0.0001311199849339521,
      "loss": 0.7902,
      "step": 2763
    },
    {
      "epoch": 0.4223555029224128,
      "grad_norm": 0.2861863970756531,
      "learning_rate": 0.0001310725454813499,
      "loss": 0.5792,
      "step": 2764
    },
    {
      "epoch": 0.4225083088207205,
      "grad_norm": 0.39001768827438354,
      "learning_rate": 0.00013102509828728388,
      "loss": 0.7467,
      "step": 2765
    },
    {
      "epoch": 0.4226611147190282,
      "grad_norm": 0.28480300307273865,
      "learning_rate": 0.00013097764336357504,
      "loss": 0.7347,
      "step": 2766
    },
    {
      "epoch": 0.4228139206173358,
      "grad_norm": 0.3337126076221466,
      "learning_rate": 0.0001309301807220464,
      "loss": 0.7443,
      "step": 2767
    },
    {
      "epoch": 0.4229667265156435,
      "grad_norm": 0.27228620648384094,
      "learning_rate": 0.0001308827103745228,
      "loss": 0.7698,
      "step": 2768
    },
    {
      "epoch": 0.42311953241395117,
      "grad_norm": 0.288928747177124,
      "learning_rate": 0.00013083523233283124,
      "loss": 0.6021,
      "step": 2769
    },
    {
      "epoch": 0.42327233831225886,
      "grad_norm": 0.3781624138355255,
      "learning_rate": 0.00013078774660880033,
      "loss": 0.7343,
      "step": 2770
    },
    {
      "epoch": 0.42342514421056654,
      "grad_norm": 0.354174941778183,
      "learning_rate": 0.0001307402532142608,
      "loss": 0.585,
      "step": 2771
    },
    {
      "epoch": 0.4235779501088742,
      "grad_norm": 0.31593069434165955,
      "learning_rate": 0.00013069275216104521,
      "loss": 0.8165,
      "step": 2772
    },
    {
      "epoch": 0.42373075600718185,
      "grad_norm": 0.4373694956302643,
      "learning_rate": 0.00013064524346098808,
      "loss": 0.6946,
      "step": 2773
    },
    {
      "epoch": 0.42388356190548954,
      "grad_norm": 0.3665942847728729,
      "learning_rate": 0.00013059772712592578,
      "loss": 0.7237,
      "step": 2774
    },
    {
      "epoch": 0.4240363678037972,
      "grad_norm": 0.3371705412864685,
      "learning_rate": 0.0001305502031676966,
      "loss": 1.1382,
      "step": 2775
    },
    {
      "epoch": 0.4241891737021049,
      "grad_norm": 0.2612996995449066,
      "learning_rate": 0.0001305026715981408,
      "loss": 0.7192,
      "step": 2776
    },
    {
      "epoch": 0.4243419796004126,
      "grad_norm": 0.26436302065849304,
      "learning_rate": 0.00013045513242910032,
      "loss": 0.7453,
      "step": 2777
    },
    {
      "epoch": 0.4244947854987203,
      "grad_norm": 0.3628009855747223,
      "learning_rate": 0.00013040758567241933,
      "loss": 0.5983,
      "step": 2778
    },
    {
      "epoch": 0.4246475913970279,
      "grad_norm": 0.32881370186805725,
      "learning_rate": 0.0001303600313399436,
      "loss": 0.7943,
      "step": 2779
    },
    {
      "epoch": 0.4248003972953356,
      "grad_norm": 0.30668795108795166,
      "learning_rate": 0.0001303124694435209,
      "loss": 0.6752,
      "step": 2780
    },
    {
      "epoch": 0.42495320319364327,
      "grad_norm": 0.3583773672580719,
      "learning_rate": 0.00013026489999500086,
      "loss": 0.6052,
      "step": 2781
    },
    {
      "epoch": 0.42510600909195095,
      "grad_norm": 0.3048308491706848,
      "learning_rate": 0.00013021732300623508,
      "loss": 0.5376,
      "step": 2782
    },
    {
      "epoch": 0.42525881499025864,
      "grad_norm": 0.3251284658908844,
      "learning_rate": 0.0001301697384890769,
      "loss": 0.5928,
      "step": 2783
    },
    {
      "epoch": 0.4254116208885663,
      "grad_norm": 0.3076835870742798,
      "learning_rate": 0.00013012214645538163,
      "loss": 0.605,
      "step": 2784
    },
    {
      "epoch": 0.42556442678687395,
      "grad_norm": 0.24366116523742676,
      "learning_rate": 0.00013007454691700644,
      "loss": 0.7375,
      "step": 2785
    },
    {
      "epoch": 0.42571723268518163,
      "grad_norm": 0.3150011897087097,
      "learning_rate": 0.00013002693988581034,
      "loss": 0.7008,
      "step": 2786
    },
    {
      "epoch": 0.4258700385834893,
      "grad_norm": 0.26339590549468994,
      "learning_rate": 0.0001299793253736542,
      "loss": 0.5094,
      "step": 2787
    },
    {
      "epoch": 0.426022844481797,
      "grad_norm": 0.31079646944999695,
      "learning_rate": 0.00012993170339240082,
      "loss": 0.5418,
      "step": 2788
    },
    {
      "epoch": 0.4261756503801047,
      "grad_norm": 0.3271988332271576,
      "learning_rate": 0.00012988407395391477,
      "loss": 0.7808,
      "step": 2789
    },
    {
      "epoch": 0.42632845627841237,
      "grad_norm": 2.406322956085205,
      "learning_rate": 0.00012983643707006258,
      "loss": 0.6183,
      "step": 2790
    },
    {
      "epoch": 0.42648126217672,
      "grad_norm": 0.28407543897628784,
      "learning_rate": 0.00012978879275271253,
      "loss": 0.5404,
      "step": 2791
    },
    {
      "epoch": 0.4266340680750277,
      "grad_norm": 0.2678498923778534,
      "learning_rate": 0.0001297411410137348,
      "loss": 0.8858,
      "step": 2792
    },
    {
      "epoch": 0.42678687397333537,
      "grad_norm": 0.3212999999523163,
      "learning_rate": 0.00012969348186500147,
      "loss": 0.6627,
      "step": 2793
    },
    {
      "epoch": 0.42693967987164305,
      "grad_norm": 0.2900780737400055,
      "learning_rate": 0.00012964581531838636,
      "loss": 0.5738,
      "step": 2794
    },
    {
      "epoch": 0.42709248576995074,
      "grad_norm": 0.3582835793495178,
      "learning_rate": 0.00012959814138576524,
      "loss": 0.6817,
      "step": 2795
    },
    {
      "epoch": 0.4272452916682584,
      "grad_norm": 0.5339453220367432,
      "learning_rate": 0.00012955046007901563,
      "loss": 0.5825,
      "step": 2796
    },
    {
      "epoch": 0.42739809756656605,
      "grad_norm": 0.3053556978702545,
      "learning_rate": 0.00012950277141001695,
      "loss": 0.9986,
      "step": 2797
    },
    {
      "epoch": 0.42755090346487373,
      "grad_norm": 0.29578697681427,
      "learning_rate": 0.00012945507539065046,
      "loss": 0.7364,
      "step": 2798
    },
    {
      "epoch": 0.4277037093631814,
      "grad_norm": 0.2910451292991638,
      "learning_rate": 0.00012940737203279916,
      "loss": 0.7474,
      "step": 2799
    },
    {
      "epoch": 0.4278565152614891,
      "grad_norm": 0.32356956601142883,
      "learning_rate": 0.00012935966134834797,
      "loss": 0.7036,
      "step": 2800
    },
    {
      "epoch": 0.4280093211597968,
      "grad_norm": 0.3562777042388916,
      "learning_rate": 0.0001293119433491836,
      "loss": 0.6056,
      "step": 2801
    },
    {
      "epoch": 0.42816212705810447,
      "grad_norm": 0.2624085247516632,
      "learning_rate": 0.0001292642180471946,
      "loss": 0.6888,
      "step": 2802
    },
    {
      "epoch": 0.4283149329564121,
      "grad_norm": 0.307565838098526,
      "learning_rate": 0.00012921648545427135,
      "loss": 0.6338,
      "step": 2803
    },
    {
      "epoch": 0.4284677388547198,
      "grad_norm": 0.334005743265152,
      "learning_rate": 0.00012916874558230597,
      "loss": 0.5713,
      "step": 2804
    },
    {
      "epoch": 0.42862054475302747,
      "grad_norm": 0.2838148772716522,
      "learning_rate": 0.00012912099844319247,
      "loss": 0.5971,
      "step": 2805
    },
    {
      "epoch": 0.42877335065133515,
      "grad_norm": 0.3633905053138733,
      "learning_rate": 0.0001290732440488267,
      "loss": 0.7187,
      "step": 2806
    },
    {
      "epoch": 0.42892615654964283,
      "grad_norm": 0.2693686783313751,
      "learning_rate": 0.00012902548241110618,
      "loss": 0.6844,
      "step": 2807
    },
    {
      "epoch": 0.4290789624479505,
      "grad_norm": 0.6584002375602722,
      "learning_rate": 0.00012897771354193038,
      "loss": 0.6379,
      "step": 2808
    },
    {
      "epoch": 0.42923176834625815,
      "grad_norm": 0.29742875695228577,
      "learning_rate": 0.00012892993745320053,
      "loss": 0.783,
      "step": 2809
    },
    {
      "epoch": 0.42938457424456583,
      "grad_norm": 0.3576945662498474,
      "learning_rate": 0.00012888215415681956,
      "loss": 0.6983,
      "step": 2810
    },
    {
      "epoch": 0.4295373801428735,
      "grad_norm": 0.3622451424598694,
      "learning_rate": 0.00012883436366469236,
      "loss": 0.6491,
      "step": 2811
    },
    {
      "epoch": 0.4296901860411812,
      "grad_norm": 0.2713620662689209,
      "learning_rate": 0.00012878656598872546,
      "loss": 0.7308,
      "step": 2812
    },
    {
      "epoch": 0.4298429919394889,
      "grad_norm": 0.273732453584671,
      "learning_rate": 0.00012873876114082733,
      "loss": 0.6912,
      "step": 2813
    },
    {
      "epoch": 0.4299957978377965,
      "grad_norm": 0.2648273706436157,
      "learning_rate": 0.00012869094913290805,
      "loss": 0.7097,
      "step": 2814
    },
    {
      "epoch": 0.4301486037361042,
      "grad_norm": 0.32749706506729126,
      "learning_rate": 0.0001286431299768797,
      "loss": 0.7119,
      "step": 2815
    },
    {
      "epoch": 0.4303014096344119,
      "grad_norm": 0.4028230309486389,
      "learning_rate": 0.00012859530368465586,
      "loss": 0.6675,
      "step": 2816
    },
    {
      "epoch": 0.43045421553271956,
      "grad_norm": 0.27643126249313354,
      "learning_rate": 0.0001285474702681522,
      "loss": 0.7513,
      "step": 2817
    },
    {
      "epoch": 0.43060702143102725,
      "grad_norm": 0.2783336341381073,
      "learning_rate": 0.00012849962973928596,
      "loss": 0.6643,
      "step": 2818
    },
    {
      "epoch": 0.43075982732933493,
      "grad_norm": 0.3845579922199249,
      "learning_rate": 0.00012845178210997622,
      "loss": 0.5968,
      "step": 2819
    },
    {
      "epoch": 0.43091263322764256,
      "grad_norm": 0.26863181591033936,
      "learning_rate": 0.00012840392739214376,
      "loss": 0.7512,
      "step": 2820
    },
    {
      "epoch": 0.43106543912595024,
      "grad_norm": 0.3777031898498535,
      "learning_rate": 0.00012835606559771123,
      "loss": 0.6785,
      "step": 2821
    },
    {
      "epoch": 0.43121824502425793,
      "grad_norm": 0.44814273715019226,
      "learning_rate": 0.000128308196738603,
      "loss": 0.8311,
      "step": 2822
    },
    {
      "epoch": 0.4313710509225656,
      "grad_norm": 0.3343289792537689,
      "learning_rate": 0.00012826032082674516,
      "loss": 0.7952,
      "step": 2823
    },
    {
      "epoch": 0.4315238568208733,
      "grad_norm": 0.25728681683540344,
      "learning_rate": 0.00012821243787406562,
      "loss": 0.6728,
      "step": 2824
    },
    {
      "epoch": 0.431676662719181,
      "grad_norm": 0.35816818475723267,
      "learning_rate": 0.000128164547892494,
      "loss": 0.6913,
      "step": 2825
    },
    {
      "epoch": 0.4318294686174886,
      "grad_norm": 0.8182726502418518,
      "learning_rate": 0.0001281166508939617,
      "loss": 0.602,
      "step": 2826
    },
    {
      "epoch": 0.4319822745157963,
      "grad_norm": 0.2920895218849182,
      "learning_rate": 0.00012806874689040186,
      "loss": 0.72,
      "step": 2827
    },
    {
      "epoch": 0.432135080414104,
      "grad_norm": 0.35942891240119934,
      "learning_rate": 0.0001280208358937493,
      "loss": 0.7262,
      "step": 2828
    },
    {
      "epoch": 0.43228788631241166,
      "grad_norm": 0.2746555805206299,
      "learning_rate": 0.00012797291791594073,
      "loss": 0.6001,
      "step": 2829
    },
    {
      "epoch": 0.43244069221071935,
      "grad_norm": 0.32052844762802124,
      "learning_rate": 0.00012792499296891447,
      "loss": 0.6371,
      "step": 2830
    },
    {
      "epoch": 0.43259349810902703,
      "grad_norm": 0.30219176411628723,
      "learning_rate": 0.00012787706106461063,
      "loss": 0.8482,
      "step": 2831
    },
    {
      "epoch": 0.43274630400733466,
      "grad_norm": 0.30528518557548523,
      "learning_rate": 0.000127829122214971,
      "loss": 0.7601,
      "step": 2832
    },
    {
      "epoch": 0.43289910990564234,
      "grad_norm": 0.26077762246131897,
      "learning_rate": 0.0001277811764319392,
      "loss": 0.7087,
      "step": 2833
    },
    {
      "epoch": 0.43305191580395,
      "grad_norm": 0.36096397042274475,
      "learning_rate": 0.00012773322372746049,
      "loss": 0.9584,
      "step": 2834
    },
    {
      "epoch": 0.4332047217022577,
      "grad_norm": 0.29656782746315,
      "learning_rate": 0.00012768526411348187,
      "loss": 0.5632,
      "step": 2835
    },
    {
      "epoch": 0.4333575276005654,
      "grad_norm": 0.29737043380737305,
      "learning_rate": 0.0001276372976019521,
      "loss": 0.7245,
      "step": 2836
    },
    {
      "epoch": 0.4335103334988731,
      "grad_norm": 0.3119308650493622,
      "learning_rate": 0.00012758932420482163,
      "loss": 0.739,
      "step": 2837
    },
    {
      "epoch": 0.4336631393971807,
      "grad_norm": 0.35479971766471863,
      "learning_rate": 0.00012754134393404265,
      "loss": 0.753,
      "step": 2838
    },
    {
      "epoch": 0.4338159452954884,
      "grad_norm": 0.291146457195282,
      "learning_rate": 0.000127493356801569,
      "loss": 0.7229,
      "step": 2839
    },
    {
      "epoch": 0.4339687511937961,
      "grad_norm": 0.28508853912353516,
      "learning_rate": 0.00012744536281935628,
      "loss": 0.7438,
      "step": 2840
    },
    {
      "epoch": 0.43412155709210376,
      "grad_norm": 0.3319436311721802,
      "learning_rate": 0.00012739736199936182,
      "loss": 0.7025,
      "step": 2841
    },
    {
      "epoch": 0.43427436299041144,
      "grad_norm": 0.33289408683776855,
      "learning_rate": 0.00012734935435354457,
      "loss": 0.688,
      "step": 2842
    },
    {
      "epoch": 0.43442716888871913,
      "grad_norm": 0.32559987902641296,
      "learning_rate": 0.00012730133989386524,
      "loss": 0.8483,
      "step": 2843
    },
    {
      "epoch": 0.43457997478702676,
      "grad_norm": 0.2847137749195099,
      "learning_rate": 0.0001272533186322863,
      "loss": 0.7268,
      "step": 2844
    },
    {
      "epoch": 0.43473278068533444,
      "grad_norm": 0.35314276814460754,
      "learning_rate": 0.00012720529058077176,
      "loss": 0.7862,
      "step": 2845
    },
    {
      "epoch": 0.4348855865836421,
      "grad_norm": 0.2800363302230835,
      "learning_rate": 0.00012715725575128745,
      "loss": 0.6973,
      "step": 2846
    },
    {
      "epoch": 0.4350383924819498,
      "grad_norm": 0.8330638408660889,
      "learning_rate": 0.00012710921415580085,
      "loss": 0.9033,
      "step": 2847
    },
    {
      "epoch": 0.4351911983802575,
      "grad_norm": 0.3423483073711395,
      "learning_rate": 0.00012706116580628112,
      "loss": 0.6541,
      "step": 2848
    },
    {
      "epoch": 0.4353440042785652,
      "grad_norm": 0.3231146037578583,
      "learning_rate": 0.00012701311071469903,
      "loss": 0.5636,
      "step": 2849
    },
    {
      "epoch": 0.4354968101768728,
      "grad_norm": 0.5048816204071045,
      "learning_rate": 0.0001269650488930272,
      "loss": 0.8825,
      "step": 2850
    },
    {
      "epoch": 0.4356496160751805,
      "grad_norm": 0.2932036221027374,
      "learning_rate": 0.00012691698035323978,
      "loss": 0.7126,
      "step": 2851
    },
    {
      "epoch": 0.4358024219734882,
      "grad_norm": 0.5563439130783081,
      "learning_rate": 0.00012686890510731267,
      "loss": 0.6056,
      "step": 2852
    },
    {
      "epoch": 0.43595522787179586,
      "grad_norm": 0.28055623173713684,
      "learning_rate": 0.00012682082316722336,
      "loss": 0.823,
      "step": 2853
    },
    {
      "epoch": 0.43610803377010354,
      "grad_norm": 0.28064948320388794,
      "learning_rate": 0.00012677273454495113,
      "loss": 0.6092,
      "step": 2854
    },
    {
      "epoch": 0.4362608396684112,
      "grad_norm": 0.3126406669616699,
      "learning_rate": 0.0001267246392524768,
      "loss": 0.7116,
      "step": 2855
    },
    {
      "epoch": 0.43641364556671886,
      "grad_norm": 0.31279459595680237,
      "learning_rate": 0.00012667653730178292,
      "loss": 0.7781,
      "step": 2856
    },
    {
      "epoch": 0.43656645146502654,
      "grad_norm": 0.2848126292228699,
      "learning_rate": 0.00012662842870485376,
      "loss": 0.586,
      "step": 2857
    },
    {
      "epoch": 0.4367192573633342,
      "grad_norm": 0.28570377826690674,
      "learning_rate": 0.00012658031347367505,
      "loss": 0.5861,
      "step": 2858
    },
    {
      "epoch": 0.4368720632616419,
      "grad_norm": 0.2820393145084381,
      "learning_rate": 0.0001265321916202344,
      "loss": 0.659,
      "step": 2859
    },
    {
      "epoch": 0.4370248691599496,
      "grad_norm": 0.27809035778045654,
      "learning_rate": 0.0001264840631565209,
      "loss": 0.573,
      "step": 2860
    },
    {
      "epoch": 0.4371776750582573,
      "grad_norm": 0.38270965218544006,
      "learning_rate": 0.00012643592809452543,
      "loss": 0.7039,
      "step": 2861
    },
    {
      "epoch": 0.4373304809565649,
      "grad_norm": 0.32795193791389465,
      "learning_rate": 0.00012638778644624032,
      "loss": 0.7147,
      "step": 2862
    },
    {
      "epoch": 0.4374832868548726,
      "grad_norm": 0.32430192828178406,
      "learning_rate": 0.00012633963822365976,
      "loss": 0.9189,
      "step": 2863
    },
    {
      "epoch": 0.4376360927531803,
      "grad_norm": 0.2981487214565277,
      "learning_rate": 0.00012629148343877943,
      "loss": 0.6675,
      "step": 2864
    },
    {
      "epoch": 0.43778889865148796,
      "grad_norm": 0.30008915066719055,
      "learning_rate": 0.0001262433221035967,
      "loss": 0.7412,
      "step": 2865
    },
    {
      "epoch": 0.43794170454979564,
      "grad_norm": 0.31011244654655457,
      "learning_rate": 0.00012619515423011057,
      "loss": 0.8016,
      "step": 2866
    },
    {
      "epoch": 0.43809451044810327,
      "grad_norm": 0.2737204432487488,
      "learning_rate": 0.00012614697983032164,
      "loss": 0.5848,
      "step": 2867
    },
    {
      "epoch": 0.43824731634641095,
      "grad_norm": 0.2696418762207031,
      "learning_rate": 0.00012609879891623216,
      "loss": 0.5662,
      "step": 2868
    },
    {
      "epoch": 0.43840012224471864,
      "grad_norm": 0.31181618571281433,
      "learning_rate": 0.000126050611499846,
      "loss": 0.6401,
      "step": 2869
    },
    {
      "epoch": 0.4385529281430263,
      "grad_norm": 0.3828161060810089,
      "learning_rate": 0.0001260024175931687,
      "loss": 0.6144,
      "step": 2870
    },
    {
      "epoch": 0.438705734041334,
      "grad_norm": 0.26452359557151794,
      "learning_rate": 0.0001259542172082073,
      "loss": 0.624,
      "step": 2871
    },
    {
      "epoch": 0.4388585399396417,
      "grad_norm": 0.776643693447113,
      "learning_rate": 0.00012590601035697055,
      "loss": 0.7748,
      "step": 2872
    },
    {
      "epoch": 0.4390113458379493,
      "grad_norm": 0.4832134246826172,
      "learning_rate": 0.0001258577970514688,
      "loss": 0.6519,
      "step": 2873
    },
    {
      "epoch": 0.439164151736257,
      "grad_norm": 0.305779367685318,
      "learning_rate": 0.00012580957730371395,
      "loss": 0.8206,
      "step": 2874
    },
    {
      "epoch": 0.4393169576345647,
      "grad_norm": 0.3510475754737854,
      "learning_rate": 0.00012576135112571957,
      "loss": 0.7114,
      "step": 2875
    },
    {
      "epoch": 0.43946976353287237,
      "grad_norm": 0.4784543514251709,
      "learning_rate": 0.0001257131185295008,
      "loss": 0.8677,
      "step": 2876
    },
    {
      "epoch": 0.43962256943118005,
      "grad_norm": 0.2720498740673065,
      "learning_rate": 0.0001256648795270744,
      "loss": 0.796,
      "step": 2877
    },
    {
      "epoch": 0.43977537532948774,
      "grad_norm": 0.31961312890052795,
      "learning_rate": 0.0001256166341304587,
      "loss": 0.6601,
      "step": 2878
    },
    {
      "epoch": 0.43992818122779537,
      "grad_norm": 0.2913792133331299,
      "learning_rate": 0.00012556838235167365,
      "loss": 0.6879,
      "step": 2879
    },
    {
      "epoch": 0.44008098712610305,
      "grad_norm": 0.2850216329097748,
      "learning_rate": 0.00012552012420274076,
      "loss": 0.6935,
      "step": 2880
    },
    {
      "epoch": 0.44023379302441074,
      "grad_norm": 0.2468993067741394,
      "learning_rate": 0.00012547185969568312,
      "loss": 0.7689,
      "step": 2881
    },
    {
      "epoch": 0.4403865989227184,
      "grad_norm": 0.3027266561985016,
      "learning_rate": 0.00012542358884252546,
      "loss": 0.6852,
      "step": 2882
    },
    {
      "epoch": 0.4405394048210261,
      "grad_norm": 0.30375269055366516,
      "learning_rate": 0.00012537531165529407,
      "loss": 0.5691,
      "step": 2883
    },
    {
      "epoch": 0.4406922107193338,
      "grad_norm": 0.28959324955940247,
      "learning_rate": 0.0001253270281460168,
      "loss": 0.6491,
      "step": 2884
    },
    {
      "epoch": 0.4408450166176414,
      "grad_norm": 0.32386699318885803,
      "learning_rate": 0.00012527873832672305,
      "loss": 0.6175,
      "step": 2885
    },
    {
      "epoch": 0.4409978225159491,
      "grad_norm": 0.34364500641822815,
      "learning_rate": 0.00012523044220944383,
      "loss": 0.6779,
      "step": 2886
    },
    {
      "epoch": 0.4411506284142568,
      "grad_norm": 0.37659937143325806,
      "learning_rate": 0.00012518213980621177,
      "loss": 0.6467,
      "step": 2887
    },
    {
      "epoch": 0.44130343431256447,
      "grad_norm": 0.34209194779396057,
      "learning_rate": 0.00012513383112906093,
      "loss": 0.5235,
      "step": 2888
    },
    {
      "epoch": 0.44145624021087215,
      "grad_norm": 0.3095417320728302,
      "learning_rate": 0.00012508551619002701,
      "loss": 0.6551,
      "step": 2889
    },
    {
      "epoch": 0.44160904610917984,
      "grad_norm": 0.29345428943634033,
      "learning_rate": 0.00012503719500114735,
      "loss": 0.6686,
      "step": 2890
    },
    {
      "epoch": 0.44176185200748747,
      "grad_norm": 0.2837190330028534,
      "learning_rate": 0.0001249888675744607,
      "loss": 0.7411,
      "step": 2891
    },
    {
      "epoch": 0.44191465790579515,
      "grad_norm": 0.26552635431289673,
      "learning_rate": 0.0001249405339220075,
      "loss": 0.5733,
      "step": 2892
    },
    {
      "epoch": 0.44206746380410283,
      "grad_norm": 0.3099066913127899,
      "learning_rate": 0.0001248921940558296,
      "loss": 0.6688,
      "step": 2893
    },
    {
      "epoch": 0.4422202697024105,
      "grad_norm": 0.35833939909935,
      "learning_rate": 0.00012484384798797048,
      "loss": 0.7451,
      "step": 2894
    },
    {
      "epoch": 0.4423730756007182,
      "grad_norm": 0.2927980422973633,
      "learning_rate": 0.00012479549573047522,
      "loss": 0.5564,
      "step": 2895
    },
    {
      "epoch": 0.4425258814990259,
      "grad_norm": 0.4316510260105133,
      "learning_rate": 0.00012474713729539034,
      "loss": 0.5236,
      "step": 2896
    },
    {
      "epoch": 0.4426786873973335,
      "grad_norm": 0.2684415280818939,
      "learning_rate": 0.00012469877269476388,
      "loss": 0.697,
      "step": 2897
    },
    {
      "epoch": 0.4428314932956412,
      "grad_norm": 0.31690576672554016,
      "learning_rate": 0.00012465040194064558,
      "loss": 0.7508,
      "step": 2898
    },
    {
      "epoch": 0.4429842991939489,
      "grad_norm": 0.32044708728790283,
      "learning_rate": 0.00012460202504508653,
      "loss": 0.8633,
      "step": 2899
    },
    {
      "epoch": 0.44313710509225657,
      "grad_norm": 0.31981486082077026,
      "learning_rate": 0.0001245536420201395,
      "loss": 0.6791,
      "step": 2900
    },
    {
      "epoch": 0.44328991099056425,
      "grad_norm": 0.32003486156463623,
      "learning_rate": 0.00012450525287785861,
      "loss": 0.6707,
      "step": 2901
    },
    {
      "epoch": 0.44344271688887194,
      "grad_norm": 0.3154270052909851,
      "learning_rate": 0.0001244568576302997,
      "loss": 0.6449,
      "step": 2902
    },
    {
      "epoch": 0.44359552278717956,
      "grad_norm": 0.3915046453475952,
      "learning_rate": 0.00012440845628952004,
      "loss": 0.7843,
      "step": 2903
    },
    {
      "epoch": 0.44374832868548725,
      "grad_norm": 0.3003976345062256,
      "learning_rate": 0.00012436004886757831,
      "loss": 0.705,
      "step": 2904
    },
    {
      "epoch": 0.44390113458379493,
      "grad_norm": 0.2850950062274933,
      "learning_rate": 0.00012431163537653496,
      "loss": 0.6981,
      "step": 2905
    },
    {
      "epoch": 0.4440539404821026,
      "grad_norm": 0.38096436858177185,
      "learning_rate": 0.00012426321582845168,
      "loss": 0.7158,
      "step": 2906
    },
    {
      "epoch": 0.4442067463804103,
      "grad_norm": 0.3688443899154663,
      "learning_rate": 0.00012421479023539192,
      "loss": 0.699,
      "step": 2907
    },
    {
      "epoch": 0.444359552278718,
      "grad_norm": 0.3340109884738922,
      "learning_rate": 0.00012416635860942033,
      "loss": 0.8428,
      "step": 2908
    },
    {
      "epoch": 0.4445123581770256,
      "grad_norm": 0.2632228136062622,
      "learning_rate": 0.00012411792096260347,
      "loss": 0.6115,
      "step": 2909
    },
    {
      "epoch": 0.4446651640753333,
      "grad_norm": 0.26376640796661377,
      "learning_rate": 0.00012406947730700895,
      "loss": 0.7574,
      "step": 2910
    },
    {
      "epoch": 0.444817969973641,
      "grad_norm": 0.31339865922927856,
      "learning_rate": 0.00012402102765470628,
      "loss": 0.5751,
      "step": 2911
    },
    {
      "epoch": 0.44497077587194866,
      "grad_norm": 0.32284119725227356,
      "learning_rate": 0.0001239725720177662,
      "loss": 0.6088,
      "step": 2912
    },
    {
      "epoch": 0.44512358177025635,
      "grad_norm": 0.301904559135437,
      "learning_rate": 0.00012392411040826099,
      "loss": 0.6401,
      "step": 2913
    },
    {
      "epoch": 0.44527638766856403,
      "grad_norm": 0.30717435479164124,
      "learning_rate": 0.00012387564283826451,
      "loss": 0.6669,
      "step": 2914
    },
    {
      "epoch": 0.44542919356687166,
      "grad_norm": 0.3378068804740906,
      "learning_rate": 0.00012382716931985202,
      "loss": 0.6117,
      "step": 2915
    },
    {
      "epoch": 0.44558199946517935,
      "grad_norm": 0.3473984897136688,
      "learning_rate": 0.00012377868986510035,
      "loss": 0.9922,
      "step": 2916
    },
    {
      "epoch": 0.44573480536348703,
      "grad_norm": 0.3443201184272766,
      "learning_rate": 0.00012373020448608766,
      "loss": 0.7179,
      "step": 2917
    },
    {
      "epoch": 0.4458876112617947,
      "grad_norm": 0.3572174608707428,
      "learning_rate": 0.00012368171319489376,
      "loss": 0.6572,
      "step": 2918
    },
    {
      "epoch": 0.4460404171601024,
      "grad_norm": 0.23893767595291138,
      "learning_rate": 0.00012363321600359977,
      "loss": 0.452,
      "step": 2919
    },
    {
      "epoch": 0.4461932230584101,
      "grad_norm": 0.3510747253894806,
      "learning_rate": 0.00012358471292428844,
      "loss": 0.7885,
      "step": 2920
    },
    {
      "epoch": 0.4463460289567177,
      "grad_norm": 0.2646324634552002,
      "learning_rate": 0.00012353620396904382,
      "loss": 0.5921,
      "step": 2921
    },
    {
      "epoch": 0.4464988348550254,
      "grad_norm": 0.2995966970920563,
      "learning_rate": 0.00012348768914995157,
      "loss": 0.6149,
      "step": 2922
    },
    {
      "epoch": 0.4466516407533331,
      "grad_norm": 0.27304011583328247,
      "learning_rate": 0.0001234391684790987,
      "loss": 0.8127,
      "step": 2923
    },
    {
      "epoch": 0.44680444665164076,
      "grad_norm": 0.301516056060791,
      "learning_rate": 0.00012339064196857378,
      "loss": 0.6597,
      "step": 2924
    },
    {
      "epoch": 0.44695725254994845,
      "grad_norm": 0.4759582579135895,
      "learning_rate": 0.00012334210963046679,
      "loss": 0.753,
      "step": 2925
    },
    {
      "epoch": 0.4471100584482561,
      "grad_norm": 0.39895206689834595,
      "learning_rate": 0.0001232935714768691,
      "loss": 0.6516,
      "step": 2926
    },
    {
      "epoch": 0.44726286434656376,
      "grad_norm": 0.30540645122528076,
      "learning_rate": 0.0001232450275198736,
      "loss": 0.7916,
      "step": 2927
    },
    {
      "epoch": 0.44741567024487144,
      "grad_norm": 0.3424038887023926,
      "learning_rate": 0.0001231964777715746,
      "loss": 0.6381,
      "step": 2928
    },
    {
      "epoch": 0.44756847614317913,
      "grad_norm": 0.37364235520362854,
      "learning_rate": 0.00012314792224406792,
      "loss": 0.7826,
      "step": 2929
    },
    {
      "epoch": 0.4477212820414868,
      "grad_norm": 0.2799992561340332,
      "learning_rate": 0.00012309936094945072,
      "loss": 0.6587,
      "step": 2930
    },
    {
      "epoch": 0.4478740879397945,
      "grad_norm": 0.306768000125885,
      "learning_rate": 0.00012305079389982162,
      "loss": 0.7384,
      "step": 2931
    },
    {
      "epoch": 0.4480268938381021,
      "grad_norm": 0.3117838501930237,
      "learning_rate": 0.0001230022211072807,
      "loss": 0.8259,
      "step": 2932
    },
    {
      "epoch": 0.4481796997364098,
      "grad_norm": 0.34458303451538086,
      "learning_rate": 0.0001229536425839295,
      "loss": 0.7534,
      "step": 2933
    },
    {
      "epoch": 0.4483325056347175,
      "grad_norm": 0.30791348218917847,
      "learning_rate": 0.00012290505834187094,
      "loss": 0.8358,
      "step": 2934
    },
    {
      "epoch": 0.4484853115330252,
      "grad_norm": 0.327889621257782,
      "learning_rate": 0.00012285646839320935,
      "loss": 0.7923,
      "step": 2935
    },
    {
      "epoch": 0.44863811743133286,
      "grad_norm": 0.2725731432437897,
      "learning_rate": 0.0001228078727500505,
      "loss": 0.7498,
      "step": 2936
    },
    {
      "epoch": 0.44879092332964055,
      "grad_norm": 0.4868723154067993,
      "learning_rate": 0.00012275927142450164,
      "loss": 0.5499,
      "step": 2937
    },
    {
      "epoch": 0.4489437292279482,
      "grad_norm": 0.33403563499450684,
      "learning_rate": 0.00012271066442867137,
      "loss": 0.7104,
      "step": 2938
    },
    {
      "epoch": 0.44909653512625586,
      "grad_norm": 0.323974609375,
      "learning_rate": 0.00012266205177466965,
      "loss": 0.6424,
      "step": 2939
    },
    {
      "epoch": 0.44924934102456354,
      "grad_norm": 0.33368954062461853,
      "learning_rate": 0.00012261343347460797,
      "loss": 0.6989,
      "step": 2940
    },
    {
      "epoch": 0.4494021469228712,
      "grad_norm": 0.22383123636245728,
      "learning_rate": 0.0001225648095405992,
      "loss": 0.5866,
      "step": 2941
    },
    {
      "epoch": 0.4495549528211789,
      "grad_norm": 0.3466974198818207,
      "learning_rate": 0.00012251617998475752,
      "loss": 0.7301,
      "step": 2942
    },
    {
      "epoch": 0.4497077587194866,
      "grad_norm": 0.3107375204563141,
      "learning_rate": 0.0001224675448191986,
      "loss": 0.8147,
      "step": 2943
    },
    {
      "epoch": 0.4498605646177942,
      "grad_norm": 0.32883167266845703,
      "learning_rate": 0.0001224189040560395,
      "loss": 0.8292,
      "step": 2944
    },
    {
      "epoch": 0.4500133705161019,
      "grad_norm": 0.2791670262813568,
      "learning_rate": 0.00012237025770739862,
      "loss": 0.6703,
      "step": 2945
    },
    {
      "epoch": 0.4501661764144096,
      "grad_norm": 0.27861130237579346,
      "learning_rate": 0.00012232160578539586,
      "loss": 0.65,
      "step": 2946
    },
    {
      "epoch": 0.4503189823127173,
      "grad_norm": 15.20201301574707,
      "learning_rate": 0.00012227294830215234,
      "loss": 0.8696,
      "step": 2947
    },
    {
      "epoch": 0.45047178821102496,
      "grad_norm": 0.31514841318130493,
      "learning_rate": 0.00012222428526979074,
      "loss": 0.634,
      "step": 2948
    },
    {
      "epoch": 0.45062459410933264,
      "grad_norm": 0.3868434727191925,
      "learning_rate": 0.000122175616700435,
      "loss": 0.7436,
      "step": 2949
    },
    {
      "epoch": 0.45077740000764027,
      "grad_norm": 0.2911074459552765,
      "learning_rate": 0.00012212694260621052,
      "loss": 0.6778,
      "step": 2950
    },
    {
      "epoch": 0.45093020590594796,
      "grad_norm": 0.3343454599380493,
      "learning_rate": 0.00012207826299924407,
      "loss": 0.6356,
      "step": 2951
    },
    {
      "epoch": 0.45108301180425564,
      "grad_norm": 0.2641962766647339,
      "learning_rate": 0.00012202957789166365,
      "loss": 0.8245,
      "step": 2952
    },
    {
      "epoch": 0.4512358177025633,
      "grad_norm": 0.41699346899986267,
      "learning_rate": 0.00012198088729559889,
      "loss": 0.5049,
      "step": 2953
    },
    {
      "epoch": 0.451388623600871,
      "grad_norm": 0.2433166354894638,
      "learning_rate": 0.00012193219122318052,
      "loss": 0.567,
      "step": 2954
    },
    {
      "epoch": 0.4515414294991787,
      "grad_norm": 0.37394678592681885,
      "learning_rate": 0.00012188348968654084,
      "loss": 0.6925,
      "step": 2955
    },
    {
      "epoch": 0.4516942353974863,
      "grad_norm": 0.6773134469985962,
      "learning_rate": 0.00012183478269781337,
      "loss": 0.7749,
      "step": 2956
    },
    {
      "epoch": 0.451847041295794,
      "grad_norm": 0.3066105544567108,
      "learning_rate": 0.00012178607026913311,
      "loss": 0.6992,
      "step": 2957
    },
    {
      "epoch": 0.4519998471941017,
      "grad_norm": 0.33363470435142517,
      "learning_rate": 0.00012173735241263631,
      "loss": 0.667,
      "step": 2958
    },
    {
      "epoch": 0.4521526530924094,
      "grad_norm": 0.3259199261665344,
      "learning_rate": 0.00012168862914046063,
      "loss": 0.82,
      "step": 2959
    },
    {
      "epoch": 0.45230545899071706,
      "grad_norm": 0.25989770889282227,
      "learning_rate": 0.00012163990046474505,
      "loss": 0.7487,
      "step": 2960
    },
    {
      "epoch": 0.45245826488902474,
      "grad_norm": 0.2744223475456238,
      "learning_rate": 0.00012159116639762991,
      "loss": 0.6466,
      "step": 2961
    },
    {
      "epoch": 0.45261107078733237,
      "grad_norm": 0.2962299585342407,
      "learning_rate": 0.00012154242695125692,
      "loss": 0.634,
      "step": 2962
    },
    {
      "epoch": 0.45276387668564005,
      "grad_norm": 0.319975346326828,
      "learning_rate": 0.00012149368213776906,
      "loss": 0.6443,
      "step": 2963
    },
    {
      "epoch": 0.45291668258394774,
      "grad_norm": 0.2526867687702179,
      "learning_rate": 0.00012144493196931078,
      "loss": 0.6078,
      "step": 2964
    },
    {
      "epoch": 0.4530694884822554,
      "grad_norm": 0.35642507672309875,
      "learning_rate": 0.00012139617645802763,
      "loss": 0.5997,
      "step": 2965
    },
    {
      "epoch": 0.4532222943805631,
      "grad_norm": 0.28118640184402466,
      "learning_rate": 0.00012134741561606679,
      "loss": 0.7705,
      "step": 2966
    },
    {
      "epoch": 0.4533751002788708,
      "grad_norm": 0.3002743721008301,
      "learning_rate": 0.00012129864945557652,
      "loss": 0.6621,
      "step": 2967
    },
    {
      "epoch": 0.4535279061771784,
      "grad_norm": 0.33882054686546326,
      "learning_rate": 0.00012124987798870652,
      "loss": 0.8184,
      "step": 2968
    },
    {
      "epoch": 0.4536807120754861,
      "grad_norm": 0.31006500124931335,
      "learning_rate": 0.00012120110122760779,
      "loss": 0.6977,
      "step": 2969
    },
    {
      "epoch": 0.4538335179737938,
      "grad_norm": 0.3728959858417511,
      "learning_rate": 0.00012115231918443268,
      "loss": 0.6769,
      "step": 2970
    },
    {
      "epoch": 0.45398632387210147,
      "grad_norm": 0.3111363649368286,
      "learning_rate": 0.00012110353187133478,
      "loss": 0.6327,
      "step": 2971
    },
    {
      "epoch": 0.45413912977040916,
      "grad_norm": 0.27471086382865906,
      "learning_rate": 0.00012105473930046907,
      "loss": 0.6579,
      "step": 2972
    },
    {
      "epoch": 0.45429193566871684,
      "grad_norm": 0.29122394323349,
      "learning_rate": 0.0001210059414839918,
      "loss": 0.6247,
      "step": 2973
    },
    {
      "epoch": 0.45444474156702447,
      "grad_norm": 0.3433492183685303,
      "learning_rate": 0.00012095713843406056,
      "loss": 0.7394,
      "step": 2974
    },
    {
      "epoch": 0.45459754746533215,
      "grad_norm": 0.23486945033073425,
      "learning_rate": 0.00012090833016283415,
      "loss": 0.5011,
      "step": 2975
    },
    {
      "epoch": 0.45475035336363984,
      "grad_norm": 0.2754330635070801,
      "learning_rate": 0.00012085951668247284,
      "loss": 0.5579,
      "step": 2976
    },
    {
      "epoch": 0.4549031592619475,
      "grad_norm": 0.37102657556533813,
      "learning_rate": 0.00012081069800513803,
      "loss": 0.6467,
      "step": 2977
    },
    {
      "epoch": 0.4550559651602552,
      "grad_norm": 0.7223601937294006,
      "learning_rate": 0.00012076187414299249,
      "loss": 0.6745,
      "step": 2978
    },
    {
      "epoch": 0.45520877105856283,
      "grad_norm": 0.24973377585411072,
      "learning_rate": 0.00012071304510820029,
      "loss": 0.6539,
      "step": 2979
    },
    {
      "epoch": 0.4553615769568705,
      "grad_norm": 0.2775816023349762,
      "learning_rate": 0.0001206642109129268,
      "loss": 0.6408,
      "step": 2980
    },
    {
      "epoch": 0.4555143828551782,
      "grad_norm": 0.29855409264564514,
      "learning_rate": 0.0001206153715693386,
      "loss": 0.5669,
      "step": 2981
    },
    {
      "epoch": 0.4556671887534859,
      "grad_norm": 0.31334125995635986,
      "learning_rate": 0.00012056652708960361,
      "loss": 0.6411,
      "step": 2982
    },
    {
      "epoch": 0.45581999465179357,
      "grad_norm": 0.2566351294517517,
      "learning_rate": 0.00012051767748589106,
      "loss": 0.6787,
      "step": 2983
    },
    {
      "epoch": 0.45597280055010125,
      "grad_norm": 0.3497639000415802,
      "learning_rate": 0.00012046882277037136,
      "loss": 0.6258,
      "step": 2984
    },
    {
      "epoch": 0.4561256064484089,
      "grad_norm": 0.3289467990398407,
      "learning_rate": 0.00012041996295521634,
      "loss": 0.6685,
      "step": 2985
    },
    {
      "epoch": 0.45627841234671657,
      "grad_norm": 0.32322293519973755,
      "learning_rate": 0.00012037109805259892,
      "loss": 0.7568,
      "step": 2986
    },
    {
      "epoch": 0.45643121824502425,
      "grad_norm": 0.30762824416160583,
      "learning_rate": 0.00012032222807469344,
      "loss": 0.8101,
      "step": 2987
    },
    {
      "epoch": 0.45658402414333193,
      "grad_norm": 0.2983434796333313,
      "learning_rate": 0.00012027335303367542,
      "loss": 0.669,
      "step": 2988
    },
    {
      "epoch": 0.4567368300416396,
      "grad_norm": 0.5077597498893738,
      "learning_rate": 0.00012022447294172165,
      "loss": 0.5633,
      "step": 2989
    },
    {
      "epoch": 0.4568896359399473,
      "grad_norm": 0.24513135850429535,
      "learning_rate": 0.00012017558781101026,
      "loss": 0.6378,
      "step": 2990
    },
    {
      "epoch": 0.45704244183825493,
      "grad_norm": 0.3104904890060425,
      "learning_rate": 0.00012012669765372049,
      "loss": 0.7319,
      "step": 2991
    },
    {
      "epoch": 0.4571952477365626,
      "grad_norm": 0.2522958815097809,
      "learning_rate": 0.00012007780248203297,
      "loss": 0.699,
      "step": 2992
    },
    {
      "epoch": 0.4573480536348703,
      "grad_norm": 0.2799461781978607,
      "learning_rate": 0.00012002890230812947,
      "loss": 0.7926,
      "step": 2993
    },
    {
      "epoch": 0.457500859533178,
      "grad_norm": 0.2929769456386566,
      "learning_rate": 0.00011997999714419313,
      "loss": 0.7925,
      "step": 2994
    },
    {
      "epoch": 0.45765366543148567,
      "grad_norm": 0.37641918659210205,
      "learning_rate": 0.00011993108700240815,
      "loss": 0.7682,
      "step": 2995
    },
    {
      "epoch": 0.45780647132979335,
      "grad_norm": 0.3405778408050537,
      "learning_rate": 0.00011988217189496022,
      "loss": 0.5922,
      "step": 2996
    },
    {
      "epoch": 0.457959277228101,
      "grad_norm": 0.27888843417167664,
      "learning_rate": 0.00011983325183403604,
      "loss": 0.7494,
      "step": 2997
    },
    {
      "epoch": 0.45811208312640866,
      "grad_norm": 0.30546584725379944,
      "learning_rate": 0.00011978432683182364,
      "loss": 0.7148,
      "step": 2998
    },
    {
      "epoch": 0.45826488902471635,
      "grad_norm": 0.6508386731147766,
      "learning_rate": 0.0001197353969005123,
      "loss": 0.9219,
      "step": 2999
    },
    {
      "epoch": 0.45841769492302403,
      "grad_norm": 0.2737182080745697,
      "learning_rate": 0.00011968646205229244,
      "loss": 0.4652,
      "step": 3000
    },
    {
      "epoch": 0.4585705008213317,
      "grad_norm": 0.26674118638038635,
      "learning_rate": 0.00011963752229935587,
      "loss": 0.7178,
      "step": 3001
    },
    {
      "epoch": 0.4587233067196394,
      "grad_norm": 0.29134851694107056,
      "learning_rate": 0.00011958857765389541,
      "loss": 0.6097,
      "step": 3002
    },
    {
      "epoch": 0.45887611261794703,
      "grad_norm": 0.2613201141357422,
      "learning_rate": 0.00011953962812810531,
      "loss": 0.7444,
      "step": 3003
    },
    {
      "epoch": 0.4590289185162547,
      "grad_norm": 0.3279878497123718,
      "learning_rate": 0.00011949067373418084,
      "loss": 0.6885,
      "step": 3004
    },
    {
      "epoch": 0.4591817244145624,
      "grad_norm": 0.2864905595779419,
      "learning_rate": 0.00011944171448431864,
      "loss": 0.579,
      "step": 3005
    },
    {
      "epoch": 0.4593345303128701,
      "grad_norm": 0.3064310550689697,
      "learning_rate": 0.0001193927503907165,
      "loss": 0.5701,
      "step": 3006
    },
    {
      "epoch": 0.45948733621117777,
      "grad_norm": 0.265474796295166,
      "learning_rate": 0.00011934378146557335,
      "loss": 0.6268,
      "step": 3007
    },
    {
      "epoch": 0.45964014210948545,
      "grad_norm": 0.2856680750846863,
      "learning_rate": 0.00011929480772108941,
      "loss": 0.6023,
      "step": 3008
    },
    {
      "epoch": 0.4597929480077931,
      "grad_norm": 0.29818516969680786,
      "learning_rate": 0.00011924582916946612,
      "loss": 0.7667,
      "step": 3009
    },
    {
      "epoch": 0.45994575390610076,
      "grad_norm": 0.46206653118133545,
      "learning_rate": 0.00011919684582290605,
      "loss": 0.6517,
      "step": 3010
    },
    {
      "epoch": 0.46009855980440845,
      "grad_norm": 0.3467860221862793,
      "learning_rate": 0.00011914785769361294,
      "loss": 0.6512,
      "step": 3011
    },
    {
      "epoch": 0.46025136570271613,
      "grad_norm": 0.40472298860549927,
      "learning_rate": 0.00011909886479379189,
      "loss": 0.5255,
      "step": 3012
    },
    {
      "epoch": 0.4604041716010238,
      "grad_norm": 0.33601143956184387,
      "learning_rate": 0.00011904986713564896,
      "loss": 0.8582,
      "step": 3013
    },
    {
      "epoch": 0.4605569774993315,
      "grad_norm": 0.31958696246147156,
      "learning_rate": 0.00011900086473139153,
      "loss": 0.901,
      "step": 3014
    },
    {
      "epoch": 0.4607097833976391,
      "grad_norm": 0.2809063494205475,
      "learning_rate": 0.00011895185759322818,
      "loss": 0.8309,
      "step": 3015
    },
    {
      "epoch": 0.4608625892959468,
      "grad_norm": 0.27857983112335205,
      "learning_rate": 0.00011890284573336856,
      "loss": 0.5825,
      "step": 3016
    },
    {
      "epoch": 0.4610153951942545,
      "grad_norm": 0.29882699251174927,
      "learning_rate": 0.00011885382916402364,
      "loss": 0.8242,
      "step": 3017
    },
    {
      "epoch": 0.4611682010925622,
      "grad_norm": 0.2936548590660095,
      "learning_rate": 0.00011880480789740542,
      "loss": 0.8594,
      "step": 3018
    },
    {
      "epoch": 0.46132100699086986,
      "grad_norm": 0.361464262008667,
      "learning_rate": 0.00011875578194572719,
      "loss": 0.5966,
      "step": 3019
    },
    {
      "epoch": 0.46147381288917755,
      "grad_norm": 0.3106854259967804,
      "learning_rate": 0.0001187067513212033,
      "loss": 0.7327,
      "step": 3020
    },
    {
      "epoch": 0.4616266187874852,
      "grad_norm": 0.3830045759677887,
      "learning_rate": 0.00011865771603604935,
      "loss": 0.6991,
      "step": 3021
    },
    {
      "epoch": 0.46177942468579286,
      "grad_norm": 0.29338714480400085,
      "learning_rate": 0.00011860867610248208,
      "loss": 0.7067,
      "step": 3022
    },
    {
      "epoch": 0.46193223058410054,
      "grad_norm": 0.28551679849624634,
      "learning_rate": 0.00011855963153271936,
      "loss": 0.7352,
      "step": 3023
    },
    {
      "epoch": 0.46208503648240823,
      "grad_norm": 0.2840779423713684,
      "learning_rate": 0.00011851058233898025,
      "loss": 0.7279,
      "step": 3024
    },
    {
      "epoch": 0.4622378423807159,
      "grad_norm": 0.26828092336654663,
      "learning_rate": 0.00011846152853348491,
      "loss": 0.7248,
      "step": 3025
    },
    {
      "epoch": 0.4623906482790236,
      "grad_norm": 0.2917962074279785,
      "learning_rate": 0.00011841247012845471,
      "loss": 0.8556,
      "step": 3026
    },
    {
      "epoch": 0.4625434541773312,
      "grad_norm": 0.33142760396003723,
      "learning_rate": 0.00011836340713611216,
      "loss": 0.59,
      "step": 3027
    },
    {
      "epoch": 0.4626962600756389,
      "grad_norm": 0.5676470994949341,
      "learning_rate": 0.00011831433956868085,
      "loss": 0.6251,
      "step": 3028
    },
    {
      "epoch": 0.4628490659739466,
      "grad_norm": 0.36629360914230347,
      "learning_rate": 0.0001182652674383856,
      "loss": 0.7498,
      "step": 3029
    },
    {
      "epoch": 0.4630018718722543,
      "grad_norm": 0.292192667722702,
      "learning_rate": 0.00011821619075745225,
      "loss": 0.7018,
      "step": 3030
    },
    {
      "epoch": 0.46315467777056196,
      "grad_norm": 0.32250627875328064,
      "learning_rate": 0.00011816710953810788,
      "loss": 0.6218,
      "step": 3031
    },
    {
      "epoch": 0.4633074836688696,
      "grad_norm": 0.2832304835319519,
      "learning_rate": 0.0001181180237925807,
      "loss": 0.6173,
      "step": 3032
    },
    {
      "epoch": 0.4634602895671773,
      "grad_norm": 0.3310091197490692,
      "learning_rate": 0.00011806893353309995,
      "loss": 0.4714,
      "step": 3033
    },
    {
      "epoch": 0.46361309546548496,
      "grad_norm": 0.2954336702823639,
      "learning_rate": 0.0001180198387718961,
      "loss": 0.7133,
      "step": 3034
    },
    {
      "epoch": 0.46376590136379264,
      "grad_norm": 0.31061121821403503,
      "learning_rate": 0.0001179707395212007,
      "loss": 0.6204,
      "step": 3035
    },
    {
      "epoch": 0.4639187072621003,
      "grad_norm": 0.25961393117904663,
      "learning_rate": 0.0001179216357932464,
      "loss": 0.5827,
      "step": 3036
    },
    {
      "epoch": 0.464071513160408,
      "grad_norm": 0.3093631863594055,
      "learning_rate": 0.00011787252760026694,
      "loss": 0.6789,
      "step": 3037
    },
    {
      "epoch": 0.46422431905871564,
      "grad_norm": 0.35962679982185364,
      "learning_rate": 0.00011782341495449732,
      "loss": 0.7595,
      "step": 3038
    },
    {
      "epoch": 0.4643771249570233,
      "grad_norm": 0.44419047236442566,
      "learning_rate": 0.0001177742978681734,
      "loss": 0.6952,
      "step": 3039
    },
    {
      "epoch": 0.464529930855331,
      "grad_norm": 0.382176011800766,
      "learning_rate": 0.00011772517635353242,
      "loss": 0.7884,
      "step": 3040
    },
    {
      "epoch": 0.4646827367536387,
      "grad_norm": 0.302168071269989,
      "learning_rate": 0.00011767605042281251,
      "loss": 0.7756,
      "step": 3041
    },
    {
      "epoch": 0.4648355426519464,
      "grad_norm": 0.33565452694892883,
      "learning_rate": 0.00011762692008825304,
      "loss": 0.8042,
      "step": 3042
    },
    {
      "epoch": 0.46498834855025406,
      "grad_norm": 0.33202725648880005,
      "learning_rate": 0.00011757778536209438,
      "loss": 0.7221,
      "step": 3043
    },
    {
      "epoch": 0.4651411544485617,
      "grad_norm": 0.3008812963962555,
      "learning_rate": 0.00011752864625657804,
      "loss": 0.8778,
      "step": 3044
    },
    {
      "epoch": 0.4652939603468694,
      "grad_norm": 0.3398931324481964,
      "learning_rate": 0.00011747950278394668,
      "loss": 0.9344,
      "step": 3045
    },
    {
      "epoch": 0.46544676624517706,
      "grad_norm": 0.2822340726852417,
      "learning_rate": 0.00011743035495644385,
      "loss": 0.7301,
      "step": 3046
    },
    {
      "epoch": 0.46559957214348474,
      "grad_norm": 0.3987044394016266,
      "learning_rate": 0.00011738120278631445,
      "loss": 0.8121,
      "step": 3047
    },
    {
      "epoch": 0.4657523780417924,
      "grad_norm": 0.28100937604904175,
      "learning_rate": 0.00011733204628580426,
      "loss": 0.8923,
      "step": 3048
    },
    {
      "epoch": 0.4659051839401001,
      "grad_norm": 0.2732929587364197,
      "learning_rate": 0.00011728288546716024,
      "loss": 0.8098,
      "step": 3049
    },
    {
      "epoch": 0.46605798983840774,
      "grad_norm": 0.48743966221809387,
      "learning_rate": 0.00011723372034263036,
      "loss": 0.9673,
      "step": 3050
    },
    {
      "epoch": 0.4662107957367154,
      "grad_norm": 0.3390193581581116,
      "learning_rate": 0.00011718455092446375,
      "loss": 0.5456,
      "step": 3051
    },
    {
      "epoch": 0.4663636016350231,
      "grad_norm": 0.37044551968574524,
      "learning_rate": 0.0001171353772249105,
      "loss": 0.6036,
      "step": 3052
    },
    {
      "epoch": 0.4665164075333308,
      "grad_norm": 0.3185332715511322,
      "learning_rate": 0.00011708619925622188,
      "loss": 0.5297,
      "step": 3053
    },
    {
      "epoch": 0.4666692134316385,
      "grad_norm": 0.30760905146598816,
      "learning_rate": 0.00011703701703065014,
      "loss": 0.7604,
      "step": 3054
    },
    {
      "epoch": 0.46682201932994616,
      "grad_norm": 0.318132609128952,
      "learning_rate": 0.00011698783056044859,
      "loss": 0.6375,
      "step": 3055
    },
    {
      "epoch": 0.4669748252282538,
      "grad_norm": 0.3219239413738251,
      "learning_rate": 0.00011693863985787168,
      "loss": 0.8012,
      "step": 3056
    },
    {
      "epoch": 0.46712763112656147,
      "grad_norm": 0.24363091588020325,
      "learning_rate": 0.0001168894449351748,
      "loss": 0.483,
      "step": 3057
    },
    {
      "epoch": 0.46728043702486916,
      "grad_norm": 0.346457302570343,
      "learning_rate": 0.00011684024580461455,
      "loss": 0.8002,
      "step": 3058
    },
    {
      "epoch": 0.46743324292317684,
      "grad_norm": 0.3414503335952759,
      "learning_rate": 0.00011679104247844834,
      "loss": 0.6163,
      "step": 3059
    },
    {
      "epoch": 0.4675860488214845,
      "grad_norm": 0.3042216897010803,
      "learning_rate": 0.00011674183496893492,
      "loss": 0.6604,
      "step": 3060
    },
    {
      "epoch": 0.4677388547197922,
      "grad_norm": 0.29265016317367554,
      "learning_rate": 0.00011669262328833381,
      "loss": 0.6929,
      "step": 3061
    },
    {
      "epoch": 0.46789166061809984,
      "grad_norm": 0.31261003017425537,
      "learning_rate": 0.00011664340744890577,
      "loss": 0.7802,
      "step": 3062
    },
    {
      "epoch": 0.4680444665164075,
      "grad_norm": 0.3014015257358551,
      "learning_rate": 0.00011659418746291242,
      "loss": 0.6751,
      "step": 3063
    },
    {
      "epoch": 0.4681972724147152,
      "grad_norm": 0.3346925973892212,
      "learning_rate": 0.0001165449633426166,
      "loss": 0.7601,
      "step": 3064
    },
    {
      "epoch": 0.4683500783130229,
      "grad_norm": 0.5724461078643799,
      "learning_rate": 0.00011649573510028203,
      "loss": 0.7809,
      "step": 3065
    },
    {
      "epoch": 0.4685028842113306,
      "grad_norm": 0.45399367809295654,
      "learning_rate": 0.00011644650274817353,
      "loss": 0.6694,
      "step": 3066
    },
    {
      "epoch": 0.46865569010963826,
      "grad_norm": 2.10894775390625,
      "learning_rate": 0.00011639726629855691,
      "loss": 0.8659,
      "step": 3067
    },
    {
      "epoch": 0.4688084960079459,
      "grad_norm": 0.29158470034599304,
      "learning_rate": 0.00011634802576369905,
      "loss": 0.5995,
      "step": 3068
    },
    {
      "epoch": 0.46896130190625357,
      "grad_norm": 0.3776535093784332,
      "learning_rate": 0.0001162987811558678,
      "loss": 0.7662,
      "step": 3069
    },
    {
      "epoch": 0.46911410780456125,
      "grad_norm": 0.30398276448249817,
      "learning_rate": 0.00011624953248733204,
      "loss": 0.7443,
      "step": 3070
    },
    {
      "epoch": 0.46926691370286894,
      "grad_norm": 0.3071722686290741,
      "learning_rate": 0.00011620027977036168,
      "loss": 0.7196,
      "step": 3071
    },
    {
      "epoch": 0.4694197196011766,
      "grad_norm": 0.2889639735221863,
      "learning_rate": 0.00011615102301722758,
      "loss": 0.8124,
      "step": 3072
    },
    {
      "epoch": 0.4695725254994843,
      "grad_norm": 0.29236549139022827,
      "learning_rate": 0.00011610176224020168,
      "loss": 0.9651,
      "step": 3073
    },
    {
      "epoch": 0.46972533139779193,
      "grad_norm": 0.4778033494949341,
      "learning_rate": 0.00011605249745155688,
      "loss": 0.7847,
      "step": 3074
    },
    {
      "epoch": 0.4698781372960996,
      "grad_norm": 0.32045695185661316,
      "learning_rate": 0.00011600322866356708,
      "loss": 0.5641,
      "step": 3075
    },
    {
      "epoch": 0.4700309431944073,
      "grad_norm": 0.33323490619659424,
      "learning_rate": 0.00011595395588850719,
      "loss": 0.7267,
      "step": 3076
    },
    {
      "epoch": 0.470183749092715,
      "grad_norm": 0.2765256464481354,
      "learning_rate": 0.00011590467913865313,
      "loss": 0.6555,
      "step": 3077
    },
    {
      "epoch": 0.47033655499102267,
      "grad_norm": 0.36682021617889404,
      "learning_rate": 0.00011585539842628178,
      "loss": 0.7699,
      "step": 3078
    },
    {
      "epoch": 0.47048936088933035,
      "grad_norm": 0.26881060004234314,
      "learning_rate": 0.00011580611376367096,
      "loss": 0.7308,
      "step": 3079
    },
    {
      "epoch": 0.470642166787638,
      "grad_norm": 0.2899646461009979,
      "learning_rate": 0.00011575682516309963,
      "loss": 0.6116,
      "step": 3080
    },
    {
      "epoch": 0.47079497268594567,
      "grad_norm": 0.32141637802124023,
      "learning_rate": 0.00011570753263684755,
      "loss": 0.5917,
      "step": 3081
    },
    {
      "epoch": 0.47094777858425335,
      "grad_norm": 0.3428771197795868,
      "learning_rate": 0.00011565823619719556,
      "loss": 0.595,
      "step": 3082
    },
    {
      "epoch": 0.47110058448256104,
      "grad_norm": 0.31115248799324036,
      "learning_rate": 0.00011560893585642547,
      "loss": 0.5678,
      "step": 3083
    },
    {
      "epoch": 0.4712533903808687,
      "grad_norm": 0.3463020324707031,
      "learning_rate": 0.00011555963162682007,
      "loss": 0.622,
      "step": 3084
    },
    {
      "epoch": 0.4714061962791764,
      "grad_norm": 0.2892141044139862,
      "learning_rate": 0.000115510323520663,
      "loss": 0.6668,
      "step": 3085
    },
    {
      "epoch": 0.47155900217748403,
      "grad_norm": 0.34900522232055664,
      "learning_rate": 0.00011546101155023908,
      "loss": 0.6623,
      "step": 3086
    },
    {
      "epoch": 0.4717118080757917,
      "grad_norm": 0.2772337794303894,
      "learning_rate": 0.00011541169572783386,
      "loss": 0.5601,
      "step": 3087
    },
    {
      "epoch": 0.4718646139740994,
      "grad_norm": 0.26819148659706116,
      "learning_rate": 0.00011536237606573405,
      "loss": 0.6573,
      "step": 3088
    },
    {
      "epoch": 0.4720174198724071,
      "grad_norm": 0.2884041368961334,
      "learning_rate": 0.00011531305257622717,
      "loss": 0.5774,
      "step": 3089
    },
    {
      "epoch": 0.47217022577071477,
      "grad_norm": 0.4495169222354889,
      "learning_rate": 0.00011526372527160183,
      "loss": 0.7284,
      "step": 3090
    },
    {
      "epoch": 0.4723230316690224,
      "grad_norm": 0.27755841612815857,
      "learning_rate": 0.00011521439416414746,
      "loss": 0.7151,
      "step": 3091
    },
    {
      "epoch": 0.4724758375673301,
      "grad_norm": 0.2918242812156677,
      "learning_rate": 0.00011516505926615444,
      "loss": 0.7234,
      "step": 3092
    },
    {
      "epoch": 0.47262864346563777,
      "grad_norm": 0.3533172607421875,
      "learning_rate": 0.00011511572058991426,
      "loss": 0.8371,
      "step": 3093
    },
    {
      "epoch": 0.47278144936394545,
      "grad_norm": 0.5401041507720947,
      "learning_rate": 0.00011506637814771915,
      "loss": 0.9416,
      "step": 3094
    },
    {
      "epoch": 0.47293425526225313,
      "grad_norm": 0.317081093788147,
      "learning_rate": 0.00011501703195186242,
      "loss": 0.7744,
      "step": 3095
    },
    {
      "epoch": 0.4730870611605608,
      "grad_norm": 0.3578571081161499,
      "learning_rate": 0.00011496768201463822,
      "loss": 0.6039,
      "step": 3096
    },
    {
      "epoch": 0.47323986705886845,
      "grad_norm": 0.683226466178894,
      "learning_rate": 0.00011491832834834171,
      "loss": 0.7333,
      "step": 3097
    },
    {
      "epoch": 0.47339267295717613,
      "grad_norm": 0.47472453117370605,
      "learning_rate": 0.00011486897096526888,
      "loss": 0.8873,
      "step": 3098
    },
    {
      "epoch": 0.4735454788554838,
      "grad_norm": 0.31787946820259094,
      "learning_rate": 0.00011481960987771678,
      "loss": 0.7204,
      "step": 3099
    },
    {
      "epoch": 0.4736982847537915,
      "grad_norm": 0.3622148931026459,
      "learning_rate": 0.00011477024509798326,
      "loss": 0.6251,
      "step": 3100
    },
    {
      "epoch": 0.4738510906520992,
      "grad_norm": 0.31107020378112793,
      "learning_rate": 0.00011472087663836718,
      "loss": 0.6451,
      "step": 3101
    },
    {
      "epoch": 0.47400389655040687,
      "grad_norm": 0.2645438015460968,
      "learning_rate": 0.00011467150451116823,
      "loss": 0.7023,
      "step": 3102
    },
    {
      "epoch": 0.4741567024487145,
      "grad_norm": 0.2966662049293518,
      "learning_rate": 0.00011462212872868712,
      "loss": 0.8464,
      "step": 3103
    },
    {
      "epoch": 0.4743095083470222,
      "grad_norm": 0.30942660570144653,
      "learning_rate": 0.00011457274930322534,
      "loss": 0.7057,
      "step": 3104
    },
    {
      "epoch": 0.47446231424532986,
      "grad_norm": 0.271252304315567,
      "learning_rate": 0.0001145233662470854,
      "loss": 0.619,
      "step": 3105
    },
    {
      "epoch": 0.47461512014363755,
      "grad_norm": 0.3281991183757782,
      "learning_rate": 0.00011447397957257071,
      "loss": 0.9169,
      "step": 3106
    },
    {
      "epoch": 0.47476792604194523,
      "grad_norm": 0.28666695952415466,
      "learning_rate": 0.00011442458929198549,
      "loss": 0.7189,
      "step": 3107
    },
    {
      "epoch": 0.4749207319402529,
      "grad_norm": 0.27483201026916504,
      "learning_rate": 0.00011437519541763493,
      "loss": 0.7052,
      "step": 3108
    },
    {
      "epoch": 0.47507353783856054,
      "grad_norm": 0.3456527590751648,
      "learning_rate": 0.0001143257979618251,
      "loss": 0.7226,
      "step": 3109
    },
    {
      "epoch": 0.47522634373686823,
      "grad_norm": 0.2994341254234314,
      "learning_rate": 0.00011427639693686296,
      "loss": 0.728,
      "step": 3110
    },
    {
      "epoch": 0.4753791496351759,
      "grad_norm": 0.29687169194221497,
      "learning_rate": 0.00011422699235505636,
      "loss": 0.7427,
      "step": 3111
    },
    {
      "epoch": 0.4755319555334836,
      "grad_norm": 0.3335234522819519,
      "learning_rate": 0.00011417758422871405,
      "loss": 0.6418,
      "step": 3112
    },
    {
      "epoch": 0.4756847614317913,
      "grad_norm": 0.41639548540115356,
      "learning_rate": 0.00011412817257014564,
      "loss": 0.7566,
      "step": 3113
    },
    {
      "epoch": 0.47583756733009897,
      "grad_norm": 0.31691673398017883,
      "learning_rate": 0.00011407875739166161,
      "loss": 0.6892,
      "step": 3114
    },
    {
      "epoch": 0.4759903732284066,
      "grad_norm": 0.28714266419410706,
      "learning_rate": 0.00011402933870557337,
      "loss": 0.7085,
      "step": 3115
    },
    {
      "epoch": 0.4761431791267143,
      "grad_norm": 0.2862628400325775,
      "learning_rate": 0.00011397991652419316,
      "loss": 0.7797,
      "step": 3116
    },
    {
      "epoch": 0.47629598502502196,
      "grad_norm": 0.2885003983974457,
      "learning_rate": 0.00011393049085983409,
      "loss": 0.81,
      "step": 3117
    },
    {
      "epoch": 0.47644879092332965,
      "grad_norm": 0.27381911873817444,
      "learning_rate": 0.00011388106172481016,
      "loss": 0.6638,
      "step": 3118
    },
    {
      "epoch": 0.47660159682163733,
      "grad_norm": 0.3181326687335968,
      "learning_rate": 0.00011383162913143624,
      "loss": 0.7114,
      "step": 3119
    },
    {
      "epoch": 0.476754402719945,
      "grad_norm": 0.3386448919773102,
      "learning_rate": 0.000113782193092028,
      "loss": 0.7061,
      "step": 3120
    },
    {
      "epoch": 0.47690720861825264,
      "grad_norm": 0.2852921485900879,
      "learning_rate": 0.00011373275361890205,
      "loss": 0.5549,
      "step": 3121
    },
    {
      "epoch": 0.4770600145165603,
      "grad_norm": 0.30625444650650024,
      "learning_rate": 0.00011368331072437584,
      "loss": 0.7699,
      "step": 3122
    },
    {
      "epoch": 0.477212820414868,
      "grad_norm": 0.4224965274333954,
      "learning_rate": 0.0001136338644207676,
      "loss": 0.7529,
      "step": 3123
    },
    {
      "epoch": 0.4773656263131757,
      "grad_norm": 0.33408239483833313,
      "learning_rate": 0.00011358441472039647,
      "loss": 0.625,
      "step": 3124
    },
    {
      "epoch": 0.4775184322114834,
      "grad_norm": 0.34229129552841187,
      "learning_rate": 0.00011353496163558246,
      "loss": 0.6759,
      "step": 3125
    },
    {
      "epoch": 0.47767123810979106,
      "grad_norm": 0.3091820776462555,
      "learning_rate": 0.00011348550517864638,
      "loss": 0.5886,
      "step": 3126
    },
    {
      "epoch": 0.4778240440080987,
      "grad_norm": 0.2753916382789612,
      "learning_rate": 0.00011343604536190988,
      "loss": 0.8108,
      "step": 3127
    },
    {
      "epoch": 0.4779768499064064,
      "grad_norm": 0.2937089800834656,
      "learning_rate": 0.00011338658219769546,
      "loss": 0.6251,
      "step": 3128
    },
    {
      "epoch": 0.47812965580471406,
      "grad_norm": 0.2915576100349426,
      "learning_rate": 0.00011333711569832645,
      "loss": 0.773,
      "step": 3129
    },
    {
      "epoch": 0.47828246170302174,
      "grad_norm": 0.31171897053718567,
      "learning_rate": 0.00011328764587612704,
      "loss": 0.5729,
      "step": 3130
    },
    {
      "epoch": 0.47843526760132943,
      "grad_norm": 0.4770534932613373,
      "learning_rate": 0.00011323817274342219,
      "loss": 0.7378,
      "step": 3131
    },
    {
      "epoch": 0.4785880734996371,
      "grad_norm": 0.30231431126594543,
      "learning_rate": 0.00011318869631253774,
      "loss": 0.7529,
      "step": 3132
    },
    {
      "epoch": 0.47874087939794474,
      "grad_norm": 0.3131800889968872,
      "learning_rate": 0.00011313921659580028,
      "loss": 0.8394,
      "step": 3133
    },
    {
      "epoch": 0.4788936852962524,
      "grad_norm": 0.3672395348548889,
      "learning_rate": 0.00011308973360553733,
      "loss": 0.9422,
      "step": 3134
    },
    {
      "epoch": 0.4790464911945601,
      "grad_norm": 0.2536657750606537,
      "learning_rate": 0.0001130402473540771,
      "loss": 0.759,
      "step": 3135
    },
    {
      "epoch": 0.4791992970928678,
      "grad_norm": 0.30961093306541443,
      "learning_rate": 0.00011299075785374875,
      "loss": 0.5457,
      "step": 3136
    },
    {
      "epoch": 0.4793521029911755,
      "grad_norm": 0.33329442143440247,
      "learning_rate": 0.00011294126511688205,
      "loss": 0.9315,
      "step": 3137
    },
    {
      "epoch": 0.47950490888948316,
      "grad_norm": 0.3517923355102539,
      "learning_rate": 0.00011289176915580784,
      "loss": 0.6728,
      "step": 3138
    },
    {
      "epoch": 0.4796577147877908,
      "grad_norm": 0.32341015338897705,
      "learning_rate": 0.00011284226998285756,
      "loss": 0.7087,
      "step": 3139
    },
    {
      "epoch": 0.4798105206860985,
      "grad_norm": 0.37149766087532043,
      "learning_rate": 0.0001127927676103635,
      "loss": 0.8427,
      "step": 3140
    },
    {
      "epoch": 0.47996332658440616,
      "grad_norm": 0.2929363250732422,
      "learning_rate": 0.00011274326205065879,
      "loss": 0.6859,
      "step": 3141
    },
    {
      "epoch": 0.48011613248271384,
      "grad_norm": 0.24660053849220276,
      "learning_rate": 0.00011269375331607728,
      "loss": 0.6897,
      "step": 3142
    },
    {
      "epoch": 0.4802689383810215,
      "grad_norm": 0.26271674036979675,
      "learning_rate": 0.00011264424141895373,
      "loss": 0.6369,
      "step": 3143
    },
    {
      "epoch": 0.48042174427932915,
      "grad_norm": 0.3054701089859009,
      "learning_rate": 0.00011259472637162352,
      "loss": 0.5811,
      "step": 3144
    },
    {
      "epoch": 0.48057455017763684,
      "grad_norm": 0.3352110683917999,
      "learning_rate": 0.000112545208186423,
      "loss": 0.8285,
      "step": 3145
    },
    {
      "epoch": 0.4807273560759445,
      "grad_norm": 0.31057208776474,
      "learning_rate": 0.00011249568687568914,
      "loss": 0.6465,
      "step": 3146
    },
    {
      "epoch": 0.4808801619742522,
      "grad_norm": 0.2694271504878998,
      "learning_rate": 0.00011244616245175981,
      "loss": 0.713,
      "step": 3147
    },
    {
      "epoch": 0.4810329678725599,
      "grad_norm": 0.35943877696990967,
      "learning_rate": 0.00011239663492697356,
      "loss": 0.7039,
      "step": 3148
    },
    {
      "epoch": 0.4811857737708676,
      "grad_norm": 0.32773950695991516,
      "learning_rate": 0.00011234710431366979,
      "loss": 0.6115,
      "step": 3149
    },
    {
      "epoch": 0.4813385796691752,
      "grad_norm": 0.2837180495262146,
      "learning_rate": 0.00011229757062418862,
      "loss": 0.7428,
      "step": 3150
    },
    {
      "epoch": 0.4814913855674829,
      "grad_norm": 0.3680180609226227,
      "learning_rate": 0.00011224803387087095,
      "loss": 0.8842,
      "step": 3151
    },
    {
      "epoch": 0.48164419146579057,
      "grad_norm": 0.3667493760585785,
      "learning_rate": 0.00011219849406605846,
      "loss": 0.6102,
      "step": 3152
    },
    {
      "epoch": 0.48179699736409826,
      "grad_norm": 0.35641342401504517,
      "learning_rate": 0.00011214895122209356,
      "loss": 0.7404,
      "step": 3153
    },
    {
      "epoch": 0.48194980326240594,
      "grad_norm": 0.28385525941848755,
      "learning_rate": 0.00011209940535131948,
      "loss": 0.6549,
      "step": 3154
    },
    {
      "epoch": 0.4821026091607136,
      "grad_norm": 0.2514529228210449,
      "learning_rate": 0.0001120498564660801,
      "loss": 0.6631,
      "step": 3155
    },
    {
      "epoch": 0.48225541505902125,
      "grad_norm": 0.2889954447746277,
      "learning_rate": 0.00011200030457872013,
      "loss": 0.6912,
      "step": 3156
    },
    {
      "epoch": 0.48240822095732894,
      "grad_norm": 1.1534295082092285,
      "learning_rate": 0.00011195074970158502,
      "loss": 0.676,
      "step": 3157
    },
    {
      "epoch": 0.4825610268556366,
      "grad_norm": 0.287183940410614,
      "learning_rate": 0.00011190119184702092,
      "loss": 0.6186,
      "step": 3158
    },
    {
      "epoch": 0.4827138327539443,
      "grad_norm": 0.2797063887119293,
      "learning_rate": 0.00011185163102737477,
      "loss": 0.6834,
      "step": 3159
    },
    {
      "epoch": 0.482866638652252,
      "grad_norm": 0.29181644320487976,
      "learning_rate": 0.00011180206725499424,
      "loss": 0.795,
      "step": 3160
    },
    {
      "epoch": 0.4830194445505597,
      "grad_norm": 0.37816306948661804,
      "learning_rate": 0.00011175250054222774,
      "loss": 0.6745,
      "step": 3161
    },
    {
      "epoch": 0.4831722504488673,
      "grad_norm": 0.2842831611633301,
      "learning_rate": 0.00011170293090142437,
      "loss": 0.6604,
      "step": 3162
    },
    {
      "epoch": 0.483325056347175,
      "grad_norm": 0.26622524857521057,
      "learning_rate": 0.000111653358344934,
      "loss": 0.6542,
      "step": 3163
    },
    {
      "epoch": 0.48347786224548267,
      "grad_norm": 0.3658379912376404,
      "learning_rate": 0.00011160378288510723,
      "loss": 0.7897,
      "step": 3164
    },
    {
      "epoch": 0.48363066814379035,
      "grad_norm": 0.42732903361320496,
      "learning_rate": 0.00011155420453429535,
      "loss": 0.5246,
      "step": 3165
    },
    {
      "epoch": 0.48378347404209804,
      "grad_norm": 0.36560025811195374,
      "learning_rate": 0.00011150462330485041,
      "loss": 0.5862,
      "step": 3166
    },
    {
      "epoch": 0.4839362799404057,
      "grad_norm": 0.4351115822792053,
      "learning_rate": 0.00011145503920912512,
      "loss": 0.7431,
      "step": 3167
    },
    {
      "epoch": 0.48408908583871335,
      "grad_norm": 0.34151631593704224,
      "learning_rate": 0.000111405452259473,
      "loss": 0.5993,
      "step": 3168
    },
    {
      "epoch": 0.48424189173702104,
      "grad_norm": 0.35425591468811035,
      "learning_rate": 0.00011135586246824817,
      "loss": 0.5834,
      "step": 3169
    },
    {
      "epoch": 0.4843946976353287,
      "grad_norm": 0.2991638481616974,
      "learning_rate": 0.00011130626984780554,
      "loss": 0.7526,
      "step": 3170
    },
    {
      "epoch": 0.4845475035336364,
      "grad_norm": 0.5623118281364441,
      "learning_rate": 0.00011125667441050069,
      "loss": 0.666,
      "step": 3171
    },
    {
      "epoch": 0.4847003094319441,
      "grad_norm": 0.2882367968559265,
      "learning_rate": 0.00011120707616868988,
      "loss": 0.7725,
      "step": 3172
    },
    {
      "epoch": 0.48485311533025177,
      "grad_norm": 0.2950005829334259,
      "learning_rate": 0.00011115747513473014,
      "loss": 0.6603,
      "step": 3173
    },
    {
      "epoch": 0.4850059212285594,
      "grad_norm": 0.27206265926361084,
      "learning_rate": 0.0001111078713209791,
      "loss": 0.847,
      "step": 3174
    },
    {
      "epoch": 0.4851587271268671,
      "grad_norm": 0.39594581723213196,
      "learning_rate": 0.0001110582647397952,
      "loss": 0.9321,
      "step": 3175
    },
    {
      "epoch": 0.48531153302517477,
      "grad_norm": 0.3641679584980011,
      "learning_rate": 0.00011100865540353744,
      "loss": 0.6959,
      "step": 3176
    },
    {
      "epoch": 0.48546433892348245,
      "grad_norm": 0.35023003816604614,
      "learning_rate": 0.0001109590433245656,
      "loss": 0.6577,
      "step": 3177
    },
    {
      "epoch": 0.48561714482179014,
      "grad_norm": 0.5135242342948914,
      "learning_rate": 0.00011090942851524013,
      "loss": 0.9438,
      "step": 3178
    },
    {
      "epoch": 0.4857699507200978,
      "grad_norm": 0.24862836301326752,
      "learning_rate": 0.00011085981098792208,
      "loss": 0.5999,
      "step": 3179
    },
    {
      "epoch": 0.48592275661840545,
      "grad_norm": 0.5486438870429993,
      "learning_rate": 0.00011081019075497332,
      "loss": 0.7452,
      "step": 3180
    },
    {
      "epoch": 0.48607556251671313,
      "grad_norm": 0.3016669452190399,
      "learning_rate": 0.00011076056782875625,
      "loss": 0.6285,
      "step": 3181
    },
    {
      "epoch": 0.4862283684150208,
      "grad_norm": 0.32901546359062195,
      "learning_rate": 0.00011071094222163408,
      "loss": 0.6339,
      "step": 3182
    },
    {
      "epoch": 0.4863811743133285,
      "grad_norm": 0.31634917855262756,
      "learning_rate": 0.0001106613139459705,
      "loss": 0.6858,
      "step": 3183
    },
    {
      "epoch": 0.4865339802116362,
      "grad_norm": 0.28542599081993103,
      "learning_rate": 0.00011061168301413009,
      "loss": 0.8819,
      "step": 3184
    },
    {
      "epoch": 0.48668678610994387,
      "grad_norm": 0.31034329533576965,
      "learning_rate": 0.0001105620494384779,
      "loss": 0.7413,
      "step": 3185
    },
    {
      "epoch": 0.4868395920082515,
      "grad_norm": 0.32563355565071106,
      "learning_rate": 0.00011051241323137978,
      "loss": 0.717,
      "step": 3186
    },
    {
      "epoch": 0.4869923979065592,
      "grad_norm": 0.278524249792099,
      "learning_rate": 0.00011046277440520214,
      "loss": 0.7499,
      "step": 3187
    },
    {
      "epoch": 0.48714520380486687,
      "grad_norm": 0.31609123945236206,
      "learning_rate": 0.00011041313297231206,
      "loss": 0.7538,
      "step": 3188
    },
    {
      "epoch": 0.48729800970317455,
      "grad_norm": 0.3462464213371277,
      "learning_rate": 0.00011036348894507735,
      "loss": 0.7642,
      "step": 3189
    },
    {
      "epoch": 0.48745081560148223,
      "grad_norm": 0.3006207048892975,
      "learning_rate": 0.00011031384233586633,
      "loss": 0.7188,
      "step": 3190
    },
    {
      "epoch": 0.4876036214997899,
      "grad_norm": 0.29584068059921265,
      "learning_rate": 0.0001102641931570481,
      "loss": 0.429,
      "step": 3191
    },
    {
      "epoch": 0.48775642739809755,
      "grad_norm": 0.25582364201545715,
      "learning_rate": 0.00011021454142099228,
      "loss": 0.6474,
      "step": 3192
    },
    {
      "epoch": 0.48790923329640523,
      "grad_norm": 0.32200515270233154,
      "learning_rate": 0.00011016488714006923,
      "loss": 0.6822,
      "step": 3193
    },
    {
      "epoch": 0.4880620391947129,
      "grad_norm": 0.29044628143310547,
      "learning_rate": 0.00011011523032664988,
      "loss": 0.5595,
      "step": 3194
    },
    {
      "epoch": 0.4882148450930206,
      "grad_norm": 0.3656401038169861,
      "learning_rate": 0.00011006557099310577,
      "loss": 0.8375,
      "step": 3195
    },
    {
      "epoch": 0.4883676509913283,
      "grad_norm": 0.3706183135509491,
      "learning_rate": 0.00011001590915180917,
      "loss": 0.751,
      "step": 3196
    },
    {
      "epoch": 0.4885204568896359,
      "grad_norm": 0.3113393485546112,
      "learning_rate": 0.00010996624481513287,
      "loss": 0.7639,
      "step": 3197
    },
    {
      "epoch": 0.4886732627879436,
      "grad_norm": 0.2899192273616791,
      "learning_rate": 0.00010991657799545033,
      "loss": 0.5524,
      "step": 3198
    },
    {
      "epoch": 0.4888260686862513,
      "grad_norm": 0.31966841220855713,
      "learning_rate": 0.00010986690870513559,
      "loss": 0.5835,
      "step": 3199
    },
    {
      "epoch": 0.48897887458455896,
      "grad_norm": 0.26261016726493835,
      "learning_rate": 0.00010981723695656343,
      "loss": 0.7348,
      "step": 3200
    },
    {
      "epoch": 0.48913168048286665,
      "grad_norm": 0.3918934762477875,
      "learning_rate": 0.00010976756276210907,
      "loss": 0.6722,
      "step": 3201
    },
    {
      "epoch": 0.48928448638117433,
      "grad_norm": 0.40297189354896545,
      "learning_rate": 0.00010971788613414843,
      "loss": 0.6896,
      "step": 3202
    },
    {
      "epoch": 0.48943729227948196,
      "grad_norm": 0.3552076816558838,
      "learning_rate": 0.00010966820708505805,
      "loss": 0.6717,
      "step": 3203
    },
    {
      "epoch": 0.48959009817778965,
      "grad_norm": 0.3047221899032593,
      "learning_rate": 0.00010961852562721502,
      "loss": 0.5305,
      "step": 3204
    },
    {
      "epoch": 0.48974290407609733,
      "grad_norm": 0.3160412013530731,
      "learning_rate": 0.00010956884177299707,
      "loss": 0.7559,
      "step": 3205
    },
    {
      "epoch": 0.489895709974405,
      "grad_norm": 0.2890625,
      "learning_rate": 0.00010951915553478252,
      "loss": 0.6041,
      "step": 3206
    },
    {
      "epoch": 0.4900485158727127,
      "grad_norm": 0.2591745853424072,
      "learning_rate": 0.00010946946692495029,
      "loss": 0.7393,
      "step": 3207
    },
    {
      "epoch": 0.4902013217710204,
      "grad_norm": 0.2744395136833191,
      "learning_rate": 0.00010941977595587985,
      "loss": 0.6317,
      "step": 3208
    },
    {
      "epoch": 0.490354127669328,
      "grad_norm": 0.2722474932670593,
      "learning_rate": 0.00010937008263995128,
      "loss": 0.6662,
      "step": 3209
    },
    {
      "epoch": 0.4905069335676357,
      "grad_norm": 0.33023321628570557,
      "learning_rate": 0.0001093203869895453,
      "loss": 0.7126,
      "step": 3210
    },
    {
      "epoch": 0.4906597394659434,
      "grad_norm": 0.6961508989334106,
      "learning_rate": 0.00010927068901704314,
      "loss": 0.6022,
      "step": 3211
    },
    {
      "epoch": 0.49081254536425106,
      "grad_norm": 0.2518894374370575,
      "learning_rate": 0.00010922098873482663,
      "loss": 0.6411,
      "step": 3212
    },
    {
      "epoch": 0.49096535126255875,
      "grad_norm": 0.3645883798599243,
      "learning_rate": 0.00010917128615527816,
      "loss": 0.7511,
      "step": 3213
    },
    {
      "epoch": 0.49111815716086643,
      "grad_norm": 0.4825361371040344,
      "learning_rate": 0.00010912158129078074,
      "loss": 0.9103,
      "step": 3214
    },
    {
      "epoch": 0.49127096305917406,
      "grad_norm": 0.32693371176719666,
      "learning_rate": 0.00010907187415371793,
      "loss": 0.8316,
      "step": 3215
    },
    {
      "epoch": 0.49142376895748174,
      "grad_norm": 0.2648088335990906,
      "learning_rate": 0.0001090221647564738,
      "loss": 0.6512,
      "step": 3216
    },
    {
      "epoch": 0.4915765748557894,
      "grad_norm": 0.28130269050598145,
      "learning_rate": 0.0001089724531114331,
      "loss": 0.6506,
      "step": 3217
    },
    {
      "epoch": 0.4917293807540971,
      "grad_norm": 0.34511005878448486,
      "learning_rate": 0.00010892273923098098,
      "loss": 0.7288,
      "step": 3218
    },
    {
      "epoch": 0.4918821866524048,
      "grad_norm": 0.29202011227607727,
      "learning_rate": 0.00010887302312750329,
      "loss": 0.5704,
      "step": 3219
    },
    {
      "epoch": 0.4920349925507125,
      "grad_norm": 0.2937288284301758,
      "learning_rate": 0.00010882330481338636,
      "loss": 0.8524,
      "step": 3220
    },
    {
      "epoch": 0.4921877984490201,
      "grad_norm": 0.3036741614341736,
      "learning_rate": 0.00010877358430101711,
      "loss": 0.5406,
      "step": 3221
    },
    {
      "epoch": 0.4923406043473278,
      "grad_norm": 0.2834756672382355,
      "learning_rate": 0.00010872386160278298,
      "loss": 0.5422,
      "step": 3222
    },
    {
      "epoch": 0.4924934102456355,
      "grad_norm": 0.2763515114784241,
      "learning_rate": 0.00010867413673107196,
      "loss": 0.9426,
      "step": 3223
    },
    {
      "epoch": 0.49264621614394316,
      "grad_norm": 0.2803753912448883,
      "learning_rate": 0.00010862440969827262,
      "loss": 0.6358,
      "step": 3224
    },
    {
      "epoch": 0.49279902204225084,
      "grad_norm": 0.47641652822494507,
      "learning_rate": 0.00010857468051677395,
      "loss": 0.9681,
      "step": 3225
    },
    {
      "epoch": 0.49295182794055853,
      "grad_norm": 0.29467979073524475,
      "learning_rate": 0.00010852494919896565,
      "loss": 0.566,
      "step": 3226
    },
    {
      "epoch": 0.49310463383886616,
      "grad_norm": 0.28544798493385315,
      "learning_rate": 0.00010847521575723778,
      "loss": 0.7639,
      "step": 3227
    },
    {
      "epoch": 0.49325743973717384,
      "grad_norm": 0.2804313898086548,
      "learning_rate": 0.00010842548020398106,
      "loss": 0.7826,
      "step": 3228
    },
    {
      "epoch": 0.4934102456354815,
      "grad_norm": 0.2962232530117035,
      "learning_rate": 0.00010837574255158667,
      "loss": 0.7477,
      "step": 3229
    },
    {
      "epoch": 0.4935630515337892,
      "grad_norm": 0.2538807988166809,
      "learning_rate": 0.00010832600281244635,
      "loss": 0.6508,
      "step": 3230
    },
    {
      "epoch": 0.4937158574320969,
      "grad_norm": 0.3388998806476593,
      "learning_rate": 0.0001082762609989523,
      "loss": 0.9114,
      "step": 3231
    },
    {
      "epoch": 0.4938686633304046,
      "grad_norm": 0.2898162007331848,
      "learning_rate": 0.00010822651712349729,
      "loss": 0.5826,
      "step": 3232
    },
    {
      "epoch": 0.4940214692287122,
      "grad_norm": 0.2569247782230377,
      "learning_rate": 0.00010817677119847463,
      "loss": 0.683,
      "step": 3233
    },
    {
      "epoch": 0.4941742751270199,
      "grad_norm": 0.31775936484336853,
      "learning_rate": 0.00010812702323627802,
      "loss": 0.7554,
      "step": 3234
    },
    {
      "epoch": 0.4943270810253276,
      "grad_norm": 0.38079357147216797,
      "learning_rate": 0.00010807727324930181,
      "loss": 0.6775,
      "step": 3235
    },
    {
      "epoch": 0.49447988692363526,
      "grad_norm": 0.7460795044898987,
      "learning_rate": 0.00010802752124994075,
      "loss": 0.6831,
      "step": 3236
    },
    {
      "epoch": 0.49463269282194294,
      "grad_norm": 0.3023420572280884,
      "learning_rate": 0.00010797776725059021,
      "loss": 0.8218,
      "step": 3237
    },
    {
      "epoch": 0.4947854987202506,
      "grad_norm": 0.25051984190940857,
      "learning_rate": 0.00010792801126364587,
      "loss": 0.4852,
      "step": 3238
    },
    {
      "epoch": 0.49493830461855826,
      "grad_norm": 0.28263378143310547,
      "learning_rate": 0.00010787825330150412,
      "loss": 0.7961,
      "step": 3239
    },
    {
      "epoch": 0.49509111051686594,
      "grad_norm": 0.2797674238681793,
      "learning_rate": 0.00010782849337656165,
      "loss": 0.6993,
      "step": 3240
    },
    {
      "epoch": 0.4952439164151736,
      "grad_norm": 0.29567739367485046,
      "learning_rate": 0.0001077787315012158,
      "loss": 0.6962,
      "step": 3241
    },
    {
      "epoch": 0.4953967223134813,
      "grad_norm": 0.8774082064628601,
      "learning_rate": 0.0001077289676878643,
      "loss": 0.663,
      "step": 3242
    },
    {
      "epoch": 0.495549528211789,
      "grad_norm": 0.4161388874053955,
      "learning_rate": 0.00010767920194890535,
      "loss": 0.665,
      "step": 3243
    },
    {
      "epoch": 0.4957023341100967,
      "grad_norm": 0.288461834192276,
      "learning_rate": 0.0001076294342967377,
      "loss": 0.5216,
      "step": 3244
    },
    {
      "epoch": 0.4958551400084043,
      "grad_norm": 0.3518747091293335,
      "learning_rate": 0.00010757966474376056,
      "loss": 0.6696,
      "step": 3245
    },
    {
      "epoch": 0.496007945906712,
      "grad_norm": 0.25768399238586426,
      "learning_rate": 0.00010752989330237355,
      "loss": 0.5461,
      "step": 3246
    },
    {
      "epoch": 0.4961607518050197,
      "grad_norm": 0.2731454372406006,
      "learning_rate": 0.00010748011998497682,
      "loss": 0.7564,
      "step": 3247
    },
    {
      "epoch": 0.49631355770332736,
      "grad_norm": 0.4299314320087433,
      "learning_rate": 0.00010743034480397103,
      "loss": 0.732,
      "step": 3248
    },
    {
      "epoch": 0.49646636360163504,
      "grad_norm": 0.30067208409309387,
      "learning_rate": 0.00010738056777175717,
      "loss": 0.5602,
      "step": 3249
    },
    {
      "epoch": 0.4966191694999427,
      "grad_norm": 0.30349549651145935,
      "learning_rate": 0.00010733078890073683,
      "loss": 0.7734,
      "step": 3250
    },
    {
      "epoch": 0.49677197539825035,
      "grad_norm": 0.7365745902061462,
      "learning_rate": 0.00010728100820331195,
      "loss": 0.6051,
      "step": 3251
    },
    {
      "epoch": 0.49692478129655804,
      "grad_norm": 0.30216264724731445,
      "learning_rate": 0.000107231225691885,
      "loss": 0.8426,
      "step": 3252
    },
    {
      "epoch": 0.4970775871948657,
      "grad_norm": 0.2875060737133026,
      "learning_rate": 0.00010718144137885888,
      "loss": 0.6761,
      "step": 3253
    },
    {
      "epoch": 0.4972303930931734,
      "grad_norm": 0.3124886453151703,
      "learning_rate": 0.00010713165527663691,
      "loss": 0.6802,
      "step": 3254
    },
    {
      "epoch": 0.4973831989914811,
      "grad_norm": 0.2875783443450928,
      "learning_rate": 0.0001070818673976229,
      "loss": 0.6805,
      "step": 3255
    },
    {
      "epoch": 0.4975360048897887,
      "grad_norm": 0.31740424036979675,
      "learning_rate": 0.00010703207775422106,
      "loss": 0.5709,
      "step": 3256
    },
    {
      "epoch": 0.4976888107880964,
      "grad_norm": 0.3055468201637268,
      "learning_rate": 0.0001069822863588361,
      "loss": 0.5894,
      "step": 3257
    },
    {
      "epoch": 0.4978416166864041,
      "grad_norm": 0.2838101089000702,
      "learning_rate": 0.00010693249322387309,
      "loss": 0.6071,
      "step": 3258
    },
    {
      "epoch": 0.49799442258471177,
      "grad_norm": 0.29591605067253113,
      "learning_rate": 0.00010688269836173759,
      "loss": 0.7489,
      "step": 3259
    },
    {
      "epoch": 0.49814722848301946,
      "grad_norm": 0.26190677285194397,
      "learning_rate": 0.00010683290178483556,
      "loss": 0.7207,
      "step": 3260
    },
    {
      "epoch": 0.49830003438132714,
      "grad_norm": 0.3020467162132263,
      "learning_rate": 0.00010678310350557341,
      "loss": 0.7131,
      "step": 3261
    },
    {
      "epoch": 0.49845284027963477,
      "grad_norm": 0.27667558193206787,
      "learning_rate": 0.00010673330353635798,
      "loss": 0.7659,
      "step": 3262
    },
    {
      "epoch": 0.49860564617794245,
      "grad_norm": 0.3452799320220947,
      "learning_rate": 0.00010668350188959649,
      "loss": 0.5658,
      "step": 3263
    },
    {
      "epoch": 0.49875845207625014,
      "grad_norm": 0.2541445791721344,
      "learning_rate": 0.00010663369857769658,
      "loss": 0.8587,
      "step": 3264
    },
    {
      "epoch": 0.4989112579745578,
      "grad_norm": 0.4348546266555786,
      "learning_rate": 0.0001065838936130664,
      "loss": 0.7917,
      "step": 3265
    },
    {
      "epoch": 0.4990640638728655,
      "grad_norm": 1.7747349739074707,
      "learning_rate": 0.00010653408700811433,
      "loss": 0.5763,
      "step": 3266
    },
    {
      "epoch": 0.4992168697711732,
      "grad_norm": 0.23470017313957214,
      "learning_rate": 0.00010648427877524938,
      "loss": 0.6186,
      "step": 3267
    },
    {
      "epoch": 0.4993696756694808,
      "grad_norm": 0.36335036158561707,
      "learning_rate": 0.00010643446892688078,
      "loss": 0.8022,
      "step": 3268
    },
    {
      "epoch": 0.4995224815677885,
      "grad_norm": 0.3045618236064911,
      "learning_rate": 0.00010638465747541828,
      "loss": 0.5187,
      "step": 3269
    },
    {
      "epoch": 0.4996752874660962,
      "grad_norm": 0.29446032643318176,
      "learning_rate": 0.00010633484443327195,
      "loss": 0.5423,
      "step": 3270
    },
    {
      "epoch": 0.49982809336440387,
      "grad_norm": 0.33767345547676086,
      "learning_rate": 0.0001062850298128523,
      "loss": 0.679,
      "step": 3271
    },
    {
      "epoch": 0.49998089926271155,
      "grad_norm": 0.3081493079662323,
      "learning_rate": 0.00010623521362657025,
      "loss": 0.6156,
      "step": 3272
    },
    {
      "epoch": 0.5001337051610192,
      "grad_norm": 0.2943879961967468,
      "learning_rate": 0.00010618539588683705,
      "loss": 0.4835,
      "step": 3273
    },
    {
      "epoch": 0.5002865110593269,
      "grad_norm": 0.2678261697292328,
      "learning_rate": 0.00010613557660606441,
      "loss": 0.7285,
      "step": 3274
    },
    {
      "epoch": 0.5004393169576346,
      "grad_norm": 0.3967953324317932,
      "learning_rate": 0.0001060857557966643,
      "loss": 0.6834,
      "step": 3275
    },
    {
      "epoch": 0.5005921228559422,
      "grad_norm": 0.36574381589889526,
      "learning_rate": 0.0001060359334710493,
      "loss": 0.6326,
      "step": 3276
    },
    {
      "epoch": 0.5007449287542499,
      "grad_norm": 0.3894730806350708,
      "learning_rate": 0.00010598610964163208,
      "loss": 0.6009,
      "step": 3277
    },
    {
      "epoch": 0.5008977346525576,
      "grad_norm": 0.2868845462799072,
      "learning_rate": 0.00010593628432082594,
      "loss": 0.7465,
      "step": 3278
    },
    {
      "epoch": 0.5010505405508653,
      "grad_norm": 0.26092529296875,
      "learning_rate": 0.00010588645752104433,
      "loss": 0.6455,
      "step": 3279
    },
    {
      "epoch": 0.501203346449173,
      "grad_norm": 0.3582485318183899,
      "learning_rate": 0.00010583662925470128,
      "loss": 0.8203,
      "step": 3280
    },
    {
      "epoch": 0.5013561523474807,
      "grad_norm": 0.31029212474823,
      "learning_rate": 0.00010578679953421106,
      "loss": 0.7229,
      "step": 3281
    },
    {
      "epoch": 0.5015089582457883,
      "grad_norm": 0.36049965023994446,
      "learning_rate": 0.0001057369683719883,
      "loss": 0.8482,
      "step": 3282
    },
    {
      "epoch": 0.5016617641440959,
      "grad_norm": 0.38351500034332275,
      "learning_rate": 0.00010568713578044805,
      "loss": 0.794,
      "step": 3283
    },
    {
      "epoch": 0.5018145700424036,
      "grad_norm": 0.3084133565425873,
      "learning_rate": 0.0001056373017720056,
      "loss": 0.8044,
      "step": 3284
    },
    {
      "epoch": 0.5019673759407113,
      "grad_norm": 0.4007570445537567,
      "learning_rate": 0.0001055874663590768,
      "loss": 0.6948,
      "step": 3285
    },
    {
      "epoch": 0.502120181839019,
      "grad_norm": 0.3142980635166168,
      "learning_rate": 0.00010553762955407757,
      "loss": 0.7865,
      "step": 3286
    },
    {
      "epoch": 0.5022729877373266,
      "grad_norm": 0.30172571539878845,
      "learning_rate": 0.0001054877913694245,
      "loss": 0.7177,
      "step": 3287
    },
    {
      "epoch": 0.5024257936356343,
      "grad_norm": 0.3817455470561981,
      "learning_rate": 0.00010543795181753427,
      "loss": 0.7549,
      "step": 3288
    },
    {
      "epoch": 0.502578599533942,
      "grad_norm": 0.9309012293815613,
      "learning_rate": 0.00010538811091082397,
      "loss": 0.747,
      "step": 3289
    },
    {
      "epoch": 0.5027314054322497,
      "grad_norm": 0.31485238671302795,
      "learning_rate": 0.00010533826866171108,
      "loss": 0.8134,
      "step": 3290
    },
    {
      "epoch": 0.5028842113305574,
      "grad_norm": 0.3265262842178345,
      "learning_rate": 0.00010528842508261334,
      "loss": 0.5837,
      "step": 3291
    },
    {
      "epoch": 0.5030370172288651,
      "grad_norm": 0.3494139313697815,
      "learning_rate": 0.0001052385801859489,
      "loss": 0.7537,
      "step": 3292
    },
    {
      "epoch": 0.5031898231271728,
      "grad_norm": 0.2907181978225708,
      "learning_rate": 0.00010518873398413616,
      "loss": 0.7375,
      "step": 3293
    },
    {
      "epoch": 0.5033426290254804,
      "grad_norm": 0.2753676474094391,
      "learning_rate": 0.00010513888648959394,
      "loss": 0.7807,
      "step": 3294
    },
    {
      "epoch": 0.503495434923788,
      "grad_norm": 0.2893278896808624,
      "learning_rate": 0.00010508903771474128,
      "loss": 0.6039,
      "step": 3295
    },
    {
      "epoch": 0.5036482408220957,
      "grad_norm": 0.2934708297252655,
      "learning_rate": 0.00010503918767199758,
      "loss": 0.6074,
      "step": 3296
    },
    {
      "epoch": 0.5038010467204034,
      "grad_norm": 0.2802904546260834,
      "learning_rate": 0.00010498933637378257,
      "loss": 0.605,
      "step": 3297
    },
    {
      "epoch": 0.5039538526187111,
      "grad_norm": 0.29273319244384766,
      "learning_rate": 0.00010493948383251628,
      "loss": 0.845,
      "step": 3298
    },
    {
      "epoch": 0.5041066585170187,
      "grad_norm": 0.25674715638160706,
      "learning_rate": 0.00010488963006061907,
      "loss": 0.7262,
      "step": 3299
    },
    {
      "epoch": 0.5042594644153264,
      "grad_norm": 0.4300982654094696,
      "learning_rate": 0.00010483977507051157,
      "loss": 0.6937,
      "step": 3300
    },
    {
      "epoch": 0.5044122703136341,
      "grad_norm": 0.3364725410938263,
      "learning_rate": 0.00010478991887461473,
      "loss": 0.5855,
      "step": 3301
    },
    {
      "epoch": 0.5045650762119418,
      "grad_norm": 0.2849768400192261,
      "learning_rate": 0.00010474006148534983,
      "loss": 0.7837,
      "step": 3302
    },
    {
      "epoch": 0.5047178821102495,
      "grad_norm": 0.2889060378074646,
      "learning_rate": 0.00010469020291513838,
      "loss": 0.5903,
      "step": 3303
    },
    {
      "epoch": 0.5048706880085572,
      "grad_norm": 0.2896782457828522,
      "learning_rate": 0.00010464034317640225,
      "loss": 0.599,
      "step": 3304
    },
    {
      "epoch": 0.5050234939068649,
      "grad_norm": 0.26331770420074463,
      "learning_rate": 0.00010459048228156356,
      "loss": 0.6462,
      "step": 3305
    },
    {
      "epoch": 0.5051762998051725,
      "grad_norm": 0.37208205461502075,
      "learning_rate": 0.00010454062024304476,
      "loss": 0.8038,
      "step": 3306
    },
    {
      "epoch": 0.5053291057034801,
      "grad_norm": 0.41795673966407776,
      "learning_rate": 0.00010449075707326855,
      "loss": 0.7771,
      "step": 3307
    },
    {
      "epoch": 0.5054819116017878,
      "grad_norm": 0.3807390034198761,
      "learning_rate": 0.0001044408927846579,
      "loss": 0.7304,
      "step": 3308
    },
    {
      "epoch": 0.5056347175000955,
      "grad_norm": 0.33464887738227844,
      "learning_rate": 0.00010439102738963609,
      "loss": 0.8507,
      "step": 3309
    },
    {
      "epoch": 0.5057875233984032,
      "grad_norm": 0.3084365129470825,
      "learning_rate": 0.00010434116090062664,
      "loss": 0.73,
      "step": 3310
    },
    {
      "epoch": 0.5059403292967108,
      "grad_norm": 0.2747865319252014,
      "learning_rate": 0.00010429129333005345,
      "loss": 0.7288,
      "step": 3311
    },
    {
      "epoch": 0.5060931351950185,
      "grad_norm": 0.46816909313201904,
      "learning_rate": 0.00010424142469034048,
      "loss": 0.716,
      "step": 3312
    },
    {
      "epoch": 0.5062459410933262,
      "grad_norm": 0.425784170627594,
      "learning_rate": 0.0001041915549939122,
      "loss": 0.5491,
      "step": 3313
    },
    {
      "epoch": 0.5063987469916339,
      "grad_norm": 0.3221166431903839,
      "learning_rate": 0.00010414168425319315,
      "loss": 0.7381,
      "step": 3314
    },
    {
      "epoch": 0.5065515528899416,
      "grad_norm": 0.608630359172821,
      "learning_rate": 0.00010409181248060827,
      "loss": 0.5901,
      "step": 3315
    },
    {
      "epoch": 0.5067043587882493,
      "grad_norm": 0.28582873940467834,
      "learning_rate": 0.00010404193968858262,
      "loss": 0.6935,
      "step": 3316
    },
    {
      "epoch": 0.506857164686557,
      "grad_norm": 0.29004615545272827,
      "learning_rate": 0.00010399206588954164,
      "loss": 0.6994,
      "step": 3317
    },
    {
      "epoch": 0.5070099705848645,
      "grad_norm": 0.2937512993812561,
      "learning_rate": 0.00010394219109591096,
      "loss": 0.8092,
      "step": 3318
    },
    {
      "epoch": 0.5071627764831722,
      "grad_norm": 0.2914525270462036,
      "learning_rate": 0.00010389231532011647,
      "loss": 0.801,
      "step": 3319
    },
    {
      "epoch": 0.5073155823814799,
      "grad_norm": 0.2659449875354767,
      "learning_rate": 0.00010384243857458428,
      "loss": 0.6694,
      "step": 3320
    },
    {
      "epoch": 0.5074683882797876,
      "grad_norm": 0.29074615240097046,
      "learning_rate": 0.00010379256087174076,
      "loss": 0.5927,
      "step": 3321
    },
    {
      "epoch": 0.5076211941780953,
      "grad_norm": 0.33049747347831726,
      "learning_rate": 0.00010374268222401258,
      "loss": 0.7625,
      "step": 3322
    },
    {
      "epoch": 0.5077740000764029,
      "grad_norm": 0.2912755310535431,
      "learning_rate": 0.00010369280264382648,
      "loss": 0.668,
      "step": 3323
    },
    {
      "epoch": 0.5079268059747106,
      "grad_norm": 0.298967182636261,
      "learning_rate": 0.00010364292214360965,
      "loss": 0.625,
      "step": 3324
    },
    {
      "epoch": 0.5080796118730183,
      "grad_norm": 0.26732969284057617,
      "learning_rate": 0.0001035930407357893,
      "loss": 0.6825,
      "step": 3325
    },
    {
      "epoch": 0.508232417771326,
      "grad_norm": 0.27220967411994934,
      "learning_rate": 0.00010354315843279306,
      "loss": 0.85,
      "step": 3326
    },
    {
      "epoch": 0.5083852236696337,
      "grad_norm": 0.2452717274427414,
      "learning_rate": 0.00010349327524704862,
      "loss": 0.66,
      "step": 3327
    },
    {
      "epoch": 0.5085380295679414,
      "grad_norm": 0.2734704613685608,
      "learning_rate": 0.00010344339119098394,
      "loss": 0.7091,
      "step": 3328
    },
    {
      "epoch": 0.508690835466249,
      "grad_norm": 0.30528584122657776,
      "learning_rate": 0.0001033935062770273,
      "loss": 0.7044,
      "step": 3329
    },
    {
      "epoch": 0.5088436413645566,
      "grad_norm": 0.26126575469970703,
      "learning_rate": 0.00010334362051760703,
      "loss": 0.7252,
      "step": 3330
    },
    {
      "epoch": 0.5089964472628643,
      "grad_norm": 0.27342644333839417,
      "learning_rate": 0.00010329373392515179,
      "loss": 0.57,
      "step": 3331
    },
    {
      "epoch": 0.509149253161172,
      "grad_norm": 0.26855266094207764,
      "learning_rate": 0.00010324384651209036,
      "loss": 0.6485,
      "step": 3332
    },
    {
      "epoch": 0.5093020590594797,
      "grad_norm": 0.26671916246414185,
      "learning_rate": 0.00010319395829085184,
      "loss": 0.7488,
      "step": 3333
    },
    {
      "epoch": 0.5094548649577874,
      "grad_norm": 0.2993987500667572,
      "learning_rate": 0.0001031440692738654,
      "loss": 0.6563,
      "step": 3334
    },
    {
      "epoch": 0.509607670856095,
      "grad_norm": 0.2229076474905014,
      "learning_rate": 0.0001030941794735605,
      "loss": 0.4909,
      "step": 3335
    },
    {
      "epoch": 0.5097604767544027,
      "grad_norm": 0.2941783666610718,
      "learning_rate": 0.00010304428890236678,
      "loss": 0.7214,
      "step": 3336
    },
    {
      "epoch": 0.5099132826527104,
      "grad_norm": 0.2748726010322571,
      "learning_rate": 0.00010299439757271399,
      "loss": 0.6889,
      "step": 3337
    },
    {
      "epoch": 0.5100660885510181,
      "grad_norm": 0.2850393056869507,
      "learning_rate": 0.00010294450549703221,
      "loss": 0.86,
      "step": 3338
    },
    {
      "epoch": 0.5102188944493258,
      "grad_norm": 1.257244348526001,
      "learning_rate": 0.00010289461268775157,
      "loss": 0.6314,
      "step": 3339
    },
    {
      "epoch": 0.5103717003476335,
      "grad_norm": 0.28113579750061035,
      "learning_rate": 0.00010284471915730252,
      "loss": 0.8423,
      "step": 3340
    },
    {
      "epoch": 0.5105245062459411,
      "grad_norm": 0.2960244119167328,
      "learning_rate": 0.00010279482491811554,
      "loss": 0.6526,
      "step": 3341
    },
    {
      "epoch": 0.5106773121442487,
      "grad_norm": 0.26911747455596924,
      "learning_rate": 0.00010274492998262142,
      "loss": 0.7716,
      "step": 3342
    },
    {
      "epoch": 0.5108301180425564,
      "grad_norm": 0.29852014780044556,
      "learning_rate": 0.000102695034363251,
      "loss": 0.5997,
      "step": 3343
    },
    {
      "epoch": 0.5109829239408641,
      "grad_norm": 0.28390073776245117,
      "learning_rate": 0.00010264513807243543,
      "loss": 0.7266,
      "step": 3344
    },
    {
      "epoch": 0.5111357298391718,
      "grad_norm": 0.31037935614585876,
      "learning_rate": 0.00010259524112260591,
      "loss": 0.5311,
      "step": 3345
    },
    {
      "epoch": 0.5112885357374795,
      "grad_norm": 0.35973161458969116,
      "learning_rate": 0.00010254534352619381,
      "loss": 0.9332,
      "step": 3346
    },
    {
      "epoch": 0.5114413416357871,
      "grad_norm": 0.286542683839798,
      "learning_rate": 0.00010249544529563077,
      "loss": 0.7231,
      "step": 3347
    },
    {
      "epoch": 0.5115941475340948,
      "grad_norm": 0.31040236353874207,
      "learning_rate": 0.00010244554644334847,
      "loss": 0.8314,
      "step": 3348
    },
    {
      "epoch": 0.5117469534324025,
      "grad_norm": 0.29848411679267883,
      "learning_rate": 0.00010239564698177879,
      "loss": 0.7519,
      "step": 3349
    },
    {
      "epoch": 0.5118997593307102,
      "grad_norm": 0.2744828760623932,
      "learning_rate": 0.0001023457469233538,
      "loss": 0.6148,
      "step": 3350
    },
    {
      "epoch": 0.5120525652290179,
      "grad_norm": 0.30576545000076294,
      "learning_rate": 0.00010229584628050563,
      "loss": 0.5859,
      "step": 3351
    },
    {
      "epoch": 0.5122053711273256,
      "grad_norm": 0.27415305376052856,
      "learning_rate": 0.00010224594506566667,
      "loss": 0.6705,
      "step": 3352
    },
    {
      "epoch": 0.5123581770256332,
      "grad_norm": 0.30824410915374756,
      "learning_rate": 0.0001021960432912693,
      "loss": 0.8869,
      "step": 3353
    },
    {
      "epoch": 0.5125109829239408,
      "grad_norm": 0.2593754529953003,
      "learning_rate": 0.00010214614096974622,
      "loss": 0.7246,
      "step": 3354
    },
    {
      "epoch": 0.5126637888222485,
      "grad_norm": 0.30506977438926697,
      "learning_rate": 0.00010209623811353011,
      "loss": 0.8341,
      "step": 3355
    },
    {
      "epoch": 0.5128165947205562,
      "grad_norm": 0.2997819483280182,
      "learning_rate": 0.00010204633473505388,
      "loss": 0.6893,
      "step": 3356
    },
    {
      "epoch": 0.5129694006188639,
      "grad_norm": 0.3118533194065094,
      "learning_rate": 0.00010199643084675052,
      "loss": 0.914,
      "step": 3357
    },
    {
      "epoch": 0.5131222065171716,
      "grad_norm": 0.29679909348487854,
      "learning_rate": 0.00010194652646105318,
      "loss": 0.7542,
      "step": 3358
    },
    {
      "epoch": 0.5132750124154792,
      "grad_norm": 0.3198535144329071,
      "learning_rate": 0.00010189662159039512,
      "loss": 0.7142,
      "step": 3359
    },
    {
      "epoch": 0.5134278183137869,
      "grad_norm": 0.2824925482273102,
      "learning_rate": 0.0001018467162472097,
      "loss": 0.6344,
      "step": 3360
    },
    {
      "epoch": 0.5135806242120946,
      "grad_norm": 0.3698297441005707,
      "learning_rate": 0.00010179681044393042,
      "loss": 0.7198,
      "step": 3361
    },
    {
      "epoch": 0.5137334301104023,
      "grad_norm": 0.28322651982307434,
      "learning_rate": 0.0001017469041929909,
      "loss": 0.74,
      "step": 3362
    },
    {
      "epoch": 0.51388623600871,
      "grad_norm": 0.3029322326183319,
      "learning_rate": 0.00010169699750682489,
      "loss": 0.7064,
      "step": 3363
    },
    {
      "epoch": 0.5140390419070177,
      "grad_norm": 0.2545001208782196,
      "learning_rate": 0.00010164709039786618,
      "loss": 0.8169,
      "step": 3364
    },
    {
      "epoch": 0.5141918478053252,
      "grad_norm": 0.28603988885879517,
      "learning_rate": 0.00010159718287854871,
      "loss": 0.7604,
      "step": 3365
    },
    {
      "epoch": 0.5143446537036329,
      "grad_norm": 0.3488546907901764,
      "learning_rate": 0.00010154727496130658,
      "loss": 0.6961,
      "step": 3366
    },
    {
      "epoch": 0.5144974596019406,
      "grad_norm": 0.36552485823631287,
      "learning_rate": 0.00010149736665857382,
      "loss": 0.7482,
      "step": 3367
    },
    {
      "epoch": 0.5146502655002483,
      "grad_norm": 0.6362305283546448,
      "learning_rate": 0.00010144745798278479,
      "loss": 0.8138,
      "step": 3368
    },
    {
      "epoch": 0.514803071398556,
      "grad_norm": 0.28395330905914307,
      "learning_rate": 0.00010139754894637367,
      "loss": 0.7591,
      "step": 3369
    },
    {
      "epoch": 0.5149558772968637,
      "grad_norm": 0.26198312640190125,
      "learning_rate": 0.00010134763956177504,
      "loss": 0.6243,
      "step": 3370
    },
    {
      "epoch": 0.5151086831951713,
      "grad_norm": 0.2880837619304657,
      "learning_rate": 0.00010129772984142328,
      "loss": 0.6279,
      "step": 3371
    },
    {
      "epoch": 0.515261489093479,
      "grad_norm": 0.28449442982673645,
      "learning_rate": 0.00010124781979775307,
      "loss": 0.7934,
      "step": 3372
    },
    {
      "epoch": 0.5154142949917867,
      "grad_norm": 0.29876309633255005,
      "learning_rate": 0.00010119790944319899,
      "loss": 0.8046,
      "step": 3373
    },
    {
      "epoch": 0.5155671008900944,
      "grad_norm": 0.4107857048511505,
      "learning_rate": 0.00010114799879019581,
      "loss": 0.8078,
      "step": 3374
    },
    {
      "epoch": 0.5157199067884021,
      "grad_norm": 0.3255639374256134,
      "learning_rate": 0.00010109808785117843,
      "loss": 0.8144,
      "step": 3375
    },
    {
      "epoch": 0.5158727126867098,
      "grad_norm": 0.3160342276096344,
      "learning_rate": 0.00010104817663858161,
      "loss": 0.932,
      "step": 3376
    },
    {
      "epoch": 0.5160255185850173,
      "grad_norm": 0.35046565532684326,
      "learning_rate": 0.00010099826516484045,
      "loss": 0.7134,
      "step": 3377
    },
    {
      "epoch": 0.516178324483325,
      "grad_norm": 0.29910796880722046,
      "learning_rate": 0.00010094835344238984,
      "loss": 0.8236,
      "step": 3378
    },
    {
      "epoch": 0.5163311303816327,
      "grad_norm": 0.2847612202167511,
      "learning_rate": 0.00010089844148366498,
      "loss": 0.7021,
      "step": 3379
    },
    {
      "epoch": 0.5164839362799404,
      "grad_norm": 0.37408819794654846,
      "learning_rate": 0.00010084852930110094,
      "loss": 0.5381,
      "step": 3380
    },
    {
      "epoch": 0.5166367421782481,
      "grad_norm": 0.3474291265010834,
      "learning_rate": 0.00010079861690713297,
      "loss": 0.7535,
      "step": 3381
    },
    {
      "epoch": 0.5167895480765557,
      "grad_norm": 0.4439990818500519,
      "learning_rate": 0.00010074870431419627,
      "loss": 0.9417,
      "step": 3382
    },
    {
      "epoch": 0.5169423539748634,
      "grad_norm": 0.2557135224342346,
      "learning_rate": 0.0001006987915347262,
      "loss": 0.5236,
      "step": 3383
    },
    {
      "epoch": 0.5170951598731711,
      "grad_norm": 0.2894841730594635,
      "learning_rate": 0.00010064887858115808,
      "loss": 0.6814,
      "step": 3384
    },
    {
      "epoch": 0.5172479657714788,
      "grad_norm": 0.3533530533313751,
      "learning_rate": 0.00010059896546592729,
      "loss": 0.4942,
      "step": 3385
    },
    {
      "epoch": 0.5174007716697865,
      "grad_norm": 0.33828791975975037,
      "learning_rate": 0.0001005490522014693,
      "loss": 0.9148,
      "step": 3386
    },
    {
      "epoch": 0.5175535775680942,
      "grad_norm": 0.291148841381073,
      "learning_rate": 0.00010049913880021956,
      "loss": 0.7756,
      "step": 3387
    },
    {
      "epoch": 0.5177063834664019,
      "grad_norm": 0.477228581905365,
      "learning_rate": 0.00010044922527461358,
      "loss": 0.8127,
      "step": 3388
    },
    {
      "epoch": 0.5178591893647094,
      "grad_norm": 0.31533282995224,
      "learning_rate": 0.00010039931163708686,
      "loss": 0.6602,
      "step": 3389
    },
    {
      "epoch": 0.5180119952630171,
      "grad_norm": 0.31487801671028137,
      "learning_rate": 0.00010034939790007504,
      "loss": 0.7307,
      "step": 3390
    },
    {
      "epoch": 0.5181648011613248,
      "grad_norm": 0.2877635657787323,
      "learning_rate": 0.00010029948407601366,
      "loss": 0.7646,
      "step": 3391
    },
    {
      "epoch": 0.5183176070596325,
      "grad_norm": 0.5219588279724121,
      "learning_rate": 0.00010024957017733834,
      "loss": 0.6373,
      "step": 3392
    },
    {
      "epoch": 0.5184704129579402,
      "grad_norm": 0.34876278042793274,
      "learning_rate": 0.00010019965621648468,
      "loss": 0.5714,
      "step": 3393
    },
    {
      "epoch": 0.5186232188562478,
      "grad_norm": 0.3323829174041748,
      "learning_rate": 0.00010014974220588838,
      "loss": 0.5746,
      "step": 3394
    },
    {
      "epoch": 0.5187760247545555,
      "grad_norm": 0.3449549674987793,
      "learning_rate": 0.00010009982815798504,
      "loss": 0.7553,
      "step": 3395
    },
    {
      "epoch": 0.5189288306528632,
      "grad_norm": 0.3018842935562134,
      "learning_rate": 0.00010004991408521036,
      "loss": 0.7412,
      "step": 3396
    },
    {
      "epoch": 0.5190816365511709,
      "grad_norm": 0.31991279125213623,
      "learning_rate": 0.0001,
      "loss": 0.6534,
      "step": 3397
    },
    {
      "epoch": 0.5192344424494786,
      "grad_norm": 0.2634223699569702,
      "learning_rate": 9.995008591478966e-05,
      "loss": 0.6144,
      "step": 3398
    },
    {
      "epoch": 0.5193872483477863,
      "grad_norm": 0.36058294773101807,
      "learning_rate": 9.9900171842015e-05,
      "loss": 0.7775,
      "step": 3399
    },
    {
      "epoch": 0.519540054246094,
      "grad_norm": 0.287720263004303,
      "learning_rate": 9.985025779411166e-05,
      "loss": 0.7882,
      "step": 3400
    },
    {
      "epoch": 0.5196928601444015,
      "grad_norm": 0.26958808302879333,
      "learning_rate": 9.980034378351534e-05,
      "loss": 0.6573,
      "step": 3401
    },
    {
      "epoch": 0.5198456660427092,
      "grad_norm": 0.9140129685401917,
      "learning_rate": 9.975042982266167e-05,
      "loss": 0.7488,
      "step": 3402
    },
    {
      "epoch": 0.5199984719410169,
      "grad_norm": 0.30511972308158875,
      "learning_rate": 9.970051592398638e-05,
      "loss": 0.6557,
      "step": 3403
    },
    {
      "epoch": 0.5201512778393246,
      "grad_norm": 0.2656531035900116,
      "learning_rate": 9.965060209992497e-05,
      "loss": 0.6858,
      "step": 3404
    },
    {
      "epoch": 0.5203040837376323,
      "grad_norm": 0.4538237452507019,
      "learning_rate": 9.960068836291315e-05,
      "loss": 0.8245,
      "step": 3405
    },
    {
      "epoch": 0.52045688963594,
      "grad_norm": 0.3917170763015747,
      "learning_rate": 9.955077472538647e-05,
      "loss": 0.8073,
      "step": 3406
    },
    {
      "epoch": 0.5206096955342476,
      "grad_norm": 0.32771754264831543,
      "learning_rate": 9.950086119978045e-05,
      "loss": 0.5978,
      "step": 3407
    },
    {
      "epoch": 0.5207625014325553,
      "grad_norm": 0.640074610710144,
      "learning_rate": 9.945094779853073e-05,
      "loss": 0.9897,
      "step": 3408
    },
    {
      "epoch": 0.520915307330863,
      "grad_norm": 0.5286215543746948,
      "learning_rate": 9.940103453407272e-05,
      "loss": 0.7344,
      "step": 3409
    },
    {
      "epoch": 0.5210681132291707,
      "grad_norm": 0.31370532512664795,
      "learning_rate": 9.935112141884197e-05,
      "loss": 0.6146,
      "step": 3410
    },
    {
      "epoch": 0.5212209191274784,
      "grad_norm": 0.2929065525531769,
      "learning_rate": 9.930120846527381e-05,
      "loss": 0.7299,
      "step": 3411
    },
    {
      "epoch": 0.521373725025786,
      "grad_norm": 0.2866988182067871,
      "learning_rate": 9.925129568580375e-05,
      "loss": 0.6022,
      "step": 3412
    },
    {
      "epoch": 0.5215265309240936,
      "grad_norm": 0.2536863088607788,
      "learning_rate": 9.920138309286708e-05,
      "loss": 0.6714,
      "step": 3413
    },
    {
      "epoch": 0.5216793368224013,
      "grad_norm": 0.27033594250679016,
      "learning_rate": 9.91514706988991e-05,
      "loss": 0.8957,
      "step": 3414
    },
    {
      "epoch": 0.521832142720709,
      "grad_norm": 0.32144030928611755,
      "learning_rate": 9.910155851633504e-05,
      "loss": 0.8017,
      "step": 3415
    },
    {
      "epoch": 0.5219849486190167,
      "grad_norm": 0.27790653705596924,
      "learning_rate": 9.905164655761016e-05,
      "loss": 0.698,
      "step": 3416
    },
    {
      "epoch": 0.5221377545173244,
      "grad_norm": 0.3195480704307556,
      "learning_rate": 9.90017348351596e-05,
      "loss": 0.6499,
      "step": 3417
    },
    {
      "epoch": 0.522290560415632,
      "grad_norm": 0.3057548999786377,
      "learning_rate": 9.89518233614184e-05,
      "loss": 0.7596,
      "step": 3418
    },
    {
      "epoch": 0.5224433663139397,
      "grad_norm": 0.30440661311149597,
      "learning_rate": 9.89019121488216e-05,
      "loss": 0.665,
      "step": 3419
    },
    {
      "epoch": 0.5225961722122474,
      "grad_norm": 0.2874852120876312,
      "learning_rate": 9.885200120980418e-05,
      "loss": 0.6549,
      "step": 3420
    },
    {
      "epoch": 0.5227489781105551,
      "grad_norm": 0.31353822350502014,
      "learning_rate": 9.880209055680105e-05,
      "loss": 0.801,
      "step": 3421
    },
    {
      "epoch": 0.5229017840088628,
      "grad_norm": 0.2793009579181671,
      "learning_rate": 9.875218020224696e-05,
      "loss": 0.7663,
      "step": 3422
    },
    {
      "epoch": 0.5230545899071705,
      "grad_norm": 0.23979683220386505,
      "learning_rate": 9.870227015857672e-05,
      "loss": 0.5808,
      "step": 3423
    },
    {
      "epoch": 0.523207395805478,
      "grad_norm": 0.29966726899147034,
      "learning_rate": 9.8652360438225e-05,
      "loss": 0.7144,
      "step": 3424
    },
    {
      "epoch": 0.5233602017037857,
      "grad_norm": 0.3735535144805908,
      "learning_rate": 9.860245105362634e-05,
      "loss": 0.663,
      "step": 3425
    },
    {
      "epoch": 0.5235130076020934,
      "grad_norm": 0.28507325053215027,
      "learning_rate": 9.855254201721524e-05,
      "loss": 0.7955,
      "step": 3426
    },
    {
      "epoch": 0.5236658135004011,
      "grad_norm": 0.26180824637413025,
      "learning_rate": 9.850263334142618e-05,
      "loss": 0.6727,
      "step": 3427
    },
    {
      "epoch": 0.5238186193987088,
      "grad_norm": 0.3200896680355072,
      "learning_rate": 9.845272503869347e-05,
      "loss": 0.4995,
      "step": 3428
    },
    {
      "epoch": 0.5239714252970165,
      "grad_norm": 0.31497979164123535,
      "learning_rate": 9.840281712145131e-05,
      "loss": 0.6823,
      "step": 3429
    },
    {
      "epoch": 0.5241242311953241,
      "grad_norm": 0.3218442499637604,
      "learning_rate": 9.835290960213383e-05,
      "loss": 0.7584,
      "step": 3430
    },
    {
      "epoch": 0.5242770370936318,
      "grad_norm": 0.26961660385131836,
      "learning_rate": 9.830300249317515e-05,
      "loss": 0.7869,
      "step": 3431
    },
    {
      "epoch": 0.5244298429919395,
      "grad_norm": 0.3074052035808563,
      "learning_rate": 9.82530958070091e-05,
      "loss": 0.7193,
      "step": 3432
    },
    {
      "epoch": 0.5245826488902472,
      "grad_norm": 0.2740161418914795,
      "learning_rate": 9.82031895560696e-05,
      "loss": 0.5419,
      "step": 3433
    },
    {
      "epoch": 0.5247354547885549,
      "grad_norm": 0.2755180299282074,
      "learning_rate": 9.815328375279031e-05,
      "loss": 0.5791,
      "step": 3434
    },
    {
      "epoch": 0.5248882606868626,
      "grad_norm": 0.3647940456867218,
      "learning_rate": 9.810337840960491e-05,
      "loss": 0.8048,
      "step": 3435
    },
    {
      "epoch": 0.5250410665851701,
      "grad_norm": 0.26341575384140015,
      "learning_rate": 9.805347353894684e-05,
      "loss": 0.788,
      "step": 3436
    },
    {
      "epoch": 0.5251938724834778,
      "grad_norm": 0.275977224111557,
      "learning_rate": 9.800356915324948e-05,
      "loss": 0.7517,
      "step": 3437
    },
    {
      "epoch": 0.5253466783817855,
      "grad_norm": 0.28197404742240906,
      "learning_rate": 9.795366526494617e-05,
      "loss": 0.654,
      "step": 3438
    },
    {
      "epoch": 0.5254994842800932,
      "grad_norm": 0.30565693974494934,
      "learning_rate": 9.790376188646992e-05,
      "loss": 0.6654,
      "step": 3439
    },
    {
      "epoch": 0.5256522901784009,
      "grad_norm": 0.31840893626213074,
      "learning_rate": 9.78538590302538e-05,
      "loss": 0.9181,
      "step": 3440
    },
    {
      "epoch": 0.5258050960767086,
      "grad_norm": 0.30634599924087524,
      "learning_rate": 9.780395670873068e-05,
      "loss": 0.8275,
      "step": 3441
    },
    {
      "epoch": 0.5259579019750162,
      "grad_norm": 0.33374178409576416,
      "learning_rate": 9.775405493433337e-05,
      "loss": 0.492,
      "step": 3442
    },
    {
      "epoch": 0.5261107078733239,
      "grad_norm": 0.27607855200767517,
      "learning_rate": 9.770415371949438e-05,
      "loss": 0.6481,
      "step": 3443
    },
    {
      "epoch": 0.5262635137716316,
      "grad_norm": 0.3031352758407593,
      "learning_rate": 9.765425307664621e-05,
      "loss": 0.5265,
      "step": 3444
    },
    {
      "epoch": 0.5264163196699393,
      "grad_norm": 0.3562638461589813,
      "learning_rate": 9.760435301822125e-05,
      "loss": 0.7271,
      "step": 3445
    },
    {
      "epoch": 0.526569125568247,
      "grad_norm": 0.360408753156662,
      "learning_rate": 9.755445355665155e-05,
      "loss": 0.7489,
      "step": 3446
    },
    {
      "epoch": 0.5267219314665547,
      "grad_norm": 0.2757256031036377,
      "learning_rate": 9.750455470436925e-05,
      "loss": 0.6827,
      "step": 3447
    },
    {
      "epoch": 0.5268747373648622,
      "grad_norm": 0.32317423820495605,
      "learning_rate": 9.745465647380619e-05,
      "loss": 0.7025,
      "step": 3448
    },
    {
      "epoch": 0.5270275432631699,
      "grad_norm": 0.3631436824798584,
      "learning_rate": 9.740475887739416e-05,
      "loss": 0.5346,
      "step": 3449
    },
    {
      "epoch": 0.5271803491614776,
      "grad_norm": 0.29940101504325867,
      "learning_rate": 9.73548619275646e-05,
      "loss": 0.7169,
      "step": 3450
    },
    {
      "epoch": 0.5273331550597853,
      "grad_norm": 0.3059080243110657,
      "learning_rate": 9.7304965636749e-05,
      "loss": 0.7512,
      "step": 3451
    },
    {
      "epoch": 0.527485960958093,
      "grad_norm": 0.398517370223999,
      "learning_rate": 9.725507001737863e-05,
      "loss": 0.8103,
      "step": 3452
    },
    {
      "epoch": 0.5276387668564007,
      "grad_norm": 0.26001110672950745,
      "learning_rate": 9.72051750818845e-05,
      "loss": 0.8303,
      "step": 3453
    },
    {
      "epoch": 0.5277915727547083,
      "grad_norm": 0.32580479979515076,
      "learning_rate": 9.71552808426975e-05,
      "loss": 0.6707,
      "step": 3454
    },
    {
      "epoch": 0.527944378653016,
      "grad_norm": 0.3100968599319458,
      "learning_rate": 9.710538731224843e-05,
      "loss": 0.8172,
      "step": 3455
    },
    {
      "epoch": 0.5280971845513237,
      "grad_norm": 0.35659340023994446,
      "learning_rate": 9.705549450296784e-05,
      "loss": 0.7393,
      "step": 3456
    },
    {
      "epoch": 0.5282499904496314,
      "grad_norm": 0.47527024149894714,
      "learning_rate": 9.700560242728602e-05,
      "loss": 0.7251,
      "step": 3457
    },
    {
      "epoch": 0.5284027963479391,
      "grad_norm": 0.27160361409187317,
      "learning_rate": 9.695571109763326e-05,
      "loss": 0.6963,
      "step": 3458
    },
    {
      "epoch": 0.5285556022462468,
      "grad_norm": 0.40073350071907043,
      "learning_rate": 9.690582052643951e-05,
      "loss": 0.6446,
      "step": 3459
    },
    {
      "epoch": 0.5287084081445543,
      "grad_norm": 0.2409697026014328,
      "learning_rate": 9.685593072613464e-05,
      "loss": 0.5726,
      "step": 3460
    },
    {
      "epoch": 0.528861214042862,
      "grad_norm": 0.3380088806152344,
      "learning_rate": 9.680604170914817e-05,
      "loss": 0.8136,
      "step": 3461
    },
    {
      "epoch": 0.5290140199411697,
      "grad_norm": 0.2963113784790039,
      "learning_rate": 9.675615348790964e-05,
      "loss": 0.6934,
      "step": 3462
    },
    {
      "epoch": 0.5291668258394774,
      "grad_norm": 0.2802518904209137,
      "learning_rate": 9.670626607484826e-05,
      "loss": 0.5576,
      "step": 3463
    },
    {
      "epoch": 0.5293196317377851,
      "grad_norm": 0.420153945684433,
      "learning_rate": 9.665637948239301e-05,
      "loss": 0.7867,
      "step": 3464
    },
    {
      "epoch": 0.5294724376360928,
      "grad_norm": 0.3025205433368683,
      "learning_rate": 9.660649372297272e-05,
      "loss": 0.7153,
      "step": 3465
    },
    {
      "epoch": 0.5296252435344004,
      "grad_norm": 0.2623286843299866,
      "learning_rate": 9.655660880901606e-05,
      "loss": 0.6243,
      "step": 3466
    },
    {
      "epoch": 0.5297780494327081,
      "grad_norm": 0.2510450780391693,
      "learning_rate": 9.650672475295143e-05,
      "loss": 0.6112,
      "step": 3467
    },
    {
      "epoch": 0.5299308553310158,
      "grad_norm": 0.34889891743659973,
      "learning_rate": 9.645684156720697e-05,
      "loss": 0.691,
      "step": 3468
    },
    {
      "epoch": 0.5300836612293235,
      "grad_norm": 0.30945080518722534,
      "learning_rate": 9.64069592642107e-05,
      "loss": 0.8384,
      "step": 3469
    },
    {
      "epoch": 0.5302364671276312,
      "grad_norm": 0.3335753083229065,
      "learning_rate": 9.63570778563904e-05,
      "loss": 0.5605,
      "step": 3470
    },
    {
      "epoch": 0.5303892730259389,
      "grad_norm": 0.36615538597106934,
      "learning_rate": 9.630719735617354e-05,
      "loss": 0.687,
      "step": 3471
    },
    {
      "epoch": 0.5305420789242464,
      "grad_norm": 1.3929213285446167,
      "learning_rate": 9.625731777598746e-05,
      "loss": 0.7925,
      "step": 3472
    },
    {
      "epoch": 0.5306948848225541,
      "grad_norm": 0.35153627395629883,
      "learning_rate": 9.620743912825924e-05,
      "loss": 0.5368,
      "step": 3473
    },
    {
      "epoch": 0.5308476907208618,
      "grad_norm": 0.28042492270469666,
      "learning_rate": 9.615756142541575e-05,
      "loss": 0.6947,
      "step": 3474
    },
    {
      "epoch": 0.5310004966191695,
      "grad_norm": 0.22383491694927216,
      "learning_rate": 9.610768467988356e-05,
      "loss": 0.6687,
      "step": 3475
    },
    {
      "epoch": 0.5311533025174772,
      "grad_norm": 0.5090556144714355,
      "learning_rate": 9.605780890408903e-05,
      "loss": 0.8305,
      "step": 3476
    },
    {
      "epoch": 0.5313061084157849,
      "grad_norm": 0.2908128499984741,
      "learning_rate": 9.600793411045838e-05,
      "loss": 0.5973,
      "step": 3477
    },
    {
      "epoch": 0.5314589143140925,
      "grad_norm": 0.3204064667224884,
      "learning_rate": 9.595806031141739e-05,
      "loss": 0.7176,
      "step": 3478
    },
    {
      "epoch": 0.5316117202124002,
      "grad_norm": 0.3861880302429199,
      "learning_rate": 9.590818751939177e-05,
      "loss": 0.4478,
      "step": 3479
    },
    {
      "epoch": 0.5317645261107079,
      "grad_norm": 0.5264634490013123,
      "learning_rate": 9.585831574680684e-05,
      "loss": 0.7398,
      "step": 3480
    },
    {
      "epoch": 0.5319173320090156,
      "grad_norm": 0.32976770401000977,
      "learning_rate": 9.580844500608782e-05,
      "loss": 0.7962,
      "step": 3481
    },
    {
      "epoch": 0.5320701379073233,
      "grad_norm": 0.2593368589878082,
      "learning_rate": 9.575857530965953e-05,
      "loss": 0.5612,
      "step": 3482
    },
    {
      "epoch": 0.5322229438056308,
      "grad_norm": 0.3448527753353119,
      "learning_rate": 9.570870666994658e-05,
      "loss": 0.63,
      "step": 3483
    },
    {
      "epoch": 0.5323757497039385,
      "grad_norm": 0.3122631013393402,
      "learning_rate": 9.56588390993734e-05,
      "loss": 0.6706,
      "step": 3484
    },
    {
      "epoch": 0.5325285556022462,
      "grad_norm": 0.35931363701820374,
      "learning_rate": 9.560897261036395e-05,
      "loss": 0.7125,
      "step": 3485
    },
    {
      "epoch": 0.5326813615005539,
      "grad_norm": 0.28302425146102905,
      "learning_rate": 9.555910721534214e-05,
      "loss": 0.6462,
      "step": 3486
    },
    {
      "epoch": 0.5328341673988616,
      "grad_norm": 0.3118671774864197,
      "learning_rate": 9.550924292673146e-05,
      "loss": 0.6675,
      "step": 3487
    },
    {
      "epoch": 0.5329869732971693,
      "grad_norm": 0.33205705881118774,
      "learning_rate": 9.545937975695526e-05,
      "loss": 0.6899,
      "step": 3488
    },
    {
      "epoch": 0.533139779195477,
      "grad_norm": 0.3029916286468506,
      "learning_rate": 9.540951771843645e-05,
      "loss": 0.7862,
      "step": 3489
    },
    {
      "epoch": 0.5332925850937846,
      "grad_norm": 0.3043176531791687,
      "learning_rate": 9.535965682359778e-05,
      "loss": 0.8245,
      "step": 3490
    },
    {
      "epoch": 0.5334453909920923,
      "grad_norm": 0.3936460316181183,
      "learning_rate": 9.530979708486162e-05,
      "loss": 0.9028,
      "step": 3491
    },
    {
      "epoch": 0.5335981968904,
      "grad_norm": 0.3017941117286682,
      "learning_rate": 9.525993851465021e-05,
      "loss": 0.684,
      "step": 3492
    },
    {
      "epoch": 0.5337510027887077,
      "grad_norm": 0.3985665440559387,
      "learning_rate": 9.521008112538529e-05,
      "loss": 0.6063,
      "step": 3493
    },
    {
      "epoch": 0.5339038086870154,
      "grad_norm": 0.3323298394680023,
      "learning_rate": 9.516022492948845e-05,
      "loss": 0.5456,
      "step": 3494
    },
    {
      "epoch": 0.5340566145853229,
      "grad_norm": 0.31235024333000183,
      "learning_rate": 9.511036993938097e-05,
      "loss": 0.5547,
      "step": 3495
    },
    {
      "epoch": 0.5342094204836306,
      "grad_norm": 0.26063108444213867,
      "learning_rate": 9.506051616748374e-05,
      "loss": 0.7367,
      "step": 3496
    },
    {
      "epoch": 0.5343622263819383,
      "grad_norm": 0.33859163522720337,
      "learning_rate": 9.501066362621746e-05,
      "loss": 0.6035,
      "step": 3497
    },
    {
      "epoch": 0.534515032280246,
      "grad_norm": 0.274844765663147,
      "learning_rate": 9.496081232800243e-05,
      "loss": 0.7057,
      "step": 3498
    },
    {
      "epoch": 0.5346678381785537,
      "grad_norm": 0.2800372540950775,
      "learning_rate": 9.491096228525876e-05,
      "loss": 0.5468,
      "step": 3499
    },
    {
      "epoch": 0.5348206440768614,
      "grad_norm": 0.5205533504486084,
      "learning_rate": 9.486111351040607e-05,
      "loss": 0.7171,
      "step": 3500
    },
    {
      "epoch": 0.534973449975169,
      "grad_norm": 0.3389289081096649,
      "learning_rate": 9.481126601586385e-05,
      "loss": 0.6145,
      "step": 3501
    },
    {
      "epoch": 0.5351262558734767,
      "grad_norm": 0.30579712986946106,
      "learning_rate": 9.476141981405113e-05,
      "loss": 0.7139,
      "step": 3502
    },
    {
      "epoch": 0.5352790617717844,
      "grad_norm": 0.27632755041122437,
      "learning_rate": 9.471157491738667e-05,
      "loss": 0.751,
      "step": 3503
    },
    {
      "epoch": 0.5354318676700921,
      "grad_norm": 0.3189046382904053,
      "learning_rate": 9.466173133828895e-05,
      "loss": 0.8163,
      "step": 3504
    },
    {
      "epoch": 0.5355846735683998,
      "grad_norm": 0.288310170173645,
      "learning_rate": 9.461188908917605e-05,
      "loss": 0.6486,
      "step": 3505
    },
    {
      "epoch": 0.5357374794667075,
      "grad_norm": 0.2973790764808655,
      "learning_rate": 9.456204818246578e-05,
      "loss": 0.8716,
      "step": 3506
    },
    {
      "epoch": 0.535890285365015,
      "grad_norm": 0.2922728359699249,
      "learning_rate": 9.451220863057551e-05,
      "loss": 0.708,
      "step": 3507
    },
    {
      "epoch": 0.5360430912633227,
      "grad_norm": 0.3109127879142761,
      "learning_rate": 9.446237044592241e-05,
      "loss": 0.5553,
      "step": 3508
    },
    {
      "epoch": 0.5361958971616304,
      "grad_norm": 0.27865827083587646,
      "learning_rate": 9.441253364092326e-05,
      "loss": 0.8297,
      "step": 3509
    },
    {
      "epoch": 0.5363487030599381,
      "grad_norm": 0.28346171975135803,
      "learning_rate": 9.436269822799443e-05,
      "loss": 0.6596,
      "step": 3510
    },
    {
      "epoch": 0.5365015089582458,
      "grad_norm": 0.27833786606788635,
      "learning_rate": 9.431286421955199e-05,
      "loss": 0.758,
      "step": 3511
    },
    {
      "epoch": 0.5366543148565535,
      "grad_norm": 0.2874302864074707,
      "learning_rate": 9.426303162801171e-05,
      "loss": 0.58,
      "step": 3512
    },
    {
      "epoch": 0.5368071207548611,
      "grad_norm": 0.3504881262779236,
      "learning_rate": 9.421320046578896e-05,
      "loss": 0.557,
      "step": 3513
    },
    {
      "epoch": 0.5369599266531688,
      "grad_norm": 0.357403039932251,
      "learning_rate": 9.416337074529873e-05,
      "loss": 0.719,
      "step": 3514
    },
    {
      "epoch": 0.5371127325514765,
      "grad_norm": 0.35706785321235657,
      "learning_rate": 9.411354247895566e-05,
      "loss": 0.7606,
      "step": 3515
    },
    {
      "epoch": 0.5372655384497842,
      "grad_norm": 0.2595686912536621,
      "learning_rate": 9.406371567917411e-05,
      "loss": 0.8486,
      "step": 3516
    },
    {
      "epoch": 0.5374183443480919,
      "grad_norm": 0.30613166093826294,
      "learning_rate": 9.401389035836793e-05,
      "loss": 0.8481,
      "step": 3517
    },
    {
      "epoch": 0.5375711502463996,
      "grad_norm": 0.46529725193977356,
      "learning_rate": 9.396406652895072e-05,
      "loss": 0.7343,
      "step": 3518
    },
    {
      "epoch": 0.5377239561447071,
      "grad_norm": 0.34976473450660706,
      "learning_rate": 9.391424420333569e-05,
      "loss": 0.6401,
      "step": 3519
    },
    {
      "epoch": 0.5378767620430148,
      "grad_norm": 0.3024265766143799,
      "learning_rate": 9.386442339393564e-05,
      "loss": 0.6997,
      "step": 3520
    },
    {
      "epoch": 0.5380295679413225,
      "grad_norm": 0.2734369933605194,
      "learning_rate": 9.381460411316298e-05,
      "loss": 0.7366,
      "step": 3521
    },
    {
      "epoch": 0.5381823738396302,
      "grad_norm": 0.28172358870506287,
      "learning_rate": 9.376478637342976e-05,
      "loss": 0.8492,
      "step": 3522
    },
    {
      "epoch": 0.5383351797379379,
      "grad_norm": 0.32786622643470764,
      "learning_rate": 9.371497018714772e-05,
      "loss": 0.6493,
      "step": 3523
    },
    {
      "epoch": 0.5384879856362456,
      "grad_norm": 0.31345850229263306,
      "learning_rate": 9.366515556672808e-05,
      "loss": 0.8286,
      "step": 3524
    },
    {
      "epoch": 0.5386407915345532,
      "grad_norm": 0.3381461501121521,
      "learning_rate": 9.361534252458175e-05,
      "loss": 0.8441,
      "step": 3525
    },
    {
      "epoch": 0.5387935974328609,
      "grad_norm": 0.3015748858451843,
      "learning_rate": 9.356553107311921e-05,
      "loss": 0.693,
      "step": 3526
    },
    {
      "epoch": 0.5389464033311686,
      "grad_norm": 0.44636762142181396,
      "learning_rate": 9.351572122475065e-05,
      "loss": 0.6457,
      "step": 3527
    },
    {
      "epoch": 0.5390992092294763,
      "grad_norm": 0.5325556993484497,
      "learning_rate": 9.346591299188568e-05,
      "loss": 0.8459,
      "step": 3528
    },
    {
      "epoch": 0.539252015127784,
      "grad_norm": 0.2519857585430145,
      "learning_rate": 9.341610638693363e-05,
      "loss": 0.5791,
      "step": 3529
    },
    {
      "epoch": 0.5394048210260916,
      "grad_norm": 0.26468968391418457,
      "learning_rate": 9.336630142230342e-05,
      "loss": 0.6978,
      "step": 3530
    },
    {
      "epoch": 0.5395576269243992,
      "grad_norm": 0.2735024094581604,
      "learning_rate": 9.331649811040355e-05,
      "loss": 0.5179,
      "step": 3531
    },
    {
      "epoch": 0.5397104328227069,
      "grad_norm": 0.2528313398361206,
      "learning_rate": 9.326669646364205e-05,
      "loss": 0.6918,
      "step": 3532
    },
    {
      "epoch": 0.5398632387210146,
      "grad_norm": 0.4224424362182617,
      "learning_rate": 9.321689649442657e-05,
      "loss": 0.8484,
      "step": 3533
    },
    {
      "epoch": 0.5400160446193223,
      "grad_norm": 1.335054874420166,
      "learning_rate": 9.316709821516449e-05,
      "loss": 0.7747,
      "step": 3534
    },
    {
      "epoch": 0.54016885051763,
      "grad_norm": 0.2739188075065613,
      "learning_rate": 9.311730163826243e-05,
      "loss": 0.6359,
      "step": 3535
    },
    {
      "epoch": 0.5403216564159377,
      "grad_norm": 0.39737796783447266,
      "learning_rate": 9.306750677612693e-05,
      "loss": 0.767,
      "step": 3536
    },
    {
      "epoch": 0.5404744623142453,
      "grad_norm": 0.29680559039115906,
      "learning_rate": 9.301771364116391e-05,
      "loss": 0.7895,
      "step": 3537
    },
    {
      "epoch": 0.540627268212553,
      "grad_norm": 0.33182379603385925,
      "learning_rate": 9.296792224577895e-05,
      "loss": 0.5614,
      "step": 3538
    },
    {
      "epoch": 0.5407800741108607,
      "grad_norm": 0.2581312358379364,
      "learning_rate": 9.291813260237712e-05,
      "loss": 0.8357,
      "step": 3539
    },
    {
      "epoch": 0.5409328800091684,
      "grad_norm": 0.27075865864753723,
      "learning_rate": 9.286834472336311e-05,
      "loss": 0.5831,
      "step": 3540
    },
    {
      "epoch": 0.5410856859074761,
      "grad_norm": 0.36913207173347473,
      "learning_rate": 9.281855862114117e-05,
      "loss": 0.8358,
      "step": 3541
    },
    {
      "epoch": 0.5412384918057837,
      "grad_norm": 0.27564701437950134,
      "learning_rate": 9.276877430811501e-05,
      "loss": 0.7194,
      "step": 3542
    },
    {
      "epoch": 0.5413912977040913,
      "grad_norm": 0.42347952723503113,
      "learning_rate": 9.271899179668807e-05,
      "loss": 0.816,
      "step": 3543
    },
    {
      "epoch": 0.541544103602399,
      "grad_norm": 0.3452145755290985,
      "learning_rate": 9.266921109926318e-05,
      "loss": 0.6209,
      "step": 3544
    },
    {
      "epoch": 0.5416969095007067,
      "grad_norm": 0.3281693160533905,
      "learning_rate": 9.261943222824286e-05,
      "loss": 0.7045,
      "step": 3545
    },
    {
      "epoch": 0.5418497153990144,
      "grad_norm": 0.29575106501579285,
      "learning_rate": 9.2569655196029e-05,
      "loss": 0.588,
      "step": 3546
    },
    {
      "epoch": 0.5420025212973221,
      "grad_norm": 0.2839337885379791,
      "learning_rate": 9.251988001502317e-05,
      "loss": 0.6298,
      "step": 3547
    },
    {
      "epoch": 0.5421553271956298,
      "grad_norm": 0.263030469417572,
      "learning_rate": 9.24701066976265e-05,
      "loss": 0.8087,
      "step": 3548
    },
    {
      "epoch": 0.5423081330939374,
      "grad_norm": 0.3667827844619751,
      "learning_rate": 9.242033525623946e-05,
      "loss": 0.6128,
      "step": 3549
    },
    {
      "epoch": 0.5424609389922451,
      "grad_norm": 0.2590767741203308,
      "learning_rate": 9.237056570326231e-05,
      "loss": 0.6672,
      "step": 3550
    },
    {
      "epoch": 0.5426137448905528,
      "grad_norm": 0.3932031989097595,
      "learning_rate": 9.232079805109467e-05,
      "loss": 0.6827,
      "step": 3551
    },
    {
      "epoch": 0.5427665507888605,
      "grad_norm": 0.41681838035583496,
      "learning_rate": 9.227103231213575e-05,
      "loss": 0.8258,
      "step": 3552
    },
    {
      "epoch": 0.5429193566871682,
      "grad_norm": 0.2821168303489685,
      "learning_rate": 9.222126849878421e-05,
      "loss": 0.7343,
      "step": 3553
    },
    {
      "epoch": 0.5430721625854757,
      "grad_norm": 0.3376697301864624,
      "learning_rate": 9.217150662343835e-05,
      "loss": 0.6614,
      "step": 3554
    },
    {
      "epoch": 0.5432249684837834,
      "grad_norm": 0.28673434257507324,
      "learning_rate": 9.212174669849593e-05,
      "loss": 0.655,
      "step": 3555
    },
    {
      "epoch": 0.5433777743820911,
      "grad_norm": 0.8690926432609558,
      "learning_rate": 9.207198873635414e-05,
      "loss": 0.744,
      "step": 3556
    },
    {
      "epoch": 0.5435305802803988,
      "grad_norm": 0.2887100577354431,
      "learning_rate": 9.202223274940981e-05,
      "loss": 0.8725,
      "step": 3557
    },
    {
      "epoch": 0.5436833861787065,
      "grad_norm": 0.276275634765625,
      "learning_rate": 9.197247875005923e-05,
      "loss": 0.794,
      "step": 3558
    },
    {
      "epoch": 0.5438361920770142,
      "grad_norm": 0.5013990998268127,
      "learning_rate": 9.192272675069821e-05,
      "loss": 0.6538,
      "step": 3559
    },
    {
      "epoch": 0.5439889979753219,
      "grad_norm": 0.3582007586956024,
      "learning_rate": 9.1872976763722e-05,
      "loss": 0.801,
      "step": 3560
    },
    {
      "epoch": 0.5441418038736295,
      "grad_norm": 0.28688696026802063,
      "learning_rate": 9.182322880152539e-05,
      "loss": 0.8727,
      "step": 3561
    },
    {
      "epoch": 0.5442946097719372,
      "grad_norm": 0.9655963182449341,
      "learning_rate": 9.177348287650273e-05,
      "loss": 0.6883,
      "step": 3562
    },
    {
      "epoch": 0.5444474156702449,
      "grad_norm": 0.3847043812274933,
      "learning_rate": 9.172373900104774e-05,
      "loss": 0.6851,
      "step": 3563
    },
    {
      "epoch": 0.5446002215685526,
      "grad_norm": 2.0926127433776855,
      "learning_rate": 9.167399718755366e-05,
      "loss": 0.6177,
      "step": 3564
    },
    {
      "epoch": 0.5447530274668603,
      "grad_norm": 1.1532070636749268,
      "learning_rate": 9.162425744841333e-05,
      "loss": 0.8987,
      "step": 3565
    },
    {
      "epoch": 0.5449058333651678,
      "grad_norm": 0.31538552045822144,
      "learning_rate": 9.157451979601896e-05,
      "loss": 0.6536,
      "step": 3566
    },
    {
      "epoch": 0.5450586392634755,
      "grad_norm": 0.2628517746925354,
      "learning_rate": 9.152478424276226e-05,
      "loss": 0.6453,
      "step": 3567
    },
    {
      "epoch": 0.5452114451617832,
      "grad_norm": 0.3210277557373047,
      "learning_rate": 9.147505080103437e-05,
      "loss": 0.6057,
      "step": 3568
    },
    {
      "epoch": 0.5453642510600909,
      "grad_norm": 0.2707095146179199,
      "learning_rate": 9.142531948322605e-05,
      "loss": 0.6254,
      "step": 3569
    },
    {
      "epoch": 0.5455170569583986,
      "grad_norm": 0.3149011433124542,
      "learning_rate": 9.137559030172742e-05,
      "loss": 0.5751,
      "step": 3570
    },
    {
      "epoch": 0.5456698628567063,
      "grad_norm": 0.25553005933761597,
      "learning_rate": 9.132586326892805e-05,
      "loss": 0.6009,
      "step": 3571
    },
    {
      "epoch": 0.545822668755014,
      "grad_norm": 0.32813313603401184,
      "learning_rate": 9.1276138397217e-05,
      "loss": 0.551,
      "step": 3572
    },
    {
      "epoch": 0.5459754746533216,
      "grad_norm": 0.33819863200187683,
      "learning_rate": 9.12264156989829e-05,
      "loss": 0.6935,
      "step": 3573
    },
    {
      "epoch": 0.5461282805516293,
      "grad_norm": 0.2711593210697174,
      "learning_rate": 9.117669518661366e-05,
      "loss": 0.6271,
      "step": 3574
    },
    {
      "epoch": 0.546281086449937,
      "grad_norm": 0.3292696475982666,
      "learning_rate": 9.112697687249673e-05,
      "loss": 0.7504,
      "step": 3575
    },
    {
      "epoch": 0.5464338923482447,
      "grad_norm": 0.31159183382987976,
      "learning_rate": 9.107726076901903e-05,
      "loss": 0.5733,
      "step": 3576
    },
    {
      "epoch": 0.5465866982465524,
      "grad_norm": 0.29188716411590576,
      "learning_rate": 9.102754688856694e-05,
      "loss": 0.6164,
      "step": 3577
    },
    {
      "epoch": 0.54673950414486,
      "grad_norm": 0.3371030390262604,
      "learning_rate": 9.09778352435262e-05,
      "loss": 0.8755,
      "step": 3578
    },
    {
      "epoch": 0.5468923100431676,
      "grad_norm": 0.34226492047309875,
      "learning_rate": 9.092812584628208e-05,
      "loss": 0.7217,
      "step": 3579
    },
    {
      "epoch": 0.5470451159414753,
      "grad_norm": 0.2898171544075012,
      "learning_rate": 9.08784187092193e-05,
      "loss": 0.6281,
      "step": 3580
    },
    {
      "epoch": 0.547197921839783,
      "grad_norm": 0.34746459126472473,
      "learning_rate": 9.082871384472186e-05,
      "loss": 0.8541,
      "step": 3581
    },
    {
      "epoch": 0.5473507277380907,
      "grad_norm": 0.3657127916812897,
      "learning_rate": 9.077901126517341e-05,
      "loss": 0.79,
      "step": 3582
    },
    {
      "epoch": 0.5475035336363984,
      "grad_norm": 0.27212727069854736,
      "learning_rate": 9.072931098295687e-05,
      "loss": 0.8048,
      "step": 3583
    },
    {
      "epoch": 0.547656339534706,
      "grad_norm": 0.49914315342903137,
      "learning_rate": 9.067961301045472e-05,
      "loss": 0.5319,
      "step": 3584
    },
    {
      "epoch": 0.5478091454330137,
      "grad_norm": 0.31453418731689453,
      "learning_rate": 9.062991736004874e-05,
      "loss": 0.7725,
      "step": 3585
    },
    {
      "epoch": 0.5479619513313214,
      "grad_norm": 0.2770235538482666,
      "learning_rate": 9.058022404412019e-05,
      "loss": 0.7344,
      "step": 3586
    },
    {
      "epoch": 0.5481147572296291,
      "grad_norm": 0.29153865575790405,
      "learning_rate": 9.053053307504978e-05,
      "loss": 0.4709,
      "step": 3587
    },
    {
      "epoch": 0.5482675631279368,
      "grad_norm": 0.3256016969680786,
      "learning_rate": 9.04808444652175e-05,
      "loss": 0.6212,
      "step": 3588
    },
    {
      "epoch": 0.5484203690262444,
      "grad_norm": 0.2777874171733856,
      "learning_rate": 9.043115822700294e-05,
      "loss": 0.8251,
      "step": 3589
    },
    {
      "epoch": 0.548573174924552,
      "grad_norm": 0.37808412313461304,
      "learning_rate": 9.038147437278498e-05,
      "loss": 0.7221,
      "step": 3590
    },
    {
      "epoch": 0.5487259808228597,
      "grad_norm": 0.33841803669929504,
      "learning_rate": 9.0331792914942e-05,
      "loss": 0.7242,
      "step": 3591
    },
    {
      "epoch": 0.5488787867211674,
      "grad_norm": 0.4070587158203125,
      "learning_rate": 9.028211386585158e-05,
      "loss": 0.6671,
      "step": 3592
    },
    {
      "epoch": 0.5490315926194751,
      "grad_norm": 0.32144245505332947,
      "learning_rate": 9.023243723789095e-05,
      "loss": 0.7437,
      "step": 3593
    },
    {
      "epoch": 0.5491843985177828,
      "grad_norm": 0.3725501000881195,
      "learning_rate": 9.018276304343661e-05,
      "loss": 0.9447,
      "step": 3594
    },
    {
      "epoch": 0.5493372044160905,
      "grad_norm": 0.287739098072052,
      "learning_rate": 9.013309129486442e-05,
      "loss": 0.8444,
      "step": 3595
    },
    {
      "epoch": 0.5494900103143981,
      "grad_norm": 0.3222897946834564,
      "learning_rate": 9.00834220045497e-05,
      "loss": 0.8743,
      "step": 3596
    },
    {
      "epoch": 0.5496428162127058,
      "grad_norm": 0.29483258724212646,
      "learning_rate": 9.003375518486717e-05,
      "loss": 0.6778,
      "step": 3597
    },
    {
      "epoch": 0.5497956221110135,
      "grad_norm": 0.31444084644317627,
      "learning_rate": 8.998409084819088e-05,
      "loss": 0.6698,
      "step": 3598
    },
    {
      "epoch": 0.5499484280093212,
      "grad_norm": 0.935632050037384,
      "learning_rate": 8.993442900689426e-05,
      "loss": 0.735,
      "step": 3599
    },
    {
      "epoch": 0.5501012339076289,
      "grad_norm": 0.3364983797073364,
      "learning_rate": 8.988476967335015e-05,
      "loss": 0.7976,
      "step": 3600
    },
    {
      "epoch": 0.5502540398059365,
      "grad_norm": 0.2456827610731125,
      "learning_rate": 8.983511285993077e-05,
      "loss": 0.651,
      "step": 3601
    },
    {
      "epoch": 0.5504068457042441,
      "grad_norm": 0.42094314098358154,
      "learning_rate": 8.978545857900774e-05,
      "loss": 0.8525,
      "step": 3602
    },
    {
      "epoch": 0.5505596516025518,
      "grad_norm": 0.3060030937194824,
      "learning_rate": 8.973580684295191e-05,
      "loss": 0.5432,
      "step": 3603
    },
    {
      "epoch": 0.5507124575008595,
      "grad_norm": 0.3308151066303253,
      "learning_rate": 8.968615766413367e-05,
      "loss": 0.7881,
      "step": 3604
    },
    {
      "epoch": 0.5508652633991672,
      "grad_norm": 0.2703869938850403,
      "learning_rate": 8.963651105492267e-05,
      "loss": 0.7023,
      "step": 3605
    },
    {
      "epoch": 0.5510180692974749,
      "grad_norm": 0.24846504628658295,
      "learning_rate": 8.958686702768796e-05,
      "loss": 0.513,
      "step": 3606
    },
    {
      "epoch": 0.5511708751957826,
      "grad_norm": 0.2808684706687927,
      "learning_rate": 8.953722559479788e-05,
      "loss": 0.7579,
      "step": 3607
    },
    {
      "epoch": 0.5513236810940902,
      "grad_norm": 0.36039602756500244,
      "learning_rate": 8.948758676862023e-05,
      "loss": 0.6608,
      "step": 3608
    },
    {
      "epoch": 0.5514764869923979,
      "grad_norm": 0.24951785802841187,
      "learning_rate": 8.943795056152213e-05,
      "loss": 0.6244,
      "step": 3609
    },
    {
      "epoch": 0.5516292928907056,
      "grad_norm": 0.2586328983306885,
      "learning_rate": 8.938831698586993e-05,
      "loss": 0.5952,
      "step": 3610
    },
    {
      "epoch": 0.5517820987890133,
      "grad_norm": 0.2756107449531555,
      "learning_rate": 8.933868605402951e-05,
      "loss": 0.5698,
      "step": 3611
    },
    {
      "epoch": 0.551934904687321,
      "grad_norm": 0.28072118759155273,
      "learning_rate": 8.928905777836599e-05,
      "loss": 0.5509,
      "step": 3612
    },
    {
      "epoch": 0.5520877105856286,
      "grad_norm": 0.3419652581214905,
      "learning_rate": 8.923943217124377e-05,
      "loss": 0.6476,
      "step": 3613
    },
    {
      "epoch": 0.5522405164839362,
      "grad_norm": 0.28130725026130676,
      "learning_rate": 8.918980924502669e-05,
      "loss": 0.613,
      "step": 3614
    },
    {
      "epoch": 0.5523933223822439,
      "grad_norm": 0.34239283204078674,
      "learning_rate": 8.914018901207791e-05,
      "loss": 0.738,
      "step": 3615
    },
    {
      "epoch": 0.5525461282805516,
      "grad_norm": 0.2802269458770752,
      "learning_rate": 8.909057148475991e-05,
      "loss": 0.5347,
      "step": 3616
    },
    {
      "epoch": 0.5526989341788593,
      "grad_norm": 0.35925936698913574,
      "learning_rate": 8.904095667543442e-05,
      "loss": 0.7487,
      "step": 3617
    },
    {
      "epoch": 0.552851740077167,
      "grad_norm": 0.2965247631072998,
      "learning_rate": 8.899134459646257e-05,
      "loss": 0.4349,
      "step": 3618
    },
    {
      "epoch": 0.5530045459754747,
      "grad_norm": 0.2840178608894348,
      "learning_rate": 8.894173526020483e-05,
      "loss": 0.7946,
      "step": 3619
    },
    {
      "epoch": 0.5531573518737823,
      "grad_norm": 0.2992875576019287,
      "learning_rate": 8.889212867902092e-05,
      "loss": 0.7204,
      "step": 3620
    },
    {
      "epoch": 0.55331015777209,
      "grad_norm": 0.4619637429714203,
      "learning_rate": 8.88425248652699e-05,
      "loss": 0.8135,
      "step": 3621
    },
    {
      "epoch": 0.5534629636703977,
      "grad_norm": 0.2635784447193146,
      "learning_rate": 8.879292383131012e-05,
      "loss": 0.7223,
      "step": 3622
    },
    {
      "epoch": 0.5536157695687054,
      "grad_norm": 0.29897844791412354,
      "learning_rate": 8.874332558949933e-05,
      "loss": 0.5591,
      "step": 3623
    },
    {
      "epoch": 0.5537685754670131,
      "grad_norm": 0.4032471776008606,
      "learning_rate": 8.869373015219448e-05,
      "loss": 0.4957,
      "step": 3624
    },
    {
      "epoch": 0.5539213813653207,
      "grad_norm": 0.33153533935546875,
      "learning_rate": 8.864413753175183e-05,
      "loss": 0.699,
      "step": 3625
    },
    {
      "epoch": 0.5540741872636283,
      "grad_norm": 0.2981371283531189,
      "learning_rate": 8.859454774052705e-05,
      "loss": 0.7621,
      "step": 3626
    },
    {
      "epoch": 0.554226993161936,
      "grad_norm": 0.34244444966316223,
      "learning_rate": 8.854496079087489e-05,
      "loss": 0.623,
      "step": 3627
    },
    {
      "epoch": 0.5543797990602437,
      "grad_norm": 0.2774951756000519,
      "learning_rate": 8.849537669514963e-05,
      "loss": 0.7215,
      "step": 3628
    },
    {
      "epoch": 0.5545326049585514,
      "grad_norm": 0.35149478912353516,
      "learning_rate": 8.844579546570466e-05,
      "loss": 0.7571,
      "step": 3629
    },
    {
      "epoch": 0.5546854108568591,
      "grad_norm": 0.2874681055545807,
      "learning_rate": 8.839621711489278e-05,
      "loss": 0.8364,
      "step": 3630
    },
    {
      "epoch": 0.5548382167551668,
      "grad_norm": 0.34419891238212585,
      "learning_rate": 8.834664165506602e-05,
      "loss": 0.5213,
      "step": 3631
    },
    {
      "epoch": 0.5549910226534744,
      "grad_norm": 0.24731077253818512,
      "learning_rate": 8.829706909857564e-05,
      "loss": 0.7184,
      "step": 3632
    },
    {
      "epoch": 0.5551438285517821,
      "grad_norm": 0.31164079904556274,
      "learning_rate": 8.824749945777231e-05,
      "loss": 0.8348,
      "step": 3633
    },
    {
      "epoch": 0.5552966344500898,
      "grad_norm": 0.2927907705307007,
      "learning_rate": 8.819793274500577e-05,
      "loss": 0.6894,
      "step": 3634
    },
    {
      "epoch": 0.5554494403483975,
      "grad_norm": 0.32291513681411743,
      "learning_rate": 8.814836897262524e-05,
      "loss": 0.7885,
      "step": 3635
    },
    {
      "epoch": 0.5556022462467052,
      "grad_norm": 0.2853772044181824,
      "learning_rate": 8.80988081529791e-05,
      "loss": 0.6697,
      "step": 3636
    },
    {
      "epoch": 0.5557550521450128,
      "grad_norm": 0.35197320580482483,
      "learning_rate": 8.804925029841503e-05,
      "loss": 0.6366,
      "step": 3637
    },
    {
      "epoch": 0.5559078580433204,
      "grad_norm": 0.2681580185890198,
      "learning_rate": 8.79996954212799e-05,
      "loss": 0.804,
      "step": 3638
    },
    {
      "epoch": 0.5560606639416281,
      "grad_norm": 0.3117936849594116,
      "learning_rate": 8.795014353391992e-05,
      "loss": 0.7763,
      "step": 3639
    },
    {
      "epoch": 0.5562134698399358,
      "grad_norm": 0.4075622856616974,
      "learning_rate": 8.790059464868052e-05,
      "loss": 0.6972,
      "step": 3640
    },
    {
      "epoch": 0.5563662757382435,
      "grad_norm": 0.2932533919811249,
      "learning_rate": 8.785104877790646e-05,
      "loss": 0.7157,
      "step": 3641
    },
    {
      "epoch": 0.5565190816365512,
      "grad_norm": 0.3137199282646179,
      "learning_rate": 8.780150593394155e-05,
      "loss": 0.5992,
      "step": 3642
    },
    {
      "epoch": 0.5566718875348589,
      "grad_norm": 0.31674298644065857,
      "learning_rate": 8.775196612912906e-05,
      "loss": 0.5875,
      "step": 3643
    },
    {
      "epoch": 0.5568246934331665,
      "grad_norm": 0.31617283821105957,
      "learning_rate": 8.770242937581142e-05,
      "loss": 0.6944,
      "step": 3644
    },
    {
      "epoch": 0.5569774993314742,
      "grad_norm": 0.25456005334854126,
      "learning_rate": 8.765289568633023e-05,
      "loss": 0.6138,
      "step": 3645
    },
    {
      "epoch": 0.5571303052297819,
      "grad_norm": 0.35206279158592224,
      "learning_rate": 8.760336507302645e-05,
      "loss": 0.6077,
      "step": 3646
    },
    {
      "epoch": 0.5572831111280896,
      "grad_norm": 0.2751142382621765,
      "learning_rate": 8.755383754824021e-05,
      "loss": 0.545,
      "step": 3647
    },
    {
      "epoch": 0.5574359170263972,
      "grad_norm": 0.30307304859161377,
      "learning_rate": 8.750431312431088e-05,
      "loss": 0.793,
      "step": 3648
    },
    {
      "epoch": 0.5575887229247048,
      "grad_norm": 0.3258976936340332,
      "learning_rate": 8.745479181357702e-05,
      "loss": 0.8169,
      "step": 3649
    },
    {
      "epoch": 0.5577415288230125,
      "grad_norm": 0.34108075499534607,
      "learning_rate": 8.740527362837649e-05,
      "loss": 0.6695,
      "step": 3650
    },
    {
      "epoch": 0.5578943347213202,
      "grad_norm": 0.2840404510498047,
      "learning_rate": 8.735575858104632e-05,
      "loss": 0.6967,
      "step": 3651
    },
    {
      "epoch": 0.5580471406196279,
      "grad_norm": 0.2926589548587799,
      "learning_rate": 8.730624668392274e-05,
      "loss": 0.6988,
      "step": 3652
    },
    {
      "epoch": 0.5581999465179356,
      "grad_norm": 0.2742522358894348,
      "learning_rate": 8.725673794934122e-05,
      "loss": 0.5609,
      "step": 3653
    },
    {
      "epoch": 0.5583527524162433,
      "grad_norm": 0.29250505566596985,
      "learning_rate": 8.720723238963651e-05,
      "loss": 0.8384,
      "step": 3654
    },
    {
      "epoch": 0.558505558314551,
      "grad_norm": 0.297107458114624,
      "learning_rate": 8.715773001714247e-05,
      "loss": 0.7247,
      "step": 3655
    },
    {
      "epoch": 0.5586583642128586,
      "grad_norm": 0.2704477906227112,
      "learning_rate": 8.710823084419217e-05,
      "loss": 0.7114,
      "step": 3656
    },
    {
      "epoch": 0.5588111701111663,
      "grad_norm": 0.4120960235595703,
      "learning_rate": 8.705873488311793e-05,
      "loss": 0.781,
      "step": 3657
    },
    {
      "epoch": 0.558963976009474,
      "grad_norm": 0.293628066778183,
      "learning_rate": 8.70092421462513e-05,
      "loss": 0.8205,
      "step": 3658
    },
    {
      "epoch": 0.5591167819077817,
      "grad_norm": 0.43009713292121887,
      "learning_rate": 8.695975264592293e-05,
      "loss": 0.6756,
      "step": 3659
    },
    {
      "epoch": 0.5592695878060893,
      "grad_norm": 0.3064223825931549,
      "learning_rate": 8.691026639446269e-05,
      "loss": 0.6319,
      "step": 3660
    },
    {
      "epoch": 0.559422393704397,
      "grad_norm": 0.6732028722763062,
      "learning_rate": 8.686078340419973e-05,
      "loss": 0.6336,
      "step": 3661
    },
    {
      "epoch": 0.5595751996027046,
      "grad_norm": 0.28670135140419006,
      "learning_rate": 8.68113036874623e-05,
      "loss": 0.6219,
      "step": 3662
    },
    {
      "epoch": 0.5597280055010123,
      "grad_norm": 0.332643061876297,
      "learning_rate": 8.676182725657783e-05,
      "loss": 0.6776,
      "step": 3663
    },
    {
      "epoch": 0.55988081139932,
      "grad_norm": 0.34815049171447754,
      "learning_rate": 8.671235412387296e-05,
      "loss": 0.621,
      "step": 3664
    },
    {
      "epoch": 0.5600336172976277,
      "grad_norm": 0.36170151829719543,
      "learning_rate": 8.666288430167356e-05,
      "loss": 0.841,
      "step": 3665
    },
    {
      "epoch": 0.5601864231959354,
      "grad_norm": 0.2729724049568176,
      "learning_rate": 8.661341780230456e-05,
      "loss": 0.7348,
      "step": 3666
    },
    {
      "epoch": 0.560339229094243,
      "grad_norm": 0.3887978196144104,
      "learning_rate": 8.656395463809014e-05,
      "loss": 0.7487,
      "step": 3667
    },
    {
      "epoch": 0.5604920349925507,
      "grad_norm": 0.29915356636047363,
      "learning_rate": 8.651449482135362e-05,
      "loss": 0.6618,
      "step": 3668
    },
    {
      "epoch": 0.5606448408908584,
      "grad_norm": 0.35995975136756897,
      "learning_rate": 8.646503836441755e-05,
      "loss": 0.6133,
      "step": 3669
    },
    {
      "epoch": 0.5607976467891661,
      "grad_norm": 0.3829162120819092,
      "learning_rate": 8.641558527960354e-05,
      "loss": 0.5826,
      "step": 3670
    },
    {
      "epoch": 0.5609504526874738,
      "grad_norm": 0.34606584906578064,
      "learning_rate": 8.63661355792324e-05,
      "loss": 0.8346,
      "step": 3671
    },
    {
      "epoch": 0.5611032585857814,
      "grad_norm": 0.25087451934814453,
      "learning_rate": 8.631668927562421e-05,
      "loss": 0.71,
      "step": 3672
    },
    {
      "epoch": 0.561256064484089,
      "grad_norm": 0.3157608211040497,
      "learning_rate": 8.626724638109796e-05,
      "loss": 0.6984,
      "step": 3673
    },
    {
      "epoch": 0.5614088703823967,
      "grad_norm": 0.33677148818969727,
      "learning_rate": 8.6217806907972e-05,
      "loss": 0.541,
      "step": 3674
    },
    {
      "epoch": 0.5615616762807044,
      "grad_norm": 0.28932487964630127,
      "learning_rate": 8.616837086856377e-05,
      "loss": 0.819,
      "step": 3675
    },
    {
      "epoch": 0.5617144821790121,
      "grad_norm": 0.3011149764060974,
      "learning_rate": 8.611893827518987e-05,
      "loss": 0.6822,
      "step": 3676
    },
    {
      "epoch": 0.5618672880773198,
      "grad_norm": 0.3271982669830322,
      "learning_rate": 8.606950914016593e-05,
      "loss": 0.6524,
      "step": 3677
    },
    {
      "epoch": 0.5620200939756275,
      "grad_norm": 0.291826993227005,
      "learning_rate": 8.602008347580685e-05,
      "loss": 0.6323,
      "step": 3678
    },
    {
      "epoch": 0.5621728998739352,
      "grad_norm": 0.2812834680080414,
      "learning_rate": 8.597066129442663e-05,
      "loss": 0.6507,
      "step": 3679
    },
    {
      "epoch": 0.5623257057722428,
      "grad_norm": 0.35509222745895386,
      "learning_rate": 8.59212426083384e-05,
      "loss": 0.8128,
      "step": 3680
    },
    {
      "epoch": 0.5624785116705505,
      "grad_norm": 0.26385602355003357,
      "learning_rate": 8.587182742985439e-05,
      "loss": 0.5526,
      "step": 3681
    },
    {
      "epoch": 0.5626313175688582,
      "grad_norm": 0.26334047317504883,
      "learning_rate": 8.582241577128596e-05,
      "loss": 0.7467,
      "step": 3682
    },
    {
      "epoch": 0.5627841234671659,
      "grad_norm": 0.4938865303993225,
      "learning_rate": 8.577300764494369e-05,
      "loss": 0.7191,
      "step": 3683
    },
    {
      "epoch": 0.5629369293654735,
      "grad_norm": 0.2696554958820343,
      "learning_rate": 8.572360306313706e-05,
      "loss": 0.6709,
      "step": 3684
    },
    {
      "epoch": 0.5630897352637811,
      "grad_norm": 0.36236676573753357,
      "learning_rate": 8.567420203817492e-05,
      "loss": 0.6558,
      "step": 3685
    },
    {
      "epoch": 0.5632425411620888,
      "grad_norm": 0.27699196338653564,
      "learning_rate": 8.562480458236507e-05,
      "loss": 0.7543,
      "step": 3686
    },
    {
      "epoch": 0.5633953470603965,
      "grad_norm": 0.2756933569908142,
      "learning_rate": 8.557541070801455e-05,
      "loss": 0.6575,
      "step": 3687
    },
    {
      "epoch": 0.5635481529587042,
      "grad_norm": 0.3750994801521301,
      "learning_rate": 8.55260204274293e-05,
      "loss": 0.6176,
      "step": 3688
    },
    {
      "epoch": 0.5637009588570119,
      "grad_norm": 0.4120349884033203,
      "learning_rate": 8.547663375291459e-05,
      "loss": 0.6742,
      "step": 3689
    },
    {
      "epoch": 0.5638537647553196,
      "grad_norm": 0.2664812207221985,
      "learning_rate": 8.54272506967747e-05,
      "loss": 0.5904,
      "step": 3690
    },
    {
      "epoch": 0.5640065706536272,
      "grad_norm": 0.2847195565700531,
      "learning_rate": 8.537787127131292e-05,
      "loss": 0.7125,
      "step": 3691
    },
    {
      "epoch": 0.5641593765519349,
      "grad_norm": 0.27091795206069946,
      "learning_rate": 8.532849548883179e-05,
      "loss": 0.789,
      "step": 3692
    },
    {
      "epoch": 0.5643121824502426,
      "grad_norm": 0.3841065764427185,
      "learning_rate": 8.527912336163283e-05,
      "loss": 0.7313,
      "step": 3693
    },
    {
      "epoch": 0.5644649883485503,
      "grad_norm": 0.3322576582431793,
      "learning_rate": 8.522975490201677e-05,
      "loss": 0.6777,
      "step": 3694
    },
    {
      "epoch": 0.5646177942468579,
      "grad_norm": 0.26073184609413147,
      "learning_rate": 8.518039012228324e-05,
      "loss": 0.7309,
      "step": 3695
    },
    {
      "epoch": 0.5647706001451656,
      "grad_norm": 0.28975701332092285,
      "learning_rate": 8.513102903473113e-05,
      "loss": 0.6413,
      "step": 3696
    },
    {
      "epoch": 0.5649234060434732,
      "grad_norm": 0.25856003165245056,
      "learning_rate": 8.508167165165834e-05,
      "loss": 0.5668,
      "step": 3697
    },
    {
      "epoch": 0.5650762119417809,
      "grad_norm": 0.3022193908691406,
      "learning_rate": 8.50323179853618e-05,
      "loss": 0.663,
      "step": 3698
    },
    {
      "epoch": 0.5652290178400886,
      "grad_norm": 0.2755308449268341,
      "learning_rate": 8.498296804813759e-05,
      "loss": 0.6749,
      "step": 3699
    },
    {
      "epoch": 0.5653818237383963,
      "grad_norm": 0.30388593673706055,
      "learning_rate": 8.493362185228086e-05,
      "loss": 0.6675,
      "step": 3700
    },
    {
      "epoch": 0.565534629636704,
      "grad_norm": 0.3053312599658966,
      "learning_rate": 8.488427941008578e-05,
      "loss": 0.8176,
      "step": 3701
    },
    {
      "epoch": 0.5656874355350117,
      "grad_norm": 0.36438342928886414,
      "learning_rate": 8.483494073384557e-05,
      "loss": 0.6879,
      "step": 3702
    },
    {
      "epoch": 0.5658402414333193,
      "grad_norm": 0.30132386088371277,
      "learning_rate": 8.478560583585258e-05,
      "loss": 0.7079,
      "step": 3703
    },
    {
      "epoch": 0.565993047331627,
      "grad_norm": 0.2842436134815216,
      "learning_rate": 8.47362747283982e-05,
      "loss": 0.5923,
      "step": 3704
    },
    {
      "epoch": 0.5661458532299347,
      "grad_norm": 0.33494096994400024,
      "learning_rate": 8.468694742377284e-05,
      "loss": 0.6465,
      "step": 3705
    },
    {
      "epoch": 0.5662986591282424,
      "grad_norm": 0.4333784580230713,
      "learning_rate": 8.463762393426596e-05,
      "loss": 0.8571,
      "step": 3706
    },
    {
      "epoch": 0.56645146502655,
      "grad_norm": 0.29764246940612793,
      "learning_rate": 8.458830427216615e-05,
      "loss": 0.6411,
      "step": 3707
    },
    {
      "epoch": 0.5666042709248577,
      "grad_norm": 0.2814170718193054,
      "learning_rate": 8.453898844976098e-05,
      "loss": 0.7452,
      "step": 3708
    },
    {
      "epoch": 0.5667570768231653,
      "grad_norm": 0.33996063470840454,
      "learning_rate": 8.448967647933702e-05,
      "loss": 0.6065,
      "step": 3709
    },
    {
      "epoch": 0.566909882721473,
      "grad_norm": 0.3823285698890686,
      "learning_rate": 8.444036837317995e-05,
      "loss": 0.7324,
      "step": 3710
    },
    {
      "epoch": 0.5670626886197807,
      "grad_norm": 0.37926560640335083,
      "learning_rate": 8.439106414357455e-05,
      "loss": 0.6082,
      "step": 3711
    },
    {
      "epoch": 0.5672154945180884,
      "grad_norm": 0.3851792812347412,
      "learning_rate": 8.434176380280445e-05,
      "loss": 0.7381,
      "step": 3712
    },
    {
      "epoch": 0.5673683004163961,
      "grad_norm": 0.3026863932609558,
      "learning_rate": 8.429246736315248e-05,
      "loss": 0.6545,
      "step": 3713
    },
    {
      "epoch": 0.5675211063147038,
      "grad_norm": 0.3653876781463623,
      "learning_rate": 8.424317483690037e-05,
      "loss": 0.8296,
      "step": 3714
    },
    {
      "epoch": 0.5676739122130114,
      "grad_norm": 0.30067160725593567,
      "learning_rate": 8.419388623632905e-05,
      "loss": 0.6965,
      "step": 3715
    },
    {
      "epoch": 0.5678267181113191,
      "grad_norm": 0.3310571610927582,
      "learning_rate": 8.414460157371825e-05,
      "loss": 0.6493,
      "step": 3716
    },
    {
      "epoch": 0.5679795240096268,
      "grad_norm": 0.3468477725982666,
      "learning_rate": 8.409532086134688e-05,
      "loss": 0.6634,
      "step": 3717
    },
    {
      "epoch": 0.5681323299079345,
      "grad_norm": 0.29771387577056885,
      "learning_rate": 8.40460441114928e-05,
      "loss": 0.5265,
      "step": 3718
    },
    {
      "epoch": 0.5682851358062421,
      "grad_norm": 0.4328177571296692,
      "learning_rate": 8.399677133643294e-05,
      "loss": 0.7287,
      "step": 3719
    },
    {
      "epoch": 0.5684379417045498,
      "grad_norm": 0.3208015263080597,
      "learning_rate": 8.394750254844314e-05,
      "loss": 0.7538,
      "step": 3720
    },
    {
      "epoch": 0.5685907476028574,
      "grad_norm": 0.27956128120422363,
      "learning_rate": 8.389823775979833e-05,
      "loss": 0.608,
      "step": 3721
    },
    {
      "epoch": 0.5687435535011651,
      "grad_norm": 0.2791298031806946,
      "learning_rate": 8.384897698277246e-05,
      "loss": 0.6882,
      "step": 3722
    },
    {
      "epoch": 0.5688963593994728,
      "grad_norm": 0.28099286556243896,
      "learning_rate": 8.379972022963835e-05,
      "loss": 0.7346,
      "step": 3723
    },
    {
      "epoch": 0.5690491652977805,
      "grad_norm": 0.27625101804733276,
      "learning_rate": 8.375046751266797e-05,
      "loss": 0.6558,
      "step": 3724
    },
    {
      "epoch": 0.5692019711960882,
      "grad_norm": 0.26803532242774963,
      "learning_rate": 8.37012188441322e-05,
      "loss": 0.6644,
      "step": 3725
    },
    {
      "epoch": 0.5693547770943959,
      "grad_norm": 0.28423699736595154,
      "learning_rate": 8.365197423630097e-05,
      "loss": 0.7188,
      "step": 3726
    },
    {
      "epoch": 0.5695075829927035,
      "grad_norm": 0.3047555387020111,
      "learning_rate": 8.36027337014431e-05,
      "loss": 0.7798,
      "step": 3727
    },
    {
      "epoch": 0.5696603888910112,
      "grad_norm": 0.27898523211479187,
      "learning_rate": 8.355349725182651e-05,
      "loss": 0.6601,
      "step": 3728
    },
    {
      "epoch": 0.5698131947893189,
      "grad_norm": 0.2902171015739441,
      "learning_rate": 8.350426489971802e-05,
      "loss": 0.7398,
      "step": 3729
    },
    {
      "epoch": 0.5699660006876266,
      "grad_norm": 0.3205011487007141,
      "learning_rate": 8.345503665738343e-05,
      "loss": 0.8977,
      "step": 3730
    },
    {
      "epoch": 0.5701188065859342,
      "grad_norm": 0.2823057770729065,
      "learning_rate": 8.340581253708759e-05,
      "loss": 0.6605,
      "step": 3731
    },
    {
      "epoch": 0.5702716124842419,
      "grad_norm": 0.2640000283718109,
      "learning_rate": 8.335659255109424e-05,
      "loss": 0.704,
      "step": 3732
    },
    {
      "epoch": 0.5704244183825495,
      "grad_norm": 0.32330089807510376,
      "learning_rate": 8.330737671166622e-05,
      "loss": 0.6702,
      "step": 3733
    },
    {
      "epoch": 0.5705772242808572,
      "grad_norm": 0.29183852672576904,
      "learning_rate": 8.32581650310651e-05,
      "loss": 0.7842,
      "step": 3734
    },
    {
      "epoch": 0.5707300301791649,
      "grad_norm": 0.28813636302948,
      "learning_rate": 8.320895752155165e-05,
      "loss": 0.6337,
      "step": 3735
    },
    {
      "epoch": 0.5708828360774726,
      "grad_norm": 0.265868604183197,
      "learning_rate": 8.315975419538551e-05,
      "loss": 0.7946,
      "step": 3736
    },
    {
      "epoch": 0.5710356419757803,
      "grad_norm": 0.2624013423919678,
      "learning_rate": 8.311055506482522e-05,
      "loss": 0.5877,
      "step": 3737
    },
    {
      "epoch": 0.571188447874088,
      "grad_norm": 0.5272555947303772,
      "learning_rate": 8.306136014212836e-05,
      "loss": 0.7125,
      "step": 3738
    },
    {
      "epoch": 0.5713412537723956,
      "grad_norm": 0.30128014087677,
      "learning_rate": 8.301216943955143e-05,
      "loss": 0.7108,
      "step": 3739
    },
    {
      "epoch": 0.5714940596707033,
      "grad_norm": 0.26696425676345825,
      "learning_rate": 8.296298296934993e-05,
      "loss": 0.6307,
      "step": 3740
    },
    {
      "epoch": 0.571646865569011,
      "grad_norm": 0.3078870177268982,
      "learning_rate": 8.291380074377815e-05,
      "loss": 0.6569,
      "step": 3741
    },
    {
      "epoch": 0.5717996714673187,
      "grad_norm": 0.3038552403450012,
      "learning_rate": 8.286462277508951e-05,
      "loss": 0.6657,
      "step": 3742
    },
    {
      "epoch": 0.5719524773656263,
      "grad_norm": 0.3003843426704407,
      "learning_rate": 8.281544907553629e-05,
      "loss": 0.7251,
      "step": 3743
    },
    {
      "epoch": 0.572105283263934,
      "grad_norm": 0.31399810314178467,
      "learning_rate": 8.276627965736968e-05,
      "loss": 0.8504,
      "step": 3744
    },
    {
      "epoch": 0.5722580891622416,
      "grad_norm": 0.3033444881439209,
      "learning_rate": 8.271711453283978e-05,
      "loss": 0.8417,
      "step": 3745
    },
    {
      "epoch": 0.5724108950605493,
      "grad_norm": 0.325181782245636,
      "learning_rate": 8.266795371419574e-05,
      "loss": 0.5664,
      "step": 3746
    },
    {
      "epoch": 0.572563700958857,
      "grad_norm": 0.31936803460121155,
      "learning_rate": 8.261879721368558e-05,
      "loss": 0.6776,
      "step": 3747
    },
    {
      "epoch": 0.5727165068571647,
      "grad_norm": 0.34658119082450867,
      "learning_rate": 8.256964504355617e-05,
      "loss": 0.8581,
      "step": 3748
    },
    {
      "epoch": 0.5728693127554724,
      "grad_norm": 0.288990318775177,
      "learning_rate": 8.252049721605335e-05,
      "loss": 0.6763,
      "step": 3749
    },
    {
      "epoch": 0.57302211865378,
      "grad_norm": 0.3504142165184021,
      "learning_rate": 8.247135374342196e-05,
      "loss": 0.7964,
      "step": 3750
    },
    {
      "epoch": 0.5731749245520877,
      "grad_norm": 0.3110313415527344,
      "learning_rate": 8.242221463790565e-05,
      "loss": 0.7416,
      "step": 3751
    },
    {
      "epoch": 0.5733277304503954,
      "grad_norm": 0.28872978687286377,
      "learning_rate": 8.237307991174697e-05,
      "loss": 0.6734,
      "step": 3752
    },
    {
      "epoch": 0.5734805363487031,
      "grad_norm": 0.24102354049682617,
      "learning_rate": 8.232394957718749e-05,
      "loss": 0.8467,
      "step": 3753
    },
    {
      "epoch": 0.5736333422470107,
      "grad_norm": 0.28960511088371277,
      "learning_rate": 8.227482364646762e-05,
      "loss": 0.7903,
      "step": 3754
    },
    {
      "epoch": 0.5737861481453184,
      "grad_norm": 0.28464069962501526,
      "learning_rate": 8.222570213182662e-05,
      "loss": 0.8631,
      "step": 3755
    },
    {
      "epoch": 0.573938954043626,
      "grad_norm": 0.3860986828804016,
      "learning_rate": 8.217658504550272e-05,
      "loss": 0.8208,
      "step": 3756
    },
    {
      "epoch": 0.5740917599419337,
      "grad_norm": 0.29846811294555664,
      "learning_rate": 8.212747239973306e-05,
      "loss": 0.7068,
      "step": 3757
    },
    {
      "epoch": 0.5742445658402414,
      "grad_norm": 0.37197667360305786,
      "learning_rate": 8.207836420675365e-05,
      "loss": 0.7763,
      "step": 3758
    },
    {
      "epoch": 0.5743973717385491,
      "grad_norm": 0.3524726927280426,
      "learning_rate": 8.202926047879933e-05,
      "loss": 0.6656,
      "step": 3759
    },
    {
      "epoch": 0.5745501776368568,
      "grad_norm": 0.30434712767601013,
      "learning_rate": 8.198016122810388e-05,
      "loss": 0.6682,
      "step": 3760
    },
    {
      "epoch": 0.5747029835351645,
      "grad_norm": 0.3250044584274292,
      "learning_rate": 8.193106646690006e-05,
      "loss": 0.5475,
      "step": 3761
    },
    {
      "epoch": 0.5748557894334722,
      "grad_norm": 0.2767269015312195,
      "learning_rate": 8.188197620741933e-05,
      "loss": 0.8508,
      "step": 3762
    },
    {
      "epoch": 0.5750085953317798,
      "grad_norm": 0.3154396414756775,
      "learning_rate": 8.183289046189213e-05,
      "loss": 0.782,
      "step": 3763
    },
    {
      "epoch": 0.5751614012300875,
      "grad_norm": 0.3278322219848633,
      "learning_rate": 8.178380924254775e-05,
      "loss": 0.5591,
      "step": 3764
    },
    {
      "epoch": 0.5753142071283952,
      "grad_norm": 0.29018348455429077,
      "learning_rate": 8.173473256161445e-05,
      "loss": 0.7719,
      "step": 3765
    },
    {
      "epoch": 0.5754670130267028,
      "grad_norm": 0.307338148355484,
      "learning_rate": 8.168566043131917e-05,
      "loss": 0.8133,
      "step": 3766
    },
    {
      "epoch": 0.5756198189250105,
      "grad_norm": 0.35957232117652893,
      "learning_rate": 8.163659286388784e-05,
      "loss": 0.7925,
      "step": 3767
    },
    {
      "epoch": 0.5757726248233181,
      "grad_norm": 0.29846546053886414,
      "learning_rate": 8.158752987154533e-05,
      "loss": 0.6603,
      "step": 3768
    },
    {
      "epoch": 0.5759254307216258,
      "grad_norm": 0.3277988135814667,
      "learning_rate": 8.153847146651511e-05,
      "loss": 0.7112,
      "step": 3769
    },
    {
      "epoch": 0.5760782366199335,
      "grad_norm": 0.3009068965911865,
      "learning_rate": 8.148941766101979e-05,
      "loss": 0.7852,
      "step": 3770
    },
    {
      "epoch": 0.5762310425182412,
      "grad_norm": 0.3635782301425934,
      "learning_rate": 8.144036846728063e-05,
      "loss": 0.6492,
      "step": 3771
    },
    {
      "epoch": 0.5763838484165489,
      "grad_norm": 0.3081236779689789,
      "learning_rate": 8.139132389751793e-05,
      "loss": 0.8141,
      "step": 3772
    },
    {
      "epoch": 0.5765366543148566,
      "grad_norm": 0.2913459837436676,
      "learning_rate": 8.134228396395067e-05,
      "loss": 0.6704,
      "step": 3773
    },
    {
      "epoch": 0.5766894602131643,
      "grad_norm": 0.29093053936958313,
      "learning_rate": 8.129324867879673e-05,
      "loss": 0.7357,
      "step": 3774
    },
    {
      "epoch": 0.5768422661114719,
      "grad_norm": 0.39256051182746887,
      "learning_rate": 8.124421805427286e-05,
      "loss": 0.7393,
      "step": 3775
    },
    {
      "epoch": 0.5769950720097796,
      "grad_norm": 0.54695063829422,
      "learning_rate": 8.11951921025946e-05,
      "loss": 0.6199,
      "step": 3776
    },
    {
      "epoch": 0.5771478779080873,
      "grad_norm": 0.27664807438850403,
      "learning_rate": 8.114617083597639e-05,
      "loss": 0.523,
      "step": 3777
    },
    {
      "epoch": 0.5773006838063949,
      "grad_norm": 0.27972468733787537,
      "learning_rate": 8.109715426663145e-05,
      "loss": 0.6728,
      "step": 3778
    },
    {
      "epoch": 0.5774534897047026,
      "grad_norm": 0.3046027719974518,
      "learning_rate": 8.104814240677188e-05,
      "loss": 0.5586,
      "step": 3779
    },
    {
      "epoch": 0.5776062956030102,
      "grad_norm": 0.3104955852031708,
      "learning_rate": 8.099913526860849e-05,
      "loss": 0.7716,
      "step": 3780
    },
    {
      "epoch": 0.5777591015013179,
      "grad_norm": 0.28801658749580383,
      "learning_rate": 8.095013286435107e-05,
      "loss": 0.7354,
      "step": 3781
    },
    {
      "epoch": 0.5779119073996256,
      "grad_norm": 0.2638983428478241,
      "learning_rate": 8.090113520620816e-05,
      "loss": 0.6428,
      "step": 3782
    },
    {
      "epoch": 0.5780647132979333,
      "grad_norm": 0.4122447669506073,
      "learning_rate": 8.085214230638707e-05,
      "loss": 0.6169,
      "step": 3783
    },
    {
      "epoch": 0.578217519196241,
      "grad_norm": 0.36378583312034607,
      "learning_rate": 8.080315417709398e-05,
      "loss": 0.5359,
      "step": 3784
    },
    {
      "epoch": 0.5783703250945487,
      "grad_norm": 0.2803877592086792,
      "learning_rate": 8.075417083053389e-05,
      "loss": 0.8017,
      "step": 3785
    },
    {
      "epoch": 0.5785231309928563,
      "grad_norm": 0.3822747468948364,
      "learning_rate": 8.070519227891063e-05,
      "loss": 1.0106,
      "step": 3786
    },
    {
      "epoch": 0.578675936891164,
      "grad_norm": 0.31396767497062683,
      "learning_rate": 8.065621853442669e-05,
      "loss": 0.5438,
      "step": 3787
    },
    {
      "epoch": 0.5788287427894717,
      "grad_norm": 0.3391607105731964,
      "learning_rate": 8.060724960928354e-05,
      "loss": 1.0131,
      "step": 3788
    },
    {
      "epoch": 0.5789815486877794,
      "grad_norm": 0.3262687027454376,
      "learning_rate": 8.055828551568138e-05,
      "loss": 0.8778,
      "step": 3789
    },
    {
      "epoch": 0.579134354586087,
      "grad_norm": 0.2590049207210541,
      "learning_rate": 8.050932626581918e-05,
      "loss": 0.6414,
      "step": 3790
    },
    {
      "epoch": 0.5792871604843947,
      "grad_norm": 0.31911787390708923,
      "learning_rate": 8.046037187189471e-05,
      "loss": 0.7003,
      "step": 3791
    },
    {
      "epoch": 0.5794399663827023,
      "grad_norm": 0.281447172164917,
      "learning_rate": 8.04114223461046e-05,
      "loss": 0.711,
      "step": 3792
    },
    {
      "epoch": 0.57959277228101,
      "grad_norm": 0.3643721640110016,
      "learning_rate": 8.036247770064418e-05,
      "loss": 0.5737,
      "step": 3793
    },
    {
      "epoch": 0.5797455781793177,
      "grad_norm": 0.3160751760005951,
      "learning_rate": 8.031353794770757e-05,
      "loss": 0.6465,
      "step": 3794
    },
    {
      "epoch": 0.5798983840776254,
      "grad_norm": 0.28902941942214966,
      "learning_rate": 8.026460309948774e-05,
      "loss": 0.5288,
      "step": 3795
    },
    {
      "epoch": 0.5800511899759331,
      "grad_norm": 0.28848549723625183,
      "learning_rate": 8.021567316817637e-05,
      "loss": 0.7132,
      "step": 3796
    },
    {
      "epoch": 0.5802039958742408,
      "grad_norm": 0.35009557008743286,
      "learning_rate": 8.0166748165964e-05,
      "loss": 0.6089,
      "step": 3797
    },
    {
      "epoch": 0.5803568017725484,
      "grad_norm": 0.31023460626602173,
      "learning_rate": 8.011782810503979e-05,
      "loss": 0.6298,
      "step": 3798
    },
    {
      "epoch": 0.5805096076708561,
      "grad_norm": 0.32872024178504944,
      "learning_rate": 8.006891299759183e-05,
      "loss": 0.6994,
      "step": 3799
    },
    {
      "epoch": 0.5806624135691638,
      "grad_norm": 0.25840136408805847,
      "learning_rate": 8.002000285580692e-05,
      "loss": 0.681,
      "step": 3800
    },
    {
      "epoch": 0.5808152194674715,
      "grad_norm": 0.3060307502746582,
      "learning_rate": 7.997109769187054e-05,
      "loss": 0.7211,
      "step": 3801
    },
    {
      "epoch": 0.5809680253657791,
      "grad_norm": 0.43128442764282227,
      "learning_rate": 7.992219751796704e-05,
      "loss": 0.9828,
      "step": 3802
    },
    {
      "epoch": 0.5811208312640868,
      "grad_norm": 0.3138619661331177,
      "learning_rate": 7.987330234627951e-05,
      "loss": 0.7311,
      "step": 3803
    },
    {
      "epoch": 0.5812736371623944,
      "grad_norm": 0.39342501759529114,
      "learning_rate": 7.982441218898977e-05,
      "loss": 0.7003,
      "step": 3804
    },
    {
      "epoch": 0.5814264430607021,
      "grad_norm": 1.905755639076233,
      "learning_rate": 7.977552705827836e-05,
      "loss": 0.8023,
      "step": 3805
    },
    {
      "epoch": 0.5815792489590098,
      "grad_norm": 0.26288965344429016,
      "learning_rate": 7.972664696632458e-05,
      "loss": 0.621,
      "step": 3806
    },
    {
      "epoch": 0.5817320548573175,
      "grad_norm": 0.2712186276912689,
      "learning_rate": 7.967777192530658e-05,
      "loss": 0.572,
      "step": 3807
    },
    {
      "epoch": 0.5818848607556252,
      "grad_norm": 0.28509825468063354,
      "learning_rate": 7.962890194740109e-05,
      "loss": 0.6408,
      "step": 3808
    },
    {
      "epoch": 0.5820376666539329,
      "grad_norm": 0.2955816984176636,
      "learning_rate": 7.958003704478368e-05,
      "loss": 0.5941,
      "step": 3809
    },
    {
      "epoch": 0.5821904725522405,
      "grad_norm": 0.3763854205608368,
      "learning_rate": 7.953117722962862e-05,
      "loss": 0.6688,
      "step": 3810
    },
    {
      "epoch": 0.5823432784505482,
      "grad_norm": 0.4830414652824402,
      "learning_rate": 7.948232251410896e-05,
      "loss": 0.6907,
      "step": 3811
    },
    {
      "epoch": 0.5824960843488559,
      "grad_norm": 0.4125008285045624,
      "learning_rate": 7.94334729103964e-05,
      "loss": 0.7622,
      "step": 3812
    },
    {
      "epoch": 0.5826488902471635,
      "grad_norm": 0.45555487275123596,
      "learning_rate": 7.938462843066142e-05,
      "loss": 0.903,
      "step": 3813
    },
    {
      "epoch": 0.5828016961454712,
      "grad_norm": 0.3383921682834625,
      "learning_rate": 7.933578908707326e-05,
      "loss": 0.77,
      "step": 3814
    },
    {
      "epoch": 0.5829545020437789,
      "grad_norm": 0.28087118268013,
      "learning_rate": 7.928695489179972e-05,
      "loss": 0.5502,
      "step": 3815
    },
    {
      "epoch": 0.5831073079420865,
      "grad_norm": 0.32582974433898926,
      "learning_rate": 7.923812585700753e-05,
      "loss": 0.6114,
      "step": 3816
    },
    {
      "epoch": 0.5832601138403942,
      "grad_norm": 0.33687683939933777,
      "learning_rate": 7.918930199486197e-05,
      "loss": 0.7654,
      "step": 3817
    },
    {
      "epoch": 0.5834129197387019,
      "grad_norm": 0.35183513164520264,
      "learning_rate": 7.914048331752719e-05,
      "loss": 0.6804,
      "step": 3818
    },
    {
      "epoch": 0.5835657256370096,
      "grad_norm": 0.35134196281433105,
      "learning_rate": 7.909166983716586e-05,
      "loss": 0.8217,
      "step": 3819
    },
    {
      "epoch": 0.5837185315353173,
      "grad_norm": 0.24550901353359222,
      "learning_rate": 7.904286156593948e-05,
      "loss": 0.6473,
      "step": 3820
    },
    {
      "epoch": 0.583871337433625,
      "grad_norm": 0.28883498907089233,
      "learning_rate": 7.899405851600822e-05,
      "loss": 0.6587,
      "step": 3821
    },
    {
      "epoch": 0.5840241433319326,
      "grad_norm": 0.29037415981292725,
      "learning_rate": 7.894526069953094e-05,
      "loss": 0.6768,
      "step": 3822
    },
    {
      "epoch": 0.5841769492302403,
      "grad_norm": 0.2939176857471466,
      "learning_rate": 7.889646812866524e-05,
      "loss": 0.5691,
      "step": 3823
    },
    {
      "epoch": 0.584329755128548,
      "grad_norm": 0.29959577322006226,
      "learning_rate": 7.884768081556735e-05,
      "loss": 0.7104,
      "step": 3824
    },
    {
      "epoch": 0.5844825610268556,
      "grad_norm": 0.2616795003414154,
      "learning_rate": 7.879889877239224e-05,
      "loss": 0.6442,
      "step": 3825
    },
    {
      "epoch": 0.5846353669251633,
      "grad_norm": 0.29472115635871887,
      "learning_rate": 7.87501220112935e-05,
      "loss": 0.3929,
      "step": 3826
    },
    {
      "epoch": 0.584788172823471,
      "grad_norm": 0.23962584137916565,
      "learning_rate": 7.87013505444235e-05,
      "loss": 0.6744,
      "step": 3827
    },
    {
      "epoch": 0.5849409787217786,
      "grad_norm": 0.26623886823654175,
      "learning_rate": 7.865258438393322e-05,
      "loss": 0.6706,
      "step": 3828
    },
    {
      "epoch": 0.5850937846200863,
      "grad_norm": 0.3564209043979645,
      "learning_rate": 7.860382354197239e-05,
      "loss": 0.8078,
      "step": 3829
    },
    {
      "epoch": 0.585246590518394,
      "grad_norm": 0.2812064588069916,
      "learning_rate": 7.855506803068926e-05,
      "loss": 0.6951,
      "step": 3830
    },
    {
      "epoch": 0.5853993964167017,
      "grad_norm": 0.29761627316474915,
      "learning_rate": 7.850631786223093e-05,
      "loss": 0.5924,
      "step": 3831
    },
    {
      "epoch": 0.5855522023150094,
      "grad_norm": 0.2774466276168823,
      "learning_rate": 7.845757304874313e-05,
      "loss": 0.5304,
      "step": 3832
    },
    {
      "epoch": 0.5857050082133171,
      "grad_norm": 0.31062594056129456,
      "learning_rate": 7.84088336023701e-05,
      "loss": 0.6803,
      "step": 3833
    },
    {
      "epoch": 0.5858578141116247,
      "grad_norm": 0.30979427695274353,
      "learning_rate": 7.836009953525499e-05,
      "loss": 0.7159,
      "step": 3834
    },
    {
      "epoch": 0.5860106200099324,
      "grad_norm": 0.30622562766075134,
      "learning_rate": 7.83113708595394e-05,
      "loss": 0.6932,
      "step": 3835
    },
    {
      "epoch": 0.5861634259082401,
      "grad_norm": 0.28833743929862976,
      "learning_rate": 7.826264758736374e-05,
      "loss": 0.8625,
      "step": 3836
    },
    {
      "epoch": 0.5863162318065477,
      "grad_norm": 0.30535200238227844,
      "learning_rate": 7.821392973086691e-05,
      "loss": 0.9028,
      "step": 3837
    },
    {
      "epoch": 0.5864690377048554,
      "grad_norm": 0.3745479881763458,
      "learning_rate": 7.816521730218663e-05,
      "loss": 0.8378,
      "step": 3838
    },
    {
      "epoch": 0.586621843603163,
      "grad_norm": 0.35396698117256165,
      "learning_rate": 7.811651031345921e-05,
      "loss": 0.7586,
      "step": 3839
    },
    {
      "epoch": 0.5867746495014707,
      "grad_norm": 0.29585057497024536,
      "learning_rate": 7.806780877681952e-05,
      "loss": 0.6075,
      "step": 3840
    },
    {
      "epoch": 0.5869274553997784,
      "grad_norm": 0.30357035994529724,
      "learning_rate": 7.801911270440114e-05,
      "loss": 0.6288,
      "step": 3841
    },
    {
      "epoch": 0.5870802612980861,
      "grad_norm": 0.28360724449157715,
      "learning_rate": 7.797042210833635e-05,
      "loss": 0.6806,
      "step": 3842
    },
    {
      "epoch": 0.5872330671963938,
      "grad_norm": 0.30030885338783264,
      "learning_rate": 7.792173700075598e-05,
      "loss": 0.6808,
      "step": 3843
    },
    {
      "epoch": 0.5873858730947015,
      "grad_norm": 0.31683292984962463,
      "learning_rate": 7.78730573937895e-05,
      "loss": 0.8747,
      "step": 3844
    },
    {
      "epoch": 0.5875386789930092,
      "grad_norm": 0.34894859790802,
      "learning_rate": 7.7824383299565e-05,
      "loss": 0.9388,
      "step": 3845
    },
    {
      "epoch": 0.5876914848913168,
      "grad_norm": 1.2283755540847778,
      "learning_rate": 7.777571473020931e-05,
      "loss": 0.5487,
      "step": 3846
    },
    {
      "epoch": 0.5878442907896245,
      "grad_norm": 0.3542500138282776,
      "learning_rate": 7.772705169784769e-05,
      "loss": 0.7024,
      "step": 3847
    },
    {
      "epoch": 0.5879970966879322,
      "grad_norm": 0.3123841881752014,
      "learning_rate": 7.767839421460417e-05,
      "loss": 0.6317,
      "step": 3848
    },
    {
      "epoch": 0.5881499025862398,
      "grad_norm": 0.3913807272911072,
      "learning_rate": 7.762974229260138e-05,
      "loss": 0.7937,
      "step": 3849
    },
    {
      "epoch": 0.5883027084845475,
      "grad_norm": 0.24539242684841156,
      "learning_rate": 7.758109594396054e-05,
      "loss": 0.6266,
      "step": 3850
    },
    {
      "epoch": 0.5884555143828551,
      "grad_norm": 0.2929461598396301,
      "learning_rate": 7.753245518080143e-05,
      "loss": 0.6406,
      "step": 3851
    },
    {
      "epoch": 0.5886083202811628,
      "grad_norm": 0.5563262701034546,
      "learning_rate": 7.748382001524249e-05,
      "loss": 0.9369,
      "step": 3852
    },
    {
      "epoch": 0.5887611261794705,
      "grad_norm": 0.30135178565979004,
      "learning_rate": 7.743519045940083e-05,
      "loss": 0.4966,
      "step": 3853
    },
    {
      "epoch": 0.5889139320777782,
      "grad_norm": 0.2895030975341797,
      "learning_rate": 7.738656652539204e-05,
      "loss": 0.6125,
      "step": 3854
    },
    {
      "epoch": 0.5890667379760859,
      "grad_norm": 0.33835187554359436,
      "learning_rate": 7.733794822533038e-05,
      "loss": 0.5807,
      "step": 3855
    },
    {
      "epoch": 0.5892195438743936,
      "grad_norm": 0.2818509042263031,
      "learning_rate": 7.728933557132864e-05,
      "loss": 0.6172,
      "step": 3856
    },
    {
      "epoch": 0.5893723497727013,
      "grad_norm": 0.30797627568244934,
      "learning_rate": 7.724072857549838e-05,
      "loss": 0.7899,
      "step": 3857
    },
    {
      "epoch": 0.5895251556710089,
      "grad_norm": 0.30569693446159363,
      "learning_rate": 7.719212724994951e-05,
      "loss": 0.6897,
      "step": 3858
    },
    {
      "epoch": 0.5896779615693166,
      "grad_norm": 0.255311518907547,
      "learning_rate": 7.714353160679066e-05,
      "loss": 0.7461,
      "step": 3859
    },
    {
      "epoch": 0.5898307674676242,
      "grad_norm": 0.2892606556415558,
      "learning_rate": 7.709494165812907e-05,
      "loss": 0.5905,
      "step": 3860
    },
    {
      "epoch": 0.5899835733659319,
      "grad_norm": 0.24588391184806824,
      "learning_rate": 7.704635741607052e-05,
      "loss": 0.5826,
      "step": 3861
    },
    {
      "epoch": 0.5901363792642396,
      "grad_norm": 0.25809404253959656,
      "learning_rate": 7.69977788927193e-05,
      "loss": 0.6255,
      "step": 3862
    },
    {
      "epoch": 0.5902891851625472,
      "grad_norm": 0.3097207546234131,
      "learning_rate": 7.69492061001784e-05,
      "loss": 0.6712,
      "step": 3863
    },
    {
      "epoch": 0.5904419910608549,
      "grad_norm": 0.31905776262283325,
      "learning_rate": 7.690063905054933e-05,
      "loss": 0.9132,
      "step": 3864
    },
    {
      "epoch": 0.5905947969591626,
      "grad_norm": 0.3189091384410858,
      "learning_rate": 7.685207775593211e-05,
      "loss": 0.727,
      "step": 3865
    },
    {
      "epoch": 0.5907476028574703,
      "grad_norm": 0.3288535177707672,
      "learning_rate": 7.680352222842541e-05,
      "loss": 0.5702,
      "step": 3866
    },
    {
      "epoch": 0.590900408755778,
      "grad_norm": 0.2501838207244873,
      "learning_rate": 7.67549724801264e-05,
      "loss": 0.5476,
      "step": 3867
    },
    {
      "epoch": 0.5910532146540857,
      "grad_norm": 0.25239741802215576,
      "learning_rate": 7.670642852313094e-05,
      "loss": 0.6705,
      "step": 3868
    },
    {
      "epoch": 0.5912060205523934,
      "grad_norm": 0.3150840401649475,
      "learning_rate": 7.665789036953324e-05,
      "loss": 0.6463,
      "step": 3869
    },
    {
      "epoch": 0.591358826450701,
      "grad_norm": 0.29708942770957947,
      "learning_rate": 7.660935803142621e-05,
      "loss": 0.9097,
      "step": 3870
    },
    {
      "epoch": 0.5915116323490087,
      "grad_norm": 0.3262752592563629,
      "learning_rate": 7.656083152090133e-05,
      "loss": 0.6029,
      "step": 3871
    },
    {
      "epoch": 0.5916644382473163,
      "grad_norm": 0.3073914647102356,
      "learning_rate": 7.651231085004845e-05,
      "loss": 0.7531,
      "step": 3872
    },
    {
      "epoch": 0.591817244145624,
      "grad_norm": 0.3142286241054535,
      "learning_rate": 7.646379603095619e-05,
      "loss": 0.8999,
      "step": 3873
    },
    {
      "epoch": 0.5919700500439317,
      "grad_norm": 0.30331626534461975,
      "learning_rate": 7.641528707571157e-05,
      "loss": 0.5738,
      "step": 3874
    },
    {
      "epoch": 0.5921228559422393,
      "grad_norm": 0.2707924246788025,
      "learning_rate": 7.636678399640026e-05,
      "loss": 0.6544,
      "step": 3875
    },
    {
      "epoch": 0.592275661840547,
      "grad_norm": 0.4686855375766754,
      "learning_rate": 7.631828680510626e-05,
      "loss": 0.5677,
      "step": 3876
    },
    {
      "epoch": 0.5924284677388547,
      "grad_norm": 0.2566758096218109,
      "learning_rate": 7.626979551391235e-05,
      "loss": 0.6577,
      "step": 3877
    },
    {
      "epoch": 0.5925812736371624,
      "grad_norm": 0.30719277262687683,
      "learning_rate": 7.622131013489971e-05,
      "loss": 0.6697,
      "step": 3878
    },
    {
      "epoch": 0.5927340795354701,
      "grad_norm": 0.349299818277359,
      "learning_rate": 7.617283068014797e-05,
      "loss": 0.8471,
      "step": 3879
    },
    {
      "epoch": 0.5928868854337778,
      "grad_norm": 0.31798675656318665,
      "learning_rate": 7.612435716173552e-05,
      "loss": 0.9319,
      "step": 3880
    },
    {
      "epoch": 0.5930396913320855,
      "grad_norm": 0.34878382086753845,
      "learning_rate": 7.607588959173904e-05,
      "loss": 0.7974,
      "step": 3881
    },
    {
      "epoch": 0.5931924972303931,
      "grad_norm": 0.3770315945148468,
      "learning_rate": 7.602742798223388e-05,
      "loss": 0.6537,
      "step": 3882
    },
    {
      "epoch": 0.5933453031287008,
      "grad_norm": 0.2860184907913208,
      "learning_rate": 7.597897234529374e-05,
      "loss": 0.6633,
      "step": 3883
    },
    {
      "epoch": 0.5934981090270084,
      "grad_norm": 0.27172017097473145,
      "learning_rate": 7.593052269299105e-05,
      "loss": 0.724,
      "step": 3884
    },
    {
      "epoch": 0.5936509149253161,
      "grad_norm": 0.3685009479522705,
      "learning_rate": 7.58820790373966e-05,
      "loss": 0.4991,
      "step": 3885
    },
    {
      "epoch": 0.5938037208236238,
      "grad_norm": 0.3282112181186676,
      "learning_rate": 7.583364139057966e-05,
      "loss": 0.6445,
      "step": 3886
    },
    {
      "epoch": 0.5939565267219314,
      "grad_norm": 0.28819167613983154,
      "learning_rate": 7.578520976460813e-05,
      "loss": 0.7517,
      "step": 3887
    },
    {
      "epoch": 0.5941093326202391,
      "grad_norm": 0.34896764159202576,
      "learning_rate": 7.573678417154831e-05,
      "loss": 0.7079,
      "step": 3888
    },
    {
      "epoch": 0.5942621385185468,
      "grad_norm": 0.28771957755088806,
      "learning_rate": 7.568836462346509e-05,
      "loss": 0.7737,
      "step": 3889
    },
    {
      "epoch": 0.5944149444168545,
      "grad_norm": 0.2801218330860138,
      "learning_rate": 7.563995113242171e-05,
      "loss": 0.6842,
      "step": 3890
    },
    {
      "epoch": 0.5945677503151622,
      "grad_norm": 0.30863484740257263,
      "learning_rate": 7.559154371048e-05,
      "loss": 0.7982,
      "step": 3891
    },
    {
      "epoch": 0.5947205562134699,
      "grad_norm": 0.30108898878097534,
      "learning_rate": 7.554314236970032e-05,
      "loss": 0.6757,
      "step": 3892
    },
    {
      "epoch": 0.5948733621117775,
      "grad_norm": 0.25410279631614685,
      "learning_rate": 7.549474712214141e-05,
      "loss": 0.7674,
      "step": 3893
    },
    {
      "epoch": 0.5950261680100852,
      "grad_norm": 0.27434930205345154,
      "learning_rate": 7.544635797986053e-05,
      "loss": 0.7742,
      "step": 3894
    },
    {
      "epoch": 0.5951789739083929,
      "grad_norm": 0.3767421245574951,
      "learning_rate": 7.539797495491347e-05,
      "loss": 0.6442,
      "step": 3895
    },
    {
      "epoch": 0.5953317798067005,
      "grad_norm": 2.1998231410980225,
      "learning_rate": 7.534959805935444e-05,
      "loss": 0.9633,
      "step": 3896
    },
    {
      "epoch": 0.5954845857050082,
      "grad_norm": 0.28787243366241455,
      "learning_rate": 7.530122730523613e-05,
      "loss": 0.6614,
      "step": 3897
    },
    {
      "epoch": 0.5956373916033159,
      "grad_norm": 0.2915334105491638,
      "learning_rate": 7.52528627046097e-05,
      "loss": 0.6121,
      "step": 3898
    },
    {
      "epoch": 0.5957901975016235,
      "grad_norm": 0.40670573711395264,
      "learning_rate": 7.520450426952479e-05,
      "loss": 0.5696,
      "step": 3899
    },
    {
      "epoch": 0.5959430033999312,
      "grad_norm": 0.25353896617889404,
      "learning_rate": 7.515615201202953e-05,
      "loss": 0.6941,
      "step": 3900
    },
    {
      "epoch": 0.5960958092982389,
      "grad_norm": 0.5514530539512634,
      "learning_rate": 7.510780594417043e-05,
      "loss": 0.6979,
      "step": 3901
    },
    {
      "epoch": 0.5962486151965466,
      "grad_norm": 0.28294044733047485,
      "learning_rate": 7.505946607799251e-05,
      "loss": 0.5892,
      "step": 3902
    },
    {
      "epoch": 0.5964014210948543,
      "grad_norm": 0.2962487041950226,
      "learning_rate": 7.50111324255393e-05,
      "loss": 0.6492,
      "step": 3903
    },
    {
      "epoch": 0.596554226993162,
      "grad_norm": 0.34200263023376465,
      "learning_rate": 7.496280499885267e-05,
      "loss": 0.5383,
      "step": 3904
    },
    {
      "epoch": 0.5967070328914696,
      "grad_norm": 0.5539739727973938,
      "learning_rate": 7.4914483809973e-05,
      "loss": 0.8527,
      "step": 3905
    },
    {
      "epoch": 0.5968598387897773,
      "grad_norm": 0.28289106488227844,
      "learning_rate": 7.48661688709391e-05,
      "loss": 0.6176,
      "step": 3906
    },
    {
      "epoch": 0.597012644688085,
      "grad_norm": 0.2907141447067261,
      "learning_rate": 7.481786019378827e-05,
      "loss": 1.0129,
      "step": 3907
    },
    {
      "epoch": 0.5971654505863926,
      "grad_norm": 0.3366968035697937,
      "learning_rate": 7.476955779055618e-05,
      "loss": 0.7976,
      "step": 3908
    },
    {
      "epoch": 0.5973182564847003,
      "grad_norm": 0.2729245722293854,
      "learning_rate": 7.472126167327695e-05,
      "loss": 0.7484,
      "step": 3909
    },
    {
      "epoch": 0.597471062383008,
      "grad_norm": 0.29188403487205505,
      "learning_rate": 7.467297185398324e-05,
      "loss": 0.6826,
      "step": 3910
    },
    {
      "epoch": 0.5976238682813156,
      "grad_norm": 0.3058101236820221,
      "learning_rate": 7.462468834470592e-05,
      "loss": 0.6161,
      "step": 3911
    },
    {
      "epoch": 0.5977766741796233,
      "grad_norm": 0.29125460982322693,
      "learning_rate": 7.457641115747453e-05,
      "loss": 0.6507,
      "step": 3912
    },
    {
      "epoch": 0.597929480077931,
      "grad_norm": 0.322819322347641,
      "learning_rate": 7.452814030431687e-05,
      "loss": 0.7652,
      "step": 3913
    },
    {
      "epoch": 0.5980822859762387,
      "grad_norm": 0.27412402629852295,
      "learning_rate": 7.447987579725928e-05,
      "loss": 0.8564,
      "step": 3914
    },
    {
      "epoch": 0.5982350918745464,
      "grad_norm": 0.2602679133415222,
      "learning_rate": 7.443161764832638e-05,
      "loss": 0.6872,
      "step": 3915
    },
    {
      "epoch": 0.5983878977728541,
      "grad_norm": 0.3177022337913513,
      "learning_rate": 7.438336586954131e-05,
      "loss": 0.6176,
      "step": 3916
    },
    {
      "epoch": 0.5985407036711617,
      "grad_norm": 0.2916209101676941,
      "learning_rate": 7.433512047292563e-05,
      "loss": 0.5914,
      "step": 3917
    },
    {
      "epoch": 0.5986935095694694,
      "grad_norm": 0.2940508723258972,
      "learning_rate": 7.428688147049921e-05,
      "loss": 0.6788,
      "step": 3918
    },
    {
      "epoch": 0.598846315467777,
      "grad_norm": 0.31359565258026123,
      "learning_rate": 7.423864887428044e-05,
      "loss": 0.8232,
      "step": 3919
    },
    {
      "epoch": 0.5989991213660847,
      "grad_norm": 0.33102843165397644,
      "learning_rate": 7.419042269628606e-05,
      "loss": 0.8431,
      "step": 3920
    },
    {
      "epoch": 0.5991519272643924,
      "grad_norm": 0.3415786027908325,
      "learning_rate": 7.414220294853125e-05,
      "loss": 0.8176,
      "step": 3921
    },
    {
      "epoch": 0.5993047331627,
      "grad_norm": 0.2847096025943756,
      "learning_rate": 7.409398964302947e-05,
      "loss": 0.6231,
      "step": 3922
    },
    {
      "epoch": 0.5994575390610077,
      "grad_norm": 0.3127872347831726,
      "learning_rate": 7.404578279179273e-05,
      "loss": 0.5949,
      "step": 3923
    },
    {
      "epoch": 0.5996103449593154,
      "grad_norm": 0.31126832962036133,
      "learning_rate": 7.399758240683134e-05,
      "loss": 0.6723,
      "step": 3924
    },
    {
      "epoch": 0.5997631508576231,
      "grad_norm": 0.26205122470855713,
      "learning_rate": 7.394938850015402e-05,
      "loss": 0.6486,
      "step": 3925
    },
    {
      "epoch": 0.5999159567559308,
      "grad_norm": 0.9165391325950623,
      "learning_rate": 7.390120108376785e-05,
      "loss": 0.788,
      "step": 3926
    },
    {
      "epoch": 0.6000687626542385,
      "grad_norm": 0.32874244451522827,
      "learning_rate": 7.385302016967839e-05,
      "loss": 0.6388,
      "step": 3927
    },
    {
      "epoch": 0.6002215685525462,
      "grad_norm": 0.7829940319061279,
      "learning_rate": 7.380484576988948e-05,
      "loss": 0.5911,
      "step": 3928
    },
    {
      "epoch": 0.6003743744508538,
      "grad_norm": 0.5211532711982727,
      "learning_rate": 7.375667789640331e-05,
      "loss": 0.8848,
      "step": 3929
    },
    {
      "epoch": 0.6005271803491615,
      "grad_norm": 0.3158925175666809,
      "learning_rate": 7.370851656122058e-05,
      "loss": 0.6837,
      "step": 3930
    },
    {
      "epoch": 0.6006799862474691,
      "grad_norm": 0.3350100815296173,
      "learning_rate": 7.366036177634027e-05,
      "loss": 0.7339,
      "step": 3931
    },
    {
      "epoch": 0.6008327921457768,
      "grad_norm": 0.28904014825820923,
      "learning_rate": 7.36122135537597e-05,
      "loss": 0.5846,
      "step": 3932
    },
    {
      "epoch": 0.6009855980440845,
      "grad_norm": 0.27264395356178284,
      "learning_rate": 7.356407190547459e-05,
      "loss": 0.7204,
      "step": 3933
    },
    {
      "epoch": 0.6011384039423922,
      "grad_norm": 0.34374403953552246,
      "learning_rate": 7.351593684347909e-05,
      "loss": 0.7039,
      "step": 3934
    },
    {
      "epoch": 0.6012912098406998,
      "grad_norm": 0.3035162091255188,
      "learning_rate": 7.346780837976563e-05,
      "loss": 1.0995,
      "step": 3935
    },
    {
      "epoch": 0.6014440157390075,
      "grad_norm": 0.3120017349720001,
      "learning_rate": 7.341968652632496e-05,
      "loss": 0.7253,
      "step": 3936
    },
    {
      "epoch": 0.6015968216373152,
      "grad_norm": 0.38665685057640076,
      "learning_rate": 7.337157129514627e-05,
      "loss": 0.8983,
      "step": 3937
    },
    {
      "epoch": 0.6017496275356229,
      "grad_norm": 0.34627819061279297,
      "learning_rate": 7.332346269821706e-05,
      "loss": 0.8088,
      "step": 3938
    },
    {
      "epoch": 0.6019024334339306,
      "grad_norm": 0.29683157801628113,
      "learning_rate": 7.327536074752324e-05,
      "loss": 0.7064,
      "step": 3939
    },
    {
      "epoch": 0.6020552393322383,
      "grad_norm": 0.5646716952323914,
      "learning_rate": 7.32272654550489e-05,
      "loss": 0.7454,
      "step": 3940
    },
    {
      "epoch": 0.6022080452305459,
      "grad_norm": 0.462716668844223,
      "learning_rate": 7.317917683277665e-05,
      "loss": 0.6755,
      "step": 3941
    },
    {
      "epoch": 0.6023608511288536,
      "grad_norm": 0.31523165106773376,
      "learning_rate": 7.313109489268738e-05,
      "loss": 0.779,
      "step": 3942
    },
    {
      "epoch": 0.6025136570271612,
      "grad_norm": 0.29431718587875366,
      "learning_rate": 7.308301964676026e-05,
      "loss": 0.6833,
      "step": 3943
    },
    {
      "epoch": 0.6026664629254689,
      "grad_norm": 0.29744040966033936,
      "learning_rate": 7.303495110697281e-05,
      "loss": 0.7451,
      "step": 3944
    },
    {
      "epoch": 0.6028192688237766,
      "grad_norm": 0.3033977448940277,
      "learning_rate": 7.298688928530098e-05,
      "loss": 0.937,
      "step": 3945
    },
    {
      "epoch": 0.6029720747220843,
      "grad_norm": 0.3462549149990082,
      "learning_rate": 7.293883419371893e-05,
      "loss": 0.8325,
      "step": 3946
    },
    {
      "epoch": 0.6031248806203919,
      "grad_norm": 0.28772634267807007,
      "learning_rate": 7.289078584419918e-05,
      "loss": 0.7318,
      "step": 3947
    },
    {
      "epoch": 0.6032776865186996,
      "grad_norm": 0.2670397162437439,
      "learning_rate": 7.284274424871254e-05,
      "loss": 0.5443,
      "step": 3948
    },
    {
      "epoch": 0.6034304924170073,
      "grad_norm": 0.3772238790988922,
      "learning_rate": 7.279470941922826e-05,
      "loss": 0.7199,
      "step": 3949
    },
    {
      "epoch": 0.603583298315315,
      "grad_norm": 0.27530890703201294,
      "learning_rate": 7.274668136771373e-05,
      "loss": 0.7777,
      "step": 3950
    },
    {
      "epoch": 0.6037361042136227,
      "grad_norm": 0.3031236529350281,
      "learning_rate": 7.269866010613477e-05,
      "loss": 0.7969,
      "step": 3951
    },
    {
      "epoch": 0.6038889101119304,
      "grad_norm": 0.4699702262878418,
      "learning_rate": 7.265064564645545e-05,
      "loss": 0.8742,
      "step": 3952
    },
    {
      "epoch": 0.604041716010238,
      "grad_norm": 0.2931947708129883,
      "learning_rate": 7.260263800063822e-05,
      "loss": 0.6974,
      "step": 3953
    },
    {
      "epoch": 0.6041945219085457,
      "grad_norm": 0.2625153660774231,
      "learning_rate": 7.255463718064375e-05,
      "loss": 0.6238,
      "step": 3954
    },
    {
      "epoch": 0.6043473278068533,
      "grad_norm": 0.2817601263523102,
      "learning_rate": 7.250664319843101e-05,
      "loss": 0.6791,
      "step": 3955
    },
    {
      "epoch": 0.604500133705161,
      "grad_norm": 0.29988333582878113,
      "learning_rate": 7.245865606595741e-05,
      "loss": 0.6681,
      "step": 3956
    },
    {
      "epoch": 0.6046529396034687,
      "grad_norm": 0.29616379737854004,
      "learning_rate": 7.241067579517837e-05,
      "loss": 0.7775,
      "step": 3957
    },
    {
      "epoch": 0.6048057455017763,
      "grad_norm": 0.28116655349731445,
      "learning_rate": 7.236270239804792e-05,
      "loss": 0.8737,
      "step": 3958
    },
    {
      "epoch": 0.604958551400084,
      "grad_norm": 0.30657532811164856,
      "learning_rate": 7.231473588651814e-05,
      "loss": 0.8031,
      "step": 3959
    },
    {
      "epoch": 0.6051113572983917,
      "grad_norm": 0.30859723687171936,
      "learning_rate": 7.226677627253955e-05,
      "loss": 0.6121,
      "step": 3960
    },
    {
      "epoch": 0.6052641631966994,
      "grad_norm": 0.23964034020900726,
      "learning_rate": 7.221882356806083e-05,
      "loss": 0.6389,
      "step": 3961
    },
    {
      "epoch": 0.6054169690950071,
      "grad_norm": 0.26439711451530457,
      "learning_rate": 7.217087778502903e-05,
      "loss": 0.6267,
      "step": 3962
    },
    {
      "epoch": 0.6055697749933148,
      "grad_norm": 0.9159783124923706,
      "learning_rate": 7.212293893538944e-05,
      "loss": 0.4435,
      "step": 3963
    },
    {
      "epoch": 0.6057225808916225,
      "grad_norm": 0.7968850135803223,
      "learning_rate": 7.207500703108556e-05,
      "loss": 0.7617,
      "step": 3964
    },
    {
      "epoch": 0.6058753867899301,
      "grad_norm": 0.4541511535644531,
      "learning_rate": 7.202708208405928e-05,
      "loss": 0.6902,
      "step": 3965
    },
    {
      "epoch": 0.6060281926882378,
      "grad_norm": 0.3079363703727722,
      "learning_rate": 7.197916410625072e-05,
      "loss": 0.5515,
      "step": 3966
    },
    {
      "epoch": 0.6061809985865454,
      "grad_norm": 0.287811279296875,
      "learning_rate": 7.193125310959821e-05,
      "loss": 0.7739,
      "step": 3967
    },
    {
      "epoch": 0.6063338044848531,
      "grad_norm": 0.3375343978404999,
      "learning_rate": 7.188334910603832e-05,
      "loss": 0.6862,
      "step": 3968
    },
    {
      "epoch": 0.6064866103831608,
      "grad_norm": 0.3060528039932251,
      "learning_rate": 7.183545210750602e-05,
      "loss": 0.7394,
      "step": 3969
    },
    {
      "epoch": 0.6066394162814684,
      "grad_norm": 0.279608815908432,
      "learning_rate": 7.178756212593443e-05,
      "loss": 0.6801,
      "step": 3970
    },
    {
      "epoch": 0.6067922221797761,
      "grad_norm": 0.33723247051239014,
      "learning_rate": 7.173967917325488e-05,
      "loss": 0.675,
      "step": 3971
    },
    {
      "epoch": 0.6069450280780838,
      "grad_norm": 0.32487475872039795,
      "learning_rate": 7.169180326139702e-05,
      "loss": 0.7913,
      "step": 3972
    },
    {
      "epoch": 0.6070978339763915,
      "grad_norm": 0.2952229678630829,
      "learning_rate": 7.164393440228878e-05,
      "loss": 0.6479,
      "step": 3973
    },
    {
      "epoch": 0.6072506398746992,
      "grad_norm": 0.2784630060195923,
      "learning_rate": 7.159607260785627e-05,
      "loss": 0.8433,
      "step": 3974
    },
    {
      "epoch": 0.6074034457730069,
      "grad_norm": 0.2817748785018921,
      "learning_rate": 7.15482178900238e-05,
      "loss": 0.6597,
      "step": 3975
    },
    {
      "epoch": 0.6075562516713146,
      "grad_norm": 0.32576805353164673,
      "learning_rate": 7.150037026071405e-05,
      "loss": 0.9512,
      "step": 3976
    },
    {
      "epoch": 0.6077090575696222,
      "grad_norm": 0.2870212495326996,
      "learning_rate": 7.145252973184779e-05,
      "loss": 0.7329,
      "step": 3977
    },
    {
      "epoch": 0.6078618634679298,
      "grad_norm": 0.2842814326286316,
      "learning_rate": 7.140469631534414e-05,
      "loss": 0.8501,
      "step": 3978
    },
    {
      "epoch": 0.6080146693662375,
      "grad_norm": 0.3353877663612366,
      "learning_rate": 7.135687002312035e-05,
      "loss": 0.6133,
      "step": 3979
    },
    {
      "epoch": 0.6081674752645452,
      "grad_norm": 0.33758804202079773,
      "learning_rate": 7.130905086709196e-05,
      "loss": 0.5174,
      "step": 3980
    },
    {
      "epoch": 0.6083202811628529,
      "grad_norm": 0.2900623083114624,
      "learning_rate": 7.126123885917272e-05,
      "loss": 0.4506,
      "step": 3981
    },
    {
      "epoch": 0.6084730870611605,
      "grad_norm": 0.3299383819103241,
      "learning_rate": 7.121343401127456e-05,
      "loss": 0.6244,
      "step": 3982
    },
    {
      "epoch": 0.6086258929594682,
      "grad_norm": 0.25950318574905396,
      "learning_rate": 7.116563633530766e-05,
      "loss": 0.6782,
      "step": 3983
    },
    {
      "epoch": 0.6087786988577759,
      "grad_norm": 0.3207615613937378,
      "learning_rate": 7.111784584318044e-05,
      "loss": 0.7453,
      "step": 3984
    },
    {
      "epoch": 0.6089315047560836,
      "grad_norm": 0.30822837352752686,
      "learning_rate": 7.107006254679951e-05,
      "loss": 0.7912,
      "step": 3985
    },
    {
      "epoch": 0.6090843106543913,
      "grad_norm": 0.38215330243110657,
      "learning_rate": 7.102228645806963e-05,
      "loss": 0.7137,
      "step": 3986
    },
    {
      "epoch": 0.609237116552699,
      "grad_norm": 0.32029587030410767,
      "learning_rate": 7.097451758889382e-05,
      "loss": 0.762,
      "step": 3987
    },
    {
      "epoch": 0.6093899224510066,
      "grad_norm": 0.3142178952693939,
      "learning_rate": 7.092675595117333e-05,
      "loss": 0.6819,
      "step": 3988
    },
    {
      "epoch": 0.6095427283493143,
      "grad_norm": 0.28147318959236145,
      "learning_rate": 7.087900155680754e-05,
      "loss": 0.7674,
      "step": 3989
    },
    {
      "epoch": 0.6096955342476219,
      "grad_norm": 0.2938244938850403,
      "learning_rate": 7.083125441769402e-05,
      "loss": 0.6486,
      "step": 3990
    },
    {
      "epoch": 0.6098483401459296,
      "grad_norm": 0.3013629615306854,
      "learning_rate": 7.078351454572867e-05,
      "loss": 0.7303,
      "step": 3991
    },
    {
      "epoch": 0.6100011460442373,
      "grad_norm": 0.3084275722503662,
      "learning_rate": 7.073578195280541e-05,
      "loss": 0.7825,
      "step": 3992
    },
    {
      "epoch": 0.610153951942545,
      "grad_norm": 0.29917213320732117,
      "learning_rate": 7.068805665081641e-05,
      "loss": 0.7427,
      "step": 3993
    },
    {
      "epoch": 0.6103067578408526,
      "grad_norm": 0.26484012603759766,
      "learning_rate": 7.064033865165204e-05,
      "loss": 0.6877,
      "step": 3994
    },
    {
      "epoch": 0.6104595637391603,
      "grad_norm": 0.30923646688461304,
      "learning_rate": 7.059262796720088e-05,
      "loss": 0.7605,
      "step": 3995
    },
    {
      "epoch": 0.610612369637468,
      "grad_norm": 0.3539402484893799,
      "learning_rate": 7.054492460934958e-05,
      "loss": 0.6913,
      "step": 3996
    },
    {
      "epoch": 0.6107651755357757,
      "grad_norm": 0.3845704197883606,
      "learning_rate": 7.049722858998307e-05,
      "loss": 0.7764,
      "step": 3997
    },
    {
      "epoch": 0.6109179814340834,
      "grad_norm": 0.26904961466789246,
      "learning_rate": 7.044953992098436e-05,
      "loss": 0.6718,
      "step": 3998
    },
    {
      "epoch": 0.6110707873323911,
      "grad_norm": 0.31562212109565735,
      "learning_rate": 7.040185861423478e-05,
      "loss": 0.5668,
      "step": 3999
    },
    {
      "epoch": 0.6112235932306987,
      "grad_norm": 0.31812769174575806,
      "learning_rate": 7.035418468161365e-05,
      "loss": 0.7084,
      "step": 4000
    },
    {
      "epoch": 0.6113763991290064,
      "grad_norm": 0.28301897644996643,
      "learning_rate": 7.030651813499854e-05,
      "loss": 0.6407,
      "step": 4001
    },
    {
      "epoch": 0.611529205027314,
      "grad_norm": 0.3221738636493683,
      "learning_rate": 7.025885898626525e-05,
      "loss": 0.6902,
      "step": 4002
    },
    {
      "epoch": 0.6116820109256217,
      "grad_norm": 0.25532403588294983,
      "learning_rate": 7.021120724728751e-05,
      "loss": 0.7482,
      "step": 4003
    },
    {
      "epoch": 0.6118348168239294,
      "grad_norm": 0.2903250753879547,
      "learning_rate": 7.016356292993746e-05,
      "loss": 0.6027,
      "step": 4004
    },
    {
      "epoch": 0.6119876227222371,
      "grad_norm": 0.3196435272693634,
      "learning_rate": 7.011592604608523e-05,
      "loss": 0.6325,
      "step": 4005
    },
    {
      "epoch": 0.6121404286205447,
      "grad_norm": 0.251808226108551,
      "learning_rate": 7.006829660759923e-05,
      "loss": 0.6652,
      "step": 4006
    },
    {
      "epoch": 0.6122932345188524,
      "grad_norm": 0.3030737042427063,
      "learning_rate": 7.002067462634582e-05,
      "loss": 0.5607,
      "step": 4007
    },
    {
      "epoch": 0.6124460404171601,
      "grad_norm": 0.3054194748401642,
      "learning_rate": 6.99730601141897e-05,
      "loss": 0.6044,
      "step": 4008
    },
    {
      "epoch": 0.6125988463154678,
      "grad_norm": 0.2582065761089325,
      "learning_rate": 6.992545308299355e-05,
      "loss": 0.6113,
      "step": 4009
    },
    {
      "epoch": 0.6127516522137755,
      "grad_norm": 0.35208937525749207,
      "learning_rate": 6.987785354461838e-05,
      "loss": 0.6342,
      "step": 4010
    },
    {
      "epoch": 0.6129044581120832,
      "grad_norm": 0.35848256945610046,
      "learning_rate": 6.98302615109231e-05,
      "loss": 0.6829,
      "step": 4011
    },
    {
      "epoch": 0.6130572640103908,
      "grad_norm": 0.29076623916625977,
      "learning_rate": 6.978267699376494e-05,
      "loss": 0.8626,
      "step": 4012
    },
    {
      "epoch": 0.6132100699086985,
      "grad_norm": 0.28895699977874756,
      "learning_rate": 6.973510000499916e-05,
      "loss": 0.8021,
      "step": 4013
    },
    {
      "epoch": 0.6133628758070061,
      "grad_norm": 0.42235711216926575,
      "learning_rate": 6.968753055647915e-05,
      "loss": 1.0156,
      "step": 4014
    },
    {
      "epoch": 0.6135156817053138,
      "grad_norm": 0.2926298975944519,
      "learning_rate": 6.963996866005644e-05,
      "loss": 0.6561,
      "step": 4015
    },
    {
      "epoch": 0.6136684876036215,
      "grad_norm": 0.3840494751930237,
      "learning_rate": 6.959241432758067e-05,
      "loss": 0.6022,
      "step": 4016
    },
    {
      "epoch": 0.6138212935019292,
      "grad_norm": 0.28409114480018616,
      "learning_rate": 6.954486757089968e-05,
      "loss": 0.7626,
      "step": 4017
    },
    {
      "epoch": 0.6139740994002368,
      "grad_norm": 0.37249550223350525,
      "learning_rate": 6.949732840185926e-05,
      "loss": 0.8332,
      "step": 4018
    },
    {
      "epoch": 0.6141269052985445,
      "grad_norm": 0.2957054376602173,
      "learning_rate": 6.94497968323034e-05,
      "loss": 0.669,
      "step": 4019
    },
    {
      "epoch": 0.6142797111968522,
      "grad_norm": 0.24224689602851868,
      "learning_rate": 6.940227287407426e-05,
      "loss": 0.373,
      "step": 4020
    },
    {
      "epoch": 0.6144325170951599,
      "grad_norm": 0.4046684503555298,
      "learning_rate": 6.935475653901194e-05,
      "loss": 1.0801,
      "step": 4021
    },
    {
      "epoch": 0.6145853229934676,
      "grad_norm": 0.33295106887817383,
      "learning_rate": 6.930724783895481e-05,
      "loss": 0.8235,
      "step": 4022
    },
    {
      "epoch": 0.6147381288917753,
      "grad_norm": 0.2868274748325348,
      "learning_rate": 6.925974678573923e-05,
      "loss": 0.7053,
      "step": 4023
    },
    {
      "epoch": 0.6148909347900829,
      "grad_norm": 0.28332453966140747,
      "learning_rate": 6.921225339119972e-05,
      "loss": 0.7562,
      "step": 4024
    },
    {
      "epoch": 0.6150437406883905,
      "grad_norm": 0.3626477122306824,
      "learning_rate": 6.91647676671688e-05,
      "loss": 0.5815,
      "step": 4025
    },
    {
      "epoch": 0.6151965465866982,
      "grad_norm": 0.30137038230895996,
      "learning_rate": 6.911728962547719e-05,
      "loss": 0.6479,
      "step": 4026
    },
    {
      "epoch": 0.6153493524850059,
      "grad_norm": 0.29259753227233887,
      "learning_rate": 6.906981927795366e-05,
      "loss": 0.6555,
      "step": 4027
    },
    {
      "epoch": 0.6155021583833136,
      "grad_norm": 0.27849724888801575,
      "learning_rate": 6.9022356636425e-05,
      "loss": 0.6151,
      "step": 4028
    },
    {
      "epoch": 0.6156549642816213,
      "grad_norm": 0.36362895369529724,
      "learning_rate": 6.897490171271614e-05,
      "loss": 0.6835,
      "step": 4029
    },
    {
      "epoch": 0.6158077701799289,
      "grad_norm": 0.3303452432155609,
      "learning_rate": 6.892745451865008e-05,
      "loss": 0.7131,
      "step": 4030
    },
    {
      "epoch": 0.6159605760782366,
      "grad_norm": 0.3015969693660736,
      "learning_rate": 6.888001506604794e-05,
      "loss": 0.7166,
      "step": 4031
    },
    {
      "epoch": 0.6161133819765443,
      "grad_norm": 0.33131933212280273,
      "learning_rate": 6.883258336672879e-05,
      "loss": 0.8487,
      "step": 4032
    },
    {
      "epoch": 0.616266187874852,
      "grad_norm": 0.2961571216583252,
      "learning_rate": 6.878515943250985e-05,
      "loss": 0.5713,
      "step": 4033
    },
    {
      "epoch": 0.6164189937731597,
      "grad_norm": 0.28777509927749634,
      "learning_rate": 6.873774327520644e-05,
      "loss": 0.702,
      "step": 4034
    },
    {
      "epoch": 0.6165717996714674,
      "grad_norm": 0.29192861914634705,
      "learning_rate": 6.869033490663187e-05,
      "loss": 0.5561,
      "step": 4035
    },
    {
      "epoch": 0.616724605569775,
      "grad_norm": 0.31917914748191833,
      "learning_rate": 6.86429343385975e-05,
      "loss": 0.7583,
      "step": 4036
    },
    {
      "epoch": 0.6168774114680826,
      "grad_norm": 0.29778870940208435,
      "learning_rate": 6.859554158291285e-05,
      "loss": 0.6645,
      "step": 4037
    },
    {
      "epoch": 0.6170302173663903,
      "grad_norm": 0.2581726610660553,
      "learning_rate": 6.854815665138541e-05,
      "loss": 0.66,
      "step": 4038
    },
    {
      "epoch": 0.617183023264698,
      "grad_norm": 0.2948669493198395,
      "learning_rate": 6.850077955582072e-05,
      "loss": 0.6895,
      "step": 4039
    },
    {
      "epoch": 0.6173358291630057,
      "grad_norm": 0.2632709741592407,
      "learning_rate": 6.845341030802236e-05,
      "loss": 0.4614,
      "step": 4040
    },
    {
      "epoch": 0.6174886350613134,
      "grad_norm": 0.29188868403434753,
      "learning_rate": 6.840604891979205e-05,
      "loss": 0.8561,
      "step": 4041
    },
    {
      "epoch": 0.617641440959621,
      "grad_norm": 0.26110169291496277,
      "learning_rate": 6.835869540292943e-05,
      "loss": 0.8258,
      "step": 4042
    },
    {
      "epoch": 0.6177942468579287,
      "grad_norm": 0.313664048910141,
      "learning_rate": 6.831134976923224e-05,
      "loss": 0.5098,
      "step": 4043
    },
    {
      "epoch": 0.6179470527562364,
      "grad_norm": 0.3582458198070526,
      "learning_rate": 6.826401203049624e-05,
      "loss": 0.7877,
      "step": 4044
    },
    {
      "epoch": 0.6180998586545441,
      "grad_norm": 0.335791677236557,
      "learning_rate": 6.821668219851529e-05,
      "loss": 0.8319,
      "step": 4045
    },
    {
      "epoch": 0.6182526645528518,
      "grad_norm": 0.31981077790260315,
      "learning_rate": 6.816936028508114e-05,
      "loss": 0.7662,
      "step": 4046
    },
    {
      "epoch": 0.6184054704511595,
      "grad_norm": 0.3540882170200348,
      "learning_rate": 6.812204630198369e-05,
      "loss": 0.7006,
      "step": 4047
    },
    {
      "epoch": 0.6185582763494671,
      "grad_norm": 0.33336111903190613,
      "learning_rate": 6.807474026101079e-05,
      "loss": 0.6572,
      "step": 4048
    },
    {
      "epoch": 0.6187110822477747,
      "grad_norm": 0.34919145703315735,
      "learning_rate": 6.80274421739484e-05,
      "loss": 0.8531,
      "step": 4049
    },
    {
      "epoch": 0.6188638881460824,
      "grad_norm": 0.2679178714752197,
      "learning_rate": 6.798015205258039e-05,
      "loss": 0.6045,
      "step": 4050
    },
    {
      "epoch": 0.6190166940443901,
      "grad_norm": 0.35538893938064575,
      "learning_rate": 6.793286990868869e-05,
      "loss": 0.6461,
      "step": 4051
    },
    {
      "epoch": 0.6191694999426978,
      "grad_norm": 0.32986605167388916,
      "learning_rate": 6.788559575405333e-05,
      "loss": 0.8395,
      "step": 4052
    },
    {
      "epoch": 0.6193223058410054,
      "grad_norm": 0.3114609122276306,
      "learning_rate": 6.783832960045215e-05,
      "loss": 0.858,
      "step": 4053
    },
    {
      "epoch": 0.6194751117393131,
      "grad_norm": 0.28365322947502136,
      "learning_rate": 6.779107145966122e-05,
      "loss": 0.715,
      "step": 4054
    },
    {
      "epoch": 0.6196279176376208,
      "grad_norm": 0.3411410450935364,
      "learning_rate": 6.774382134345442e-05,
      "loss": 0.8184,
      "step": 4055
    },
    {
      "epoch": 0.6197807235359285,
      "grad_norm": 0.26076486706733704,
      "learning_rate": 6.769657926360382e-05,
      "loss": 0.7415,
      "step": 4056
    },
    {
      "epoch": 0.6199335294342362,
      "grad_norm": 0.24735046923160553,
      "learning_rate": 6.764934523187931e-05,
      "loss": 0.5559,
      "step": 4057
    },
    {
      "epoch": 0.6200863353325439,
      "grad_norm": 0.3628125488758087,
      "learning_rate": 6.760211926004889e-05,
      "loss": 0.6857,
      "step": 4058
    },
    {
      "epoch": 0.6202391412308516,
      "grad_norm": 0.4325239360332489,
      "learning_rate": 6.75549013598785e-05,
      "loss": 0.8027,
      "step": 4059
    },
    {
      "epoch": 0.6203919471291592,
      "grad_norm": 0.3012774586677551,
      "learning_rate": 6.750769154313206e-05,
      "loss": 0.7281,
      "step": 4060
    },
    {
      "epoch": 0.6205447530274668,
      "grad_norm": 0.2891152501106262,
      "learning_rate": 6.746048982157154e-05,
      "loss": 0.9022,
      "step": 4061
    },
    {
      "epoch": 0.6206975589257745,
      "grad_norm": 0.40588390827178955,
      "learning_rate": 6.74132962069568e-05,
      "loss": 0.8497,
      "step": 4062
    },
    {
      "epoch": 0.6208503648240822,
      "grad_norm": 0.26198041439056396,
      "learning_rate": 6.736611071104583e-05,
      "loss": 0.705,
      "step": 4063
    },
    {
      "epoch": 0.6210031707223899,
      "grad_norm": 0.29433444142341614,
      "learning_rate": 6.731893334559441e-05,
      "loss": 0.7995,
      "step": 4064
    },
    {
      "epoch": 0.6211559766206975,
      "grad_norm": 0.32308998703956604,
      "learning_rate": 6.727176412235641e-05,
      "loss": 0.7435,
      "step": 4065
    },
    {
      "epoch": 0.6213087825190052,
      "grad_norm": 0.32107511162757874,
      "learning_rate": 6.722460305308369e-05,
      "loss": 0.6198,
      "step": 4066
    },
    {
      "epoch": 0.6214615884173129,
      "grad_norm": 1.0581830739974976,
      "learning_rate": 6.717745014952594e-05,
      "loss": 0.6794,
      "step": 4067
    },
    {
      "epoch": 0.6216143943156206,
      "grad_norm": 0.45635101199150085,
      "learning_rate": 6.713030542343097e-05,
      "loss": 0.8428,
      "step": 4068
    },
    {
      "epoch": 0.6217672002139283,
      "grad_norm": 0.2648105323314667,
      "learning_rate": 6.70831688865445e-05,
      "loss": 0.7215,
      "step": 4069
    },
    {
      "epoch": 0.621920006112236,
      "grad_norm": 0.32674112915992737,
      "learning_rate": 6.703604055061022e-05,
      "loss": 0.6808,
      "step": 4070
    },
    {
      "epoch": 0.6220728120105437,
      "grad_norm": 0.2716309428215027,
      "learning_rate": 6.698892042736969e-05,
      "loss": 0.6815,
      "step": 4071
    },
    {
      "epoch": 0.6222256179088513,
      "grad_norm": 0.2864760160446167,
      "learning_rate": 6.694180852856254e-05,
      "loss": 0.6477,
      "step": 4072
    },
    {
      "epoch": 0.6223784238071589,
      "grad_norm": 0.30836066603660583,
      "learning_rate": 6.68947048659263e-05,
      "loss": 0.8132,
      "step": 4073
    },
    {
      "epoch": 0.6225312297054666,
      "grad_norm": 0.3510795533657074,
      "learning_rate": 6.684760945119645e-05,
      "loss": 0.8555,
      "step": 4074
    },
    {
      "epoch": 0.6226840356037743,
      "grad_norm": 0.3028537929058075,
      "learning_rate": 6.68005222961064e-05,
      "loss": 0.647,
      "step": 4075
    },
    {
      "epoch": 0.622836841502082,
      "grad_norm": 0.2671966254711151,
      "learning_rate": 6.675344341238757e-05,
      "loss": 0.6059,
      "step": 4076
    },
    {
      "epoch": 0.6229896474003896,
      "grad_norm": 0.39240017533302307,
      "learning_rate": 6.670637281176923e-05,
      "loss": 0.8019,
      "step": 4077
    },
    {
      "epoch": 0.6231424532986973,
      "grad_norm": 0.3353138267993927,
      "learning_rate": 6.66593105059786e-05,
      "loss": 0.7382,
      "step": 4078
    },
    {
      "epoch": 0.623295259197005,
      "grad_norm": 0.32272979617118835,
      "learning_rate": 6.661225650674089e-05,
      "loss": 0.7867,
      "step": 4079
    },
    {
      "epoch": 0.6234480650953127,
      "grad_norm": 0.3123871684074402,
      "learning_rate": 6.656521082577925e-05,
      "loss": 0.7258,
      "step": 4080
    },
    {
      "epoch": 0.6236008709936204,
      "grad_norm": 0.2753034830093384,
      "learning_rate": 6.651817347481462e-05,
      "loss": 0.5895,
      "step": 4081
    },
    {
      "epoch": 0.6237536768919281,
      "grad_norm": 0.28939950466156006,
      "learning_rate": 6.647114446556601e-05,
      "loss": 0.5629,
      "step": 4082
    },
    {
      "epoch": 0.6239064827902358,
      "grad_norm": 0.29780569672584534,
      "learning_rate": 6.642412380975033e-05,
      "loss": 0.8147,
      "step": 4083
    },
    {
      "epoch": 0.6240592886885433,
      "grad_norm": 0.3016960024833679,
      "learning_rate": 6.637711151908239e-05,
      "loss": 0.8671,
      "step": 4084
    },
    {
      "epoch": 0.624212094586851,
      "grad_norm": 0.33941328525543213,
      "learning_rate": 6.633010760527485e-05,
      "loss": 0.6496,
      "step": 4085
    },
    {
      "epoch": 0.6243649004851587,
      "grad_norm": 0.2641719877719879,
      "learning_rate": 6.628311208003834e-05,
      "loss": 0.639,
      "step": 4086
    },
    {
      "epoch": 0.6245177063834664,
      "grad_norm": 0.2600893974304199,
      "learning_rate": 6.623612495508146e-05,
      "loss": 0.6703,
      "step": 4087
    },
    {
      "epoch": 0.6246705122817741,
      "grad_norm": 0.29402458667755127,
      "learning_rate": 6.618914624211064e-05,
      "loss": 0.5691,
      "step": 4088
    },
    {
      "epoch": 0.6248233181800817,
      "grad_norm": 0.3072538673877716,
      "learning_rate": 6.614217595283019e-05,
      "loss": 0.758,
      "step": 4089
    },
    {
      "epoch": 0.6249761240783894,
      "grad_norm": 0.26396307349205017,
      "learning_rate": 6.609521409894237e-05,
      "loss": 0.7844,
      "step": 4090
    },
    {
      "epoch": 0.6251289299766971,
      "grad_norm": 0.3354957699775696,
      "learning_rate": 6.60482606921474e-05,
      "loss": 0.7251,
      "step": 4091
    },
    {
      "epoch": 0.6252817358750048,
      "grad_norm": 0.2975131869316101,
      "learning_rate": 6.600131574414325e-05,
      "loss": 0.7848,
      "step": 4092
    },
    {
      "epoch": 0.6254345417733125,
      "grad_norm": 0.3199508488178253,
      "learning_rate": 6.59543792666259e-05,
      "loss": 0.7637,
      "step": 4093
    },
    {
      "epoch": 0.6255873476716202,
      "grad_norm": 0.2629062533378601,
      "learning_rate": 6.590745127128914e-05,
      "loss": 0.5365,
      "step": 4094
    },
    {
      "epoch": 0.6257401535699278,
      "grad_norm": 0.3765850067138672,
      "learning_rate": 6.586053176982476e-05,
      "loss": 0.6494,
      "step": 4095
    },
    {
      "epoch": 0.6258929594682354,
      "grad_norm": 0.28707364201545715,
      "learning_rate": 6.58136207739223e-05,
      "loss": 0.7073,
      "step": 4096
    },
    {
      "epoch": 0.6260457653665431,
      "grad_norm": 0.339830607175827,
      "learning_rate": 6.576671829526923e-05,
      "loss": 0.6416,
      "step": 4097
    },
    {
      "epoch": 0.6261985712648508,
      "grad_norm": 0.28438472747802734,
      "learning_rate": 6.5719824345551e-05,
      "loss": 0.6059,
      "step": 4098
    },
    {
      "epoch": 0.6263513771631585,
      "grad_norm": 0.2813330590724945,
      "learning_rate": 6.56729389364507e-05,
      "loss": 0.7748,
      "step": 4099
    },
    {
      "epoch": 0.6265041830614662,
      "grad_norm": 0.27409806847572327,
      "learning_rate": 6.562606207964954e-05,
      "loss": 0.736,
      "step": 4100
    },
    {
      "epoch": 0.6266569889597738,
      "grad_norm": 0.28923875093460083,
      "learning_rate": 6.557919378682646e-05,
      "loss": 0.8389,
      "step": 4101
    },
    {
      "epoch": 0.6268097948580815,
      "grad_norm": 0.2727113366127014,
      "learning_rate": 6.553233406965835e-05,
      "loss": 0.6921,
      "step": 4102
    },
    {
      "epoch": 0.6269626007563892,
      "grad_norm": 0.2747400104999542,
      "learning_rate": 6.548548293981985e-05,
      "loss": 0.7585,
      "step": 4103
    },
    {
      "epoch": 0.6271154066546969,
      "grad_norm": 0.2889161705970764,
      "learning_rate": 6.543864040898355e-05,
      "loss": 0.6042,
      "step": 4104
    },
    {
      "epoch": 0.6272682125530046,
      "grad_norm": 0.28560250997543335,
      "learning_rate": 6.539180648881991e-05,
      "loss": 0.7497,
      "step": 4105
    },
    {
      "epoch": 0.6274210184513123,
      "grad_norm": 0.43822401762008667,
      "learning_rate": 6.534498119099712e-05,
      "loss": 0.7462,
      "step": 4106
    },
    {
      "epoch": 0.62757382434962,
      "grad_norm": 0.40729212760925293,
      "learning_rate": 6.529816452718139e-05,
      "loss": 0.8604,
      "step": 4107
    },
    {
      "epoch": 0.6277266302479275,
      "grad_norm": 0.2853194773197174,
      "learning_rate": 6.525135650903666e-05,
      "loss": 0.6286,
      "step": 4108
    },
    {
      "epoch": 0.6278794361462352,
      "grad_norm": 0.3668119013309479,
      "learning_rate": 6.520455714822481e-05,
      "loss": 0.5869,
      "step": 4109
    },
    {
      "epoch": 0.6280322420445429,
      "grad_norm": 0.34195056557655334,
      "learning_rate": 6.515776645640541e-05,
      "loss": 0.8816,
      "step": 4110
    },
    {
      "epoch": 0.6281850479428506,
      "grad_norm": 0.34236249327659607,
      "learning_rate": 6.511098444523604e-05,
      "loss": 0.8364,
      "step": 4111
    },
    {
      "epoch": 0.6283378538411583,
      "grad_norm": 0.27742037177085876,
      "learning_rate": 6.506421112637207e-05,
      "loss": 0.5767,
      "step": 4112
    },
    {
      "epoch": 0.6284906597394659,
      "grad_norm": 0.300037145614624,
      "learning_rate": 6.50174465114666e-05,
      "loss": 0.8639,
      "step": 4113
    },
    {
      "epoch": 0.6286434656377736,
      "grad_norm": 0.32458987832069397,
      "learning_rate": 6.497069061217065e-05,
      "loss": 0.672,
      "step": 4114
    },
    {
      "epoch": 0.6287962715360813,
      "grad_norm": 0.3458729386329651,
      "learning_rate": 6.492394344013313e-05,
      "loss": 0.6846,
      "step": 4115
    },
    {
      "epoch": 0.628949077434389,
      "grad_norm": 0.21836614608764648,
      "learning_rate": 6.487720500700067e-05,
      "loss": 0.7049,
      "step": 4116
    },
    {
      "epoch": 0.6291018833326967,
      "grad_norm": 0.2723524868488312,
      "learning_rate": 6.483047532441773e-05,
      "loss": 0.6425,
      "step": 4117
    },
    {
      "epoch": 0.6292546892310044,
      "grad_norm": 0.2965027093887329,
      "learning_rate": 6.478375440402664e-05,
      "loss": 0.7011,
      "step": 4118
    },
    {
      "epoch": 0.629407495129312,
      "grad_norm": 0.2943280041217804,
      "learning_rate": 6.473704225746755e-05,
      "loss": 0.5876,
      "step": 4119
    },
    {
      "epoch": 0.6295603010276196,
      "grad_norm": 0.3261384963989258,
      "learning_rate": 6.469033889637837e-05,
      "loss": 0.8015,
      "step": 4120
    },
    {
      "epoch": 0.6297131069259273,
      "grad_norm": 0.4228057265281677,
      "learning_rate": 6.464364433239484e-05,
      "loss": 0.7216,
      "step": 4121
    },
    {
      "epoch": 0.629865912824235,
      "grad_norm": 0.49428024888038635,
      "learning_rate": 6.459695857715053e-05,
      "loss": 0.7154,
      "step": 4122
    },
    {
      "epoch": 0.6300187187225427,
      "grad_norm": 0.45371702313423157,
      "learning_rate": 6.455028164227685e-05,
      "loss": 0.6947,
      "step": 4123
    },
    {
      "epoch": 0.6301715246208504,
      "grad_norm": 0.3593444526195526,
      "learning_rate": 6.45036135394029e-05,
      "loss": 0.709,
      "step": 4124
    },
    {
      "epoch": 0.630324330519158,
      "grad_norm": 0.38711681962013245,
      "learning_rate": 6.445695428015566e-05,
      "loss": 0.6442,
      "step": 4125
    },
    {
      "epoch": 0.6304771364174657,
      "grad_norm": 0.2977801561355591,
      "learning_rate": 6.44103038761599e-05,
      "loss": 0.7507,
      "step": 4126
    },
    {
      "epoch": 0.6306299423157734,
      "grad_norm": 0.25699782371520996,
      "learning_rate": 6.436366233903822e-05,
      "loss": 0.6813,
      "step": 4127
    },
    {
      "epoch": 0.6307827482140811,
      "grad_norm": 0.251458078622818,
      "learning_rate": 6.431702968041091e-05,
      "loss": 0.8123,
      "step": 4128
    },
    {
      "epoch": 0.6309355541123888,
      "grad_norm": 0.3088221251964569,
      "learning_rate": 6.427040591189609e-05,
      "loss": 0.9976,
      "step": 4129
    },
    {
      "epoch": 0.6310883600106965,
      "grad_norm": 0.35455629229545593,
      "learning_rate": 6.422379104510976e-05,
      "loss": 0.8277,
      "step": 4130
    },
    {
      "epoch": 0.6312411659090041,
      "grad_norm": 0.2564350366592407,
      "learning_rate": 6.417718509166557e-05,
      "loss": 0.5566,
      "step": 4131
    },
    {
      "epoch": 0.6313939718073117,
      "grad_norm": 0.3636449873447418,
      "learning_rate": 6.413058806317496e-05,
      "loss": 0.7471,
      "step": 4132
    },
    {
      "epoch": 0.6315467777056194,
      "grad_norm": 0.25471046566963196,
      "learning_rate": 6.408399997124728e-05,
      "loss": 0.6974,
      "step": 4133
    },
    {
      "epoch": 0.6316995836039271,
      "grad_norm": 0.2742546796798706,
      "learning_rate": 6.403742082748954e-05,
      "loss": 0.5548,
      "step": 4134
    },
    {
      "epoch": 0.6318523895022348,
      "grad_norm": 0.29743149876594543,
      "learning_rate": 6.399085064350648e-05,
      "loss": 0.7215,
      "step": 4135
    },
    {
      "epoch": 0.6320051954005425,
      "grad_norm": 0.34070295095443726,
      "learning_rate": 6.394428943090071e-05,
      "loss": 0.8442,
      "step": 4136
    },
    {
      "epoch": 0.6321580012988501,
      "grad_norm": 0.3170759975910187,
      "learning_rate": 6.389773720127262e-05,
      "loss": 0.5968,
      "step": 4137
    },
    {
      "epoch": 0.6323108071971578,
      "grad_norm": 0.3096469044685364,
      "learning_rate": 6.385119396622021e-05,
      "loss": 0.8517,
      "step": 4138
    },
    {
      "epoch": 0.6324636130954655,
      "grad_norm": 0.3050990104675293,
      "learning_rate": 6.38046597373394e-05,
      "loss": 0.5937,
      "step": 4139
    },
    {
      "epoch": 0.6326164189937732,
      "grad_norm": 0.30861058831214905,
      "learning_rate": 6.375813452622375e-05,
      "loss": 0.6394,
      "step": 4140
    },
    {
      "epoch": 0.6327692248920809,
      "grad_norm": 0.270451158285141,
      "learning_rate": 6.37116183444647e-05,
      "loss": 0.719,
      "step": 4141
    },
    {
      "epoch": 0.6329220307903886,
      "grad_norm": 0.29544568061828613,
      "learning_rate": 6.366511120365132e-05,
      "loss": 0.759,
      "step": 4142
    },
    {
      "epoch": 0.6330748366886961,
      "grad_norm": 0.34251758456230164,
      "learning_rate": 6.361861311537046e-05,
      "loss": 0.6881,
      "step": 4143
    },
    {
      "epoch": 0.6332276425870038,
      "grad_norm": 0.36020627617836,
      "learning_rate": 6.357212409120679e-05,
      "loss": 0.8744,
      "step": 4144
    },
    {
      "epoch": 0.6333804484853115,
      "grad_norm": 0.3259471654891968,
      "learning_rate": 6.352564414274256e-05,
      "loss": 0.5879,
      "step": 4145
    },
    {
      "epoch": 0.6335332543836192,
      "grad_norm": 0.2928166687488556,
      "learning_rate": 6.347917328155795e-05,
      "loss": 0.7869,
      "step": 4146
    },
    {
      "epoch": 0.6336860602819269,
      "grad_norm": 0.316599577665329,
      "learning_rate": 6.343271151923074e-05,
      "loss": 0.8952,
      "step": 4147
    },
    {
      "epoch": 0.6338388661802346,
      "grad_norm": 0.30253708362579346,
      "learning_rate": 6.338625886733654e-05,
      "loss": 0.6866,
      "step": 4148
    },
    {
      "epoch": 0.6339916720785422,
      "grad_norm": 0.29290080070495605,
      "learning_rate": 6.333981533744856e-05,
      "loss": 0.681,
      "step": 4149
    },
    {
      "epoch": 0.6341444779768499,
      "grad_norm": 0.26810938119888306,
      "learning_rate": 6.329338094113785e-05,
      "loss": 0.4452,
      "step": 4150
    },
    {
      "epoch": 0.6342972838751576,
      "grad_norm": 0.3918895423412323,
      "learning_rate": 6.324695568997319e-05,
      "loss": 0.6125,
      "step": 4151
    },
    {
      "epoch": 0.6344500897734653,
      "grad_norm": 0.2929452657699585,
      "learning_rate": 6.320053959552095e-05,
      "loss": 0.5832,
      "step": 4152
    },
    {
      "epoch": 0.634602895671773,
      "grad_norm": 0.31379982829093933,
      "learning_rate": 6.31541326693454e-05,
      "loss": 0.5962,
      "step": 4153
    },
    {
      "epoch": 0.6347557015700807,
      "grad_norm": 0.8252871036529541,
      "learning_rate": 6.310773492300839e-05,
      "loss": 0.6811,
      "step": 4154
    },
    {
      "epoch": 0.6349085074683882,
      "grad_norm": 0.2837304472923279,
      "learning_rate": 6.306134636806957e-05,
      "loss": 0.5664,
      "step": 4155
    },
    {
      "epoch": 0.6350613133666959,
      "grad_norm": 0.32201525568962097,
      "learning_rate": 6.30149670160862e-05,
      "loss": 0.7099,
      "step": 4156
    },
    {
      "epoch": 0.6352141192650036,
      "grad_norm": 0.30925363302230835,
      "learning_rate": 6.296859687861335e-05,
      "loss": 0.6987,
      "step": 4157
    },
    {
      "epoch": 0.6353669251633113,
      "grad_norm": 0.3547913134098053,
      "learning_rate": 6.292223596720371e-05,
      "loss": 0.6015,
      "step": 4158
    },
    {
      "epoch": 0.635519731061619,
      "grad_norm": 0.28169745206832886,
      "learning_rate": 6.287588429340781e-05,
      "loss": 0.5393,
      "step": 4159
    },
    {
      "epoch": 0.6356725369599266,
      "grad_norm": 0.2913646996021271,
      "learning_rate": 6.282954186877364e-05,
      "loss": 0.6671,
      "step": 4160
    },
    {
      "epoch": 0.6358253428582343,
      "grad_norm": 0.38874661922454834,
      "learning_rate": 6.27832087048471e-05,
      "loss": 0.7775,
      "step": 4161
    },
    {
      "epoch": 0.635978148756542,
      "grad_norm": 0.26316070556640625,
      "learning_rate": 6.273688481317175e-05,
      "loss": 0.6152,
      "step": 4162
    },
    {
      "epoch": 0.6361309546548497,
      "grad_norm": 0.398821622133255,
      "learning_rate": 6.269057020528872e-05,
      "loss": 0.5058,
      "step": 4163
    },
    {
      "epoch": 0.6362837605531574,
      "grad_norm": 0.3221498727798462,
      "learning_rate": 6.264426489273694e-05,
      "loss": 0.6687,
      "step": 4164
    },
    {
      "epoch": 0.6364365664514651,
      "grad_norm": 0.27947044372558594,
      "learning_rate": 6.259796888705298e-05,
      "loss": 0.673,
      "step": 4165
    },
    {
      "epoch": 0.6365893723497728,
      "grad_norm": 0.27743926644325256,
      "learning_rate": 6.255168219977114e-05,
      "loss": 0.7665,
      "step": 4166
    },
    {
      "epoch": 0.6367421782480803,
      "grad_norm": 0.24967680871486664,
      "learning_rate": 6.250540484242331e-05,
      "loss": 0.584,
      "step": 4167
    },
    {
      "epoch": 0.636894984146388,
      "grad_norm": 0.2937239408493042,
      "learning_rate": 6.245913682653912e-05,
      "loss": 0.6989,
      "step": 4168
    },
    {
      "epoch": 0.6370477900446957,
      "grad_norm": 0.7510557770729065,
      "learning_rate": 6.24128781636459e-05,
      "loss": 0.5784,
      "step": 4169
    },
    {
      "epoch": 0.6372005959430034,
      "grad_norm": 0.2786187529563904,
      "learning_rate": 6.236662886526854e-05,
      "loss": 0.6723,
      "step": 4170
    },
    {
      "epoch": 0.6373534018413111,
      "grad_norm": 0.2596394419670105,
      "learning_rate": 6.232038894292966e-05,
      "loss": 0.7527,
      "step": 4171
    },
    {
      "epoch": 0.6375062077396187,
      "grad_norm": 0.3109414577484131,
      "learning_rate": 6.227415840814963e-05,
      "loss": 0.6461,
      "step": 4172
    },
    {
      "epoch": 0.6376590136379264,
      "grad_norm": 0.314042866230011,
      "learning_rate": 6.222793727244635e-05,
      "loss": 0.6459,
      "step": 4173
    },
    {
      "epoch": 0.6378118195362341,
      "grad_norm": 0.2707376480102539,
      "learning_rate": 6.218172554733543e-05,
      "loss": 0.5276,
      "step": 4174
    },
    {
      "epoch": 0.6379646254345418,
      "grad_norm": 0.3110902011394501,
      "learning_rate": 6.21355232443301e-05,
      "loss": 0.7231,
      "step": 4175
    },
    {
      "epoch": 0.6381174313328495,
      "grad_norm": 0.29810798168182373,
      "learning_rate": 6.208933037494136e-05,
      "loss": 0.6088,
      "step": 4176
    },
    {
      "epoch": 0.6382702372311572,
      "grad_norm": 0.32729408144950867,
      "learning_rate": 6.20431469506777e-05,
      "loss": 0.6172,
      "step": 4177
    },
    {
      "epoch": 0.6384230431294649,
      "grad_norm": 0.3452955484390259,
      "learning_rate": 6.199697298304534e-05,
      "loss": 0.7917,
      "step": 4178
    },
    {
      "epoch": 0.6385758490277724,
      "grad_norm": 0.28180643916130066,
      "learning_rate": 6.195080848354818e-05,
      "loss": 0.7181,
      "step": 4179
    },
    {
      "epoch": 0.6387286549260801,
      "grad_norm": 0.3455478250980377,
      "learning_rate": 6.19046534636877e-05,
      "loss": 0.653,
      "step": 4180
    },
    {
      "epoch": 0.6388814608243878,
      "grad_norm": 0.28653568029403687,
      "learning_rate": 6.185850793496301e-05,
      "loss": 0.6431,
      "step": 4181
    },
    {
      "epoch": 0.6390342667226955,
      "grad_norm": 0.268694132566452,
      "learning_rate": 6.181237190887088e-05,
      "loss": 0.6316,
      "step": 4182
    },
    {
      "epoch": 0.6391870726210032,
      "grad_norm": 0.2896341383457184,
      "learning_rate": 6.176624539690579e-05,
      "loss": 0.537,
      "step": 4183
    },
    {
      "epoch": 0.6393398785193108,
      "grad_norm": 0.2786364257335663,
      "learning_rate": 6.172012841055968e-05,
      "loss": 0.7144,
      "step": 4184
    },
    {
      "epoch": 0.6394926844176185,
      "grad_norm": 0.33958667516708374,
      "learning_rate": 6.167402096132224e-05,
      "loss": 0.7105,
      "step": 4185
    },
    {
      "epoch": 0.6396454903159262,
      "grad_norm": 0.27773991227149963,
      "learning_rate": 6.162792306068075e-05,
      "loss": 0.7349,
      "step": 4186
    },
    {
      "epoch": 0.6397982962142339,
      "grad_norm": 0.3180773854255676,
      "learning_rate": 6.158183472012015e-05,
      "loss": 0.614,
      "step": 4187
    },
    {
      "epoch": 0.6399511021125416,
      "grad_norm": 0.2762540578842163,
      "learning_rate": 6.153575595112295e-05,
      "loss": 0.5515,
      "step": 4188
    },
    {
      "epoch": 0.6401039080108493,
      "grad_norm": 0.28452420234680176,
      "learning_rate": 6.148968676516925e-05,
      "loss": 0.7795,
      "step": 4189
    },
    {
      "epoch": 0.6402567139091568,
      "grad_norm": 0.2750689387321472,
      "learning_rate": 6.144362717373686e-05,
      "loss": 0.7882,
      "step": 4190
    },
    {
      "epoch": 0.6404095198074645,
      "grad_norm": 0.2844794988632202,
      "learning_rate": 6.139757718830106e-05,
      "loss": 0.6313,
      "step": 4191
    },
    {
      "epoch": 0.6405623257057722,
      "grad_norm": 0.2462836503982544,
      "learning_rate": 6.135153682033489e-05,
      "loss": 0.4304,
      "step": 4192
    },
    {
      "epoch": 0.6407151316040799,
      "grad_norm": 0.45701074600219727,
      "learning_rate": 6.130550608130887e-05,
      "loss": 0.7714,
      "step": 4193
    },
    {
      "epoch": 0.6408679375023876,
      "grad_norm": 0.270158976316452,
      "learning_rate": 6.125948498269126e-05,
      "loss": 0.7841,
      "step": 4194
    },
    {
      "epoch": 0.6410207434006953,
      "grad_norm": 0.30690333247184753,
      "learning_rate": 6.12134735359477e-05,
      "loss": 0.5731,
      "step": 4195
    },
    {
      "epoch": 0.6411735492990029,
      "grad_norm": 0.3889475166797638,
      "learning_rate": 6.116747175254167e-05,
      "loss": 0.5577,
      "step": 4196
    },
    {
      "epoch": 0.6413263551973106,
      "grad_norm": 0.2712765336036682,
      "learning_rate": 6.112147964393405e-05,
      "loss": 0.6571,
      "step": 4197
    },
    {
      "epoch": 0.6414791610956183,
      "grad_norm": 0.3843899667263031,
      "learning_rate": 6.107549722158347e-05,
      "loss": 0.6538,
      "step": 4198
    },
    {
      "epoch": 0.641631966993926,
      "grad_norm": 0.24763554334640503,
      "learning_rate": 6.102952449694599e-05,
      "loss": 0.5702,
      "step": 4199
    },
    {
      "epoch": 0.6417847728922337,
      "grad_norm": 0.2887122929096222,
      "learning_rate": 6.098356148147535e-05,
      "loss": 0.8121,
      "step": 4200
    },
    {
      "epoch": 0.6419375787905414,
      "grad_norm": 0.3069363534450531,
      "learning_rate": 6.0937608186622865e-05,
      "loss": 0.6811,
      "step": 4201
    },
    {
      "epoch": 0.6420903846888489,
      "grad_norm": 0.28866079449653625,
      "learning_rate": 6.0891664623837374e-05,
      "loss": 0.7553,
      "step": 4202
    },
    {
      "epoch": 0.6422431905871566,
      "grad_norm": 0.299434632062912,
      "learning_rate": 6.084573080456537e-05,
      "loss": 0.664,
      "step": 4203
    },
    {
      "epoch": 0.6423959964854643,
      "grad_norm": 0.350629985332489,
      "learning_rate": 6.0799806740250854e-05,
      "loss": 0.6892,
      "step": 4204
    },
    {
      "epoch": 0.642548802383772,
      "grad_norm": 0.3066038489341736,
      "learning_rate": 6.075389244233549e-05,
      "loss": 0.7243,
      "step": 4205
    },
    {
      "epoch": 0.6427016082820797,
      "grad_norm": 0.2728354334831238,
      "learning_rate": 6.0707987922258316e-05,
      "loss": 0.635,
      "step": 4206
    },
    {
      "epoch": 0.6428544141803874,
      "grad_norm": 0.2741679549217224,
      "learning_rate": 6.066209319145615e-05,
      "loss": 0.7023,
      "step": 4207
    },
    {
      "epoch": 0.643007220078695,
      "grad_norm": 0.30276694893836975,
      "learning_rate": 6.061620826136327e-05,
      "loss": 0.6974,
      "step": 4208
    },
    {
      "epoch": 0.6431600259770027,
      "grad_norm": 0.28418371081352234,
      "learning_rate": 6.0570333143411476e-05,
      "loss": 0.5183,
      "step": 4209
    },
    {
      "epoch": 0.6433128318753104,
      "grad_norm": 0.26944833993911743,
      "learning_rate": 6.0524467849030206e-05,
      "loss": 0.6816,
      "step": 4210
    },
    {
      "epoch": 0.6434656377736181,
      "grad_norm": 0.25730451941490173,
      "learning_rate": 6.0478612389646404e-05,
      "loss": 0.732,
      "step": 4211
    },
    {
      "epoch": 0.6436184436719258,
      "grad_norm": 0.2732875347137451,
      "learning_rate": 6.043276677668459e-05,
      "loss": 0.5747,
      "step": 4212
    },
    {
      "epoch": 0.6437712495702335,
      "grad_norm": 0.2730986773967743,
      "learning_rate": 6.038693102156676e-05,
      "loss": 0.63,
      "step": 4213
    },
    {
      "epoch": 0.643924055468541,
      "grad_norm": 0.5691524744033813,
      "learning_rate": 6.034110513571257e-05,
      "loss": 0.7707,
      "step": 4214
    },
    {
      "epoch": 0.6440768613668487,
      "grad_norm": 0.2587032616138458,
      "learning_rate": 6.029528913053914e-05,
      "loss": 0.6522,
      "step": 4215
    },
    {
      "epoch": 0.6442296672651564,
      "grad_norm": 0.46911007165908813,
      "learning_rate": 6.0249483017461117e-05,
      "loss": 0.6487,
      "step": 4216
    },
    {
      "epoch": 0.6443824731634641,
      "grad_norm": 0.39704567193984985,
      "learning_rate": 6.0203686807890704e-05,
      "loss": 0.5755,
      "step": 4217
    },
    {
      "epoch": 0.6445352790617718,
      "grad_norm": 0.4068554639816284,
      "learning_rate": 6.015790051323769e-05,
      "loss": 0.6695,
      "step": 4218
    },
    {
      "epoch": 0.6446880849600795,
      "grad_norm": 0.354889839887619,
      "learning_rate": 6.0112124144909335e-05,
      "loss": 0.7831,
      "step": 4219
    },
    {
      "epoch": 0.6448408908583871,
      "grad_norm": 0.2730399966239929,
      "learning_rate": 6.006635771431039e-05,
      "loss": 0.8288,
      "step": 4220
    },
    {
      "epoch": 0.6449936967566948,
      "grad_norm": 0.30042764544487,
      "learning_rate": 6.002060123284321e-05,
      "loss": 0.7643,
      "step": 4221
    },
    {
      "epoch": 0.6451465026550025,
      "grad_norm": 0.3824443519115448,
      "learning_rate": 5.9974854711907646e-05,
      "loss": 0.7536,
      "step": 4222
    },
    {
      "epoch": 0.6452993085533102,
      "grad_norm": 0.2480231523513794,
      "learning_rate": 5.9929118162901056e-05,
      "loss": 0.847,
      "step": 4223
    },
    {
      "epoch": 0.6454521144516179,
      "grad_norm": 0.4015941023826599,
      "learning_rate": 5.988339159721828e-05,
      "loss": 0.7163,
      "step": 4224
    },
    {
      "epoch": 0.6456049203499256,
      "grad_norm": 0.3213943839073181,
      "learning_rate": 5.983767502625176e-05,
      "loss": 0.7023,
      "step": 4225
    },
    {
      "epoch": 0.6457577262482331,
      "grad_norm": 0.3295423090457916,
      "learning_rate": 5.979196846139139e-05,
      "loss": 0.8445,
      "step": 4226
    },
    {
      "epoch": 0.6459105321465408,
      "grad_norm": 0.3309463858604431,
      "learning_rate": 5.9746271914024554e-05,
      "loss": 0.8821,
      "step": 4227
    },
    {
      "epoch": 0.6460633380448485,
      "grad_norm": 0.28392040729522705,
      "learning_rate": 5.970058539553614e-05,
      "loss": 0.5415,
      "step": 4228
    },
    {
      "epoch": 0.6462161439431562,
      "grad_norm": 0.2704792022705078,
      "learning_rate": 5.965490891730863e-05,
      "loss": 0.8293,
      "step": 4229
    },
    {
      "epoch": 0.6463689498414639,
      "grad_norm": 0.30566468834877014,
      "learning_rate": 5.9609242490721884e-05,
      "loss": 0.5895,
      "step": 4230
    },
    {
      "epoch": 0.6465217557397716,
      "grad_norm": 0.4203466475009918,
      "learning_rate": 5.9563586127153315e-05,
      "loss": 0.7199,
      "step": 4231
    },
    {
      "epoch": 0.6466745616380792,
      "grad_norm": 0.3807709813117981,
      "learning_rate": 5.951793983797782e-05,
      "loss": 0.6698,
      "step": 4232
    },
    {
      "epoch": 0.6468273675363869,
      "grad_norm": 0.37328287959098816,
      "learning_rate": 5.9472303634567836e-05,
      "loss": 0.6147,
      "step": 4233
    },
    {
      "epoch": 0.6469801734346946,
      "grad_norm": 0.33843472599983215,
      "learning_rate": 5.942667752829317e-05,
      "loss": 0.6556,
      "step": 4234
    },
    {
      "epoch": 0.6471329793330023,
      "grad_norm": 0.34154462814331055,
      "learning_rate": 5.938106153052123e-05,
      "loss": 0.8309,
      "step": 4235
    },
    {
      "epoch": 0.64728578523131,
      "grad_norm": 0.27381810545921326,
      "learning_rate": 5.933545565261682e-05,
      "loss": 0.8016,
      "step": 4236
    },
    {
      "epoch": 0.6474385911296177,
      "grad_norm": 0.2713511884212494,
      "learning_rate": 5.928985990594231e-05,
      "loss": 0.5769,
      "step": 4237
    },
    {
      "epoch": 0.6475913970279252,
      "grad_norm": 0.3166002333164215,
      "learning_rate": 5.9244274301857484e-05,
      "loss": 0.9896,
      "step": 4238
    },
    {
      "epoch": 0.6477442029262329,
      "grad_norm": 0.3390193581581116,
      "learning_rate": 5.919869885171956e-05,
      "loss": 0.707,
      "step": 4239
    },
    {
      "epoch": 0.6478970088245406,
      "grad_norm": 0.4529277980327606,
      "learning_rate": 5.915313356688339e-05,
      "loss": 0.7401,
      "step": 4240
    },
    {
      "epoch": 0.6480498147228483,
      "grad_norm": 0.37623921036720276,
      "learning_rate": 5.910757845870105e-05,
      "loss": 0.6358,
      "step": 4241
    },
    {
      "epoch": 0.648202620621156,
      "grad_norm": 0.29530203342437744,
      "learning_rate": 5.9062033538522286e-05,
      "loss": 0.7905,
      "step": 4242
    },
    {
      "epoch": 0.6483554265194637,
      "grad_norm": 0.2699858248233795,
      "learning_rate": 5.901649881769422e-05,
      "loss": 0.6672,
      "step": 4243
    },
    {
      "epoch": 0.6485082324177713,
      "grad_norm": 0.24652545154094696,
      "learning_rate": 5.8970974307561475e-05,
      "loss": 0.6473,
      "step": 4244
    },
    {
      "epoch": 0.648661038316079,
      "grad_norm": 0.26167502999305725,
      "learning_rate": 5.892546001946606e-05,
      "loss": 0.5892,
      "step": 4245
    },
    {
      "epoch": 0.6488138442143867,
      "grad_norm": 0.3461175560951233,
      "learning_rate": 5.887995596474749e-05,
      "loss": 0.6392,
      "step": 4246
    },
    {
      "epoch": 0.6489666501126944,
      "grad_norm": 0.2689460515975952,
      "learning_rate": 5.8834462154742745e-05,
      "loss": 0.6877,
      "step": 4247
    },
    {
      "epoch": 0.6491194560110021,
      "grad_norm": 0.3303474485874176,
      "learning_rate": 5.878897860078616e-05,
      "loss": 0.6899,
      "step": 4248
    },
    {
      "epoch": 0.6492722619093096,
      "grad_norm": 0.29973793029785156,
      "learning_rate": 5.8743505314209634e-05,
      "loss": 0.8927,
      "step": 4249
    },
    {
      "epoch": 0.6494250678076173,
      "grad_norm": 0.30865025520324707,
      "learning_rate": 5.8698042306342416e-05,
      "loss": 0.6779,
      "step": 4250
    },
    {
      "epoch": 0.649577873705925,
      "grad_norm": 0.3161505162715912,
      "learning_rate": 5.865258958851134e-05,
      "loss": 0.8618,
      "step": 4251
    },
    {
      "epoch": 0.6497306796042327,
      "grad_norm": 0.27236294746398926,
      "learning_rate": 5.860714717204041e-05,
      "loss": 0.5867,
      "step": 4252
    },
    {
      "epoch": 0.6498834855025404,
      "grad_norm": 0.2424437701702118,
      "learning_rate": 5.856171506825132e-05,
      "loss": 0.6115,
      "step": 4253
    },
    {
      "epoch": 0.6500362914008481,
      "grad_norm": 0.2960748076438904,
      "learning_rate": 5.851629328846311e-05,
      "loss": 0.7064,
      "step": 4254
    },
    {
      "epoch": 0.6501890972991557,
      "grad_norm": 0.31836503744125366,
      "learning_rate": 5.8470881843992185e-05,
      "loss": 0.6482,
      "step": 4255
    },
    {
      "epoch": 0.6503419031974634,
      "grad_norm": 0.24373292922973633,
      "learning_rate": 5.842548074615242e-05,
      "loss": 0.5645,
      "step": 4256
    },
    {
      "epoch": 0.6504947090957711,
      "grad_norm": 0.2876763343811035,
      "learning_rate": 5.838009000625515e-05,
      "loss": 0.7036,
      "step": 4257
    },
    {
      "epoch": 0.6506475149940788,
      "grad_norm": 0.27968302369117737,
      "learning_rate": 5.8334709635609106e-05,
      "loss": 0.8507,
      "step": 4258
    },
    {
      "epoch": 0.6508003208923865,
      "grad_norm": 0.33190199732780457,
      "learning_rate": 5.828933964552037e-05,
      "loss": 0.6497,
      "step": 4259
    },
    {
      "epoch": 0.6509531267906942,
      "grad_norm": 0.28241148591041565,
      "learning_rate": 5.8243980047292545e-05,
      "loss": 0.6532,
      "step": 4260
    },
    {
      "epoch": 0.6511059326890017,
      "grad_norm": 0.30200818181037903,
      "learning_rate": 5.819863085222665e-05,
      "loss": 0.715,
      "step": 4261
    },
    {
      "epoch": 0.6512587385873094,
      "grad_norm": 0.31453654170036316,
      "learning_rate": 5.81532920716209e-05,
      "loss": 0.6918,
      "step": 4262
    },
    {
      "epoch": 0.6514115444856171,
      "grad_norm": 0.31839510798454285,
      "learning_rate": 5.810796371677117e-05,
      "loss": 0.7786,
      "step": 4263
    },
    {
      "epoch": 0.6515643503839248,
      "grad_norm": 0.28044262528419495,
      "learning_rate": 5.806264579897063e-05,
      "loss": 0.7164,
      "step": 4264
    },
    {
      "epoch": 0.6517171562822325,
      "grad_norm": 0.31478336453437805,
      "learning_rate": 5.8017338329509926e-05,
      "loss": 0.6987,
      "step": 4265
    },
    {
      "epoch": 0.6518699621805402,
      "grad_norm": 0.5099149346351624,
      "learning_rate": 5.797204131967691e-05,
      "loss": 0.6539,
      "step": 4266
    },
    {
      "epoch": 0.6520227680788478,
      "grad_norm": 0.3031832277774811,
      "learning_rate": 5.792675478075697e-05,
      "loss": 0.7614,
      "step": 4267
    },
    {
      "epoch": 0.6521755739771555,
      "grad_norm": 0.2523060142993927,
      "learning_rate": 5.788147872403293e-05,
      "loss": 0.6402,
      "step": 4268
    },
    {
      "epoch": 0.6523283798754632,
      "grad_norm": 0.31935545802116394,
      "learning_rate": 5.783621316078495e-05,
      "loss": 0.7183,
      "step": 4269
    },
    {
      "epoch": 0.6524811857737709,
      "grad_norm": 0.27997279167175293,
      "learning_rate": 5.779095810229052e-05,
      "loss": 0.6922,
      "step": 4270
    },
    {
      "epoch": 0.6526339916720786,
      "grad_norm": 0.3088814318180084,
      "learning_rate": 5.774571355982452e-05,
      "loss": 0.6417,
      "step": 4271
    },
    {
      "epoch": 0.6527867975703863,
      "grad_norm": 0.35518980026245117,
      "learning_rate": 5.7700479544659346e-05,
      "loss": 0.8312,
      "step": 4272
    },
    {
      "epoch": 0.6529396034686938,
      "grad_norm": 0.32195112109184265,
      "learning_rate": 5.7655256068064576e-05,
      "loss": 0.6058,
      "step": 4273
    },
    {
      "epoch": 0.6530924093670015,
      "grad_norm": 0.27744877338409424,
      "learning_rate": 5.7610043141307345e-05,
      "loss": 0.75,
      "step": 4274
    },
    {
      "epoch": 0.6532452152653092,
      "grad_norm": 0.34689977765083313,
      "learning_rate": 5.7564840775651994e-05,
      "loss": 0.6277,
      "step": 4275
    },
    {
      "epoch": 0.6533980211636169,
      "grad_norm": 0.3278833329677582,
      "learning_rate": 5.7519648982360395e-05,
      "loss": 0.7029,
      "step": 4276
    },
    {
      "epoch": 0.6535508270619246,
      "grad_norm": 0.3406006395816803,
      "learning_rate": 5.7474467772691606e-05,
      "loss": 0.577,
      "step": 4277
    },
    {
      "epoch": 0.6537036329602323,
      "grad_norm": 0.32342788577079773,
      "learning_rate": 5.7429297157902264e-05,
      "loss": 0.6111,
      "step": 4278
    },
    {
      "epoch": 0.65385643885854,
      "grad_norm": 0.27723947167396545,
      "learning_rate": 5.7384137149246175e-05,
      "loss": 0.7135,
      "step": 4279
    },
    {
      "epoch": 0.6540092447568476,
      "grad_norm": 0.2640012502670288,
      "learning_rate": 5.733898775797455e-05,
      "loss": 0.62,
      "step": 4280
    },
    {
      "epoch": 0.6541620506551553,
      "grad_norm": 0.2819145619869232,
      "learning_rate": 5.729384899533602e-05,
      "loss": 0.5432,
      "step": 4281
    },
    {
      "epoch": 0.654314856553463,
      "grad_norm": 0.36311617493629456,
      "learning_rate": 5.724872087257657e-05,
      "loss": 0.5153,
      "step": 4282
    },
    {
      "epoch": 0.6544676624517707,
      "grad_norm": 0.3131016790866852,
      "learning_rate": 5.7203603400939445e-05,
      "loss": 0.5944,
      "step": 4283
    },
    {
      "epoch": 0.6546204683500784,
      "grad_norm": 0.2309597134590149,
      "learning_rate": 5.715849659166525e-05,
      "loss": 0.6252,
      "step": 4284
    },
    {
      "epoch": 0.6547732742483859,
      "grad_norm": 0.44529998302459717,
      "learning_rate": 5.7113400455992e-05,
      "loss": 0.8177,
      "step": 4285
    },
    {
      "epoch": 0.6549260801466936,
      "grad_norm": 0.3021618127822876,
      "learning_rate": 5.706831500515507e-05,
      "loss": 0.7102,
      "step": 4286
    },
    {
      "epoch": 0.6550788860450013,
      "grad_norm": 0.30636557936668396,
      "learning_rate": 5.7023240250387075e-05,
      "loss": 0.8765,
      "step": 4287
    },
    {
      "epoch": 0.655231691943309,
      "grad_norm": 0.27565455436706543,
      "learning_rate": 5.697817620291799e-05,
      "loss": 0.6337,
      "step": 4288
    },
    {
      "epoch": 0.6553844978416167,
      "grad_norm": 0.30019816756248474,
      "learning_rate": 5.693312287397515e-05,
      "loss": 0.825,
      "step": 4289
    },
    {
      "epoch": 0.6555373037399244,
      "grad_norm": 0.33282437920570374,
      "learning_rate": 5.688808027478328e-05,
      "loss": 0.6767,
      "step": 4290
    },
    {
      "epoch": 0.655690109638232,
      "grad_norm": 0.31007322669029236,
      "learning_rate": 5.6843048416564314e-05,
      "loss": 0.8461,
      "step": 4291
    },
    {
      "epoch": 0.6558429155365397,
      "grad_norm": 0.37334969639778137,
      "learning_rate": 5.679802731053754e-05,
      "loss": 0.867,
      "step": 4292
    },
    {
      "epoch": 0.6559957214348474,
      "grad_norm": 0.3802035450935364,
      "learning_rate": 5.6753016967919633e-05,
      "loss": 0.7248,
      "step": 4293
    },
    {
      "epoch": 0.6561485273331551,
      "grad_norm": 0.29199114441871643,
      "learning_rate": 5.6708017399924485e-05,
      "loss": 0.7837,
      "step": 4294
    },
    {
      "epoch": 0.6563013332314628,
      "grad_norm": 0.27255427837371826,
      "learning_rate": 5.6663028617763415e-05,
      "loss": 0.6914,
      "step": 4295
    },
    {
      "epoch": 0.6564541391297705,
      "grad_norm": 0.28946343064308167,
      "learning_rate": 5.6618050632645e-05,
      "loss": 0.7849,
      "step": 4296
    },
    {
      "epoch": 0.656606945028078,
      "grad_norm": 0.28791841864585876,
      "learning_rate": 5.6573083455775136e-05,
      "loss": 0.6734,
      "step": 4297
    },
    {
      "epoch": 0.6567597509263857,
      "grad_norm": 0.3184029161930084,
      "learning_rate": 5.652812709835694e-05,
      "loss": 0.6667,
      "step": 4298
    },
    {
      "epoch": 0.6569125568246934,
      "grad_norm": 0.48238903284072876,
      "learning_rate": 5.648318157159096e-05,
      "loss": 0.7216,
      "step": 4299
    },
    {
      "epoch": 0.6570653627230011,
      "grad_norm": 0.3703603148460388,
      "learning_rate": 5.643824688667505e-05,
      "loss": 0.6124,
      "step": 4300
    },
    {
      "epoch": 0.6572181686213088,
      "grad_norm": 0.3254699110984802,
      "learning_rate": 5.639332305480426e-05,
      "loss": 0.7546,
      "step": 4301
    },
    {
      "epoch": 0.6573709745196165,
      "grad_norm": 0.2918962240219116,
      "learning_rate": 5.634841008717093e-05,
      "loss": 0.5583,
      "step": 4302
    },
    {
      "epoch": 0.6575237804179241,
      "grad_norm": 0.28097614645957947,
      "learning_rate": 5.630350799496482e-05,
      "loss": 0.6999,
      "step": 4303
    },
    {
      "epoch": 0.6576765863162318,
      "grad_norm": 0.30386725068092346,
      "learning_rate": 5.625861678937294e-05,
      "loss": 0.7967,
      "step": 4304
    },
    {
      "epoch": 0.6578293922145395,
      "grad_norm": 0.2628733515739441,
      "learning_rate": 5.62137364815795e-05,
      "loss": 0.785,
      "step": 4305
    },
    {
      "epoch": 0.6579821981128472,
      "grad_norm": 0.2997375726699829,
      "learning_rate": 5.616886708276603e-05,
      "loss": 0.6496,
      "step": 4306
    },
    {
      "epoch": 0.6581350040111549,
      "grad_norm": 0.37791678309440613,
      "learning_rate": 5.612400860411139e-05,
      "loss": 0.7869,
      "step": 4307
    },
    {
      "epoch": 0.6582878099094625,
      "grad_norm": 0.2886675298213959,
      "learning_rate": 5.607916105679174e-05,
      "loss": 0.671,
      "step": 4308
    },
    {
      "epoch": 0.6584406158077701,
      "grad_norm": 0.28003209829330444,
      "learning_rate": 5.6034324451980425e-05,
      "loss": 0.7855,
      "step": 4309
    },
    {
      "epoch": 0.6585934217060778,
      "grad_norm": 0.3257627487182617,
      "learning_rate": 5.5989498800848094e-05,
      "loss": 0.8834,
      "step": 4310
    },
    {
      "epoch": 0.6587462276043855,
      "grad_norm": 0.29753580689430237,
      "learning_rate": 5.594468411456273e-05,
      "loss": 0.7202,
      "step": 4311
    },
    {
      "epoch": 0.6588990335026932,
      "grad_norm": 0.29642051458358765,
      "learning_rate": 5.5899880404289465e-05,
      "loss": 0.7634,
      "step": 4312
    },
    {
      "epoch": 0.6590518394010009,
      "grad_norm": 0.2864471673965454,
      "learning_rate": 5.585508768119085e-05,
      "loss": 0.7543,
      "step": 4313
    },
    {
      "epoch": 0.6592046452993086,
      "grad_norm": 0.3022642135620117,
      "learning_rate": 5.581030595642653e-05,
      "loss": 0.8052,
      "step": 4314
    },
    {
      "epoch": 0.6593574511976162,
      "grad_norm": 0.31377336382865906,
      "learning_rate": 5.5765535241153596e-05,
      "loss": 0.8731,
      "step": 4315
    },
    {
      "epoch": 0.6595102570959239,
      "grad_norm": 0.2930757701396942,
      "learning_rate": 5.5720775546526205e-05,
      "loss": 0.7746,
      "step": 4316
    },
    {
      "epoch": 0.6596630629942316,
      "grad_norm": 0.3031260371208191,
      "learning_rate": 5.567602688369593e-05,
      "loss": 0.7174,
      "step": 4317
    },
    {
      "epoch": 0.6598158688925393,
      "grad_norm": 0.3256378471851349,
      "learning_rate": 5.5631289263811495e-05,
      "loss": 0.7988,
      "step": 4318
    },
    {
      "epoch": 0.659968674790847,
      "grad_norm": 0.2856435477733612,
      "learning_rate": 5.558656269801884e-05,
      "loss": 0.7675,
      "step": 4319
    },
    {
      "epoch": 0.6601214806891545,
      "grad_norm": 0.2758930027484894,
      "learning_rate": 5.5541847197461296e-05,
      "loss": 0.7446,
      "step": 4320
    },
    {
      "epoch": 0.6602742865874622,
      "grad_norm": 0.2451760172843933,
      "learning_rate": 5.549714277327931e-05,
      "loss": 0.6915,
      "step": 4321
    },
    {
      "epoch": 0.6604270924857699,
      "grad_norm": 0.3307189643383026,
      "learning_rate": 5.545244943661072e-05,
      "loss": 0.7638,
      "step": 4322
    },
    {
      "epoch": 0.6605798983840776,
      "grad_norm": 0.35473012924194336,
      "learning_rate": 5.5407767198590335e-05,
      "loss": 0.6032,
      "step": 4323
    },
    {
      "epoch": 0.6607327042823853,
      "grad_norm": 0.2760302722454071,
      "learning_rate": 5.536309607035043e-05,
      "loss": 0.6474,
      "step": 4324
    },
    {
      "epoch": 0.660885510180693,
      "grad_norm": 0.45210763812065125,
      "learning_rate": 5.5318436063020485e-05,
      "loss": 0.823,
      "step": 4325
    },
    {
      "epoch": 0.6610383160790007,
      "grad_norm": 0.30650877952575684,
      "learning_rate": 5.527378718772713e-05,
      "loss": 0.7758,
      "step": 4326
    },
    {
      "epoch": 0.6611911219773083,
      "grad_norm": 0.2780720591545105,
      "learning_rate": 5.522914945559421e-05,
      "loss": 0.6157,
      "step": 4327
    },
    {
      "epoch": 0.661343927875616,
      "grad_norm": 0.2897791862487793,
      "learning_rate": 5.518452287774289e-05,
      "loss": 0.772,
      "step": 4328
    },
    {
      "epoch": 0.6614967337739237,
      "grad_norm": 0.3310716152191162,
      "learning_rate": 5.513990746529154e-05,
      "loss": 0.8176,
      "step": 4329
    },
    {
      "epoch": 0.6616495396722314,
      "grad_norm": 0.29766175150871277,
      "learning_rate": 5.509530322935565e-05,
      "loss": 0.7393,
      "step": 4330
    },
    {
      "epoch": 0.6618023455705391,
      "grad_norm": 0.2601233124732971,
      "learning_rate": 5.505071018104804e-05,
      "loss": 0.6172,
      "step": 4331
    },
    {
      "epoch": 0.6619551514688466,
      "grad_norm": 0.3184444010257721,
      "learning_rate": 5.500612833147869e-05,
      "loss": 0.7818,
      "step": 4332
    },
    {
      "epoch": 0.6621079573671543,
      "grad_norm": 0.31976786255836487,
      "learning_rate": 5.4961557691754727e-05,
      "loss": 0.8395,
      "step": 4333
    },
    {
      "epoch": 0.662260763265462,
      "grad_norm": 0.26618340611457825,
      "learning_rate": 5.49169982729806e-05,
      "loss": 0.7842,
      "step": 4334
    },
    {
      "epoch": 0.6624135691637697,
      "grad_norm": 0.29280707240104675,
      "learning_rate": 5.487245008625796e-05,
      "loss": 0.6204,
      "step": 4335
    },
    {
      "epoch": 0.6625663750620774,
      "grad_norm": 0.29666972160339355,
      "learning_rate": 5.4827913142685586e-05,
      "loss": 0.6752,
      "step": 4336
    },
    {
      "epoch": 0.6627191809603851,
      "grad_norm": 0.2703108787536621,
      "learning_rate": 5.47833874533594e-05,
      "loss": 0.6249,
      "step": 4337
    },
    {
      "epoch": 0.6628719868586928,
      "grad_norm": 0.2508685886859894,
      "learning_rate": 5.473887302937268e-05,
      "loss": 0.7276,
      "step": 4338
    },
    {
      "epoch": 0.6630247927570004,
      "grad_norm": 0.28797996044158936,
      "learning_rate": 5.469436988181585e-05,
      "loss": 0.6227,
      "step": 4339
    },
    {
      "epoch": 0.6631775986553081,
      "grad_norm": 0.27974840998649597,
      "learning_rate": 5.464987802177646e-05,
      "loss": 0.5932,
      "step": 4340
    },
    {
      "epoch": 0.6633304045536158,
      "grad_norm": 0.4193362295627594,
      "learning_rate": 5.460539746033925e-05,
      "loss": 0.6765,
      "step": 4341
    },
    {
      "epoch": 0.6634832104519235,
      "grad_norm": 0.32927194237709045,
      "learning_rate": 5.4560928208586205e-05,
      "loss": 0.6033,
      "step": 4342
    },
    {
      "epoch": 0.6636360163502312,
      "grad_norm": 0.3113420009613037,
      "learning_rate": 5.45164702775965e-05,
      "loss": 0.7376,
      "step": 4343
    },
    {
      "epoch": 0.6637888222485387,
      "grad_norm": 0.2748812735080719,
      "learning_rate": 5.447202367844644e-05,
      "loss": 0.7156,
      "step": 4344
    },
    {
      "epoch": 0.6639416281468464,
      "grad_norm": 0.2942165732383728,
      "learning_rate": 5.4427588422209455e-05,
      "loss": 0.6998,
      "step": 4345
    },
    {
      "epoch": 0.6640944340451541,
      "grad_norm": 0.32953986525535583,
      "learning_rate": 5.438316451995626e-05,
      "loss": 0.5686,
      "step": 4346
    },
    {
      "epoch": 0.6642472399434618,
      "grad_norm": 0.2765321731567383,
      "learning_rate": 5.4338751982754766e-05,
      "loss": 0.6983,
      "step": 4347
    },
    {
      "epoch": 0.6644000458417695,
      "grad_norm": 0.31935787200927734,
      "learning_rate": 5.429435082166992e-05,
      "loss": 0.6046,
      "step": 4348
    },
    {
      "epoch": 0.6645528517400772,
      "grad_norm": 0.2649092972278595,
      "learning_rate": 5.424996104776385e-05,
      "loss": 0.8986,
      "step": 4349
    },
    {
      "epoch": 0.6647056576383849,
      "grad_norm": 0.8897063136100769,
      "learning_rate": 5.4205582672096e-05,
      "loss": 0.5864,
      "step": 4350
    },
    {
      "epoch": 0.6648584635366925,
      "grad_norm": 0.2820856273174286,
      "learning_rate": 5.416121570572278e-05,
      "loss": 0.6672,
      "step": 4351
    },
    {
      "epoch": 0.6650112694350002,
      "grad_norm": 0.3262161612510681,
      "learning_rate": 5.4116860159697926e-05,
      "loss": 0.6761,
      "step": 4352
    },
    {
      "epoch": 0.6651640753333079,
      "grad_norm": 0.2937242090702057,
      "learning_rate": 5.407251604507215e-05,
      "loss": 0.6514,
      "step": 4353
    },
    {
      "epoch": 0.6653168812316156,
      "grad_norm": 0.30296847224235535,
      "learning_rate": 5.402818337289353e-05,
      "loss": 0.7588,
      "step": 4354
    },
    {
      "epoch": 0.6654696871299232,
      "grad_norm": 0.29046115279197693,
      "learning_rate": 5.398386215420708e-05,
      "loss": 0.7606,
      "step": 4355
    },
    {
      "epoch": 0.6656224930282308,
      "grad_norm": 0.3020663261413574,
      "learning_rate": 5.393955240005511e-05,
      "loss": 0.7264,
      "step": 4356
    },
    {
      "epoch": 0.6657752989265385,
      "grad_norm": 0.26482436060905457,
      "learning_rate": 5.389525412147709e-05,
      "loss": 0.7413,
      "step": 4357
    },
    {
      "epoch": 0.6659281048248462,
      "grad_norm": 0.3416441082954407,
      "learning_rate": 5.3850967329509416e-05,
      "loss": 0.7522,
      "step": 4358
    },
    {
      "epoch": 0.6660809107231539,
      "grad_norm": 0.8151885271072388,
      "learning_rate": 5.380669203518585e-05,
      "loss": 0.6949,
      "step": 4359
    },
    {
      "epoch": 0.6662337166214616,
      "grad_norm": 0.4770559072494507,
      "learning_rate": 5.376242824953719e-05,
      "loss": 0.8184,
      "step": 4360
    },
    {
      "epoch": 0.6663865225197693,
      "grad_norm": 0.7262836694717407,
      "learning_rate": 5.371817598359146e-05,
      "loss": 0.7664,
      "step": 4361
    },
    {
      "epoch": 0.666539328418077,
      "grad_norm": 0.3330950140953064,
      "learning_rate": 5.3673935248373666e-05,
      "loss": 0.8146,
      "step": 4362
    },
    {
      "epoch": 0.6666921343163846,
      "grad_norm": 0.24412184953689575,
      "learning_rate": 5.3629706054906006e-05,
      "loss": 0.821,
      "step": 4363
    },
    {
      "epoch": 0.6668449402146923,
      "grad_norm": 0.2575673460960388,
      "learning_rate": 5.358548841420787e-05,
      "loss": 0.6553,
      "step": 4364
    },
    {
      "epoch": 0.6668449402146923,
      "eval_loss": 0.7003983855247498,
      "eval_runtime": 1444.4156,
      "eval_samples_per_second": 7.721,
      "eval_steps_per_second": 3.86,
      "step": 4364
    },
    {
      "epoch": 0.666997746113,
      "grad_norm": 0.30834752321243286,
      "learning_rate": 5.354128233729564e-05,
      "loss": 0.7385,
      "step": 4365
    },
    {
      "epoch": 0.6671505520113077,
      "grad_norm": 0.28052300214767456,
      "learning_rate": 5.349708783518297e-05,
      "loss": 0.9207,
      "step": 4366
    },
    {
      "epoch": 0.6673033579096153,
      "grad_norm": 0.39452987909317017,
      "learning_rate": 5.345290491888047e-05,
      "loss": 0.7037,
      "step": 4367
    },
    {
      "epoch": 0.6674561638079229,
      "grad_norm": 0.28399553894996643,
      "learning_rate": 5.3408733599396034e-05,
      "loss": 0.833,
      "step": 4368
    },
    {
      "epoch": 0.6676089697062306,
      "grad_norm": 0.3832983672618866,
      "learning_rate": 5.336457388773447e-05,
      "loss": 0.7027,
      "step": 4369
    },
    {
      "epoch": 0.6677617756045383,
      "grad_norm": 0.3385736346244812,
      "learning_rate": 5.33204257948979e-05,
      "loss": 0.6905,
      "step": 4370
    },
    {
      "epoch": 0.667914581502846,
      "grad_norm": 0.39011090993881226,
      "learning_rate": 5.32762893318854e-05,
      "loss": 0.7341,
      "step": 4371
    },
    {
      "epoch": 0.6680673874011537,
      "grad_norm": 0.35168904066085815,
      "learning_rate": 5.323216450969316e-05,
      "loss": 0.6786,
      "step": 4372
    },
    {
      "epoch": 0.6682201932994614,
      "grad_norm": 0.2878551483154297,
      "learning_rate": 5.318805133931456e-05,
      "loss": 0.461,
      "step": 4373
    },
    {
      "epoch": 0.668372999197769,
      "grad_norm": 0.295841783285141,
      "learning_rate": 5.314394983174005e-05,
      "loss": 0.5517,
      "step": 4374
    },
    {
      "epoch": 0.6685258050960767,
      "grad_norm": 0.2754735052585602,
      "learning_rate": 5.3099859997957126e-05,
      "loss": 0.6457,
      "step": 4375
    },
    {
      "epoch": 0.6686786109943844,
      "grad_norm": 0.27244895696640015,
      "learning_rate": 5.305578184895035e-05,
      "loss": 0.6681,
      "step": 4376
    },
    {
      "epoch": 0.6688314168926921,
      "grad_norm": 0.28593409061431885,
      "learning_rate": 5.301171539570146e-05,
      "loss": 0.6394,
      "step": 4377
    },
    {
      "epoch": 0.6689842227909998,
      "grad_norm": 0.26024940609931946,
      "learning_rate": 5.296766064918929e-05,
      "loss": 0.6228,
      "step": 4378
    },
    {
      "epoch": 0.6691370286893074,
      "grad_norm": 0.26800084114074707,
      "learning_rate": 5.292361762038967e-05,
      "loss": 0.4603,
      "step": 4379
    },
    {
      "epoch": 0.669289834587615,
      "grad_norm": 0.2919711768627167,
      "learning_rate": 5.28795863202755e-05,
      "loss": 0.7626,
      "step": 4380
    },
    {
      "epoch": 0.6694426404859227,
      "grad_norm": 0.2848813831806183,
      "learning_rate": 5.2835566759816865e-05,
      "loss": 0.6784,
      "step": 4381
    },
    {
      "epoch": 0.6695954463842304,
      "grad_norm": 0.2870771586894989,
      "learning_rate": 5.2791558949980915e-05,
      "loss": 0.7467,
      "step": 4382
    },
    {
      "epoch": 0.6697482522825381,
      "grad_norm": 0.33729174733161926,
      "learning_rate": 5.274756290173175e-05,
      "loss": 0.7281,
      "step": 4383
    },
    {
      "epoch": 0.6699010581808458,
      "grad_norm": 0.3717024326324463,
      "learning_rate": 5.2703578626030614e-05,
      "loss": 0.7451,
      "step": 4384
    },
    {
      "epoch": 0.6700538640791535,
      "grad_norm": 0.28417110443115234,
      "learning_rate": 5.265960613383585e-05,
      "loss": 0.5677,
      "step": 4385
    },
    {
      "epoch": 0.6702066699774611,
      "grad_norm": 0.44522276520729065,
      "learning_rate": 5.261564543610287e-05,
      "loss": 0.8297,
      "step": 4386
    },
    {
      "epoch": 0.6703594758757688,
      "grad_norm": 0.29529058933258057,
      "learning_rate": 5.257169654378405e-05,
      "loss": 0.4472,
      "step": 4387
    },
    {
      "epoch": 0.6705122817740765,
      "grad_norm": 0.27961522340774536,
      "learning_rate": 5.25277594678289e-05,
      "loss": 0.7851,
      "step": 4388
    },
    {
      "epoch": 0.6706650876723842,
      "grad_norm": 0.31498804688453674,
      "learning_rate": 5.248383421918401e-05,
      "loss": 0.6458,
      "step": 4389
    },
    {
      "epoch": 0.6708178935706919,
      "grad_norm": 0.3034273386001587,
      "learning_rate": 5.243992080879292e-05,
      "loss": 0.6193,
      "step": 4390
    },
    {
      "epoch": 0.6709706994689995,
      "grad_norm": 0.3019751310348511,
      "learning_rate": 5.239601924759634e-05,
      "loss": 0.7018,
      "step": 4391
    },
    {
      "epoch": 0.6711235053673071,
      "grad_norm": 0.3216398060321808,
      "learning_rate": 5.2352129546532e-05,
      "loss": 0.9789,
      "step": 4392
    },
    {
      "epoch": 0.6712763112656148,
      "grad_norm": 0.32603368163108826,
      "learning_rate": 5.2308251716534614e-05,
      "loss": 0.789,
      "step": 4393
    },
    {
      "epoch": 0.6714291171639225,
      "grad_norm": 0.28429511189460754,
      "learning_rate": 5.226438576853594e-05,
      "loss": 0.789,
      "step": 4394
    },
    {
      "epoch": 0.6715819230622302,
      "grad_norm": 0.2872902452945709,
      "learning_rate": 5.222053171346486e-05,
      "loss": 0.676,
      "step": 4395
    },
    {
      "epoch": 0.6717347289605379,
      "grad_norm": 0.31137508153915405,
      "learning_rate": 5.217668956224725e-05,
      "loss": 0.6885,
      "step": 4396
    },
    {
      "epoch": 0.6718875348588456,
      "grad_norm": 0.344843327999115,
      "learning_rate": 5.2132859325806003e-05,
      "loss": 0.6596,
      "step": 4397
    },
    {
      "epoch": 0.6720403407571532,
      "grad_norm": 0.26653701066970825,
      "learning_rate": 5.2089041015061e-05,
      "loss": 0.8741,
      "step": 4398
    },
    {
      "epoch": 0.6721931466554609,
      "grad_norm": 0.3506641387939453,
      "learning_rate": 5.2045234640929266e-05,
      "loss": 0.7127,
      "step": 4399
    },
    {
      "epoch": 0.6723459525537686,
      "grad_norm": 0.2936185300350189,
      "learning_rate": 5.2001440214324804e-05,
      "loss": 0.6775,
      "step": 4400
    },
    {
      "epoch": 0.6724987584520763,
      "grad_norm": 0.3009645938873291,
      "learning_rate": 5.1957657746158616e-05,
      "loss": 0.6648,
      "step": 4401
    },
    {
      "epoch": 0.672651564350384,
      "grad_norm": 0.26885971426963806,
      "learning_rate": 5.1913887247338664e-05,
      "loss": 0.8003,
      "step": 4402
    },
    {
      "epoch": 0.6728043702486916,
      "grad_norm": 0.304047554731369,
      "learning_rate": 5.1870128728770105e-05,
      "loss": 0.5644,
      "step": 4403
    },
    {
      "epoch": 0.6729571761469992,
      "grad_norm": 0.37616226077079773,
      "learning_rate": 5.182638220135492e-05,
      "loss": 0.7497,
      "step": 4404
    },
    {
      "epoch": 0.6731099820453069,
      "grad_norm": 0.27434805035591125,
      "learning_rate": 5.178264767599227e-05,
      "loss": 0.6505,
      "step": 4405
    },
    {
      "epoch": 0.6732627879436146,
      "grad_norm": 0.32058754563331604,
      "learning_rate": 5.1738925163578165e-05,
      "loss": 0.7122,
      "step": 4406
    },
    {
      "epoch": 0.6734155938419223,
      "grad_norm": 0.3557422459125519,
      "learning_rate": 5.169521467500578e-05,
      "loss": 0.4908,
      "step": 4407
    },
    {
      "epoch": 0.67356839974023,
      "grad_norm": 0.8077619075775146,
      "learning_rate": 5.165151622116513e-05,
      "loss": 0.6526,
      "step": 4408
    },
    {
      "epoch": 0.6737212056385377,
      "grad_norm": 0.26014748215675354,
      "learning_rate": 5.160782981294341e-05,
      "loss": 0.5546,
      "step": 4409
    },
    {
      "epoch": 0.6738740115368453,
      "grad_norm": 0.4278123676776886,
      "learning_rate": 5.156415546122467e-05,
      "loss": 0.7425,
      "step": 4410
    },
    {
      "epoch": 0.674026817435153,
      "grad_norm": 0.41125205159187317,
      "learning_rate": 5.1520493176889987e-05,
      "loss": 0.8237,
      "step": 4411
    },
    {
      "epoch": 0.6741796233334607,
      "grad_norm": 0.24250277876853943,
      "learning_rate": 5.147684297081747e-05,
      "loss": 0.652,
      "step": 4412
    },
    {
      "epoch": 0.6743324292317684,
      "grad_norm": 0.29083165526390076,
      "learning_rate": 5.143320485388226e-05,
      "loss": 0.6756,
      "step": 4413
    },
    {
      "epoch": 0.674485235130076,
      "grad_norm": 0.34704911708831787,
      "learning_rate": 5.1389578836956365e-05,
      "loss": 0.8026,
      "step": 4414
    },
    {
      "epoch": 0.6746380410283837,
      "grad_norm": 0.28725364804267883,
      "learning_rate": 5.134596493090882e-05,
      "loss": 0.8781,
      "step": 4415
    },
    {
      "epoch": 0.6747908469266913,
      "grad_norm": 0.34252092242240906,
      "learning_rate": 5.13023631466057e-05,
      "loss": 0.6341,
      "step": 4416
    },
    {
      "epoch": 0.674943652824999,
      "grad_norm": 0.2834259271621704,
      "learning_rate": 5.1258773494910025e-05,
      "loss": 0.7033,
      "step": 4417
    },
    {
      "epoch": 0.6750964587233067,
      "grad_norm": 0.2758314609527588,
      "learning_rate": 5.121519598668188e-05,
      "loss": 0.729,
      "step": 4418
    },
    {
      "epoch": 0.6752492646216144,
      "grad_norm": 0.2702345848083496,
      "learning_rate": 5.1171630632778035e-05,
      "loss": 0.6454,
      "step": 4419
    },
    {
      "epoch": 0.6754020705199221,
      "grad_norm": 0.38108593225479126,
      "learning_rate": 5.112807744405257e-05,
      "loss": 0.6539,
      "step": 4420
    },
    {
      "epoch": 0.6755548764182298,
      "grad_norm": 0.3102193772792816,
      "learning_rate": 5.108453643135638e-05,
      "loss": 0.6399,
      "step": 4421
    },
    {
      "epoch": 0.6757076823165374,
      "grad_norm": 0.2749772369861603,
      "learning_rate": 5.104100760553731e-05,
      "loss": 0.7171,
      "step": 4422
    },
    {
      "epoch": 0.6758604882148451,
      "grad_norm": 0.25176766514778137,
      "learning_rate": 5.099749097744024e-05,
      "loss": 0.6431,
      "step": 4423
    },
    {
      "epoch": 0.6760132941131528,
      "grad_norm": 0.3452298045158386,
      "learning_rate": 5.095398655790694e-05,
      "loss": 0.8327,
      "step": 4424
    },
    {
      "epoch": 0.6761661000114605,
      "grad_norm": 0.2980501651763916,
      "learning_rate": 5.091049435777622e-05,
      "loss": 0.8754,
      "step": 4425
    },
    {
      "epoch": 0.6763189059097681,
      "grad_norm": 0.2884484529495239,
      "learning_rate": 5.0867014387883706e-05,
      "loss": 0.8527,
      "step": 4426
    },
    {
      "epoch": 0.6764717118080757,
      "grad_norm": 0.3645104467868805,
      "learning_rate": 5.082354665906217e-05,
      "loss": 0.6624,
      "step": 4427
    },
    {
      "epoch": 0.6766245177063834,
      "grad_norm": 0.5415614247322083,
      "learning_rate": 5.078009118214119e-05,
      "loss": 0.5252,
      "step": 4428
    },
    {
      "epoch": 0.6767773236046911,
      "grad_norm": 0.3443562984466553,
      "learning_rate": 5.073664796794728e-05,
      "loss": 0.7448,
      "step": 4429
    },
    {
      "epoch": 0.6769301295029988,
      "grad_norm": 0.3706625699996948,
      "learning_rate": 5.069321702730401e-05,
      "loss": 0.9034,
      "step": 4430
    },
    {
      "epoch": 0.6770829354013065,
      "grad_norm": 0.2914830446243286,
      "learning_rate": 5.064979837103185e-05,
      "loss": 0.712,
      "step": 4431
    },
    {
      "epoch": 0.6772357412996142,
      "grad_norm": 0.3097488284111023,
      "learning_rate": 5.060639200994819e-05,
      "loss": 0.6135,
      "step": 4432
    },
    {
      "epoch": 0.6773885471979219,
      "grad_norm": 0.3045434057712555,
      "learning_rate": 5.056299795486728e-05,
      "loss": 0.6765,
      "step": 4433
    },
    {
      "epoch": 0.6775413530962295,
      "grad_norm": 0.3066057562828064,
      "learning_rate": 5.0519616216600453e-05,
      "loss": 0.7716,
      "step": 4434
    },
    {
      "epoch": 0.6776941589945372,
      "grad_norm": 0.2661169767379761,
      "learning_rate": 5.047624680595593e-05,
      "loss": 0.6888,
      "step": 4435
    },
    {
      "epoch": 0.6778469648928449,
      "grad_norm": 0.35913899540901184,
      "learning_rate": 5.043288973373881e-05,
      "loss": 0.9291,
      "step": 4436
    },
    {
      "epoch": 0.6779997707911526,
      "grad_norm": 0.26369667053222656,
      "learning_rate": 5.038954501075108e-05,
      "loss": 0.6952,
      "step": 4437
    },
    {
      "epoch": 0.6781525766894602,
      "grad_norm": 0.30785226821899414,
      "learning_rate": 5.034621264779178e-05,
      "loss": 0.7973,
      "step": 4438
    },
    {
      "epoch": 0.6783053825877678,
      "grad_norm": 0.2791886627674103,
      "learning_rate": 5.030289265565682e-05,
      "loss": 0.5642,
      "step": 4439
    },
    {
      "epoch": 0.6784581884860755,
      "grad_norm": 0.2842103838920593,
      "learning_rate": 5.025958504513899e-05,
      "loss": 0.7388,
      "step": 4440
    },
    {
      "epoch": 0.6786109943843832,
      "grad_norm": 0.3138188421726227,
      "learning_rate": 5.0216289827027986e-05,
      "loss": 0.773,
      "step": 4441
    },
    {
      "epoch": 0.6787638002826909,
      "grad_norm": 0.4475540220737457,
      "learning_rate": 5.017300701211049e-05,
      "loss": 0.89,
      "step": 4442
    },
    {
      "epoch": 0.6789166061809986,
      "grad_norm": 0.3245783746242523,
      "learning_rate": 5.012973661117002e-05,
      "loss": 0.8021,
      "step": 4443
    },
    {
      "epoch": 0.6790694120793063,
      "grad_norm": 0.29661089181900024,
      "learning_rate": 5.008647863498709e-05,
      "loss": 0.8557,
      "step": 4444
    },
    {
      "epoch": 0.679222217977614,
      "grad_norm": 0.38732847571372986,
      "learning_rate": 5.0043233094338985e-05,
      "loss": 0.793,
      "step": 4445
    },
    {
      "epoch": 0.6793750238759216,
      "grad_norm": 0.3176628053188324,
      "learning_rate": 5.000000000000002e-05,
      "loss": 0.6918,
      "step": 4446
    },
    {
      "epoch": 0.6795278297742293,
      "grad_norm": 0.2673543691635132,
      "learning_rate": 4.995677936274132e-05,
      "loss": 0.7953,
      "step": 4447
    },
    {
      "epoch": 0.679680635672537,
      "grad_norm": 0.2867792546749115,
      "learning_rate": 4.9913571193331e-05,
      "loss": 0.6188,
      "step": 4448
    },
    {
      "epoch": 0.6798334415708447,
      "grad_norm": 0.27831536531448364,
      "learning_rate": 4.987037550253398e-05,
      "loss": 0.6003,
      "step": 4449
    },
    {
      "epoch": 0.6799862474691523,
      "grad_norm": 0.2510976493358612,
      "learning_rate": 4.982719230111208e-05,
      "loss": 0.7919,
      "step": 4450
    },
    {
      "epoch": 0.68013905336746,
      "grad_norm": 0.29773804545402527,
      "learning_rate": 4.978402159982404e-05,
      "loss": 0.6,
      "step": 4451
    },
    {
      "epoch": 0.6802918592657676,
      "grad_norm": 0.26814860105514526,
      "learning_rate": 4.97408634094255e-05,
      "loss": 0.7553,
      "step": 4452
    },
    {
      "epoch": 0.6804446651640753,
      "grad_norm": 0.30513063073158264,
      "learning_rate": 4.9697717740669025e-05,
      "loss": 0.7529,
      "step": 4453
    },
    {
      "epoch": 0.680597471062383,
      "grad_norm": 0.27793049812316895,
      "learning_rate": 4.9654584604303845e-05,
      "loss": 0.6122,
      "step": 4454
    },
    {
      "epoch": 0.6807502769606907,
      "grad_norm": 0.26808398962020874,
      "learning_rate": 4.961146401107632e-05,
      "loss": 0.5882,
      "step": 4455
    },
    {
      "epoch": 0.6809030828589984,
      "grad_norm": 0.4149441123008728,
      "learning_rate": 4.956835597172954e-05,
      "loss": 0.8469,
      "step": 4456
    },
    {
      "epoch": 0.681055888757306,
      "grad_norm": 0.31907710433006287,
      "learning_rate": 4.952526049700358e-05,
      "loss": 0.6695,
      "step": 4457
    },
    {
      "epoch": 0.6812086946556137,
      "grad_norm": 0.2895703613758087,
      "learning_rate": 4.948217759763527e-05,
      "loss": 0.618,
      "step": 4458
    },
    {
      "epoch": 0.6813615005539214,
      "grad_norm": 0.3641390800476074,
      "learning_rate": 4.943910728435831e-05,
      "loss": 0.7025,
      "step": 4459
    },
    {
      "epoch": 0.6815143064522291,
      "grad_norm": 0.26010552048683167,
      "learning_rate": 4.939604956790339e-05,
      "loss": 0.5716,
      "step": 4460
    },
    {
      "epoch": 0.6816671123505368,
      "grad_norm": 0.28951773047447205,
      "learning_rate": 4.935300445899791e-05,
      "loss": 0.4312,
      "step": 4461
    },
    {
      "epoch": 0.6818199182488444,
      "grad_norm": 0.29047438502311707,
      "learning_rate": 4.930997196836625e-05,
      "loss": 0.7299,
      "step": 4462
    },
    {
      "epoch": 0.681972724147152,
      "grad_norm": 0.2965889871120453,
      "learning_rate": 4.926695210672955e-05,
      "loss": 0.6235,
      "step": 4463
    },
    {
      "epoch": 0.6821255300454597,
      "grad_norm": 0.3306009769439697,
      "learning_rate": 4.922394488480588e-05,
      "loss": 0.6667,
      "step": 4464
    },
    {
      "epoch": 0.6822783359437674,
      "grad_norm": 0.4301811754703522,
      "learning_rate": 4.918095031331011e-05,
      "loss": 0.588,
      "step": 4465
    },
    {
      "epoch": 0.6824311418420751,
      "grad_norm": 0.3095620572566986,
      "learning_rate": 4.913796840295399e-05,
      "loss": 0.7026,
      "step": 4466
    },
    {
      "epoch": 0.6825839477403828,
      "grad_norm": 0.27729034423828125,
      "learning_rate": 4.909499916444611e-05,
      "loss": 0.5636,
      "step": 4467
    },
    {
      "epoch": 0.6827367536386905,
      "grad_norm": 0.5523043870925903,
      "learning_rate": 4.905204260849183e-05,
      "loss": 0.8391,
      "step": 4468
    },
    {
      "epoch": 0.6828895595369981,
      "grad_norm": 0.3127119541168213,
      "learning_rate": 4.900909874579347e-05,
      "loss": 0.6599,
      "step": 4469
    },
    {
      "epoch": 0.6830423654353058,
      "grad_norm": 0.2766704261302948,
      "learning_rate": 4.896616758705017e-05,
      "loss": 0.6034,
      "step": 4470
    },
    {
      "epoch": 0.6831951713336135,
      "grad_norm": 0.31232303380966187,
      "learning_rate": 4.8923249142957816e-05,
      "loss": 0.8211,
      "step": 4471
    },
    {
      "epoch": 0.6833479772319212,
      "grad_norm": 0.2655163109302521,
      "learning_rate": 4.888034342420916e-05,
      "loss": 0.5255,
      "step": 4472
    },
    {
      "epoch": 0.6835007831302288,
      "grad_norm": 0.3622485101222992,
      "learning_rate": 4.8837450441493824e-05,
      "loss": 0.7362,
      "step": 4473
    },
    {
      "epoch": 0.6836535890285365,
      "grad_norm": 0.2688015401363373,
      "learning_rate": 4.879457020549828e-05,
      "loss": 0.7041,
      "step": 4474
    },
    {
      "epoch": 0.6838063949268441,
      "grad_norm": 0.28353452682495117,
      "learning_rate": 4.8751702726905733e-05,
      "loss": 0.7001,
      "step": 4475
    },
    {
      "epoch": 0.6839592008251518,
      "grad_norm": 0.36507824063301086,
      "learning_rate": 4.870884801639622e-05,
      "loss": 0.8537,
      "step": 4476
    },
    {
      "epoch": 0.6841120067234595,
      "grad_norm": 0.3329671621322632,
      "learning_rate": 4.866600608464669e-05,
      "loss": 0.7158,
      "step": 4477
    },
    {
      "epoch": 0.6842648126217672,
      "grad_norm": 0.2636788785457611,
      "learning_rate": 4.862317694233085e-05,
      "loss": 0.7174,
      "step": 4478
    },
    {
      "epoch": 0.6844176185200749,
      "grad_norm": 0.3977915346622467,
      "learning_rate": 4.858036060011922e-05,
      "loss": 0.8131,
      "step": 4479
    },
    {
      "epoch": 0.6845704244183826,
      "grad_norm": 0.3472137749195099,
      "learning_rate": 4.8537557068679075e-05,
      "loss": 0.7707,
      "step": 4480
    },
    {
      "epoch": 0.6847232303166902,
      "grad_norm": 0.27570462226867676,
      "learning_rate": 4.849476635867464e-05,
      "loss": 0.5611,
      "step": 4481
    },
    {
      "epoch": 0.6848760362149979,
      "grad_norm": 0.2932675778865814,
      "learning_rate": 4.845198848076678e-05,
      "loss": 0.7531,
      "step": 4482
    },
    {
      "epoch": 0.6850288421133056,
      "grad_norm": 0.28090453147888184,
      "learning_rate": 4.840922344561328e-05,
      "loss": 0.7064,
      "step": 4483
    },
    {
      "epoch": 0.6851816480116133,
      "grad_norm": 0.4638606607913971,
      "learning_rate": 4.8366471263868726e-05,
      "loss": 0.6788,
      "step": 4484
    },
    {
      "epoch": 0.6853344539099209,
      "grad_norm": 0.3164824843406677,
      "learning_rate": 4.8323731946184446e-05,
      "loss": 0.8831,
      "step": 4485
    },
    {
      "epoch": 0.6854872598082286,
      "grad_norm": 0.5579379200935364,
      "learning_rate": 4.828100550320852e-05,
      "loss": 0.5889,
      "step": 4486
    },
    {
      "epoch": 0.6856400657065362,
      "grad_norm": 0.2748773694038391,
      "learning_rate": 4.823829194558593e-05,
      "loss": 0.5735,
      "step": 4487
    },
    {
      "epoch": 0.6857928716048439,
      "grad_norm": 0.3306643068790436,
      "learning_rate": 4.8195591283958483e-05,
      "loss": 0.7205,
      "step": 4488
    },
    {
      "epoch": 0.6859456775031516,
      "grad_norm": 0.4027121365070343,
      "learning_rate": 4.815290352896453e-05,
      "loss": 0.8095,
      "step": 4489
    },
    {
      "epoch": 0.6860984834014593,
      "grad_norm": 0.3824899196624756,
      "learning_rate": 4.8110228691239453e-05,
      "loss": 0.6471,
      "step": 4490
    },
    {
      "epoch": 0.686251289299767,
      "grad_norm": 0.2688082456588745,
      "learning_rate": 4.806756678141532e-05,
      "loss": 0.5867,
      "step": 4491
    },
    {
      "epoch": 0.6864040951980747,
      "grad_norm": 0.3712558448314667,
      "learning_rate": 4.8024917810121015e-05,
      "loss": 0.7572,
      "step": 4492
    },
    {
      "epoch": 0.6865569010963823,
      "grad_norm": 0.26351073384284973,
      "learning_rate": 4.7982281787982165e-05,
      "loss": 0.6777,
      "step": 4493
    },
    {
      "epoch": 0.68670970699469,
      "grad_norm": 0.5376992225646973,
      "learning_rate": 4.7939658725621104e-05,
      "loss": 0.7894,
      "step": 4494
    },
    {
      "epoch": 0.6868625128929977,
      "grad_norm": 0.34455183148384094,
      "learning_rate": 4.789704863365707e-05,
      "loss": 0.4973,
      "step": 4495
    },
    {
      "epoch": 0.6870153187913054,
      "grad_norm": 0.28120651841163635,
      "learning_rate": 4.7854451522706044e-05,
      "loss": 0.6821,
      "step": 4496
    },
    {
      "epoch": 0.687168124689613,
      "grad_norm": 0.36648836731910706,
      "learning_rate": 4.7811867403380696e-05,
      "loss": 0.6997,
      "step": 4497
    },
    {
      "epoch": 0.6873209305879207,
      "grad_norm": 0.3456977605819702,
      "learning_rate": 4.776929628629047e-05,
      "loss": 0.8019,
      "step": 4498
    },
    {
      "epoch": 0.6874737364862283,
      "grad_norm": 0.32706472277641296,
      "learning_rate": 4.7726738182041674e-05,
      "loss": 0.761,
      "step": 4499
    },
    {
      "epoch": 0.687626542384536,
      "grad_norm": 0.2870117723941803,
      "learning_rate": 4.768419310123723e-05,
      "loss": 0.6598,
      "step": 4500
    },
    {
      "epoch": 0.6877793482828437,
      "grad_norm": 0.4577259421348572,
      "learning_rate": 4.7641661054476946e-05,
      "loss": 0.7088,
      "step": 4501
    },
    {
      "epoch": 0.6879321541811514,
      "grad_norm": 0.260759562253952,
      "learning_rate": 4.759914205235728e-05,
      "loss": 0.7125,
      "step": 4502
    },
    {
      "epoch": 0.6880849600794591,
      "grad_norm": 0.3623206317424774,
      "learning_rate": 4.755663610547154e-05,
      "loss": 0.6856,
      "step": 4503
    },
    {
      "epoch": 0.6882377659777668,
      "grad_norm": 0.3873855471611023,
      "learning_rate": 4.751414322440966e-05,
      "loss": 0.545,
      "step": 4504
    },
    {
      "epoch": 0.6883905718760744,
      "grad_norm": 0.4338493049144745,
      "learning_rate": 4.747166341975844e-05,
      "loss": 0.8404,
      "step": 4505
    },
    {
      "epoch": 0.6885433777743821,
      "grad_norm": 0.23762832581996918,
      "learning_rate": 4.742919670210135e-05,
      "loss": 0.7745,
      "step": 4506
    },
    {
      "epoch": 0.6886961836726898,
      "grad_norm": 0.3028179109096527,
      "learning_rate": 4.738674308201858e-05,
      "loss": 0.6844,
      "step": 4507
    },
    {
      "epoch": 0.6888489895709975,
      "grad_norm": 0.3360441327095032,
      "learning_rate": 4.7344302570087115e-05,
      "loss": 0.773,
      "step": 4508
    },
    {
      "epoch": 0.6890017954693051,
      "grad_norm": 0.27509480714797974,
      "learning_rate": 4.730187517688069e-05,
      "loss": 0.6513,
      "step": 4509
    },
    {
      "epoch": 0.6891546013676128,
      "grad_norm": 0.4068647027015686,
      "learning_rate": 4.725946091296972e-05,
      "loss": 0.6546,
      "step": 4510
    },
    {
      "epoch": 0.6893074072659204,
      "grad_norm": 0.31606605648994446,
      "learning_rate": 4.72170597889213e-05,
      "loss": 0.5298,
      "step": 4511
    },
    {
      "epoch": 0.6894602131642281,
      "grad_norm": 0.2957019805908203,
      "learning_rate": 4.717467181529937e-05,
      "loss": 0.7674,
      "step": 4512
    },
    {
      "epoch": 0.6896130190625358,
      "grad_norm": 0.30565783381462097,
      "learning_rate": 4.713229700266455e-05,
      "loss": 0.5802,
      "step": 4513
    },
    {
      "epoch": 0.6897658249608435,
      "grad_norm": 0.30693966150283813,
      "learning_rate": 4.7089935361574154e-05,
      "loss": 0.5424,
      "step": 4514
    },
    {
      "epoch": 0.6899186308591512,
      "grad_norm": 0.2552562654018402,
      "learning_rate": 4.704758690258218e-05,
      "loss": 0.719,
      "step": 4515
    },
    {
      "epoch": 0.6900714367574589,
      "grad_norm": 0.2818084955215454,
      "learning_rate": 4.700525163623944e-05,
      "loss": 0.7768,
      "step": 4516
    },
    {
      "epoch": 0.6902242426557665,
      "grad_norm": 0.2802093029022217,
      "learning_rate": 4.696292957309345e-05,
      "loss": 0.6998,
      "step": 4517
    },
    {
      "epoch": 0.6903770485540742,
      "grad_norm": 0.4027109444141388,
      "learning_rate": 4.69206207236883e-05,
      "loss": 0.8152,
      "step": 4518
    },
    {
      "epoch": 0.6905298544523819,
      "grad_norm": 0.38950568437576294,
      "learning_rate": 4.687832509856498e-05,
      "loss": 0.6509,
      "step": 4519
    },
    {
      "epoch": 0.6906826603506895,
      "grad_norm": 0.2784578204154968,
      "learning_rate": 4.6836042708261044e-05,
      "loss": 0.7362,
      "step": 4520
    },
    {
      "epoch": 0.6908354662489972,
      "grad_norm": 0.3080911338329315,
      "learning_rate": 4.679377356331076e-05,
      "loss": 0.6629,
      "step": 4521
    },
    {
      "epoch": 0.6909882721473048,
      "grad_norm": 0.3341425359249115,
      "learning_rate": 4.675151767424516e-05,
      "loss": 0.6944,
      "step": 4522
    },
    {
      "epoch": 0.6911410780456125,
      "grad_norm": 0.3041728734970093,
      "learning_rate": 4.670927505159199e-05,
      "loss": 0.7363,
      "step": 4523
    },
    {
      "epoch": 0.6912938839439202,
      "grad_norm": 0.33875536918640137,
      "learning_rate": 4.666704570587561e-05,
      "loss": 0.6821,
      "step": 4524
    },
    {
      "epoch": 0.6914466898422279,
      "grad_norm": 0.34854626655578613,
      "learning_rate": 4.662482964761707e-05,
      "loss": 0.6976,
      "step": 4525
    },
    {
      "epoch": 0.6915994957405356,
      "grad_norm": 0.3705041706562042,
      "learning_rate": 4.6582626887334166e-05,
      "loss": 0.7212,
      "step": 4526
    },
    {
      "epoch": 0.6917523016388433,
      "grad_norm": 0.26057520508766174,
      "learning_rate": 4.654043743554143e-05,
      "loss": 0.6315,
      "step": 4527
    },
    {
      "epoch": 0.691905107537151,
      "grad_norm": 0.2733753025531769,
      "learning_rate": 4.649826130274993e-05,
      "loss": 0.7938,
      "step": 4528
    },
    {
      "epoch": 0.6920579134354586,
      "grad_norm": 0.3895609676837921,
      "learning_rate": 4.6456098499467504e-05,
      "loss": 0.693,
      "step": 4529
    },
    {
      "epoch": 0.6922107193337663,
      "grad_norm": 0.2385978400707245,
      "learning_rate": 4.6413949036198665e-05,
      "loss": 0.7292,
      "step": 4530
    },
    {
      "epoch": 0.692363525232074,
      "grad_norm": 0.31826111674308777,
      "learning_rate": 4.6371812923444645e-05,
      "loss": 0.6661,
      "step": 4531
    },
    {
      "epoch": 0.6925163311303816,
      "grad_norm": 0.2785007357597351,
      "learning_rate": 4.632969017170328e-05,
      "loss": 0.7982,
      "step": 4532
    },
    {
      "epoch": 0.6926691370286893,
      "grad_norm": 0.32651248574256897,
      "learning_rate": 4.628758079146904e-05,
      "loss": 0.6782,
      "step": 4533
    },
    {
      "epoch": 0.692821942926997,
      "grad_norm": 0.3232291638851166,
      "learning_rate": 4.6245484793233174e-05,
      "loss": 0.7127,
      "step": 4534
    },
    {
      "epoch": 0.6929747488253046,
      "grad_norm": 0.3334408700466156,
      "learning_rate": 4.620340218748358e-05,
      "loss": 0.77,
      "step": 4535
    },
    {
      "epoch": 0.6931275547236123,
      "grad_norm": 0.28033608198165894,
      "learning_rate": 4.6161332984704745e-05,
      "loss": 0.6162,
      "step": 4536
    },
    {
      "epoch": 0.69328036062192,
      "grad_norm": 0.30538347363471985,
      "learning_rate": 4.611927719537783e-05,
      "loss": 0.7848,
      "step": 4537
    },
    {
      "epoch": 0.6934331665202277,
      "grad_norm": 0.3362586796283722,
      "learning_rate": 4.6077234829980744e-05,
      "loss": 0.6955,
      "step": 4538
    },
    {
      "epoch": 0.6935859724185354,
      "grad_norm": 0.2605217695236206,
      "learning_rate": 4.603520589898792e-05,
      "loss": 0.8394,
      "step": 4539
    },
    {
      "epoch": 0.693738778316843,
      "grad_norm": 0.27571675181388855,
      "learning_rate": 4.59931904128706e-05,
      "loss": 0.5412,
      "step": 4540
    },
    {
      "epoch": 0.6938915842151507,
      "grad_norm": 0.28749173879623413,
      "learning_rate": 4.59511883820965e-05,
      "loss": 0.6878,
      "step": 4541
    },
    {
      "epoch": 0.6940443901134584,
      "grad_norm": 0.49778303503990173,
      "learning_rate": 4.590919981713016e-05,
      "loss": 0.9041,
      "step": 4542
    },
    {
      "epoch": 0.6941971960117661,
      "grad_norm": 0.23859632015228271,
      "learning_rate": 4.586722472843259e-05,
      "loss": 0.7782,
      "step": 4543
    },
    {
      "epoch": 0.6943500019100737,
      "grad_norm": 0.3211337625980377,
      "learning_rate": 4.582526312646158e-05,
      "loss": 0.7629,
      "step": 4544
    },
    {
      "epoch": 0.6945028078083814,
      "grad_norm": 0.3579085171222687,
      "learning_rate": 4.578331502167157e-05,
      "loss": 0.7138,
      "step": 4545
    },
    {
      "epoch": 0.694655613706689,
      "grad_norm": 0.2770020365715027,
      "learning_rate": 4.5741380424513446e-05,
      "loss": 0.5127,
      "step": 4546
    },
    {
      "epoch": 0.6948084196049967,
      "grad_norm": 0.35301145911216736,
      "learning_rate": 4.5699459345434937e-05,
      "loss": 0.854,
      "step": 4547
    },
    {
      "epoch": 0.6949612255033044,
      "grad_norm": 0.48538297414779663,
      "learning_rate": 4.5657551794880316e-05,
      "loss": 0.7938,
      "step": 4548
    },
    {
      "epoch": 0.6951140314016121,
      "grad_norm": 0.2835939824581146,
      "learning_rate": 4.561565778329057e-05,
      "loss": 0.7409,
      "step": 4549
    },
    {
      "epoch": 0.6952668372999198,
      "grad_norm": 0.28189727663993835,
      "learning_rate": 4.557377732110309e-05,
      "loss": 0.6853,
      "step": 4550
    },
    {
      "epoch": 0.6954196431982275,
      "grad_norm": 0.2865130603313446,
      "learning_rate": 4.553191041875214e-05,
      "loss": 0.8017,
      "step": 4551
    },
    {
      "epoch": 0.6955724490965352,
      "grad_norm": 0.31105712056159973,
      "learning_rate": 4.549005708666852e-05,
      "loss": 0.7171,
      "step": 4552
    },
    {
      "epoch": 0.6957252549948428,
      "grad_norm": 0.29811492562294006,
      "learning_rate": 4.544821733527958e-05,
      "loss": 0.7542,
      "step": 4553
    },
    {
      "epoch": 0.6958780608931505,
      "grad_norm": 0.29602208733558655,
      "learning_rate": 4.54063911750094e-05,
      "loss": 0.8067,
      "step": 4554
    },
    {
      "epoch": 0.6960308667914582,
      "grad_norm": 0.27338042855262756,
      "learning_rate": 4.536457861627854e-05,
      "loss": 0.6514,
      "step": 4555
    },
    {
      "epoch": 0.6961836726897658,
      "grad_norm": 0.3065601885318756,
      "learning_rate": 4.5322779669504344e-05,
      "loss": 0.6071,
      "step": 4556
    },
    {
      "epoch": 0.6963364785880735,
      "grad_norm": 0.253862202167511,
      "learning_rate": 4.528099434510058e-05,
      "loss": 0.5923,
      "step": 4557
    },
    {
      "epoch": 0.6964892844863811,
      "grad_norm": 0.2711423337459564,
      "learning_rate": 4.5239222653477786e-05,
      "loss": 0.616,
      "step": 4558
    },
    {
      "epoch": 0.6966420903846888,
      "grad_norm": 0.31371334195137024,
      "learning_rate": 4.5197464605043e-05,
      "loss": 0.7606,
      "step": 4559
    },
    {
      "epoch": 0.6967948962829965,
      "grad_norm": 0.3264442980289459,
      "learning_rate": 4.515572021019984e-05,
      "loss": 0.8191,
      "step": 4560
    },
    {
      "epoch": 0.6969477021813042,
      "grad_norm": 0.3145497739315033,
      "learning_rate": 4.511398947934861e-05,
      "loss": 0.6609,
      "step": 4561
    },
    {
      "epoch": 0.6971005080796119,
      "grad_norm": 0.4659073054790497,
      "learning_rate": 4.507227242288621e-05,
      "loss": 0.9295,
      "step": 4562
    },
    {
      "epoch": 0.6972533139779196,
      "grad_norm": 0.36418986320495605,
      "learning_rate": 4.503056905120606e-05,
      "loss": 0.6698,
      "step": 4563
    },
    {
      "epoch": 0.6974061198762272,
      "grad_norm": 0.27688685059547424,
      "learning_rate": 4.4988879374698165e-05,
      "loss": 0.7545,
      "step": 4564
    },
    {
      "epoch": 0.6975589257745349,
      "grad_norm": 0.4654596149921417,
      "learning_rate": 4.49472034037492e-05,
      "loss": 0.7939,
      "step": 4565
    },
    {
      "epoch": 0.6977117316728426,
      "grad_norm": 0.27792832255363464,
      "learning_rate": 4.4905541148742426e-05,
      "loss": 0.5877,
      "step": 4566
    },
    {
      "epoch": 0.6978645375711503,
      "grad_norm": 0.36160188913345337,
      "learning_rate": 4.486389262005759e-05,
      "loss": 0.7599,
      "step": 4567
    },
    {
      "epoch": 0.6980173434694579,
      "grad_norm": 0.28020909428596497,
      "learning_rate": 4.4822257828071046e-05,
      "loss": 0.4777,
      "step": 4568
    },
    {
      "epoch": 0.6981701493677656,
      "grad_norm": 0.27438125014305115,
      "learning_rate": 4.478063678315578e-05,
      "loss": 0.5457,
      "step": 4569
    },
    {
      "epoch": 0.6983229552660732,
      "grad_norm": 0.3062969744205475,
      "learning_rate": 4.473902949568138e-05,
      "loss": 0.7137,
      "step": 4570
    },
    {
      "epoch": 0.6984757611643809,
      "grad_norm": 0.29249826073646545,
      "learning_rate": 4.469743597601391e-05,
      "loss": 0.6558,
      "step": 4571
    },
    {
      "epoch": 0.6986285670626886,
      "grad_norm": 0.404161661863327,
      "learning_rate": 4.465585623451601e-05,
      "loss": 0.6618,
      "step": 4572
    },
    {
      "epoch": 0.6987813729609963,
      "grad_norm": 0.3219901919364929,
      "learning_rate": 4.4614290281546945e-05,
      "loss": 0.7573,
      "step": 4573
    },
    {
      "epoch": 0.698934178859304,
      "grad_norm": 0.30580899119377136,
      "learning_rate": 4.457273812746257e-05,
      "loss": 0.8471,
      "step": 4574
    },
    {
      "epoch": 0.6990869847576117,
      "grad_norm": 0.46826639771461487,
      "learning_rate": 4.453119978261524e-05,
      "loss": 0.7451,
      "step": 4575
    },
    {
      "epoch": 0.6992397906559193,
      "grad_norm": 0.3270047605037689,
      "learning_rate": 4.448967525735381e-05,
      "loss": 0.6547,
      "step": 4576
    },
    {
      "epoch": 0.699392596554227,
      "grad_norm": 0.27389249205589294,
      "learning_rate": 4.444816456202388e-05,
      "loss": 0.8253,
      "step": 4577
    },
    {
      "epoch": 0.6995454024525347,
      "grad_norm": 0.5950169563293457,
      "learning_rate": 4.4406667706967375e-05,
      "loss": 0.7164,
      "step": 4578
    },
    {
      "epoch": 0.6996982083508423,
      "grad_norm": 0.28673499822616577,
      "learning_rate": 4.4365184702522956e-05,
      "loss": 0.8878,
      "step": 4579
    },
    {
      "epoch": 0.69985101424915,
      "grad_norm": 0.2988281846046448,
      "learning_rate": 4.432371555902579e-05,
      "loss": 0.6067,
      "step": 4580
    },
    {
      "epoch": 0.7000038201474577,
      "grad_norm": 0.2832471430301666,
      "learning_rate": 4.428226028680754e-05,
      "loss": 0.6567,
      "step": 4581
    },
    {
      "epoch": 0.7001566260457653,
      "grad_norm": 0.31266599893569946,
      "learning_rate": 4.424081889619639e-05,
      "loss": 0.6556,
      "step": 4582
    },
    {
      "epoch": 0.700309431944073,
      "grad_norm": 0.3293362855911255,
      "learning_rate": 4.4199391397517154e-05,
      "loss": 0.7416,
      "step": 4583
    },
    {
      "epoch": 0.7004622378423807,
      "grad_norm": 0.3653784394264221,
      "learning_rate": 4.415797780109118e-05,
      "loss": 0.8261,
      "step": 4584
    },
    {
      "epoch": 0.7006150437406884,
      "grad_norm": 0.5519762635231018,
      "learning_rate": 4.4116578117236296e-05,
      "loss": 0.5055,
      "step": 4585
    },
    {
      "epoch": 0.7007678496389961,
      "grad_norm": 0.26612091064453125,
      "learning_rate": 4.407519235626683e-05,
      "loss": 0.6856,
      "step": 4586
    },
    {
      "epoch": 0.7009206555373038,
      "grad_norm": 0.3700180947780609,
      "learning_rate": 4.403382052849374e-05,
      "loss": 0.7334,
      "step": 4587
    },
    {
      "epoch": 0.7010734614356114,
      "grad_norm": 0.8454820513725281,
      "learning_rate": 4.399246264422452e-05,
      "loss": 0.5672,
      "step": 4588
    },
    {
      "epoch": 0.7012262673339191,
      "grad_norm": 0.2447136491537094,
      "learning_rate": 4.395111871376308e-05,
      "loss": 0.6859,
      "step": 4589
    },
    {
      "epoch": 0.7013790732322268,
      "grad_norm": 0.48516905307769775,
      "learning_rate": 4.39097887474099e-05,
      "loss": 0.6208,
      "step": 4590
    },
    {
      "epoch": 0.7015318791305344,
      "grad_norm": 0.3230639398097992,
      "learning_rate": 4.3868472755462043e-05,
      "loss": 0.8115,
      "step": 4591
    },
    {
      "epoch": 0.7016846850288421,
      "grad_norm": 0.2862485647201538,
      "learning_rate": 4.3827170748212985e-05,
      "loss": 0.6882,
      "step": 4592
    },
    {
      "epoch": 0.7018374909271498,
      "grad_norm": 0.2857116758823395,
      "learning_rate": 4.3785882735952844e-05,
      "loss": 0.5287,
      "step": 4593
    },
    {
      "epoch": 0.7019902968254574,
      "grad_norm": 0.31349948048591614,
      "learning_rate": 4.3744608728968104e-05,
      "loss": 0.7222,
      "step": 4594
    },
    {
      "epoch": 0.7021431027237651,
      "grad_norm": 0.3246481120586395,
      "learning_rate": 4.3703348737541914e-05,
      "loss": 0.7359,
      "step": 4595
    },
    {
      "epoch": 0.7022959086220728,
      "grad_norm": 0.2840207517147064,
      "learning_rate": 4.3662102771953785e-05,
      "loss": 0.7063,
      "step": 4596
    },
    {
      "epoch": 0.7024487145203805,
      "grad_norm": 0.4355444610118866,
      "learning_rate": 4.362087084247988e-05,
      "loss": 0.7508,
      "step": 4597
    },
    {
      "epoch": 0.7026015204186882,
      "grad_norm": 0.30774471163749695,
      "learning_rate": 4.3579652959392736e-05,
      "loss": 0.9731,
      "step": 4598
    },
    {
      "epoch": 0.7027543263169959,
      "grad_norm": 0.3269573748111725,
      "learning_rate": 4.3538449132961415e-05,
      "loss": 0.5729,
      "step": 4599
    },
    {
      "epoch": 0.7029071322153035,
      "grad_norm": 0.4266010820865631,
      "learning_rate": 4.3497259373451536e-05,
      "loss": 0.4978,
      "step": 4600
    },
    {
      "epoch": 0.7030599381136112,
      "grad_norm": 0.2678755521774292,
      "learning_rate": 4.345608369112523e-05,
      "loss": 0.8741,
      "step": 4601
    },
    {
      "epoch": 0.7032127440119189,
      "grad_norm": 0.558524489402771,
      "learning_rate": 4.3414922096241025e-05,
      "loss": 0.5434,
      "step": 4602
    },
    {
      "epoch": 0.7033655499102265,
      "grad_norm": 0.43573448061943054,
      "learning_rate": 4.3373774599053966e-05,
      "loss": 0.6227,
      "step": 4603
    },
    {
      "epoch": 0.7035183558085342,
      "grad_norm": 0.2877878248691559,
      "learning_rate": 4.3332641209815615e-05,
      "loss": 0.5567,
      "step": 4604
    },
    {
      "epoch": 0.7036711617068419,
      "grad_norm": 0.28066501021385193,
      "learning_rate": 4.329152193877404e-05,
      "loss": 0.6789,
      "step": 4605
    },
    {
      "epoch": 0.7038239676051495,
      "grad_norm": 0.2897006571292877,
      "learning_rate": 4.325041679617381e-05,
      "loss": 0.6965,
      "step": 4606
    },
    {
      "epoch": 0.7039767735034572,
      "grad_norm": 0.40210720896720886,
      "learning_rate": 4.3209325792255796e-05,
      "loss": 0.7664,
      "step": 4607
    },
    {
      "epoch": 0.7041295794017649,
      "grad_norm": 0.5120120048522949,
      "learning_rate": 4.316824893725755e-05,
      "loss": 0.618,
      "step": 4608
    },
    {
      "epoch": 0.7042823853000726,
      "grad_norm": 0.30149465799331665,
      "learning_rate": 4.3127186241413055e-05,
      "loss": 0.5665,
      "step": 4609
    },
    {
      "epoch": 0.7044351911983803,
      "grad_norm": 0.3012610673904419,
      "learning_rate": 4.308613771495267e-05,
      "loss": 0.8612,
      "step": 4610
    },
    {
      "epoch": 0.704587997096688,
      "grad_norm": 0.29444053769111633,
      "learning_rate": 4.3045103368103355e-05,
      "loss": 0.7114,
      "step": 4611
    },
    {
      "epoch": 0.7047408029949956,
      "grad_norm": 0.3533160388469696,
      "learning_rate": 4.300408321108842e-05,
      "loss": 0.8258,
      "step": 4612
    },
    {
      "epoch": 0.7048936088933033,
      "grad_norm": 0.25161516666412354,
      "learning_rate": 4.296307725412774e-05,
      "loss": 0.5679,
      "step": 4613
    },
    {
      "epoch": 0.705046414791611,
      "grad_norm": 0.2681884169578552,
      "learning_rate": 4.292208550743755e-05,
      "loss": 0.8816,
      "step": 4614
    },
    {
      "epoch": 0.7051992206899186,
      "grad_norm": 0.2852892279624939,
      "learning_rate": 4.288110798123066e-05,
      "loss": 0.6218,
      "step": 4615
    },
    {
      "epoch": 0.7053520265882263,
      "grad_norm": 0.27947360277175903,
      "learning_rate": 4.2840144685716245e-05,
      "loss": 0.6808,
      "step": 4616
    },
    {
      "epoch": 0.705504832486534,
      "grad_norm": 0.25709283351898193,
      "learning_rate": 4.2799195631099944e-05,
      "loss": 0.7561,
      "step": 4617
    },
    {
      "epoch": 0.7056576383848416,
      "grad_norm": 0.33592861890792847,
      "learning_rate": 4.275826082758388e-05,
      "loss": 0.7795,
      "step": 4618
    },
    {
      "epoch": 0.7058104442831493,
      "grad_norm": 0.3069939613342285,
      "learning_rate": 4.271734028536667e-05,
      "loss": 0.5641,
      "step": 4619
    },
    {
      "epoch": 0.705963250181457,
      "grad_norm": 0.2989427447319031,
      "learning_rate": 4.2676434014643285e-05,
      "loss": 0.7034,
      "step": 4620
    },
    {
      "epoch": 0.7061160560797647,
      "grad_norm": 0.2989204227924347,
      "learning_rate": 4.2635542025605146e-05,
      "loss": 0.6892,
      "step": 4621
    },
    {
      "epoch": 0.7062688619780724,
      "grad_norm": 0.29351404309272766,
      "learning_rate": 4.259466432844017e-05,
      "loss": 0.6761,
      "step": 4622
    },
    {
      "epoch": 0.70642166787638,
      "grad_norm": 0.27882349491119385,
      "learning_rate": 4.255380093333274e-05,
      "loss": 0.6945,
      "step": 4623
    },
    {
      "epoch": 0.7065744737746877,
      "grad_norm": 0.28001776337623596,
      "learning_rate": 4.25129518504636e-05,
      "loss": 0.7171,
      "step": 4624
    },
    {
      "epoch": 0.7067272796729954,
      "grad_norm": 0.3076488673686981,
      "learning_rate": 4.247211709000991e-05,
      "loss": 0.6851,
      "step": 4625
    },
    {
      "epoch": 0.7068800855713031,
      "grad_norm": 0.43300819396972656,
      "learning_rate": 4.243129666214534e-05,
      "loss": 0.6699,
      "step": 4626
    },
    {
      "epoch": 0.7070328914696107,
      "grad_norm": 0.31589996814727783,
      "learning_rate": 4.239049057703999e-05,
      "loss": 0.5801,
      "step": 4627
    },
    {
      "epoch": 0.7071856973679184,
      "grad_norm": 0.28026047348976135,
      "learning_rate": 4.234969884486033e-05,
      "loss": 0.7313,
      "step": 4628
    },
    {
      "epoch": 0.707338503266226,
      "grad_norm": 0.31301337480545044,
      "learning_rate": 4.230892147576924e-05,
      "loss": 0.7132,
      "step": 4629
    },
    {
      "epoch": 0.7074913091645337,
      "grad_norm": 0.3458845913410187,
      "learning_rate": 4.226815847992611e-05,
      "loss": 0.6742,
      "step": 4630
    },
    {
      "epoch": 0.7076441150628414,
      "grad_norm": 0.2839435636997223,
      "learning_rate": 4.2227409867486665e-05,
      "loss": 0.7291,
      "step": 4631
    },
    {
      "epoch": 0.7077969209611491,
      "grad_norm": 0.3225105404853821,
      "learning_rate": 4.2186675648603125e-05,
      "loss": 0.7466,
      "step": 4632
    },
    {
      "epoch": 0.7079497268594568,
      "grad_norm": 0.2577323913574219,
      "learning_rate": 4.2145955833424e-05,
      "loss": 0.7672,
      "step": 4633
    },
    {
      "epoch": 0.7081025327577645,
      "grad_norm": 0.3790148198604584,
      "learning_rate": 4.210525043209439e-05,
      "loss": 0.637,
      "step": 4634
    },
    {
      "epoch": 0.7082553386560722,
      "grad_norm": 0.30979880690574646,
      "learning_rate": 4.20645594547556e-05,
      "loss": 0.8716,
      "step": 4635
    },
    {
      "epoch": 0.7084081445543798,
      "grad_norm": 0.28678464889526367,
      "learning_rate": 4.202388291154555e-05,
      "loss": 0.801,
      "step": 4636
    },
    {
      "epoch": 0.7085609504526875,
      "grad_norm": 0.2768631875514984,
      "learning_rate": 4.19832208125984e-05,
      "loss": 0.8203,
      "step": 4637
    },
    {
      "epoch": 0.7087137563509951,
      "grad_norm": 0.2772439122200012,
      "learning_rate": 4.1942573168044743e-05,
      "loss": 0.7469,
      "step": 4638
    },
    {
      "epoch": 0.7088665622493028,
      "grad_norm": 0.3146333694458008,
      "learning_rate": 4.1901939988011626e-05,
      "loss": 0.8942,
      "step": 4639
    },
    {
      "epoch": 0.7090193681476105,
      "grad_norm": 0.26574763655662537,
      "learning_rate": 4.186132128262248e-05,
      "loss": 0.6315,
      "step": 4640
    },
    {
      "epoch": 0.7091721740459181,
      "grad_norm": 0.31069281697273254,
      "learning_rate": 4.182071706199717e-05,
      "loss": 0.6383,
      "step": 4641
    },
    {
      "epoch": 0.7093249799442258,
      "grad_norm": 0.359215646982193,
      "learning_rate": 4.1780127336251776e-05,
      "loss": 0.7897,
      "step": 4642
    },
    {
      "epoch": 0.7094777858425335,
      "grad_norm": 0.3164921700954437,
      "learning_rate": 4.1739552115498924e-05,
      "loss": 0.8371,
      "step": 4643
    },
    {
      "epoch": 0.7096305917408412,
      "grad_norm": 0.4171659052371979,
      "learning_rate": 4.169899140984763e-05,
      "loss": 0.8449,
      "step": 4644
    },
    {
      "epoch": 0.7097833976391489,
      "grad_norm": 0.3271016776561737,
      "learning_rate": 4.165844522940325e-05,
      "loss": 0.7758,
      "step": 4645
    },
    {
      "epoch": 0.7099362035374566,
      "grad_norm": 0.33073553442955017,
      "learning_rate": 4.161791358426752e-05,
      "loss": 0.7159,
      "step": 4646
    },
    {
      "epoch": 0.7100890094357643,
      "grad_norm": 0.23120705783367157,
      "learning_rate": 4.157739648453851e-05,
      "loss": 0.799,
      "step": 4647
    },
    {
      "epoch": 0.7102418153340719,
      "grad_norm": 0.2860707938671112,
      "learning_rate": 4.15368939403108e-05,
      "loss": 0.7074,
      "step": 4648
    },
    {
      "epoch": 0.7103946212323796,
      "grad_norm": 0.29551446437835693,
      "learning_rate": 4.1496405961675155e-05,
      "loss": 0.5792,
      "step": 4649
    },
    {
      "epoch": 0.7105474271306872,
      "grad_norm": 0.2846836447715759,
      "learning_rate": 4.1455932558718915e-05,
      "loss": 0.7368,
      "step": 4650
    },
    {
      "epoch": 0.7107002330289949,
      "grad_norm": 0.31006723642349243,
      "learning_rate": 4.14154737415256e-05,
      "loss": 0.7059,
      "step": 4651
    },
    {
      "epoch": 0.7108530389273026,
      "grad_norm": 0.25622233748435974,
      "learning_rate": 4.137502952017528e-05,
      "loss": 0.652,
      "step": 4652
    },
    {
      "epoch": 0.7110058448256102,
      "grad_norm": 0.34584179520606995,
      "learning_rate": 4.1334599904744195e-05,
      "loss": 0.5198,
      "step": 4653
    },
    {
      "epoch": 0.7111586507239179,
      "grad_norm": 0.28006860613822937,
      "learning_rate": 4.1294184905305146e-05,
      "loss": 0.6968,
      "step": 4654
    },
    {
      "epoch": 0.7113114566222256,
      "grad_norm": 0.28837811946868896,
      "learning_rate": 4.125378453192712e-05,
      "loss": 0.7734,
      "step": 4655
    },
    {
      "epoch": 0.7114642625205333,
      "grad_norm": 0.5305294990539551,
      "learning_rate": 4.121339879467552e-05,
      "loss": 0.6769,
      "step": 4656
    },
    {
      "epoch": 0.711617068418841,
      "grad_norm": 0.3161180913448334,
      "learning_rate": 4.117302770361213e-05,
      "loss": 0.6046,
      "step": 4657
    },
    {
      "epoch": 0.7117698743171487,
      "grad_norm": 0.31682288646698,
      "learning_rate": 4.113267126879513e-05,
      "loss": 0.6814,
      "step": 4658
    },
    {
      "epoch": 0.7119226802154563,
      "grad_norm": 0.25436070561408997,
      "learning_rate": 4.109232950027893e-05,
      "loss": 0.6177,
      "step": 4659
    },
    {
      "epoch": 0.712075486113764,
      "grad_norm": 0.3298552632331848,
      "learning_rate": 4.105200240811431e-05,
      "loss": 0.7724,
      "step": 4660
    },
    {
      "epoch": 0.7122282920120717,
      "grad_norm": 0.33188796043395996,
      "learning_rate": 4.101169000234847e-05,
      "loss": 0.5912,
      "step": 4661
    },
    {
      "epoch": 0.7123810979103793,
      "grad_norm": 0.27348458766937256,
      "learning_rate": 4.0971392293024946e-05,
      "loss": 0.7854,
      "step": 4662
    },
    {
      "epoch": 0.712533903808687,
      "grad_norm": 0.30659839510917664,
      "learning_rate": 4.093110929018352e-05,
      "loss": 0.7274,
      "step": 4663
    },
    {
      "epoch": 0.7126867097069947,
      "grad_norm": 0.5424551963806152,
      "learning_rate": 4.0890841003860346e-05,
      "loss": 0.5983,
      "step": 4664
    },
    {
      "epoch": 0.7128395156053023,
      "grad_norm": 0.29840460419654846,
      "learning_rate": 4.085058744408796e-05,
      "loss": 0.836,
      "step": 4665
    },
    {
      "epoch": 0.71299232150361,
      "grad_norm": 0.25009292364120483,
      "learning_rate": 4.081034862089523e-05,
      "loss": 0.6681,
      "step": 4666
    },
    {
      "epoch": 0.7131451274019177,
      "grad_norm": 0.2948574125766754,
      "learning_rate": 4.07701245443073e-05,
      "loss": 0.7458,
      "step": 4667
    },
    {
      "epoch": 0.7132979333002254,
      "grad_norm": 0.2596202790737152,
      "learning_rate": 4.072991522434559e-05,
      "loss": 0.62,
      "step": 4668
    },
    {
      "epoch": 0.7134507391985331,
      "grad_norm": 0.29064345359802246,
      "learning_rate": 4.068972067102803e-05,
      "loss": 0.7136,
      "step": 4669
    },
    {
      "epoch": 0.7136035450968408,
      "grad_norm": 0.28021040558815,
      "learning_rate": 4.0649540894368666e-05,
      "loss": 0.5538,
      "step": 4670
    },
    {
      "epoch": 0.7137563509951484,
      "grad_norm": 0.2738005220890045,
      "learning_rate": 4.0609375904377975e-05,
      "loss": 0.6811,
      "step": 4671
    },
    {
      "epoch": 0.7139091568934561,
      "grad_norm": 0.23311470448970795,
      "learning_rate": 4.056922571106277e-05,
      "loss": 0.5184,
      "step": 4672
    },
    {
      "epoch": 0.7140619627917638,
      "grad_norm": 0.389201819896698,
      "learning_rate": 4.0529090324426125e-05,
      "loss": 0.7657,
      "step": 4673
    },
    {
      "epoch": 0.7142147686900714,
      "grad_norm": 0.27108216285705566,
      "learning_rate": 4.048896975446736e-05,
      "loss": 0.6359,
      "step": 4674
    },
    {
      "epoch": 0.7143675745883791,
      "grad_norm": 0.2944895625114441,
      "learning_rate": 4.044886401118223e-05,
      "loss": 0.6726,
      "step": 4675
    },
    {
      "epoch": 0.7145203804866868,
      "grad_norm": 0.27678659558296204,
      "learning_rate": 4.040877310456278e-05,
      "loss": 0.7396,
      "step": 4676
    },
    {
      "epoch": 0.7146731863849944,
      "grad_norm": 0.3424450755119324,
      "learning_rate": 4.036869704459729e-05,
      "loss": 0.6605,
      "step": 4677
    },
    {
      "epoch": 0.7148259922833021,
      "grad_norm": 0.9500914216041565,
      "learning_rate": 4.0328635841270346e-05,
      "loss": 0.593,
      "step": 4678
    },
    {
      "epoch": 0.7149787981816098,
      "grad_norm": 0.41044652462005615,
      "learning_rate": 4.0288589504562865e-05,
      "loss": 0.7482,
      "step": 4679
    },
    {
      "epoch": 0.7151316040799175,
      "grad_norm": 0.36469408869743347,
      "learning_rate": 4.024855804445213e-05,
      "loss": 0.7855,
      "step": 4680
    },
    {
      "epoch": 0.7152844099782252,
      "grad_norm": 0.24194401502609253,
      "learning_rate": 4.0208541470911584e-05,
      "loss": 0.6371,
      "step": 4681
    },
    {
      "epoch": 0.7154372158765329,
      "grad_norm": 0.2713262736797333,
      "learning_rate": 4.0168539793911e-05,
      "loss": 0.6222,
      "step": 4682
    },
    {
      "epoch": 0.7155900217748405,
      "grad_norm": 0.3250422477722168,
      "learning_rate": 4.012855302341647e-05,
      "loss": 0.8088,
      "step": 4683
    },
    {
      "epoch": 0.7157428276731482,
      "grad_norm": 0.3172820806503296,
      "learning_rate": 4.0088581169390424e-05,
      "loss": 0.7694,
      "step": 4684
    },
    {
      "epoch": 0.7158956335714558,
      "grad_norm": 0.3846489191055298,
      "learning_rate": 4.0048624241791464e-05,
      "loss": 0.8115,
      "step": 4685
    },
    {
      "epoch": 0.7160484394697635,
      "grad_norm": 0.2700871527194977,
      "learning_rate": 4.0008682250574504e-05,
      "loss": 0.7215,
      "step": 4686
    },
    {
      "epoch": 0.7162012453680712,
      "grad_norm": 0.33228370547294617,
      "learning_rate": 3.99687552056908e-05,
      "loss": 0.8418,
      "step": 4687
    },
    {
      "epoch": 0.7163540512663789,
      "grad_norm": 0.2891543209552765,
      "learning_rate": 3.992884311708779e-05,
      "loss": 0.52,
      "step": 4688
    },
    {
      "epoch": 0.7165068571646865,
      "grad_norm": 0.31329476833343506,
      "learning_rate": 3.9888945994709306e-05,
      "loss": 0.7719,
      "step": 4689
    },
    {
      "epoch": 0.7166596630629942,
      "grad_norm": 0.28452637791633606,
      "learning_rate": 3.9849063848495295e-05,
      "loss": 0.4992,
      "step": 4690
    },
    {
      "epoch": 0.7168124689613019,
      "grad_norm": 0.3185611069202423,
      "learning_rate": 3.9809196688382145e-05,
      "loss": 0.8112,
      "step": 4691
    },
    {
      "epoch": 0.7169652748596096,
      "grad_norm": 0.2967831790447235,
      "learning_rate": 3.9769344524302355e-05,
      "loss": 0.6697,
      "step": 4692
    },
    {
      "epoch": 0.7171180807579173,
      "grad_norm": 0.3396419584751129,
      "learning_rate": 3.972950736618482e-05,
      "loss": 0.8737,
      "step": 4693
    },
    {
      "epoch": 0.717270886656225,
      "grad_norm": 0.2848491966724396,
      "learning_rate": 3.968968522395459e-05,
      "loss": 0.7481,
      "step": 4694
    },
    {
      "epoch": 0.7174236925545326,
      "grad_norm": 0.3522728681564331,
      "learning_rate": 3.9649878107533e-05,
      "loss": 0.6715,
      "step": 4695
    },
    {
      "epoch": 0.7175764984528403,
      "grad_norm": 0.2862434685230255,
      "learning_rate": 3.961008602683768e-05,
      "loss": 0.4666,
      "step": 4696
    },
    {
      "epoch": 0.7177293043511479,
      "grad_norm": 0.32041534781455994,
      "learning_rate": 3.9570308991782534e-05,
      "loss": 0.9037,
      "step": 4697
    },
    {
      "epoch": 0.7178821102494556,
      "grad_norm": 0.29981473088264465,
      "learning_rate": 3.953054701227764e-05,
      "loss": 0.7015,
      "step": 4698
    },
    {
      "epoch": 0.7180349161477633,
      "grad_norm": 0.33403995633125305,
      "learning_rate": 3.949080009822933e-05,
      "loss": 0.5762,
      "step": 4699
    },
    {
      "epoch": 0.718187722046071,
      "grad_norm": 0.27824243903160095,
      "learning_rate": 3.9451068259540244e-05,
      "loss": 0.8935,
      "step": 4700
    },
    {
      "epoch": 0.7183405279443786,
      "grad_norm": 0.2845570743083954,
      "learning_rate": 3.941135150610929e-05,
      "loss": 0.7272,
      "step": 4701
    },
    {
      "epoch": 0.7184933338426863,
      "grad_norm": 0.34788352251052856,
      "learning_rate": 3.937164984783149e-05,
      "loss": 0.7927,
      "step": 4702
    },
    {
      "epoch": 0.718646139740994,
      "grad_norm": 0.3194750249385834,
      "learning_rate": 3.933196329459818e-05,
      "loss": 0.809,
      "step": 4703
    },
    {
      "epoch": 0.7187989456393017,
      "grad_norm": 0.3060329258441925,
      "learning_rate": 3.9292291856296945e-05,
      "loss": 0.7795,
      "step": 4704
    },
    {
      "epoch": 0.7189517515376094,
      "grad_norm": 0.26089486479759216,
      "learning_rate": 3.9252635542811645e-05,
      "loss": 0.5469,
      "step": 4705
    },
    {
      "epoch": 0.7191045574359171,
      "grad_norm": 0.32387828826904297,
      "learning_rate": 3.9212994364022224e-05,
      "loss": 0.713,
      "step": 4706
    },
    {
      "epoch": 0.7192573633342247,
      "grad_norm": 0.2914409339427948,
      "learning_rate": 3.917336832980504e-05,
      "loss": 0.663,
      "step": 4707
    },
    {
      "epoch": 0.7194101692325324,
      "grad_norm": 0.2582574486732483,
      "learning_rate": 3.913375745003254e-05,
      "loss": 0.6321,
      "step": 4708
    },
    {
      "epoch": 0.71956297513084,
      "grad_norm": 0.28708699345588684,
      "learning_rate": 3.909416173457341e-05,
      "loss": 0.642,
      "step": 4709
    },
    {
      "epoch": 0.7197157810291477,
      "grad_norm": 0.34295615553855896,
      "learning_rate": 3.905458119329262e-05,
      "loss": 0.672,
      "step": 4710
    },
    {
      "epoch": 0.7198685869274554,
      "grad_norm": 0.2741999924182892,
      "learning_rate": 3.9015015836051375e-05,
      "loss": 0.6434,
      "step": 4711
    },
    {
      "epoch": 0.720021392825763,
      "grad_norm": 0.27946770191192627,
      "learning_rate": 3.897546567270701e-05,
      "loss": 0.8582,
      "step": 4712
    },
    {
      "epoch": 0.7201741987240707,
      "grad_norm": 0.33194059133529663,
      "learning_rate": 3.893593071311309e-05,
      "loss": 0.6621,
      "step": 4713
    },
    {
      "epoch": 0.7203270046223784,
      "grad_norm": 0.335144966840744,
      "learning_rate": 3.8896410967119434e-05,
      "loss": 0.7997,
      "step": 4714
    },
    {
      "epoch": 0.7204798105206861,
      "grad_norm": 0.31013959646224976,
      "learning_rate": 3.8856906444572114e-05,
      "loss": 0.832,
      "step": 4715
    },
    {
      "epoch": 0.7206326164189938,
      "grad_norm": 0.3294447660446167,
      "learning_rate": 3.8817417155313295e-05,
      "loss": 0.7865,
      "step": 4716
    },
    {
      "epoch": 0.7207854223173015,
      "grad_norm": 0.4748566448688507,
      "learning_rate": 3.877794310918138e-05,
      "loss": 0.7879,
      "step": 4717
    },
    {
      "epoch": 0.7209382282156092,
      "grad_norm": 0.3431771695613861,
      "learning_rate": 3.873848431601102e-05,
      "loss": 0.6674,
      "step": 4718
    },
    {
      "epoch": 0.7210910341139168,
      "grad_norm": 0.2861068844795227,
      "learning_rate": 3.869904078563309e-05,
      "loss": 0.6783,
      "step": 4719
    },
    {
      "epoch": 0.7212438400122245,
      "grad_norm": 0.4855247735977173,
      "learning_rate": 3.8659612527874576e-05,
      "loss": 0.5126,
      "step": 4720
    },
    {
      "epoch": 0.7213966459105321,
      "grad_norm": 0.25908955931663513,
      "learning_rate": 3.8620199552558654e-05,
      "loss": 0.6448,
      "step": 4721
    },
    {
      "epoch": 0.7215494518088398,
      "grad_norm": 0.34213340282440186,
      "learning_rate": 3.8580801869504776e-05,
      "loss": 0.5608,
      "step": 4722
    },
    {
      "epoch": 0.7217022577071475,
      "grad_norm": 0.2678838074207306,
      "learning_rate": 3.8541419488528585e-05,
      "loss": 0.8313,
      "step": 4723
    },
    {
      "epoch": 0.7218550636054551,
      "grad_norm": 0.3597886562347412,
      "learning_rate": 3.8502052419441826e-05,
      "loss": 0.5639,
      "step": 4724
    },
    {
      "epoch": 0.7220078695037628,
      "grad_norm": 0.27323228120803833,
      "learning_rate": 3.846270067205244e-05,
      "loss": 0.7698,
      "step": 4725
    },
    {
      "epoch": 0.7221606754020705,
      "grad_norm": 0.38855069875717163,
      "learning_rate": 3.842336425616466e-05,
      "loss": 0.8291,
      "step": 4726
    },
    {
      "epoch": 0.7223134813003782,
      "grad_norm": 0.3744564354419708,
      "learning_rate": 3.838404318157875e-05,
      "loss": 0.6755,
      "step": 4727
    },
    {
      "epoch": 0.7224662871986859,
      "grad_norm": 0.448920339345932,
      "learning_rate": 3.834473745809131e-05,
      "loss": 0.6632,
      "step": 4728
    },
    {
      "epoch": 0.7226190930969936,
      "grad_norm": 0.40930065512657166,
      "learning_rate": 3.830544709549493e-05,
      "loss": 0.805,
      "step": 4729
    },
    {
      "epoch": 0.7227718989953013,
      "grad_norm": 0.33985939621925354,
      "learning_rate": 3.826617210357857e-05,
      "loss": 0.698,
      "step": 4730
    },
    {
      "epoch": 0.7229247048936089,
      "grad_norm": 0.3228268325328827,
      "learning_rate": 3.822691249212719e-05,
      "loss": 0.586,
      "step": 4731
    },
    {
      "epoch": 0.7230775107919166,
      "grad_norm": 0.2895890772342682,
      "learning_rate": 3.818766827092202e-05,
      "loss": 0.6855,
      "step": 4732
    },
    {
      "epoch": 0.7232303166902242,
      "grad_norm": 0.36268943548202515,
      "learning_rate": 3.8148439449740494e-05,
      "loss": 0.5884,
      "step": 4733
    },
    {
      "epoch": 0.7233831225885319,
      "grad_norm": 0.2500065863132477,
      "learning_rate": 3.810922603835602e-05,
      "loss": 0.6736,
      "step": 4734
    },
    {
      "epoch": 0.7235359284868396,
      "grad_norm": 0.34311750531196594,
      "learning_rate": 3.807002804653835e-05,
      "loss": 0.9017,
      "step": 4735
    },
    {
      "epoch": 0.7236887343851472,
      "grad_norm": 0.3456333577632904,
      "learning_rate": 3.803084548405335e-05,
      "loss": 0.5246,
      "step": 4736
    },
    {
      "epoch": 0.7238415402834549,
      "grad_norm": 0.2903348207473755,
      "learning_rate": 3.799167836066306e-05,
      "loss": 0.5095,
      "step": 4737
    },
    {
      "epoch": 0.7239943461817626,
      "grad_norm": 0.3143153488636017,
      "learning_rate": 3.7952526686125545e-05,
      "loss": 0.732,
      "step": 4738
    },
    {
      "epoch": 0.7241471520800703,
      "grad_norm": 0.3224197328090668,
      "learning_rate": 3.791339047019515e-05,
      "loss": 0.6884,
      "step": 4739
    },
    {
      "epoch": 0.724299957978378,
      "grad_norm": 0.297520250082016,
      "learning_rate": 3.7874269722622394e-05,
      "loss": 0.7355,
      "step": 4740
    },
    {
      "epoch": 0.7244527638766857,
      "grad_norm": 0.3008733093738556,
      "learning_rate": 3.7835164453153806e-05,
      "loss": 0.6028,
      "step": 4741
    },
    {
      "epoch": 0.7246055697749934,
      "grad_norm": 0.34514373540878296,
      "learning_rate": 3.779607467153219e-05,
      "loss": 0.8412,
      "step": 4742
    },
    {
      "epoch": 0.724758375673301,
      "grad_norm": 0.31843021512031555,
      "learning_rate": 3.775700038749639e-05,
      "loss": 0.722,
      "step": 4743
    },
    {
      "epoch": 0.7249111815716086,
      "grad_norm": 0.23146043717861176,
      "learning_rate": 3.7717941610781485e-05,
      "loss": 0.6464,
      "step": 4744
    },
    {
      "epoch": 0.7250639874699163,
      "grad_norm": 0.2926734387874603,
      "learning_rate": 3.7678898351118586e-05,
      "loss": 0.6496,
      "step": 4745
    },
    {
      "epoch": 0.725216793368224,
      "grad_norm": 0.33022475242614746,
      "learning_rate": 3.763987061823506e-05,
      "loss": 0.6851,
      "step": 4746
    },
    {
      "epoch": 0.7253695992665317,
      "grad_norm": 0.5083408951759338,
      "learning_rate": 3.760085842185431e-05,
      "loss": 0.6626,
      "step": 4747
    },
    {
      "epoch": 0.7255224051648393,
      "grad_norm": 0.2732957601547241,
      "learning_rate": 3.756186177169585e-05,
      "loss": 0.5641,
      "step": 4748
    },
    {
      "epoch": 0.725675211063147,
      "grad_norm": 0.27765700221061707,
      "learning_rate": 3.7522880677475415e-05,
      "loss": 0.7062,
      "step": 4749
    },
    {
      "epoch": 0.7258280169614547,
      "grad_norm": 0.2967795431613922,
      "learning_rate": 3.748391514890484e-05,
      "loss": 0.7701,
      "step": 4750
    },
    {
      "epoch": 0.7259808228597624,
      "grad_norm": 0.284739226102829,
      "learning_rate": 3.744496519569203e-05,
      "loss": 0.838,
      "step": 4751
    },
    {
      "epoch": 0.7261336287580701,
      "grad_norm": 0.28099575638771057,
      "learning_rate": 3.740603082754101e-05,
      "loss": 0.7573,
      "step": 4752
    },
    {
      "epoch": 0.7262864346563778,
      "grad_norm": 0.35599344968795776,
      "learning_rate": 3.7367112054151964e-05,
      "loss": 0.6278,
      "step": 4753
    },
    {
      "epoch": 0.7264392405546855,
      "grad_norm": 0.2718389630317688,
      "learning_rate": 3.732820888522124e-05,
      "loss": 0.6404,
      "step": 4754
    },
    {
      "epoch": 0.7265920464529931,
      "grad_norm": 0.2878887951374054,
      "learning_rate": 3.728932133044119e-05,
      "loss": 0.6494,
      "step": 4755
    },
    {
      "epoch": 0.7267448523513007,
      "grad_norm": 0.6028104424476624,
      "learning_rate": 3.725044939950029e-05,
      "loss": 0.7728,
      "step": 4756
    },
    {
      "epoch": 0.7268976582496084,
      "grad_norm": 0.28222134709358215,
      "learning_rate": 3.7211593102083186e-05,
      "loss": 0.7582,
      "step": 4757
    },
    {
      "epoch": 0.7270504641479161,
      "grad_norm": 0.29730039834976196,
      "learning_rate": 3.717275244787063e-05,
      "loss": 0.8627,
      "step": 4758
    },
    {
      "epoch": 0.7272032700462238,
      "grad_norm": 0.32104724645614624,
      "learning_rate": 3.713392744653942e-05,
      "loss": 0.7272,
      "step": 4759
    },
    {
      "epoch": 0.7273560759445314,
      "grad_norm": 0.2982363998889923,
      "learning_rate": 3.709511810776244e-05,
      "loss": 0.7101,
      "step": 4760
    },
    {
      "epoch": 0.7275088818428391,
      "grad_norm": 0.25882184505462646,
      "learning_rate": 3.7056324441208734e-05,
      "loss": 0.7863,
      "step": 4761
    },
    {
      "epoch": 0.7276616877411468,
      "grad_norm": 0.29439249634742737,
      "learning_rate": 3.7017546456543476e-05,
      "loss": 0.7806,
      "step": 4762
    },
    {
      "epoch": 0.7278144936394545,
      "grad_norm": 0.33841472864151,
      "learning_rate": 3.697878416342781e-05,
      "loss": 0.6692,
      "step": 4763
    },
    {
      "epoch": 0.7279672995377622,
      "grad_norm": 0.42538225650787354,
      "learning_rate": 3.694003757151904e-05,
      "loss": 0.5909,
      "step": 4764
    },
    {
      "epoch": 0.7281201054360699,
      "grad_norm": 0.30907660722732544,
      "learning_rate": 3.690130669047059e-05,
      "loss": 0.7845,
      "step": 4765
    },
    {
      "epoch": 0.7282729113343775,
      "grad_norm": 0.369582861661911,
      "learning_rate": 3.686259152993189e-05,
      "loss": 0.5686,
      "step": 4766
    },
    {
      "epoch": 0.7284257172326852,
      "grad_norm": 0.3360534906387329,
      "learning_rate": 3.6823892099548506e-05,
      "loss": 0.6306,
      "step": 4767
    },
    {
      "epoch": 0.7285785231309928,
      "grad_norm": 0.35224616527557373,
      "learning_rate": 3.6785208408962133e-05,
      "loss": 0.5248,
      "step": 4768
    },
    {
      "epoch": 0.7287313290293005,
      "grad_norm": 0.3557858467102051,
      "learning_rate": 3.674654046781044e-05,
      "loss": 0.8301,
      "step": 4769
    },
    {
      "epoch": 0.7288841349276082,
      "grad_norm": 0.3504233956336975,
      "learning_rate": 3.67078882857272e-05,
      "loss": 0.6455,
      "step": 4770
    },
    {
      "epoch": 0.7290369408259159,
      "grad_norm": 0.34271594882011414,
      "learning_rate": 3.666925187234229e-05,
      "loss": 0.9036,
      "step": 4771
    },
    {
      "epoch": 0.7291897467242235,
      "grad_norm": 0.26382726430892944,
      "learning_rate": 3.66306312372817e-05,
      "loss": 0.7683,
      "step": 4772
    },
    {
      "epoch": 0.7293425526225312,
      "grad_norm": 0.2812560498714447,
      "learning_rate": 3.6592026390167413e-05,
      "loss": 0.6227,
      "step": 4773
    },
    {
      "epoch": 0.7294953585208389,
      "grad_norm": 0.30799320340156555,
      "learning_rate": 3.6553437340617436e-05,
      "loss": 0.7421,
      "step": 4774
    },
    {
      "epoch": 0.7296481644191466,
      "grad_norm": 0.28414100408554077,
      "learning_rate": 3.651486409824597e-05,
      "loss": 0.6298,
      "step": 4775
    },
    {
      "epoch": 0.7298009703174543,
      "grad_norm": 0.34525686502456665,
      "learning_rate": 3.647630667266323e-05,
      "loss": 0.6816,
      "step": 4776
    },
    {
      "epoch": 0.729953776215762,
      "grad_norm": 0.28054291009902954,
      "learning_rate": 3.643776507347546e-05,
      "loss": 0.7858,
      "step": 4777
    },
    {
      "epoch": 0.7301065821140696,
      "grad_norm": 0.25552770495414734,
      "learning_rate": 3.639923931028493e-05,
      "loss": 0.6176,
      "step": 4778
    },
    {
      "epoch": 0.7302593880123773,
      "grad_norm": 0.2665732800960541,
      "learning_rate": 3.636072939269008e-05,
      "loss": 0.6894,
      "step": 4779
    },
    {
      "epoch": 0.7304121939106849,
      "grad_norm": 0.3016633987426758,
      "learning_rate": 3.632223533028525e-05,
      "loss": 0.8169,
      "step": 4780
    },
    {
      "epoch": 0.7305649998089926,
      "grad_norm": 0.31519678235054016,
      "learning_rate": 3.6283757132661e-05,
      "loss": 0.6808,
      "step": 4781
    },
    {
      "epoch": 0.7307178057073003,
      "grad_norm": 0.27059051394462585,
      "learning_rate": 3.624529480940379e-05,
      "loss": 0.6657,
      "step": 4782
    },
    {
      "epoch": 0.730870611605608,
      "grad_norm": 0.2791256308555603,
      "learning_rate": 3.6206848370096225e-05,
      "loss": 0.7948,
      "step": 4783
    },
    {
      "epoch": 0.7310234175039156,
      "grad_norm": 0.3773775100708008,
      "learning_rate": 3.616841782431687e-05,
      "loss": 0.7192,
      "step": 4784
    },
    {
      "epoch": 0.7311762234022233,
      "grad_norm": 0.287503719329834,
      "learning_rate": 3.6130003181640425e-05,
      "loss": 0.6652,
      "step": 4785
    },
    {
      "epoch": 0.731329029300531,
      "grad_norm": 0.3499451279640198,
      "learning_rate": 3.6091604451637516e-05,
      "loss": 0.6854,
      "step": 4786
    },
    {
      "epoch": 0.7314818351988387,
      "grad_norm": 0.331950306892395,
      "learning_rate": 3.605322164387493e-05,
      "loss": 0.9495,
      "step": 4787
    },
    {
      "epoch": 0.7316346410971464,
      "grad_norm": 0.2893081605434418,
      "learning_rate": 3.601485476791534e-05,
      "loss": 0.6678,
      "step": 4788
    },
    {
      "epoch": 0.7317874469954541,
      "grad_norm": 0.34236064553260803,
      "learning_rate": 3.597650383331762e-05,
      "loss": 0.6551,
      "step": 4789
    },
    {
      "epoch": 0.7319402528937617,
      "grad_norm": 0.33083659410476685,
      "learning_rate": 3.5938168849636544e-05,
      "loss": 0.8684,
      "step": 4790
    },
    {
      "epoch": 0.7320930587920694,
      "grad_norm": 0.23866380751132965,
      "learning_rate": 3.589984982642291e-05,
      "loss": 0.5983,
      "step": 4791
    },
    {
      "epoch": 0.732245864690377,
      "grad_norm": 0.28487899899482727,
      "learning_rate": 3.586154677322363e-05,
      "loss": 0.6288,
      "step": 4792
    },
    {
      "epoch": 0.7323986705886847,
      "grad_norm": 0.25790512561798096,
      "learning_rate": 3.582325969958157e-05,
      "loss": 0.6921,
      "step": 4793
    },
    {
      "epoch": 0.7325514764869924,
      "grad_norm": 0.5388302803039551,
      "learning_rate": 3.578498861503571e-05,
      "loss": 0.8078,
      "step": 4794
    },
    {
      "epoch": 0.7327042823853,
      "grad_norm": 0.37389835715293884,
      "learning_rate": 3.5746733529120826e-05,
      "loss": 0.7492,
      "step": 4795
    },
    {
      "epoch": 0.7328570882836077,
      "grad_norm": 0.26483863592147827,
      "learning_rate": 3.5708494451367936e-05,
      "loss": 0.8071,
      "step": 4796
    },
    {
      "epoch": 0.7330098941819154,
      "grad_norm": 0.35608604550361633,
      "learning_rate": 3.5670271391304e-05,
      "loss": 0.5763,
      "step": 4797
    },
    {
      "epoch": 0.7331627000802231,
      "grad_norm": 0.33788082003593445,
      "learning_rate": 3.563206435845196e-05,
      "loss": 0.657,
      "step": 4798
    },
    {
      "epoch": 0.7333155059785308,
      "grad_norm": 0.32307055592536926,
      "learning_rate": 3.559387336233071e-05,
      "loss": 0.8049,
      "step": 4799
    },
    {
      "epoch": 0.7334683118768385,
      "grad_norm": 0.28910204768180847,
      "learning_rate": 3.5555698412455284e-05,
      "loss": 0.8353,
      "step": 4800
    },
    {
      "epoch": 0.7336211177751462,
      "grad_norm": 0.26586541533470154,
      "learning_rate": 3.5517539518336676e-05,
      "loss": 0.7005,
      "step": 4801
    },
    {
      "epoch": 0.7337739236734538,
      "grad_norm": 0.27746787667274475,
      "learning_rate": 3.547939668948177e-05,
      "loss": 0.7271,
      "step": 4802
    },
    {
      "epoch": 0.7339267295717614,
      "grad_norm": 0.5220523476600647,
      "learning_rate": 3.544126993539362e-05,
      "loss": 0.7498,
      "step": 4803
    },
    {
      "epoch": 0.7340795354700691,
      "grad_norm": 0.39568111300468445,
      "learning_rate": 3.540315926557114e-05,
      "loss": 0.5222,
      "step": 4804
    },
    {
      "epoch": 0.7342323413683768,
      "grad_norm": 0.270342081785202,
      "learning_rate": 3.5365064689509254e-05,
      "loss": 0.4921,
      "step": 4805
    },
    {
      "epoch": 0.7343851472666845,
      "grad_norm": 0.29624781012535095,
      "learning_rate": 3.5326986216698944e-05,
      "loss": 0.7302,
      "step": 4806
    },
    {
      "epoch": 0.7345379531649922,
      "grad_norm": 0.2960861027240753,
      "learning_rate": 3.5288923856627164e-05,
      "loss": 0.7035,
      "step": 4807
    },
    {
      "epoch": 0.7346907590632998,
      "grad_norm": 0.3095923066139221,
      "learning_rate": 3.52508776187768e-05,
      "loss": 0.6334,
      "step": 4808
    },
    {
      "epoch": 0.7348435649616075,
      "grad_norm": 0.26498642563819885,
      "learning_rate": 3.5212847512626736e-05,
      "loss": 0.7783,
      "step": 4809
    },
    {
      "epoch": 0.7349963708599152,
      "grad_norm": 0.27664878964424133,
      "learning_rate": 3.517483354765187e-05,
      "loss": 0.5974,
      "step": 4810
    },
    {
      "epoch": 0.7351491767582229,
      "grad_norm": 0.6068941354751587,
      "learning_rate": 3.5136835733323105e-05,
      "loss": 0.9139,
      "step": 4811
    },
    {
      "epoch": 0.7353019826565306,
      "grad_norm": 0.36670371890068054,
      "learning_rate": 3.509885407910724e-05,
      "loss": 0.55,
      "step": 4812
    },
    {
      "epoch": 0.7354547885548383,
      "grad_norm": 0.30022528767585754,
      "learning_rate": 3.506088859446704e-05,
      "loss": 0.6745,
      "step": 4813
    },
    {
      "epoch": 0.7356075944531459,
      "grad_norm": 0.2669506371021271,
      "learning_rate": 3.5022939288861335e-05,
      "loss": 0.6979,
      "step": 4814
    },
    {
      "epoch": 0.7357604003514535,
      "grad_norm": 0.2814632058143616,
      "learning_rate": 3.4985006171744916e-05,
      "loss": 0.6519,
      "step": 4815
    },
    {
      "epoch": 0.7359132062497612,
      "grad_norm": 0.4638700485229492,
      "learning_rate": 3.4947089252568446e-05,
      "loss": 0.9276,
      "step": 4816
    },
    {
      "epoch": 0.7360660121480689,
      "grad_norm": 0.2916383743286133,
      "learning_rate": 3.490918854077859e-05,
      "loss": 0.8922,
      "step": 4817
    },
    {
      "epoch": 0.7362188180463766,
      "grad_norm": 0.29278457164764404,
      "learning_rate": 3.487130404581806e-05,
      "loss": 0.532,
      "step": 4818
    },
    {
      "epoch": 0.7363716239446843,
      "grad_norm": 0.27625879645347595,
      "learning_rate": 3.483343577712538e-05,
      "loss": 0.7354,
      "step": 4819
    },
    {
      "epoch": 0.7365244298429919,
      "grad_norm": 0.38489770889282227,
      "learning_rate": 3.47955837441352e-05,
      "loss": 0.7309,
      "step": 4820
    },
    {
      "epoch": 0.7366772357412996,
      "grad_norm": 0.30396920442581177,
      "learning_rate": 3.475774795627794e-05,
      "loss": 0.7055,
      "step": 4821
    },
    {
      "epoch": 0.7368300416396073,
      "grad_norm": 0.29432806372642517,
      "learning_rate": 3.4719928422980155e-05,
      "loss": 0.6346,
      "step": 4822
    },
    {
      "epoch": 0.736982847537915,
      "grad_norm": 0.4341113269329071,
      "learning_rate": 3.468212515366419e-05,
      "loss": 0.5119,
      "step": 4823
    },
    {
      "epoch": 0.7371356534362227,
      "grad_norm": 0.2815232276916504,
      "learning_rate": 3.464433815774848e-05,
      "loss": 0.7706,
      "step": 4824
    },
    {
      "epoch": 0.7372884593345304,
      "grad_norm": 0.28113171458244324,
      "learning_rate": 3.460656744464729e-05,
      "loss": 0.8289,
      "step": 4825
    },
    {
      "epoch": 0.737441265232838,
      "grad_norm": 0.4249742925167084,
      "learning_rate": 3.4568813023770905e-05,
      "loss": 0.7503,
      "step": 4826
    },
    {
      "epoch": 0.7375940711311456,
      "grad_norm": 0.285725861787796,
      "learning_rate": 3.4531074904525486e-05,
      "loss": 0.8374,
      "step": 4827
    },
    {
      "epoch": 0.7377468770294533,
      "grad_norm": 0.29470476508140564,
      "learning_rate": 3.44933530963132e-05,
      "loss": 0.6421,
      "step": 4828
    },
    {
      "epoch": 0.737899682927761,
      "grad_norm": 0.2831245958805084,
      "learning_rate": 3.445564760853216e-05,
      "loss": 0.5626,
      "step": 4829
    },
    {
      "epoch": 0.7380524888260687,
      "grad_norm": 0.333756685256958,
      "learning_rate": 3.441795845057627e-05,
      "loss": 0.6658,
      "step": 4830
    },
    {
      "epoch": 0.7382052947243763,
      "grad_norm": 0.25924742221832275,
      "learning_rate": 3.438028563183552e-05,
      "loss": 0.7106,
      "step": 4831
    },
    {
      "epoch": 0.738358100622684,
      "grad_norm": 0.33355987071990967,
      "learning_rate": 3.434262916169577e-05,
      "loss": 0.6727,
      "step": 4832
    },
    {
      "epoch": 0.7385109065209917,
      "grad_norm": 0.856724739074707,
      "learning_rate": 3.430498904953886e-05,
      "loss": 0.7553,
      "step": 4833
    },
    {
      "epoch": 0.7386637124192994,
      "grad_norm": 0.27116596698760986,
      "learning_rate": 3.426736530474247e-05,
      "loss": 0.6955,
      "step": 4834
    },
    {
      "epoch": 0.7388165183176071,
      "grad_norm": 0.31083372235298157,
      "learning_rate": 3.4229757936680195e-05,
      "loss": 0.5857,
      "step": 4835
    },
    {
      "epoch": 0.7389693242159148,
      "grad_norm": 0.29667478799819946,
      "learning_rate": 3.419216695472168e-05,
      "loss": 0.6607,
      "step": 4836
    },
    {
      "epoch": 0.7391221301142225,
      "grad_norm": 0.4294913709163666,
      "learning_rate": 3.415459236823233e-05,
      "loss": 0.4775,
      "step": 4837
    },
    {
      "epoch": 0.7392749360125301,
      "grad_norm": 0.27344828844070435,
      "learning_rate": 3.4117034186573594e-05,
      "loss": 0.6111,
      "step": 4838
    },
    {
      "epoch": 0.7394277419108377,
      "grad_norm": 0.3142082691192627,
      "learning_rate": 3.407949241910272e-05,
      "loss": 0.6906,
      "step": 4839
    },
    {
      "epoch": 0.7395805478091454,
      "grad_norm": 0.2933219373226166,
      "learning_rate": 3.4041967075172995e-05,
      "loss": 0.6802,
      "step": 4840
    },
    {
      "epoch": 0.7397333537074531,
      "grad_norm": 0.30935943126678467,
      "learning_rate": 3.400445816413348e-05,
      "loss": 0.7207,
      "step": 4841
    },
    {
      "epoch": 0.7398861596057608,
      "grad_norm": 0.33251291513442993,
      "learning_rate": 3.396696569532926e-05,
      "loss": 0.7258,
      "step": 4842
    },
    {
      "epoch": 0.7400389655040684,
      "grad_norm": 0.32766956090927124,
      "learning_rate": 3.3929489678101236e-05,
      "loss": 0.6056,
      "step": 4843
    },
    {
      "epoch": 0.7401917714023761,
      "grad_norm": 0.29472458362579346,
      "learning_rate": 3.38920301217862e-05,
      "loss": 0.7408,
      "step": 4844
    },
    {
      "epoch": 0.7403445773006838,
      "grad_norm": 0.3219550549983978,
      "learning_rate": 3.385458703571696e-05,
      "loss": 0.7757,
      "step": 4845
    },
    {
      "epoch": 0.7404973831989915,
      "grad_norm": 0.42171233892440796,
      "learning_rate": 3.381716042922213e-05,
      "loss": 0.5873,
      "step": 4846
    },
    {
      "epoch": 0.7406501890972992,
      "grad_norm": 0.4623895287513733,
      "learning_rate": 3.3779750311626235e-05,
      "loss": 0.708,
      "step": 4847
    },
    {
      "epoch": 0.7408029949956069,
      "grad_norm": 0.3930194675922394,
      "learning_rate": 3.374235669224965e-05,
      "loss": 0.6904,
      "step": 4848
    },
    {
      "epoch": 0.7409558008939146,
      "grad_norm": 0.31731662154197693,
      "learning_rate": 3.37049795804087e-05,
      "loss": 0.8618,
      "step": 4849
    },
    {
      "epoch": 0.7411086067922221,
      "grad_norm": 0.35052576661109924,
      "learning_rate": 3.3667618985415625e-05,
      "loss": 0.8385,
      "step": 4850
    },
    {
      "epoch": 0.7412614126905298,
      "grad_norm": 0.544321596622467,
      "learning_rate": 3.3630274916578483e-05,
      "loss": 0.6843,
      "step": 4851
    },
    {
      "epoch": 0.7414142185888375,
      "grad_norm": 0.2999391555786133,
      "learning_rate": 3.359294738320118e-05,
      "loss": 0.623,
      "step": 4852
    },
    {
      "epoch": 0.7415670244871452,
      "grad_norm": 0.29683953523635864,
      "learning_rate": 3.35556363945836e-05,
      "loss": 0.6822,
      "step": 4853
    },
    {
      "epoch": 0.7417198303854529,
      "grad_norm": 0.43165406584739685,
      "learning_rate": 3.3518341960021504e-05,
      "loss": 0.7974,
      "step": 4854
    },
    {
      "epoch": 0.7418726362837605,
      "grad_norm": 0.3263550102710724,
      "learning_rate": 3.348106408880643e-05,
      "loss": 0.7315,
      "step": 4855
    },
    {
      "epoch": 0.7420254421820682,
      "grad_norm": 0.2833004891872406,
      "learning_rate": 3.344380279022584e-05,
      "loss": 0.5614,
      "step": 4856
    },
    {
      "epoch": 0.7421782480803759,
      "grad_norm": 0.3153781592845917,
      "learning_rate": 3.340655807356313e-05,
      "loss": 0.8439,
      "step": 4857
    },
    {
      "epoch": 0.7423310539786836,
      "grad_norm": 0.28415146470069885,
      "learning_rate": 3.336932994809744e-05,
      "loss": 0.7368,
      "step": 4858
    },
    {
      "epoch": 0.7424838598769913,
      "grad_norm": 0.33800607919692993,
      "learning_rate": 3.333211842310391e-05,
      "loss": 0.6789,
      "step": 4859
    },
    {
      "epoch": 0.742636665775299,
      "grad_norm": 0.30534127354621887,
      "learning_rate": 3.329492350785342e-05,
      "loss": 0.868,
      "step": 4860
    },
    {
      "epoch": 0.7427894716736066,
      "grad_norm": 0.28079915046691895,
      "learning_rate": 3.325774521161282e-05,
      "loss": 0.6768,
      "step": 4861
    },
    {
      "epoch": 0.7429422775719142,
      "grad_norm": 0.36504265666007996,
      "learning_rate": 3.3220583543644724e-05,
      "loss": 0.6346,
      "step": 4862
    },
    {
      "epoch": 0.7430950834702219,
      "grad_norm": 0.3121730387210846,
      "learning_rate": 3.3183438513207676e-05,
      "loss": 0.5665,
      "step": 4863
    },
    {
      "epoch": 0.7432478893685296,
      "grad_norm": 0.28789499402046204,
      "learning_rate": 3.314631012955608e-05,
      "loss": 0.8213,
      "step": 4864
    },
    {
      "epoch": 0.7434006952668373,
      "grad_norm": 0.4110698103904724,
      "learning_rate": 3.310919840194013e-05,
      "loss": 0.6911,
      "step": 4865
    },
    {
      "epoch": 0.743553501165145,
      "grad_norm": 0.4067875146865845,
      "learning_rate": 3.3072103339605866e-05,
      "loss": 0.7366,
      "step": 4866
    },
    {
      "epoch": 0.7437063070634526,
      "grad_norm": 0.27583467960357666,
      "learning_rate": 3.3035024951795246e-05,
      "loss": 0.639,
      "step": 4867
    },
    {
      "epoch": 0.7438591129617603,
      "grad_norm": 0.2784540355205536,
      "learning_rate": 3.2997963247746075e-05,
      "loss": 0.5332,
      "step": 4868
    },
    {
      "epoch": 0.744011918860068,
      "grad_norm": 0.4741950035095215,
      "learning_rate": 3.2960918236691926e-05,
      "loss": 0.6251,
      "step": 4869
    },
    {
      "epoch": 0.7441647247583757,
      "grad_norm": 0.31669479608535767,
      "learning_rate": 3.2923889927862227e-05,
      "loss": 0.7696,
      "step": 4870
    },
    {
      "epoch": 0.7443175306566834,
      "grad_norm": 0.4130287170410156,
      "learning_rate": 3.2886878330482296e-05,
      "loss": 0.9864,
      "step": 4871
    },
    {
      "epoch": 0.7444703365549911,
      "grad_norm": 0.25285977125167847,
      "learning_rate": 3.28498834537733e-05,
      "loss": 0.6078,
      "step": 4872
    },
    {
      "epoch": 0.7446231424532987,
      "grad_norm": 0.25762438774108887,
      "learning_rate": 3.281290530695217e-05,
      "loss": 0.804,
      "step": 4873
    },
    {
      "epoch": 0.7447759483516063,
      "grad_norm": 0.2802187204360962,
      "learning_rate": 3.2775943899231654e-05,
      "loss": 0.6797,
      "step": 4874
    },
    {
      "epoch": 0.744928754249914,
      "grad_norm": 0.3005053997039795,
      "learning_rate": 3.273899923982047e-05,
      "loss": 0.6974,
      "step": 4875
    },
    {
      "epoch": 0.7450815601482217,
      "grad_norm": 0.2399023026227951,
      "learning_rate": 3.270207133792297e-05,
      "loss": 0.6692,
      "step": 4876
    },
    {
      "epoch": 0.7452343660465294,
      "grad_norm": 0.24347561597824097,
      "learning_rate": 3.266516020273952e-05,
      "loss": 0.6747,
      "step": 4877
    },
    {
      "epoch": 0.7453871719448371,
      "grad_norm": 0.31855449080467224,
      "learning_rate": 3.262826584346616e-05,
      "loss": 0.6217,
      "step": 4878
    },
    {
      "epoch": 0.7455399778431447,
      "grad_norm": 0.2576698958873749,
      "learning_rate": 3.259138826929484e-05,
      "loss": 0.9534,
      "step": 4879
    },
    {
      "epoch": 0.7456927837414524,
      "grad_norm": 0.3313029706478119,
      "learning_rate": 3.255452748941327e-05,
      "loss": 0.7103,
      "step": 4880
    },
    {
      "epoch": 0.7458455896397601,
      "grad_norm": 0.44755053520202637,
      "learning_rate": 3.251768351300506e-05,
      "loss": 0.6426,
      "step": 4881
    },
    {
      "epoch": 0.7459983955380678,
      "grad_norm": 0.25972044467926025,
      "learning_rate": 3.248085634924952e-05,
      "loss": 0.683,
      "step": 4882
    },
    {
      "epoch": 0.7461512014363755,
      "grad_norm": 0.27888035774230957,
      "learning_rate": 3.2444046007321836e-05,
      "loss": 0.6486,
      "step": 4883
    },
    {
      "epoch": 0.7463040073346832,
      "grad_norm": 0.3077915906906128,
      "learning_rate": 3.2407252496393006e-05,
      "loss": 0.6959,
      "step": 4884
    },
    {
      "epoch": 0.7464568132329908,
      "grad_norm": 0.28220564126968384,
      "learning_rate": 3.2370475825629844e-05,
      "loss": 0.5924,
      "step": 4885
    },
    {
      "epoch": 0.7466096191312984,
      "grad_norm": 0.3948167860507965,
      "learning_rate": 3.233371600419495e-05,
      "loss": 0.6198,
      "step": 4886
    },
    {
      "epoch": 0.7467624250296061,
      "grad_norm": 0.30583393573760986,
      "learning_rate": 3.229697304124666e-05,
      "loss": 0.8064,
      "step": 4887
    },
    {
      "epoch": 0.7469152309279138,
      "grad_norm": 0.38143858313560486,
      "learning_rate": 3.226024694593922e-05,
      "loss": 0.7402,
      "step": 4888
    },
    {
      "epoch": 0.7470680368262215,
      "grad_norm": 0.23864120244979858,
      "learning_rate": 3.222353772742267e-05,
      "loss": 0.6484,
      "step": 4889
    },
    {
      "epoch": 0.7472208427245292,
      "grad_norm": 0.2785201668739319,
      "learning_rate": 3.2186845394842766e-05,
      "loss": 0.5658,
      "step": 4890
    },
    {
      "epoch": 0.7473736486228368,
      "grad_norm": 0.2768336534500122,
      "learning_rate": 3.215016995734105e-05,
      "loss": 0.6543,
      "step": 4891
    },
    {
      "epoch": 0.7475264545211445,
      "grad_norm": 0.2642340064048767,
      "learning_rate": 3.211351142405494e-05,
      "loss": 0.8071,
      "step": 4892
    },
    {
      "epoch": 0.7476792604194522,
      "grad_norm": 0.30296286940574646,
      "learning_rate": 3.207686980411765e-05,
      "loss": 0.7364,
      "step": 4893
    },
    {
      "epoch": 0.7478320663177599,
      "grad_norm": 0.2418510764837265,
      "learning_rate": 3.204024510665804e-05,
      "loss": 0.7106,
      "step": 4894
    },
    {
      "epoch": 0.7479848722160676,
      "grad_norm": 0.28688812255859375,
      "learning_rate": 3.200363734080093e-05,
      "loss": 0.7859,
      "step": 4895
    },
    {
      "epoch": 0.7481376781143753,
      "grad_norm": 0.29675769805908203,
      "learning_rate": 3.19670465156668e-05,
      "loss": 0.6998,
      "step": 4896
    },
    {
      "epoch": 0.7482904840126829,
      "grad_norm": 0.26952260732650757,
      "learning_rate": 3.19304726403719e-05,
      "loss": 0.6482,
      "step": 4897
    },
    {
      "epoch": 0.7484432899109905,
      "grad_norm": 0.32863569259643555,
      "learning_rate": 3.189391572402836e-05,
      "loss": 0.7665,
      "step": 4898
    },
    {
      "epoch": 0.7485960958092982,
      "grad_norm": 0.2946239113807678,
      "learning_rate": 3.185737577574405e-05,
      "loss": 0.788,
      "step": 4899
    },
    {
      "epoch": 0.7487489017076059,
      "grad_norm": 0.37878304719924927,
      "learning_rate": 3.182085280462256e-05,
      "loss": 0.98,
      "step": 4900
    },
    {
      "epoch": 0.7489017076059136,
      "grad_norm": 0.9184777736663818,
      "learning_rate": 3.178434681976324e-05,
      "loss": 0.8426,
      "step": 4901
    },
    {
      "epoch": 0.7490545135042213,
      "grad_norm": 0.33207786083221436,
      "learning_rate": 3.1747857830261306e-05,
      "loss": 0.8065,
      "step": 4902
    },
    {
      "epoch": 0.7492073194025289,
      "grad_norm": 0.27969595789909363,
      "learning_rate": 3.171138584520769e-05,
      "loss": 0.6431,
      "step": 4903
    },
    {
      "epoch": 0.7493601253008366,
      "grad_norm": 0.39739498496055603,
      "learning_rate": 3.167493087368906e-05,
      "loss": 0.7623,
      "step": 4904
    },
    {
      "epoch": 0.7495129311991443,
      "grad_norm": 0.23574230074882507,
      "learning_rate": 3.163849292478783e-05,
      "loss": 0.5813,
      "step": 4905
    },
    {
      "epoch": 0.749665737097452,
      "grad_norm": 0.3343149721622467,
      "learning_rate": 3.160207200758226e-05,
      "loss": 0.5977,
      "step": 4906
    },
    {
      "epoch": 0.7498185429957597,
      "grad_norm": 0.27852675318717957,
      "learning_rate": 3.156566813114632e-05,
      "loss": 0.8232,
      "step": 4907
    },
    {
      "epoch": 0.7499713488940674,
      "grad_norm": 0.25333601236343384,
      "learning_rate": 3.152928130454972e-05,
      "loss": 0.7445,
      "step": 4908
    },
    {
      "epoch": 0.7501241547923749,
      "grad_norm": 0.30783987045288086,
      "learning_rate": 3.1492911536857886e-05,
      "loss": 0.6641,
      "step": 4909
    },
    {
      "epoch": 0.7502769606906826,
      "grad_norm": 0.26670366525650024,
      "learning_rate": 3.1456558837132065e-05,
      "loss": 0.6984,
      "step": 4910
    },
    {
      "epoch": 0.7504297665889903,
      "grad_norm": 0.2689191401004791,
      "learning_rate": 3.142022321442929e-05,
      "loss": 0.731,
      "step": 4911
    },
    {
      "epoch": 0.750582572487298,
      "grad_norm": 0.28377169370651245,
      "learning_rate": 3.138390467780221e-05,
      "loss": 0.5752,
      "step": 4912
    },
    {
      "epoch": 0.7507353783856057,
      "grad_norm": 0.29742759466171265,
      "learning_rate": 3.134760323629928e-05,
      "loss": 0.7233,
      "step": 4913
    },
    {
      "epoch": 0.7508881842839134,
      "grad_norm": 0.33162474632263184,
      "learning_rate": 3.131131889896475e-05,
      "loss": 0.863,
      "step": 4914
    },
    {
      "epoch": 0.751040990182221,
      "grad_norm": 0.4513492286205292,
      "learning_rate": 3.127505167483848e-05,
      "loss": 0.5617,
      "step": 4915
    },
    {
      "epoch": 0.7511937960805287,
      "grad_norm": 0.325539231300354,
      "learning_rate": 3.1238801572956246e-05,
      "loss": 0.703,
      "step": 4916
    },
    {
      "epoch": 0.7513466019788364,
      "grad_norm": 0.3028418719768524,
      "learning_rate": 3.120256860234936e-05,
      "loss": 0.7182,
      "step": 4917
    },
    {
      "epoch": 0.7514994078771441,
      "grad_norm": 0.33599185943603516,
      "learning_rate": 3.116635277204503e-05,
      "loss": 0.6111,
      "step": 4918
    },
    {
      "epoch": 0.7516522137754518,
      "grad_norm": 0.3110974431037903,
      "learning_rate": 3.1130154091066074e-05,
      "loss": 0.7623,
      "step": 4919
    },
    {
      "epoch": 0.7518050196737595,
      "grad_norm": 0.2833666205406189,
      "learning_rate": 3.109397256843114e-05,
      "loss": 0.7789,
      "step": 4920
    },
    {
      "epoch": 0.751957825572067,
      "grad_norm": 0.9882724285125732,
      "learning_rate": 3.1057808213154535e-05,
      "loss": 0.5325,
      "step": 4921
    },
    {
      "epoch": 0.7521106314703747,
      "grad_norm": 0.2542802691459656,
      "learning_rate": 3.102166103424626e-05,
      "loss": 0.7007,
      "step": 4922
    },
    {
      "epoch": 0.7522634373686824,
      "grad_norm": 0.27196669578552246,
      "learning_rate": 3.0985531040712125e-05,
      "loss": 0.7287,
      "step": 4923
    },
    {
      "epoch": 0.7524162432669901,
      "grad_norm": 0.27083972096443176,
      "learning_rate": 3.0949418241553605e-05,
      "loss": 0.6532,
      "step": 4924
    },
    {
      "epoch": 0.7525690491652978,
      "grad_norm": 0.3268282413482666,
      "learning_rate": 3.091332264576796e-05,
      "loss": 0.7181,
      "step": 4925
    },
    {
      "epoch": 0.7527218550636054,
      "grad_norm": 0.4242473840713501,
      "learning_rate": 3.0877244262347995e-05,
      "loss": 0.7455,
      "step": 4926
    },
    {
      "epoch": 0.7528746609619131,
      "grad_norm": 0.3833047151565552,
      "learning_rate": 3.084118310028238e-05,
      "loss": 0.6763,
      "step": 4927
    },
    {
      "epoch": 0.7530274668602208,
      "grad_norm": 0.47573891282081604,
      "learning_rate": 3.0805139168555485e-05,
      "loss": 0.6871,
      "step": 4928
    },
    {
      "epoch": 0.7531802727585285,
      "grad_norm": 0.339206725358963,
      "learning_rate": 3.076911247614731e-05,
      "loss": 0.765,
      "step": 4929
    },
    {
      "epoch": 0.7533330786568362,
      "grad_norm": 0.2713732421398163,
      "learning_rate": 3.073310303203364e-05,
      "loss": 0.6879,
      "step": 4930
    },
    {
      "epoch": 0.7534858845551439,
      "grad_norm": 0.38381505012512207,
      "learning_rate": 3.069711084518588e-05,
      "loss": 0.8672,
      "step": 4931
    },
    {
      "epoch": 0.7536386904534516,
      "grad_norm": 0.3012462258338928,
      "learning_rate": 3.066113592457124e-05,
      "loss": 0.8056,
      "step": 4932
    },
    {
      "epoch": 0.7537914963517591,
      "grad_norm": 0.295955091714859,
      "learning_rate": 3.0625178279152514e-05,
      "loss": 0.6531,
      "step": 4933
    },
    {
      "epoch": 0.7539443022500668,
      "grad_norm": 0.2634066641330719,
      "learning_rate": 3.058923791788829e-05,
      "loss": 0.6273,
      "step": 4934
    },
    {
      "epoch": 0.7540971081483745,
      "grad_norm": 0.33165842294692993,
      "learning_rate": 3.055331484973276e-05,
      "loss": 0.6211,
      "step": 4935
    },
    {
      "epoch": 0.7542499140466822,
      "grad_norm": 0.37168869376182556,
      "learning_rate": 3.0517409083635906e-05,
      "loss": 0.8095,
      "step": 4936
    },
    {
      "epoch": 0.7544027199449899,
      "grad_norm": 0.30196696519851685,
      "learning_rate": 3.0481520628543303e-05,
      "loss": 0.6351,
      "step": 4937
    },
    {
      "epoch": 0.7545555258432975,
      "grad_norm": 0.2617061138153076,
      "learning_rate": 3.044564949339631e-05,
      "loss": 0.599,
      "step": 4938
    },
    {
      "epoch": 0.7547083317416052,
      "grad_norm": 0.28290948271751404,
      "learning_rate": 3.040979568713189e-05,
      "loss": 0.761,
      "step": 4939
    },
    {
      "epoch": 0.7548611376399129,
      "grad_norm": 0.2870292663574219,
      "learning_rate": 3.037395921868269e-05,
      "loss": 0.7592,
      "step": 4940
    },
    {
      "epoch": 0.7550139435382206,
      "grad_norm": 0.30262330174446106,
      "learning_rate": 3.0338140096977086e-05,
      "loss": 0.5503,
      "step": 4941
    },
    {
      "epoch": 0.7551667494365283,
      "grad_norm": 0.3084609806537628,
      "learning_rate": 3.030233833093915e-05,
      "loss": 0.7879,
      "step": 4942
    },
    {
      "epoch": 0.755319555334836,
      "grad_norm": 0.4206237494945526,
      "learning_rate": 3.0266553929488563e-05,
      "loss": 0.6484,
      "step": 4943
    },
    {
      "epoch": 0.7554723612331437,
      "grad_norm": 0.2730026841163635,
      "learning_rate": 3.0230786901540677e-05,
      "loss": 0.7605,
      "step": 4944
    },
    {
      "epoch": 0.7556251671314512,
      "grad_norm": 0.29598739743232727,
      "learning_rate": 3.0195037256006563e-05,
      "loss": 0.7792,
      "step": 4945
    },
    {
      "epoch": 0.7557779730297589,
      "grad_norm": 0.31024444103240967,
      "learning_rate": 3.0159305001793004e-05,
      "loss": 0.6362,
      "step": 4946
    },
    {
      "epoch": 0.7559307789280666,
      "grad_norm": 0.2978576421737671,
      "learning_rate": 3.012359014780234e-05,
      "loss": 0.5886,
      "step": 4947
    },
    {
      "epoch": 0.7560835848263743,
      "grad_norm": 0.2910394072532654,
      "learning_rate": 3.0087892702932584e-05,
      "loss": 0.575,
      "step": 4948
    },
    {
      "epoch": 0.756236390724682,
      "grad_norm": 0.3117895722389221,
      "learning_rate": 3.0052212676077517e-05,
      "loss": 0.565,
      "step": 4949
    },
    {
      "epoch": 0.7563891966229896,
      "grad_norm": 0.27814364433288574,
      "learning_rate": 3.0016550076126527e-05,
      "loss": 0.5543,
      "step": 4950
    },
    {
      "epoch": 0.7565420025212973,
      "grad_norm": 0.27082252502441406,
      "learning_rate": 2.9980904911964637e-05,
      "loss": 0.5369,
      "step": 4951
    },
    {
      "epoch": 0.756694808419605,
      "grad_norm": 0.2814607322216034,
      "learning_rate": 2.9945277192472486e-05,
      "loss": 0.779,
      "step": 4952
    },
    {
      "epoch": 0.7568476143179127,
      "grad_norm": 0.47501417994499207,
      "learning_rate": 2.9909666926526515e-05,
      "loss": 0.6097,
      "step": 4953
    },
    {
      "epoch": 0.7570004202162204,
      "grad_norm": 0.31489819288253784,
      "learning_rate": 2.987407412299863e-05,
      "loss": 0.7388,
      "step": 4954
    },
    {
      "epoch": 0.7571532261145281,
      "grad_norm": 0.26809048652648926,
      "learning_rate": 2.983849879075652e-05,
      "loss": 0.5423,
      "step": 4955
    },
    {
      "epoch": 0.7573060320128358,
      "grad_norm": 0.30268001556396484,
      "learning_rate": 2.9802940938663526e-05,
      "loss": 0.688,
      "step": 4956
    },
    {
      "epoch": 0.7574588379111433,
      "grad_norm": 0.289115846157074,
      "learning_rate": 2.976740057557854e-05,
      "loss": 0.6581,
      "step": 4957
    },
    {
      "epoch": 0.757611643809451,
      "grad_norm": 0.2887480854988098,
      "learning_rate": 2.9731877710356117e-05,
      "loss": 0.6738,
      "step": 4958
    },
    {
      "epoch": 0.7577644497077587,
      "grad_norm": 0.2813575863838196,
      "learning_rate": 2.9696372351846515e-05,
      "loss": 0.6847,
      "step": 4959
    },
    {
      "epoch": 0.7579172556060664,
      "grad_norm": 0.4002649188041687,
      "learning_rate": 2.9660884508895635e-05,
      "loss": 0.6783,
      "step": 4960
    },
    {
      "epoch": 0.7580700615043741,
      "grad_norm": 0.3163740634918213,
      "learning_rate": 2.9625414190344923e-05,
      "loss": 0.6138,
      "step": 4961
    },
    {
      "epoch": 0.7582228674026817,
      "grad_norm": 0.338833212852478,
      "learning_rate": 2.9589961405031507e-05,
      "loss": 0.5459,
      "step": 4962
    },
    {
      "epoch": 0.7583756733009894,
      "grad_norm": 0.28364452719688416,
      "learning_rate": 2.9554526161788166e-05,
      "loss": 0.6336,
      "step": 4963
    },
    {
      "epoch": 0.7585284791992971,
      "grad_norm": 0.26455238461494446,
      "learning_rate": 2.9519108469443313e-05,
      "loss": 0.6763,
      "step": 4964
    },
    {
      "epoch": 0.7586812850976048,
      "grad_norm": 0.27129480242729187,
      "learning_rate": 2.948370833682096e-05,
      "loss": 0.6584,
      "step": 4965
    },
    {
      "epoch": 0.7588340909959125,
      "grad_norm": 0.3427901864051819,
      "learning_rate": 2.9448325772740713e-05,
      "loss": 0.8716,
      "step": 4966
    },
    {
      "epoch": 0.7589868968942202,
      "grad_norm": 0.3092363476753235,
      "learning_rate": 2.9412960786017906e-05,
      "loss": 0.6354,
      "step": 4967
    },
    {
      "epoch": 0.7591397027925277,
      "grad_norm": 0.27754339575767517,
      "learning_rate": 2.9377613385463366e-05,
      "loss": 0.5946,
      "step": 4968
    },
    {
      "epoch": 0.7592925086908354,
      "grad_norm": 0.28938060998916626,
      "learning_rate": 2.9342283579883644e-05,
      "loss": 0.7985,
      "step": 4969
    },
    {
      "epoch": 0.7594453145891431,
      "grad_norm": 0.33980458974838257,
      "learning_rate": 2.930697137808084e-05,
      "loss": 0.7286,
      "step": 4970
    },
    {
      "epoch": 0.7595981204874508,
      "grad_norm": 0.40130120515823364,
      "learning_rate": 2.927167678885272e-05,
      "loss": 0.8292,
      "step": 4971
    },
    {
      "epoch": 0.7597509263857585,
      "grad_norm": 0.2771167457103729,
      "learning_rate": 2.9236399820992587e-05,
      "loss": 0.6023,
      "step": 4972
    },
    {
      "epoch": 0.7599037322840662,
      "grad_norm": 0.3820517361164093,
      "learning_rate": 2.9201140483289468e-05,
      "loss": 0.6311,
      "step": 4973
    },
    {
      "epoch": 0.7600565381823738,
      "grad_norm": 0.2943771183490753,
      "learning_rate": 2.9165898784527858e-05,
      "loss": 0.513,
      "step": 4974
    },
    {
      "epoch": 0.7602093440806815,
      "grad_norm": 0.38422003388404846,
      "learning_rate": 2.9130674733488006e-05,
      "loss": 0.9081,
      "step": 4975
    },
    {
      "epoch": 0.7603621499789892,
      "grad_norm": 0.6306685209274292,
      "learning_rate": 2.909546833894561e-05,
      "loss": 0.7427,
      "step": 4976
    },
    {
      "epoch": 0.7605149558772969,
      "grad_norm": 0.2573539614677429,
      "learning_rate": 2.9060279609672126e-05,
      "loss": 0.4403,
      "step": 4977
    },
    {
      "epoch": 0.7606677617756046,
      "grad_norm": 0.2672884166240692,
      "learning_rate": 2.902510855443449e-05,
      "loss": 0.5494,
      "step": 4978
    },
    {
      "epoch": 0.7608205676739123,
      "grad_norm": 0.2918144762516022,
      "learning_rate": 2.8989955181995243e-05,
      "loss": 0.5941,
      "step": 4979
    },
    {
      "epoch": 0.7609733735722198,
      "grad_norm": 0.27063295245170593,
      "learning_rate": 2.8954819501112584e-05,
      "loss": 0.6396,
      "step": 4980
    },
    {
      "epoch": 0.7611261794705275,
      "grad_norm": 0.2645648717880249,
      "learning_rate": 2.891970152054031e-05,
      "loss": 0.9103,
      "step": 4981
    },
    {
      "epoch": 0.7612789853688352,
      "grad_norm": 0.31377100944519043,
      "learning_rate": 2.888460124902774e-05,
      "loss": 0.6627,
      "step": 4982
    },
    {
      "epoch": 0.7614317912671429,
      "grad_norm": 0.2497723251581192,
      "learning_rate": 2.8849518695319776e-05,
      "loss": 0.7019,
      "step": 4983
    },
    {
      "epoch": 0.7615845971654506,
      "grad_norm": 0.5569624304771423,
      "learning_rate": 2.8814453868156978e-05,
      "loss": 0.6643,
      "step": 4984
    },
    {
      "epoch": 0.7617374030637583,
      "grad_norm": 0.4809087812900543,
      "learning_rate": 2.8779406776275475e-05,
      "loss": 0.7912,
      "step": 4985
    },
    {
      "epoch": 0.7618902089620659,
      "grad_norm": 0.31673797965049744,
      "learning_rate": 2.8744377428406933e-05,
      "loss": 0.5688,
      "step": 4986
    },
    {
      "epoch": 0.7620430148603736,
      "grad_norm": 0.30070045590400696,
      "learning_rate": 2.870936583327858e-05,
      "loss": 0.6979,
      "step": 4987
    },
    {
      "epoch": 0.7621958207586813,
      "grad_norm": 0.3410513699054718,
      "learning_rate": 2.8674371999613314e-05,
      "loss": 0.6147,
      "step": 4988
    },
    {
      "epoch": 0.762348626656989,
      "grad_norm": 0.3067401051521301,
      "learning_rate": 2.8639395936129553e-05,
      "loss": 0.8445,
      "step": 4989
    },
    {
      "epoch": 0.7625014325552967,
      "grad_norm": 0.26025477051734924,
      "learning_rate": 2.860443765154126e-05,
      "loss": 0.6062,
      "step": 4990
    },
    {
      "epoch": 0.7626542384536044,
      "grad_norm": 0.25668859481811523,
      "learning_rate": 2.8569497154558034e-05,
      "loss": 0.6773,
      "step": 4991
    },
    {
      "epoch": 0.7628070443519119,
      "grad_norm": 0.33201903104782104,
      "learning_rate": 2.8534574453885e-05,
      "loss": 0.5587,
      "step": 4992
    },
    {
      "epoch": 0.7629598502502196,
      "grad_norm": 0.3629027009010315,
      "learning_rate": 2.8499669558222796e-05,
      "loss": 0.6214,
      "step": 4993
    },
    {
      "epoch": 0.7631126561485273,
      "grad_norm": 0.3354741632938385,
      "learning_rate": 2.8464782476267737e-05,
      "loss": 0.736,
      "step": 4994
    },
    {
      "epoch": 0.763265462046835,
      "grad_norm": 0.29513174295425415,
      "learning_rate": 2.8429913216711678e-05,
      "loss": 0.8077,
      "step": 4995
    },
    {
      "epoch": 0.7634182679451427,
      "grad_norm": 0.2917438745498657,
      "learning_rate": 2.839506178824196e-05,
      "loss": 0.7234,
      "step": 4996
    },
    {
      "epoch": 0.7635710738434504,
      "grad_norm": 0.2602333426475525,
      "learning_rate": 2.8360228199541494e-05,
      "loss": 0.5751,
      "step": 4997
    },
    {
      "epoch": 0.763723879741758,
      "grad_norm": 0.3598625659942627,
      "learning_rate": 2.8325412459288814e-05,
      "loss": 0.6023,
      "step": 4998
    },
    {
      "epoch": 0.7638766856400657,
      "grad_norm": 0.30226266384124756,
      "learning_rate": 2.8290614576157992e-05,
      "loss": 0.7538,
      "step": 4999
    },
    {
      "epoch": 0.7640294915383734,
      "grad_norm": 0.2980852425098419,
      "learning_rate": 2.8255834558818607e-05,
      "loss": 0.7214,
      "step": 5000
    },
    {
      "epoch": 0.7641822974366811,
      "grad_norm": 0.30146270990371704,
      "learning_rate": 2.8221072415935766e-05,
      "loss": 0.6857,
      "step": 5001
    },
    {
      "epoch": 0.7643351033349888,
      "grad_norm": 0.29795554280281067,
      "learning_rate": 2.8186328156170217e-05,
      "loss": 0.9127,
      "step": 5002
    },
    {
      "epoch": 0.7644879092332965,
      "grad_norm": 0.3890931010246277,
      "learning_rate": 2.8151601788178207e-05,
      "loss": 0.6883,
      "step": 5003
    },
    {
      "epoch": 0.764640715131604,
      "grad_norm": 0.27632567286491394,
      "learning_rate": 2.8116893320611494e-05,
      "loss": 0.697,
      "step": 5004
    },
    {
      "epoch": 0.7647935210299117,
      "grad_norm": 0.2864340841770172,
      "learning_rate": 2.8082202762117382e-05,
      "loss": 0.5696,
      "step": 5005
    },
    {
      "epoch": 0.7649463269282194,
      "grad_norm": 0.29483935236930847,
      "learning_rate": 2.8047530121338795e-05,
      "loss": 0.7518,
      "step": 5006
    },
    {
      "epoch": 0.7650991328265271,
      "grad_norm": 0.29760611057281494,
      "learning_rate": 2.801287540691404e-05,
      "loss": 0.8129,
      "step": 5007
    },
    {
      "epoch": 0.7652519387248348,
      "grad_norm": 0.2790490984916687,
      "learning_rate": 2.797823862747715e-05,
      "loss": 0.7121,
      "step": 5008
    },
    {
      "epoch": 0.7654047446231425,
      "grad_norm": 0.3155994117259979,
      "learning_rate": 2.7943619791657494e-05,
      "loss": 0.7546,
      "step": 5009
    },
    {
      "epoch": 0.7655575505214501,
      "grad_norm": 0.34615352749824524,
      "learning_rate": 2.7909018908080153e-05,
      "loss": 0.5962,
      "step": 5010
    },
    {
      "epoch": 0.7657103564197578,
      "grad_norm": 0.2994769513607025,
      "learning_rate": 2.7874435985365555e-05,
      "loss": 0.9641,
      "step": 5011
    },
    {
      "epoch": 0.7658631623180655,
      "grad_norm": 0.29071369767189026,
      "learning_rate": 2.7839871032129828e-05,
      "loss": 0.5396,
      "step": 5012
    },
    {
      "epoch": 0.7660159682163732,
      "grad_norm": 0.31707438826560974,
      "learning_rate": 2.7805324056984482e-05,
      "loss": 0.9124,
      "step": 5013
    },
    {
      "epoch": 0.7661687741146809,
      "grad_norm": 0.43815508484840393,
      "learning_rate": 2.777079506853665e-05,
      "loss": 0.5922,
      "step": 5014
    },
    {
      "epoch": 0.7663215800129884,
      "grad_norm": 0.341488242149353,
      "learning_rate": 2.7736284075388884e-05,
      "loss": 0.8211,
      "step": 5015
    },
    {
      "epoch": 0.7664743859112961,
      "grad_norm": 0.4834541082382202,
      "learning_rate": 2.770179108613935e-05,
      "loss": 0.4068,
      "step": 5016
    },
    {
      "epoch": 0.7666271918096038,
      "grad_norm": 0.3897079825401306,
      "learning_rate": 2.7667316109381734e-05,
      "loss": 0.7649,
      "step": 5017
    },
    {
      "epoch": 0.7667799977079115,
      "grad_norm": 0.36329084634780884,
      "learning_rate": 2.763285915370507e-05,
      "loss": 0.7393,
      "step": 5018
    },
    {
      "epoch": 0.7669328036062192,
      "grad_norm": 0.3158819377422333,
      "learning_rate": 2.759842022769408e-05,
      "loss": 0.7657,
      "step": 5019
    },
    {
      "epoch": 0.7670856095045269,
      "grad_norm": 0.38043487071990967,
      "learning_rate": 2.7563999339928938e-05,
      "loss": 0.8129,
      "step": 5020
    },
    {
      "epoch": 0.7672384154028346,
      "grad_norm": 0.2957688271999359,
      "learning_rate": 2.7529596498985334e-05,
      "loss": 0.6722,
      "step": 5021
    },
    {
      "epoch": 0.7673912213011422,
      "grad_norm": 0.2666127681732178,
      "learning_rate": 2.7495211713434443e-05,
      "loss": 0.5102,
      "step": 5022
    },
    {
      "epoch": 0.7675440271994499,
      "grad_norm": 0.2595529854297638,
      "learning_rate": 2.7460844991842893e-05,
      "loss": 0.7852,
      "step": 5023
    },
    {
      "epoch": 0.7676968330977576,
      "grad_norm": 0.33117806911468506,
      "learning_rate": 2.7426496342772934e-05,
      "loss": 0.6446,
      "step": 5024
    },
    {
      "epoch": 0.7678496389960653,
      "grad_norm": 0.8229051232337952,
      "learning_rate": 2.7392165774782175e-05,
      "loss": 0.9172,
      "step": 5025
    },
    {
      "epoch": 0.768002444894373,
      "grad_norm": 0.36411499977111816,
      "learning_rate": 2.7357853296423865e-05,
      "loss": 0.8698,
      "step": 5026
    },
    {
      "epoch": 0.7681552507926805,
      "grad_norm": 0.260728657245636,
      "learning_rate": 2.7323558916246593e-05,
      "loss": 0.733,
      "step": 5027
    },
    {
      "epoch": 0.7683080566909882,
      "grad_norm": 0.3510059416294098,
      "learning_rate": 2.7289282642794588e-05,
      "loss": 0.704,
      "step": 5028
    },
    {
      "epoch": 0.7684608625892959,
      "grad_norm": 0.35938236117362976,
      "learning_rate": 2.725502448460743e-05,
      "loss": 0.6985,
      "step": 5029
    },
    {
      "epoch": 0.7686136684876036,
      "grad_norm": 0.31864234805107117,
      "learning_rate": 2.7220784450220304e-05,
      "loss": 0.6877,
      "step": 5030
    },
    {
      "epoch": 0.7687664743859113,
      "grad_norm": 0.2729928493499756,
      "learning_rate": 2.7186562548163817e-05,
      "loss": 0.7003,
      "step": 5031
    },
    {
      "epoch": 0.768919280284219,
      "grad_norm": 0.33942142128944397,
      "learning_rate": 2.7152358786964026e-05,
      "loss": 0.6741,
      "step": 5032
    },
    {
      "epoch": 0.7690720861825266,
      "grad_norm": 0.32317203283309937,
      "learning_rate": 2.7118173175142537e-05,
      "loss": 0.6225,
      "step": 5033
    },
    {
      "epoch": 0.7692248920808343,
      "grad_norm": 0.3431759178638458,
      "learning_rate": 2.7084005721216456e-05,
      "loss": 0.6183,
      "step": 5034
    },
    {
      "epoch": 0.769377697979142,
      "grad_norm": 0.2686121165752411,
      "learning_rate": 2.7049856433698263e-05,
      "loss": 0.7735,
      "step": 5035
    },
    {
      "epoch": 0.7695305038774497,
      "grad_norm": 0.3046192228794098,
      "learning_rate": 2.701572532109595e-05,
      "loss": 0.8076,
      "step": 5036
    },
    {
      "epoch": 0.7696833097757574,
      "grad_norm": 0.3387291431427002,
      "learning_rate": 2.6981612391913026e-05,
      "loss": 0.7316,
      "step": 5037
    },
    {
      "epoch": 0.7698361156740651,
      "grad_norm": 0.2865094244480133,
      "learning_rate": 2.6947517654648467e-05,
      "loss": 0.5962,
      "step": 5038
    },
    {
      "epoch": 0.7699889215723726,
      "grad_norm": 0.3363531827926636,
      "learning_rate": 2.6913441117796666e-05,
      "loss": 0.7593,
      "step": 5039
    },
    {
      "epoch": 0.7701417274706803,
      "grad_norm": 0.32571524381637573,
      "learning_rate": 2.6879382789847486e-05,
      "loss": 0.9278,
      "step": 5040
    },
    {
      "epoch": 0.770294533368988,
      "grad_norm": 0.3032762408256531,
      "learning_rate": 2.6845342679286278e-05,
      "loss": 0.7615,
      "step": 5041
    },
    {
      "epoch": 0.7704473392672957,
      "grad_norm": 0.3438403606414795,
      "learning_rate": 2.6811320794593896e-05,
      "loss": 0.5469,
      "step": 5042
    },
    {
      "epoch": 0.7706001451656034,
      "grad_norm": 0.28308266401290894,
      "learning_rate": 2.6777317144246572e-05,
      "loss": 0.7271,
      "step": 5043
    },
    {
      "epoch": 0.7707529510639111,
      "grad_norm": 0.24329397082328796,
      "learning_rate": 2.6743331736716017e-05,
      "loss": 0.8853,
      "step": 5044
    },
    {
      "epoch": 0.7709057569622187,
      "grad_norm": 0.2939806580543518,
      "learning_rate": 2.670936458046941e-05,
      "loss": 0.843,
      "step": 5045
    },
    {
      "epoch": 0.7710585628605264,
      "grad_norm": 0.3209003508090973,
      "learning_rate": 2.6675415683969428e-05,
      "loss": 0.8808,
      "step": 5046
    },
    {
      "epoch": 0.7712113687588341,
      "grad_norm": 0.3447398245334625,
      "learning_rate": 2.6641485055674132e-05,
      "loss": 0.6298,
      "step": 5047
    },
    {
      "epoch": 0.7713641746571418,
      "grad_norm": 0.31295421719551086,
      "learning_rate": 2.660757270403701e-05,
      "loss": 0.559,
      "step": 5048
    },
    {
      "epoch": 0.7715169805554495,
      "grad_norm": 0.2551795244216919,
      "learning_rate": 2.6573678637507116e-05,
      "loss": 0.675,
      "step": 5049
    },
    {
      "epoch": 0.7716697864537572,
      "grad_norm": 1.657878041267395,
      "learning_rate": 2.6539802864528784e-05,
      "loss": 0.6751,
      "step": 5050
    },
    {
      "epoch": 0.7718225923520647,
      "grad_norm": 0.2639477252960205,
      "learning_rate": 2.6505945393541932e-05,
      "loss": 0.6769,
      "step": 5051
    },
    {
      "epoch": 0.7719753982503724,
      "grad_norm": 0.27711376547813416,
      "learning_rate": 2.6472106232981897e-05,
      "loss": 0.7162,
      "step": 5052
    },
    {
      "epoch": 0.7721282041486801,
      "grad_norm": 0.2852341830730438,
      "learning_rate": 2.643828539127937e-05,
      "loss": 0.5743,
      "step": 5053
    },
    {
      "epoch": 0.7722810100469878,
      "grad_norm": 0.30288180708885193,
      "learning_rate": 2.6404482876860527e-05,
      "loss": 0.6888,
      "step": 5054
    },
    {
      "epoch": 0.7724338159452955,
      "grad_norm": 0.25058987736701965,
      "learning_rate": 2.6370698698146977e-05,
      "loss": 0.5594,
      "step": 5055
    },
    {
      "epoch": 0.7725866218436032,
      "grad_norm": 0.29127037525177,
      "learning_rate": 2.633693286355583e-05,
      "loss": 0.7225,
      "step": 5056
    },
    {
      "epoch": 0.7727394277419108,
      "grad_norm": 0.35187846422195435,
      "learning_rate": 2.6303185381499507e-05,
      "loss": 0.5605,
      "step": 5057
    },
    {
      "epoch": 0.7728922336402185,
      "grad_norm": 0.36523064970970154,
      "learning_rate": 2.6269456260385893e-05,
      "loss": 0.7734,
      "step": 5058
    },
    {
      "epoch": 0.7730450395385262,
      "grad_norm": 0.34329739212989807,
      "learning_rate": 2.6235745508618338e-05,
      "loss": 0.689,
      "step": 5059
    },
    {
      "epoch": 0.7731978454368339,
      "grad_norm": 0.285354346036911,
      "learning_rate": 2.6202053134595618e-05,
      "loss": 0.6413,
      "step": 5060
    },
    {
      "epoch": 0.7733506513351416,
      "grad_norm": 0.2637817859649658,
      "learning_rate": 2.6168379146711884e-05,
      "loss": 0.8285,
      "step": 5061
    },
    {
      "epoch": 0.7735034572334493,
      "grad_norm": 0.31705915927886963,
      "learning_rate": 2.61347235533567e-05,
      "loss": 0.6822,
      "step": 5062
    },
    {
      "epoch": 0.7736562631317568,
      "grad_norm": 0.3138381540775299,
      "learning_rate": 2.6101086362915127e-05,
      "loss": 0.8156,
      "step": 5063
    },
    {
      "epoch": 0.7738090690300645,
      "grad_norm": 0.3055115342140198,
      "learning_rate": 2.6067467583767535e-05,
      "loss": 0.7352,
      "step": 5064
    },
    {
      "epoch": 0.7739618749283722,
      "grad_norm": 0.298575222492218,
      "learning_rate": 2.603386722428981e-05,
      "loss": 0.7935,
      "step": 5065
    },
    {
      "epoch": 0.7741146808266799,
      "grad_norm": 0.3727077841758728,
      "learning_rate": 2.6000285292853156e-05,
      "loss": 0.6423,
      "step": 5066
    },
    {
      "epoch": 0.7742674867249876,
      "grad_norm": 0.27079665660858154,
      "learning_rate": 2.5966721797824267e-05,
      "loss": 0.7292,
      "step": 5067
    },
    {
      "epoch": 0.7744202926232953,
      "grad_norm": 0.29036736488342285,
      "learning_rate": 2.593317674756517e-05,
      "loss": 0.7094,
      "step": 5068
    },
    {
      "epoch": 0.7745730985216029,
      "grad_norm": 0.2901022732257843,
      "learning_rate": 2.5899650150433375e-05,
      "loss": 0.6526,
      "step": 5069
    },
    {
      "epoch": 0.7747259044199106,
      "grad_norm": 0.3423730432987213,
      "learning_rate": 2.5866142014781726e-05,
      "loss": 0.6695,
      "step": 5070
    },
    {
      "epoch": 0.7748787103182183,
      "grad_norm": 0.3615645170211792,
      "learning_rate": 2.5832652348958475e-05,
      "loss": 0.7929,
      "step": 5071
    },
    {
      "epoch": 0.775031516216526,
      "grad_norm": 0.4355672299861908,
      "learning_rate": 2.5799181161307308e-05,
      "loss": 0.6221,
      "step": 5072
    },
    {
      "epoch": 0.7751843221148337,
      "grad_norm": 0.3562428653240204,
      "learning_rate": 2.5765728460167314e-05,
      "loss": 0.7955,
      "step": 5073
    },
    {
      "epoch": 0.7753371280131413,
      "grad_norm": 0.3044549524784088,
      "learning_rate": 2.5732294253872947e-05,
      "loss": 0.6761,
      "step": 5074
    },
    {
      "epoch": 0.7754899339114489,
      "grad_norm": 0.32677242159843445,
      "learning_rate": 2.5698878550754014e-05,
      "loss": 0.7494,
      "step": 5075
    },
    {
      "epoch": 0.7756427398097566,
      "grad_norm": 0.2794375419616699,
      "learning_rate": 2.566548135913579e-05,
      "loss": 0.7209,
      "step": 5076
    },
    {
      "epoch": 0.7757955457080643,
      "grad_norm": 0.31766951084136963,
      "learning_rate": 2.5632102687338932e-05,
      "loss": 0.812,
      "step": 5077
    },
    {
      "epoch": 0.775948351606372,
      "grad_norm": 0.34559720754623413,
      "learning_rate": 2.559874254367942e-05,
      "loss": 0.674,
      "step": 5078
    },
    {
      "epoch": 0.7761011575046797,
      "grad_norm": 0.29366979002952576,
      "learning_rate": 2.5565400936468643e-05,
      "loss": 0.6706,
      "step": 5079
    },
    {
      "epoch": 0.7762539634029874,
      "grad_norm": 0.2583487331867218,
      "learning_rate": 2.5532077874013392e-05,
      "loss": 0.7567,
      "step": 5080
    },
    {
      "epoch": 0.776406769301295,
      "grad_norm": 0.3665739893913269,
      "learning_rate": 2.549877336461587e-05,
      "loss": 0.6882,
      "step": 5081
    },
    {
      "epoch": 0.7765595751996027,
      "grad_norm": 0.2896324694156647,
      "learning_rate": 2.546548741657355e-05,
      "loss": 0.6605,
      "step": 5082
    },
    {
      "epoch": 0.7767123810979104,
      "grad_norm": 0.27397826313972473,
      "learning_rate": 2.5432220038179412e-05,
      "loss": 0.7353,
      "step": 5083
    },
    {
      "epoch": 0.7768651869962181,
      "grad_norm": 0.2787233293056488,
      "learning_rate": 2.539897123772168e-05,
      "loss": 0.7683,
      "step": 5084
    },
    {
      "epoch": 0.7770179928945258,
      "grad_norm": 0.35348227620124817,
      "learning_rate": 2.536574102348407e-05,
      "loss": 0.616,
      "step": 5085
    },
    {
      "epoch": 0.7771707987928334,
      "grad_norm": 0.384181946516037,
      "learning_rate": 2.5332529403745564e-05,
      "loss": 0.7344,
      "step": 5086
    },
    {
      "epoch": 0.777323604691141,
      "grad_norm": 0.3795936107635498,
      "learning_rate": 2.5299336386780603e-05,
      "loss": 0.5957,
      "step": 5087
    },
    {
      "epoch": 0.7774764105894487,
      "grad_norm": 0.33974671363830566,
      "learning_rate": 2.5266161980858937e-05,
      "loss": 0.6189,
      "step": 5088
    },
    {
      "epoch": 0.7776292164877564,
      "grad_norm": 1.0465130805969238,
      "learning_rate": 2.5233006194245634e-05,
      "loss": 0.8266,
      "step": 5089
    },
    {
      "epoch": 0.7777820223860641,
      "grad_norm": 0.25817668437957764,
      "learning_rate": 2.519986903520124e-05,
      "loss": 0.8808,
      "step": 5090
    },
    {
      "epoch": 0.7779348282843718,
      "grad_norm": 0.3180966079235077,
      "learning_rate": 2.516675051198161e-05,
      "loss": 0.7442,
      "step": 5091
    },
    {
      "epoch": 0.7780876341826795,
      "grad_norm": 0.2853553295135498,
      "learning_rate": 2.513365063283791e-05,
      "loss": 0.7022,
      "step": 5092
    },
    {
      "epoch": 0.7782404400809871,
      "grad_norm": 0.28600025177001953,
      "learning_rate": 2.5100569406016695e-05,
      "loss": 0.631,
      "step": 5093
    },
    {
      "epoch": 0.7783932459792948,
      "grad_norm": 0.3371172547340393,
      "learning_rate": 2.506750683975988e-05,
      "loss": 0.5755,
      "step": 5094
    },
    {
      "epoch": 0.7785460518776025,
      "grad_norm": 0.33092522621154785,
      "learning_rate": 2.5034462942304772e-05,
      "loss": 0.6777,
      "step": 5095
    },
    {
      "epoch": 0.7786988577759102,
      "grad_norm": 0.49018746614456177,
      "learning_rate": 2.5001437721883936e-05,
      "loss": 0.7151,
      "step": 5096
    },
    {
      "epoch": 0.7788516636742179,
      "grad_norm": 0.30702343583106995,
      "learning_rate": 2.4968431186725304e-05,
      "loss": 0.7647,
      "step": 5097
    },
    {
      "epoch": 0.7790044695725254,
      "grad_norm": 0.2928082346916199,
      "learning_rate": 2.4935443345052213e-05,
      "loss": 0.6836,
      "step": 5098
    },
    {
      "epoch": 0.7791572754708331,
      "grad_norm": 0.3457891047000885,
      "learning_rate": 2.4902474205083336e-05,
      "loss": 0.7501,
      "step": 5099
    },
    {
      "epoch": 0.7793100813691408,
      "grad_norm": 0.28967201709747314,
      "learning_rate": 2.486952377503261e-05,
      "loss": 0.7462,
      "step": 5100
    },
    {
      "epoch": 0.7794628872674485,
      "grad_norm": 0.2979332506656647,
      "learning_rate": 2.4836592063109355e-05,
      "loss": 0.7389,
      "step": 5101
    },
    {
      "epoch": 0.7796156931657562,
      "grad_norm": 0.2730976343154907,
      "learning_rate": 2.480367907751827e-05,
      "loss": 0.7138,
      "step": 5102
    },
    {
      "epoch": 0.7797684990640639,
      "grad_norm": 0.2908374071121216,
      "learning_rate": 2.4770784826459303e-05,
      "loss": 0.5659,
      "step": 5103
    },
    {
      "epoch": 0.7799213049623716,
      "grad_norm": 0.2820816934108734,
      "learning_rate": 2.473790931812783e-05,
      "loss": 0.6933,
      "step": 5104
    },
    {
      "epoch": 0.7800741108606792,
      "grad_norm": 0.2908737063407898,
      "learning_rate": 2.470505256071446e-05,
      "loss": 0.7285,
      "step": 5105
    },
    {
      "epoch": 0.7802269167589869,
      "grad_norm": 0.2963566482067108,
      "learning_rate": 2.4672214562405217e-05,
      "loss": 0.7999,
      "step": 5106
    },
    {
      "epoch": 0.7803797226572946,
      "grad_norm": 0.2846260368824005,
      "learning_rate": 2.4639395331381376e-05,
      "loss": 0.6746,
      "step": 5107
    },
    {
      "epoch": 0.7805325285556023,
      "grad_norm": 0.26171863079071045,
      "learning_rate": 2.4606594875819622e-05,
      "loss": 0.4587,
      "step": 5108
    },
    {
      "epoch": 0.78068533445391,
      "grad_norm": 0.26902881264686584,
      "learning_rate": 2.4573813203891883e-05,
      "loss": 0.879,
      "step": 5109
    },
    {
      "epoch": 0.7808381403522175,
      "grad_norm": 0.26719123125076294,
      "learning_rate": 2.4541050323765403e-05,
      "loss": 0.6603,
      "step": 5110
    },
    {
      "epoch": 0.7809909462505252,
      "grad_norm": 0.264125257730484,
      "learning_rate": 2.450830624360282e-05,
      "loss": 0.619,
      "step": 5111
    },
    {
      "epoch": 0.7811437521488329,
      "grad_norm": 0.265664666891098,
      "learning_rate": 2.447558097156204e-05,
      "loss": 0.527,
      "step": 5112
    },
    {
      "epoch": 0.7812965580471406,
      "grad_norm": 0.3076263964176178,
      "learning_rate": 2.4442874515796344e-05,
      "loss": 0.6551,
      "step": 5113
    },
    {
      "epoch": 0.7814493639454483,
      "grad_norm": 0.32857951521873474,
      "learning_rate": 2.4410186884454165e-05,
      "loss": 0.7661,
      "step": 5114
    },
    {
      "epoch": 0.781602169843756,
      "grad_norm": 0.3077498972415924,
      "learning_rate": 2.4377518085679396e-05,
      "loss": 0.7124,
      "step": 5115
    },
    {
      "epoch": 0.7817549757420637,
      "grad_norm": 0.3741486668586731,
      "learning_rate": 2.4344868127611243e-05,
      "loss": 0.7011,
      "step": 5116
    },
    {
      "epoch": 0.7819077816403713,
      "grad_norm": 0.2527276277542114,
      "learning_rate": 2.43122370183841e-05,
      "loss": 0.7102,
      "step": 5117
    },
    {
      "epoch": 0.782060587538679,
      "grad_norm": 0.44322469830513,
      "learning_rate": 2.4279624766127785e-05,
      "loss": 0.7646,
      "step": 5118
    },
    {
      "epoch": 0.7822133934369867,
      "grad_norm": 0.2980830669403076,
      "learning_rate": 2.424703137896731e-05,
      "loss": 0.6688,
      "step": 5119
    },
    {
      "epoch": 0.7823661993352944,
      "grad_norm": 0.25133106112480164,
      "learning_rate": 2.4214456865023117e-05,
      "loss": 0.794,
      "step": 5120
    },
    {
      "epoch": 0.7825190052336021,
      "grad_norm": 0.2756834328174591,
      "learning_rate": 2.4181901232410796e-05,
      "loss": 0.5819,
      "step": 5121
    },
    {
      "epoch": 0.7826718111319096,
      "grad_norm": 0.2971981465816498,
      "learning_rate": 2.414936448924139e-05,
      "loss": 0.6456,
      "step": 5122
    },
    {
      "epoch": 0.7828246170302173,
      "grad_norm": 0.28821277618408203,
      "learning_rate": 2.411684664362107e-05,
      "loss": 0.5152,
      "step": 5123
    },
    {
      "epoch": 0.782977422928525,
      "grad_norm": 0.3240285813808441,
      "learning_rate": 2.4084347703651466e-05,
      "loss": 0.7247,
      "step": 5124
    },
    {
      "epoch": 0.7831302288268327,
      "grad_norm": 0.2902561128139496,
      "learning_rate": 2.405186767742934e-05,
      "loss": 0.5942,
      "step": 5125
    },
    {
      "epoch": 0.7832830347251404,
      "grad_norm": 0.3167160451412201,
      "learning_rate": 2.401940657304689e-05,
      "loss": 0.6878,
      "step": 5126
    },
    {
      "epoch": 0.7834358406234481,
      "grad_norm": 0.2904062867164612,
      "learning_rate": 2.3986964398591483e-05,
      "loss": 0.6876,
      "step": 5127
    },
    {
      "epoch": 0.7835886465217557,
      "grad_norm": 0.2593238353729248,
      "learning_rate": 2.3954541162145804e-05,
      "loss": 0.6787,
      "step": 5128
    },
    {
      "epoch": 0.7837414524200634,
      "grad_norm": 0.265027791261673,
      "learning_rate": 2.392213687178785e-05,
      "loss": 0.6336,
      "step": 5129
    },
    {
      "epoch": 0.7838942583183711,
      "grad_norm": 0.3181680142879486,
      "learning_rate": 2.388975153559091e-05,
      "loss": 0.6047,
      "step": 5130
    },
    {
      "epoch": 0.7840470642166788,
      "grad_norm": 0.3099213242530823,
      "learning_rate": 2.385738516162348e-05,
      "loss": 0.7953,
      "step": 5131
    },
    {
      "epoch": 0.7841998701149865,
      "grad_norm": 0.3505837917327881,
      "learning_rate": 2.3825037757949355e-05,
      "loss": 0.8281,
      "step": 5132
    },
    {
      "epoch": 0.7843526760132941,
      "grad_norm": 0.27586525678634644,
      "learning_rate": 2.3792709332627637e-05,
      "loss": 0.6736,
      "step": 5133
    },
    {
      "epoch": 0.7845054819116017,
      "grad_norm": 0.2906564772129059,
      "learning_rate": 2.3760399893712714e-05,
      "loss": 0.7257,
      "step": 5134
    },
    {
      "epoch": 0.7846582878099094,
      "grad_norm": 0.29521262645721436,
      "learning_rate": 2.372810944925419e-05,
      "loss": 0.7035,
      "step": 5135
    },
    {
      "epoch": 0.7848110937082171,
      "grad_norm": 0.324818879365921,
      "learning_rate": 2.3695838007296913e-05,
      "loss": 0.598,
      "step": 5136
    },
    {
      "epoch": 0.7849638996065248,
      "grad_norm": 0.289086252450943,
      "learning_rate": 2.3663585575881086e-05,
      "loss": 0.7587,
      "step": 5137
    },
    {
      "epoch": 0.7851167055048325,
      "grad_norm": 0.2762129008769989,
      "learning_rate": 2.3631352163042154e-05,
      "loss": 0.557,
      "step": 5138
    },
    {
      "epoch": 0.7852695114031402,
      "grad_norm": 0.27317383885383606,
      "learning_rate": 2.3599137776810775e-05,
      "loss": 0.7014,
      "step": 5139
    },
    {
      "epoch": 0.7854223173014478,
      "grad_norm": 0.3378963768482208,
      "learning_rate": 2.356694242521287e-05,
      "loss": 0.6726,
      "step": 5140
    },
    {
      "epoch": 0.7855751231997555,
      "grad_norm": 0.29100221395492554,
      "learning_rate": 2.353476611626968e-05,
      "loss": 0.5299,
      "step": 5141
    },
    {
      "epoch": 0.7857279290980632,
      "grad_norm": 0.296665757894516,
      "learning_rate": 2.3502608857997622e-05,
      "loss": 0.5991,
      "step": 5142
    },
    {
      "epoch": 0.7858807349963709,
      "grad_norm": 0.29530707001686096,
      "learning_rate": 2.3470470658408427e-05,
      "loss": 0.6371,
      "step": 5143
    },
    {
      "epoch": 0.7860335408946786,
      "grad_norm": 0.26709187030792236,
      "learning_rate": 2.3438351525509085e-05,
      "loss": 0.6762,
      "step": 5144
    },
    {
      "epoch": 0.7861863467929862,
      "grad_norm": 0.31802254915237427,
      "learning_rate": 2.3406251467301788e-05,
      "loss": 0.7362,
      "step": 5145
    },
    {
      "epoch": 0.7863391526912938,
      "grad_norm": 0.3588809072971344,
      "learning_rate": 2.3374170491783953e-05,
      "loss": 0.4949,
      "step": 5146
    },
    {
      "epoch": 0.7864919585896015,
      "grad_norm": 0.2746977210044861,
      "learning_rate": 2.3342108606948343e-05,
      "loss": 0.6827,
      "step": 5147
    },
    {
      "epoch": 0.7866447644879092,
      "grad_norm": 0.2767939269542694,
      "learning_rate": 2.3310065820782935e-05,
      "loss": 0.6995,
      "step": 5148
    },
    {
      "epoch": 0.7867975703862169,
      "grad_norm": 0.2917032241821289,
      "learning_rate": 2.3278042141270806e-05,
      "loss": 0.7076,
      "step": 5149
    },
    {
      "epoch": 0.7869503762845246,
      "grad_norm": 0.5610830783843994,
      "learning_rate": 2.3246037576390466e-05,
      "loss": 0.6843,
      "step": 5150
    },
    {
      "epoch": 0.7871031821828323,
      "grad_norm": 0.2540992200374603,
      "learning_rate": 2.3214052134115572e-05,
      "loss": 0.6724,
      "step": 5151
    },
    {
      "epoch": 0.78725598808114,
      "grad_norm": 0.46241921186447144,
      "learning_rate": 2.3182085822415055e-05,
      "loss": 0.5017,
      "step": 5152
    },
    {
      "epoch": 0.7874087939794476,
      "grad_norm": 0.4499031901359558,
      "learning_rate": 2.315013864925304e-05,
      "loss": 0.5057,
      "step": 5153
    },
    {
      "epoch": 0.7875615998777553,
      "grad_norm": 0.27625760436058044,
      "learning_rate": 2.3118210622588843e-05,
      "loss": 0.6678,
      "step": 5154
    },
    {
      "epoch": 0.787714405776063,
      "grad_norm": 0.3061494827270508,
      "learning_rate": 2.3086301750377136e-05,
      "loss": 0.7484,
      "step": 5155
    },
    {
      "epoch": 0.7878672116743707,
      "grad_norm": 0.25919973850250244,
      "learning_rate": 2.3054412040567684e-05,
      "loss": 0.5866,
      "step": 5156
    },
    {
      "epoch": 0.7880200175726783,
      "grad_norm": 0.3416811525821686,
      "learning_rate": 2.30225415011056e-05,
      "loss": 0.5841,
      "step": 5157
    },
    {
      "epoch": 0.7881728234709859,
      "grad_norm": 0.26105600595474243,
      "learning_rate": 2.2990690139931116e-05,
      "loss": 0.7581,
      "step": 5158
    },
    {
      "epoch": 0.7883256293692936,
      "grad_norm": 0.2879030704498291,
      "learning_rate": 2.295885796497976e-05,
      "loss": 0.9003,
      "step": 5159
    },
    {
      "epoch": 0.7884784352676013,
      "grad_norm": 0.22672039270401,
      "learning_rate": 2.292704498418222e-05,
      "loss": 0.5145,
      "step": 5160
    },
    {
      "epoch": 0.788631241165909,
      "grad_norm": 0.27298426628112793,
      "learning_rate": 2.2895251205464484e-05,
      "loss": 0.8854,
      "step": 5161
    },
    {
      "epoch": 0.7887840470642167,
      "grad_norm": 0.2944872975349426,
      "learning_rate": 2.286347663674765e-05,
      "loss": 0.6355,
      "step": 5162
    },
    {
      "epoch": 0.7889368529625244,
      "grad_norm": 0.28508460521698,
      "learning_rate": 2.2831721285948126e-05,
      "loss": 0.6496,
      "step": 5163
    },
    {
      "epoch": 0.789089658860832,
      "grad_norm": 0.5533873438835144,
      "learning_rate": 2.2799985160977454e-05,
      "loss": 0.7379,
      "step": 5164
    },
    {
      "epoch": 0.7892424647591397,
      "grad_norm": 0.28607064485549927,
      "learning_rate": 2.2768268269742466e-05,
      "loss": 0.7115,
      "step": 5165
    },
    {
      "epoch": 0.7893952706574474,
      "grad_norm": 0.2833400368690491,
      "learning_rate": 2.2736570620145136e-05,
      "loss": 0.7425,
      "step": 5166
    },
    {
      "epoch": 0.7895480765557551,
      "grad_norm": 0.3185187578201294,
      "learning_rate": 2.270489222008265e-05,
      "loss": 0.721,
      "step": 5167
    },
    {
      "epoch": 0.7897008824540628,
      "grad_norm": 0.2721453607082367,
      "learning_rate": 2.267323307744742e-05,
      "loss": 0.6955,
      "step": 5168
    },
    {
      "epoch": 0.7898536883523704,
      "grad_norm": 0.3211742639541626,
      "learning_rate": 2.264159320012711e-05,
      "loss": 0.621,
      "step": 5169
    },
    {
      "epoch": 0.790006494250678,
      "grad_norm": 0.3255116641521454,
      "learning_rate": 2.260997259600448e-05,
      "loss": 0.7441,
      "step": 5170
    },
    {
      "epoch": 0.7901593001489857,
      "grad_norm": 0.3053962290287018,
      "learning_rate": 2.257837127295752e-05,
      "loss": 0.8264,
      "step": 5171
    },
    {
      "epoch": 0.7903121060472934,
      "grad_norm": 0.4836776554584503,
      "learning_rate": 2.2546789238859468e-05,
      "loss": 0.6754,
      "step": 5172
    },
    {
      "epoch": 0.7904649119456011,
      "grad_norm": 0.2527182698249817,
      "learning_rate": 2.2515226501578734e-05,
      "loss": 0.731,
      "step": 5173
    },
    {
      "epoch": 0.7906177178439088,
      "grad_norm": 0.32779669761657715,
      "learning_rate": 2.24836830689789e-05,
      "loss": 0.6997,
      "step": 5174
    },
    {
      "epoch": 0.7907705237422165,
      "grad_norm": 0.2647869884967804,
      "learning_rate": 2.2452158948918712e-05,
      "loss": 0.5513,
      "step": 5175
    },
    {
      "epoch": 0.7909233296405241,
      "grad_norm": 0.3325052857398987,
      "learning_rate": 2.2420654149252153e-05,
      "loss": 0.5717,
      "step": 5176
    },
    {
      "epoch": 0.7910761355388318,
      "grad_norm": 0.27766644954681396,
      "learning_rate": 2.238916867782843e-05,
      "loss": 0.7734,
      "step": 5177
    },
    {
      "epoch": 0.7912289414371395,
      "grad_norm": 0.295060396194458,
      "learning_rate": 2.235770254249182e-05,
      "loss": 0.6441,
      "step": 5178
    },
    {
      "epoch": 0.7913817473354472,
      "grad_norm": 0.5573975443840027,
      "learning_rate": 2.2326255751081892e-05,
      "loss": 0.6681,
      "step": 5179
    },
    {
      "epoch": 0.7915345532337549,
      "grad_norm": 0.3403951823711395,
      "learning_rate": 2.2294828311433346e-05,
      "loss": 0.6454,
      "step": 5180
    },
    {
      "epoch": 0.7916873591320625,
      "grad_norm": 0.25036436319351196,
      "learning_rate": 2.226342023137601e-05,
      "loss": 0.6293,
      "step": 5181
    },
    {
      "epoch": 0.7918401650303701,
      "grad_norm": 0.3675941526889801,
      "learning_rate": 2.2232031518734986e-05,
      "loss": 0.8306,
      "step": 5182
    },
    {
      "epoch": 0.7919929709286778,
      "grad_norm": 0.30091819167137146,
      "learning_rate": 2.2200662181330535e-05,
      "loss": 0.6478,
      "step": 5183
    },
    {
      "epoch": 0.7921457768269855,
      "grad_norm": 0.28308168053627014,
      "learning_rate": 2.2169312226978044e-05,
      "loss": 0.6683,
      "step": 5184
    },
    {
      "epoch": 0.7922985827252932,
      "grad_norm": 0.30706119537353516,
      "learning_rate": 2.2137981663488038e-05,
      "loss": 0.6971,
      "step": 5185
    },
    {
      "epoch": 0.7924513886236009,
      "grad_norm": 0.26795217394828796,
      "learning_rate": 2.2106670498666315e-05,
      "loss": 0.7442,
      "step": 5186
    },
    {
      "epoch": 0.7926041945219086,
      "grad_norm": 0.2965717017650604,
      "learning_rate": 2.207537874031381e-05,
      "loss": 0.521,
      "step": 5187
    },
    {
      "epoch": 0.7927570004202162,
      "grad_norm": 0.29789793491363525,
      "learning_rate": 2.204410639622657e-05,
      "loss": 0.7498,
      "step": 5188
    },
    {
      "epoch": 0.7929098063185239,
      "grad_norm": 0.2972559928894043,
      "learning_rate": 2.2012853474195826e-05,
      "loss": 0.6631,
      "step": 5189
    },
    {
      "epoch": 0.7930626122168316,
      "grad_norm": 0.2753361761569977,
      "learning_rate": 2.1981619982007985e-05,
      "loss": 0.6776,
      "step": 5190
    },
    {
      "epoch": 0.7932154181151393,
      "grad_norm": 1.3614482879638672,
      "learning_rate": 2.195040592744465e-05,
      "loss": 0.5634,
      "step": 5191
    },
    {
      "epoch": 0.7933682240134469,
      "grad_norm": 0.28750181198120117,
      "learning_rate": 2.1919211318282505e-05,
      "loss": 0.5691,
      "step": 5192
    },
    {
      "epoch": 0.7935210299117545,
      "grad_norm": 0.28820011019706726,
      "learning_rate": 2.1888036162293413e-05,
      "loss": 0.6766,
      "step": 5193
    },
    {
      "epoch": 0.7936738358100622,
      "grad_norm": 0.28162091970443726,
      "learning_rate": 2.185688046724441e-05,
      "loss": 0.7707,
      "step": 5194
    },
    {
      "epoch": 0.7938266417083699,
      "grad_norm": 0.3941137492656708,
      "learning_rate": 2.182574424089773e-05,
      "loss": 0.7717,
      "step": 5195
    },
    {
      "epoch": 0.7939794476066776,
      "grad_norm": 0.34859415888786316,
      "learning_rate": 2.1794627491010644e-05,
      "loss": 0.8792,
      "step": 5196
    },
    {
      "epoch": 0.7941322535049853,
      "grad_norm": 0.3332647383213043,
      "learning_rate": 2.1763530225335614e-05,
      "loss": 0.487,
      "step": 5197
    },
    {
      "epoch": 0.794285059403293,
      "grad_norm": 0.2961379885673523,
      "learning_rate": 2.1732452451620333e-05,
      "loss": 0.6603,
      "step": 5198
    },
    {
      "epoch": 0.7944378653016007,
      "grad_norm": 0.27676260471343994,
      "learning_rate": 2.1701394177607494e-05,
      "loss": 0.5361,
      "step": 5199
    },
    {
      "epoch": 0.7945906711999083,
      "grad_norm": 0.25243091583251953,
      "learning_rate": 2.167035541103506e-05,
      "loss": 0.7178,
      "step": 5200
    },
    {
      "epoch": 0.794743477098216,
      "grad_norm": 0.25789448618888855,
      "learning_rate": 2.1639336159636027e-05,
      "loss": 0.67,
      "step": 5201
    },
    {
      "epoch": 0.7948962829965237,
      "grad_norm": 0.2747963070869446,
      "learning_rate": 2.1608336431138655e-05,
      "loss": 0.641,
      "step": 5202
    },
    {
      "epoch": 0.7950490888948314,
      "grad_norm": 0.3560905158519745,
      "learning_rate": 2.1577356233266176e-05,
      "loss": 0.6971,
      "step": 5203
    },
    {
      "epoch": 0.795201894793139,
      "grad_norm": 0.30919522047042847,
      "learning_rate": 2.154639557373711e-05,
      "loss": 0.8019,
      "step": 5204
    },
    {
      "epoch": 0.7953547006914466,
      "grad_norm": 0.31300801038742065,
      "learning_rate": 2.151545446026507e-05,
      "loss": 0.5774,
      "step": 5205
    },
    {
      "epoch": 0.7955075065897543,
      "grad_norm": 0.2874794602394104,
      "learning_rate": 2.1484532900558685e-05,
      "loss": 0.8056,
      "step": 5206
    },
    {
      "epoch": 0.795660312488062,
      "grad_norm": 0.26195093989372253,
      "learning_rate": 2.1453630902321843e-05,
      "loss": 0.7185,
      "step": 5207
    },
    {
      "epoch": 0.7958131183863697,
      "grad_norm": 0.47928446531295776,
      "learning_rate": 2.142274847325353e-05,
      "loss": 0.8464,
      "step": 5208
    },
    {
      "epoch": 0.7959659242846774,
      "grad_norm": 0.32929208874702454,
      "learning_rate": 2.139188562104789e-05,
      "loss": 0.7058,
      "step": 5209
    },
    {
      "epoch": 0.7961187301829851,
      "grad_norm": 0.2897893488407135,
      "learning_rate": 2.1361042353394044e-05,
      "loss": 0.6669,
      "step": 5210
    },
    {
      "epoch": 0.7962715360812928,
      "grad_norm": 0.43749895691871643,
      "learning_rate": 2.1330218677976376e-05,
      "loss": 0.6892,
      "step": 5211
    },
    {
      "epoch": 0.7964243419796004,
      "grad_norm": 0.3396851420402527,
      "learning_rate": 2.1299414602474376e-05,
      "loss": 0.771,
      "step": 5212
    },
    {
      "epoch": 0.7965771478779081,
      "grad_norm": 0.2620655596256256,
      "learning_rate": 2.126863013456257e-05,
      "loss": 0.6952,
      "step": 5213
    },
    {
      "epoch": 0.7967299537762158,
      "grad_norm": 0.34103044867515564,
      "learning_rate": 2.1237865281910708e-05,
      "loss": 0.6764,
      "step": 5214
    },
    {
      "epoch": 0.7968827596745235,
      "grad_norm": 0.26926666498184204,
      "learning_rate": 2.120712005218354e-05,
      "loss": 0.8362,
      "step": 5215
    },
    {
      "epoch": 0.7970355655728311,
      "grad_norm": 0.29229509830474854,
      "learning_rate": 2.1176394453041016e-05,
      "loss": 0.6737,
      "step": 5216
    },
    {
      "epoch": 0.7971883714711387,
      "grad_norm": 0.25633910298347473,
      "learning_rate": 2.1145688492138127e-05,
      "loss": 0.6043,
      "step": 5217
    },
    {
      "epoch": 0.7973411773694464,
      "grad_norm": 0.2882971167564392,
      "learning_rate": 2.1115002177125064e-05,
      "loss": 0.6424,
      "step": 5218
    },
    {
      "epoch": 0.7974939832677541,
      "grad_norm": 0.49982550740242004,
      "learning_rate": 2.1084335515647024e-05,
      "loss": 0.7675,
      "step": 5219
    },
    {
      "epoch": 0.7976467891660618,
      "grad_norm": 0.25070077180862427,
      "learning_rate": 2.1053688515344327e-05,
      "loss": 0.6555,
      "step": 5220
    },
    {
      "epoch": 0.7977995950643695,
      "grad_norm": 0.3176601529121399,
      "learning_rate": 2.1023061183852433e-05,
      "loss": 0.6537,
      "step": 5221
    },
    {
      "epoch": 0.7979524009626772,
      "grad_norm": 0.2853238880634308,
      "learning_rate": 2.0992453528801924e-05,
      "loss": 0.7822,
      "step": 5222
    },
    {
      "epoch": 0.7981052068609849,
      "grad_norm": 0.2929050624370575,
      "learning_rate": 2.0961865557818417e-05,
      "loss": 0.8065,
      "step": 5223
    },
    {
      "epoch": 0.7982580127592925,
      "grad_norm": 0.2644808292388916,
      "learning_rate": 2.093129727852261e-05,
      "loss": 0.6393,
      "step": 5224
    },
    {
      "epoch": 0.7984108186576002,
      "grad_norm": 0.34935396909713745,
      "learning_rate": 2.0900748698530358e-05,
      "loss": 0.5423,
      "step": 5225
    },
    {
      "epoch": 0.7985636245559079,
      "grad_norm": 0.3045203983783722,
      "learning_rate": 2.087021982545263e-05,
      "loss": 0.8789,
      "step": 5226
    },
    {
      "epoch": 0.7987164304542156,
      "grad_norm": 0.3892831802368164,
      "learning_rate": 2.0839710666895386e-05,
      "loss": 0.7262,
      "step": 5227
    },
    {
      "epoch": 0.7988692363525232,
      "grad_norm": 0.39896148443222046,
      "learning_rate": 2.080922123045972e-05,
      "loss": 0.8121,
      "step": 5228
    },
    {
      "epoch": 0.7990220422508308,
      "grad_norm": 0.3847440481185913,
      "learning_rate": 2.0778751523741824e-05,
      "loss": 0.5827,
      "step": 5229
    },
    {
      "epoch": 0.7991748481491385,
      "grad_norm": 0.28219443559646606,
      "learning_rate": 2.0748301554333027e-05,
      "loss": 0.668,
      "step": 5230
    },
    {
      "epoch": 0.7993276540474462,
      "grad_norm": 0.2536992132663727,
      "learning_rate": 2.0717871329819628e-05,
      "loss": 0.5957,
      "step": 5231
    },
    {
      "epoch": 0.7994804599457539,
      "grad_norm": 0.3085779845714569,
      "learning_rate": 2.0687460857783048e-05,
      "loss": 0.7489,
      "step": 5232
    },
    {
      "epoch": 0.7996332658440616,
      "grad_norm": 0.332774817943573,
      "learning_rate": 2.065707014579983e-05,
      "loss": 0.9133,
      "step": 5233
    },
    {
      "epoch": 0.7997860717423693,
      "grad_norm": 0.2687940001487732,
      "learning_rate": 2.062669920144159e-05,
      "loss": 0.6992,
      "step": 5234
    },
    {
      "epoch": 0.799938877640677,
      "grad_norm": 0.3154837489128113,
      "learning_rate": 2.059634803227496e-05,
      "loss": 0.6829,
      "step": 5235
    },
    {
      "epoch": 0.8000916835389846,
      "grad_norm": 0.31086432933807373,
      "learning_rate": 2.0566016645861663e-05,
      "loss": 0.6192,
      "step": 5236
    },
    {
      "epoch": 0.8002444894372923,
      "grad_norm": 1.0907458066940308,
      "learning_rate": 2.053570504975856e-05,
      "loss": 0.638,
      "step": 5237
    },
    {
      "epoch": 0.8003972953356,
      "grad_norm": 0.5163177847862244,
      "learning_rate": 2.050541325151746e-05,
      "loss": 0.9716,
      "step": 5238
    },
    {
      "epoch": 0.8005501012339076,
      "grad_norm": 0.28363659977912903,
      "learning_rate": 2.0475141258685358e-05,
      "loss": 0.6881,
      "step": 5239
    },
    {
      "epoch": 0.8007029071322153,
      "grad_norm": 0.28859302401542664,
      "learning_rate": 2.0444889078804298e-05,
      "loss": 0.7552,
      "step": 5240
    },
    {
      "epoch": 0.8008557130305229,
      "grad_norm": 0.2968814969062805,
      "learning_rate": 2.0414656719411305e-05,
      "loss": 0.5776,
      "step": 5241
    },
    {
      "epoch": 0.8010085189288306,
      "grad_norm": 0.6331593990325928,
      "learning_rate": 2.038444418803851e-05,
      "loss": 0.7135,
      "step": 5242
    },
    {
      "epoch": 0.8011613248271383,
      "grad_norm": 0.3301532566547394,
      "learning_rate": 2.0354251492213138e-05,
      "loss": 0.7518,
      "step": 5243
    },
    {
      "epoch": 0.801314130725446,
      "grad_norm": 0.48997145891189575,
      "learning_rate": 2.0324078639457455e-05,
      "loss": 0.6749,
      "step": 5244
    },
    {
      "epoch": 0.8014669366237537,
      "grad_norm": 0.2547190189361572,
      "learning_rate": 2.029392563728877e-05,
      "loss": 0.8264,
      "step": 5245
    },
    {
      "epoch": 0.8016197425220614,
      "grad_norm": 0.23393574357032776,
      "learning_rate": 2.0263792493219413e-05,
      "loss": 0.7307,
      "step": 5246
    },
    {
      "epoch": 0.801772548420369,
      "grad_norm": 0.4022747874259949,
      "learning_rate": 2.023367921475683e-05,
      "loss": 0.7258,
      "step": 5247
    },
    {
      "epoch": 0.8019253543186767,
      "grad_norm": 0.3829197883605957,
      "learning_rate": 2.0203585809403525e-05,
      "loss": 0.7445,
      "step": 5248
    },
    {
      "epoch": 0.8020781602169844,
      "grad_norm": 0.3364792466163635,
      "learning_rate": 2.017351228465697e-05,
      "loss": 0.577,
      "step": 5249
    },
    {
      "epoch": 0.8022309661152921,
      "grad_norm": 0.28101688623428345,
      "learning_rate": 2.014345864800974e-05,
      "loss": 0.62,
      "step": 5250
    },
    {
      "epoch": 0.8023837720135997,
      "grad_norm": 0.3479030430316925,
      "learning_rate": 2.0113424906949465e-05,
      "loss": 0.48,
      "step": 5251
    },
    {
      "epoch": 0.8025365779119074,
      "grad_norm": 0.26711511611938477,
      "learning_rate": 2.0083411068958756e-05,
      "loss": 0.5663,
      "step": 5252
    },
    {
      "epoch": 0.802689383810215,
      "grad_norm": 0.3207715153694153,
      "learning_rate": 2.0053417141515373e-05,
      "loss": 0.6989,
      "step": 5253
    },
    {
      "epoch": 0.8028421897085227,
      "grad_norm": 0.261491060256958,
      "learning_rate": 2.0023443132092003e-05,
      "loss": 0.5608,
      "step": 5254
    },
    {
      "epoch": 0.8029949956068304,
      "grad_norm": 0.3126250207424164,
      "learning_rate": 1.9993489048156443e-05,
      "loss": 0.4752,
      "step": 5255
    },
    {
      "epoch": 0.8031478015051381,
      "grad_norm": 0.2783001661300659,
      "learning_rate": 1.9963554897171478e-05,
      "loss": 0.6302,
      "step": 5256
    },
    {
      "epoch": 0.8033006074034458,
      "grad_norm": 0.2911037504673004,
      "learning_rate": 1.9933640686594978e-05,
      "loss": 0.5396,
      "step": 5257
    },
    {
      "epoch": 0.8034534133017535,
      "grad_norm": 0.243194118142128,
      "learning_rate": 1.990374642387982e-05,
      "loss": 0.5593,
      "step": 5258
    },
    {
      "epoch": 0.8036062192000611,
      "grad_norm": 0.3205549418926239,
      "learning_rate": 1.9873872116473857e-05,
      "loss": 0.7169,
      "step": 5259
    },
    {
      "epoch": 0.8037590250983688,
      "grad_norm": 0.35936808586120605,
      "learning_rate": 1.9844017771820055e-05,
      "loss": 0.6554,
      "step": 5260
    },
    {
      "epoch": 0.8039118309966765,
      "grad_norm": 0.27080589532852173,
      "learning_rate": 1.981418339735641e-05,
      "loss": 0.6742,
      "step": 5261
    },
    {
      "epoch": 0.8040646368949842,
      "grad_norm": 0.3525456190109253,
      "learning_rate": 1.978436900051588e-05,
      "loss": 0.7487,
      "step": 5262
    },
    {
      "epoch": 0.8042174427932918,
      "grad_norm": 0.2958907186985016,
      "learning_rate": 1.9754574588726426e-05,
      "loss": 0.7423,
      "step": 5263
    },
    {
      "epoch": 0.8043702486915995,
      "grad_norm": 0.387239545583725,
      "learning_rate": 1.9724800169411107e-05,
      "loss": 0.7466,
      "step": 5264
    },
    {
      "epoch": 0.8045230545899071,
      "grad_norm": 0.31217509508132935,
      "learning_rate": 1.9695045749988017e-05,
      "loss": 0.6598,
      "step": 5265
    },
    {
      "epoch": 0.8046758604882148,
      "grad_norm": 0.25408506393432617,
      "learning_rate": 1.9665311337870173e-05,
      "loss": 0.7294,
      "step": 5266
    },
    {
      "epoch": 0.8048286663865225,
      "grad_norm": 0.34589096903800964,
      "learning_rate": 1.963559694046563e-05,
      "loss": 0.8124,
      "step": 5267
    },
    {
      "epoch": 0.8049814722848302,
      "grad_norm": 0.3010726571083069,
      "learning_rate": 1.9605902565177513e-05,
      "loss": 0.6591,
      "step": 5268
    },
    {
      "epoch": 0.8051342781831379,
      "grad_norm": 0.25482556223869324,
      "learning_rate": 1.9576228219403957e-05,
      "loss": 0.662,
      "step": 5269
    },
    {
      "epoch": 0.8052870840814456,
      "grad_norm": 0.3103812038898468,
      "learning_rate": 1.9546573910538036e-05,
      "loss": 0.6193,
      "step": 5270
    },
    {
      "epoch": 0.8054398899797532,
      "grad_norm": 0.3204786479473114,
      "learning_rate": 1.9516939645967857e-05,
      "loss": 0.724,
      "step": 5271
    },
    {
      "epoch": 0.8055926958780609,
      "grad_norm": 0.4453890025615692,
      "learning_rate": 1.9487325433076576e-05,
      "loss": 0.7314,
      "step": 5272
    },
    {
      "epoch": 0.8057455017763686,
      "grad_norm": 0.7212764024734497,
      "learning_rate": 1.945773127924234e-05,
      "loss": 0.8104,
      "step": 5273
    },
    {
      "epoch": 0.8058983076746763,
      "grad_norm": 0.31767502427101135,
      "learning_rate": 1.9428157191838238e-05,
      "loss": 0.5659,
      "step": 5274
    },
    {
      "epoch": 0.8060511135729839,
      "grad_norm": 0.29767361283302307,
      "learning_rate": 1.9398603178232455e-05,
      "loss": 0.7183,
      "step": 5275
    },
    {
      "epoch": 0.8062039194712916,
      "grad_norm": 0.26745566725730896,
      "learning_rate": 1.9369069245788106e-05,
      "loss": 0.6183,
      "step": 5276
    },
    {
      "epoch": 0.8063567253695992,
      "grad_norm": 0.2996903955936432,
      "learning_rate": 1.9339555401863297e-05,
      "loss": 0.5862,
      "step": 5277
    },
    {
      "epoch": 0.8065095312679069,
      "grad_norm": 0.2614487111568451,
      "learning_rate": 1.9310061653811173e-05,
      "loss": 0.7281,
      "step": 5278
    },
    {
      "epoch": 0.8066623371662146,
      "grad_norm": 0.2431805282831192,
      "learning_rate": 1.9280588008979884e-05,
      "loss": 0.5995,
      "step": 5279
    },
    {
      "epoch": 0.8068151430645223,
      "grad_norm": 0.3033202886581421,
      "learning_rate": 1.9251134474712506e-05,
      "loss": 0.6573,
      "step": 5280
    },
    {
      "epoch": 0.80696794896283,
      "grad_norm": 0.29538241028785706,
      "learning_rate": 1.922170105834713e-05,
      "loss": 0.7933,
      "step": 5281
    },
    {
      "epoch": 0.8071207548611377,
      "grad_norm": 0.2647150456905365,
      "learning_rate": 1.9192287767216867e-05,
      "loss": 0.5476,
      "step": 5282
    },
    {
      "epoch": 0.8072735607594453,
      "grad_norm": 0.2977331578731537,
      "learning_rate": 1.9162894608649805e-05,
      "loss": 0.482,
      "step": 5283
    },
    {
      "epoch": 0.807426366657753,
      "grad_norm": 0.28192129731178284,
      "learning_rate": 1.9133521589968985e-05,
      "loss": 0.6165,
      "step": 5284
    },
    {
      "epoch": 0.8075791725560607,
      "grad_norm": 0.3128628730773926,
      "learning_rate": 1.9104168718492423e-05,
      "loss": 0.7441,
      "step": 5285
    },
    {
      "epoch": 0.8077319784543684,
      "grad_norm": 0.35077306628227234,
      "learning_rate": 1.907483600153317e-05,
      "loss": 0.7481,
      "step": 5286
    },
    {
      "epoch": 0.807884784352676,
      "grad_norm": 0.3166959285736084,
      "learning_rate": 1.9045523446399237e-05,
      "loss": 0.7984,
      "step": 5287
    },
    {
      "epoch": 0.8080375902509837,
      "grad_norm": 0.27331990003585815,
      "learning_rate": 1.9016231060393596e-05,
      "loss": 0.6793,
      "step": 5288
    },
    {
      "epoch": 0.8081903961492913,
      "grad_norm": 0.2991531491279602,
      "learning_rate": 1.898695885081416e-05,
      "loss": 0.5654,
      "step": 5289
    },
    {
      "epoch": 0.808343202047599,
      "grad_norm": 0.2798959016799927,
      "learning_rate": 1.8957706824953915e-05,
      "loss": 0.6628,
      "step": 5290
    },
    {
      "epoch": 0.8084960079459067,
      "grad_norm": 0.27082306146621704,
      "learning_rate": 1.8928474990100687e-05,
      "loss": 0.7142,
      "step": 5291
    },
    {
      "epoch": 0.8086488138442144,
      "grad_norm": 0.28114885091781616,
      "learning_rate": 1.889926335353741e-05,
      "loss": 0.716,
      "step": 5292
    },
    {
      "epoch": 0.8088016197425221,
      "grad_norm": 0.3345818519592285,
      "learning_rate": 1.8870071922541877e-05,
      "loss": 0.8301,
      "step": 5293
    },
    {
      "epoch": 0.8089544256408298,
      "grad_norm": 0.3638163208961487,
      "learning_rate": 1.884090070438691e-05,
      "loss": 0.7258,
      "step": 5294
    },
    {
      "epoch": 0.8091072315391374,
      "grad_norm": 0.3682017922401428,
      "learning_rate": 1.881174970634024e-05,
      "loss": 0.6451,
      "step": 5295
    },
    {
      "epoch": 0.8092600374374451,
      "grad_norm": 0.2848983407020569,
      "learning_rate": 1.8782618935664653e-05,
      "loss": 0.8843,
      "step": 5296
    },
    {
      "epoch": 0.8094128433357528,
      "grad_norm": 0.30901724100112915,
      "learning_rate": 1.8753508399617793e-05,
      "loss": 0.638,
      "step": 5297
    },
    {
      "epoch": 0.8095656492340604,
      "grad_norm": 0.3024827837944031,
      "learning_rate": 1.872441810545228e-05,
      "loss": 0.7069,
      "step": 5298
    },
    {
      "epoch": 0.8097184551323681,
      "grad_norm": 0.28565528988838196,
      "learning_rate": 1.8695348060415762e-05,
      "loss": 0.4848,
      "step": 5299
    },
    {
      "epoch": 0.8098712610306757,
      "grad_norm": 0.45431607961654663,
      "learning_rate": 1.866629827175077e-05,
      "loss": 0.6773,
      "step": 5300
    },
    {
      "epoch": 0.8100240669289834,
      "grad_norm": 0.2928932011127472,
      "learning_rate": 1.8637268746694892e-05,
      "loss": 0.5875,
      "step": 5301
    },
    {
      "epoch": 0.8101768728272911,
      "grad_norm": 0.31244757771492004,
      "learning_rate": 1.8608259492480474e-05,
      "loss": 0.6565,
      "step": 5302
    },
    {
      "epoch": 0.8103296787255988,
      "grad_norm": 0.33375710248947144,
      "learning_rate": 1.857927051633498e-05,
      "loss": 0.6444,
      "step": 5303
    },
    {
      "epoch": 0.8104824846239065,
      "grad_norm": 0.2504745423793793,
      "learning_rate": 1.8550301825480763e-05,
      "loss": 0.7504,
      "step": 5304
    },
    {
      "epoch": 0.8106352905222142,
      "grad_norm": 0.3320654034614563,
      "learning_rate": 1.8521353427135168e-05,
      "loss": 0.7599,
      "step": 5305
    },
    {
      "epoch": 0.8107880964205219,
      "grad_norm": 0.32688429951667786,
      "learning_rate": 1.849242532851042e-05,
      "loss": 0.5366,
      "step": 5306
    },
    {
      "epoch": 0.8109409023188295,
      "grad_norm": 0.2687855064868927,
      "learning_rate": 1.846351753681368e-05,
      "loss": 0.6612,
      "step": 5307
    },
    {
      "epoch": 0.8110937082171372,
      "grad_norm": 0.30853578448295593,
      "learning_rate": 1.8434630059247126e-05,
      "loss": 0.6146,
      "step": 5308
    },
    {
      "epoch": 0.8112465141154449,
      "grad_norm": 0.2773032486438751,
      "learning_rate": 1.8405762903007793e-05,
      "loss": 0.7276,
      "step": 5309
    },
    {
      "epoch": 0.8113993200137525,
      "grad_norm": 0.3360896110534668,
      "learning_rate": 1.837691607528774e-05,
      "loss": 0.6333,
      "step": 5310
    },
    {
      "epoch": 0.8115521259120602,
      "grad_norm": 0.26231664419174194,
      "learning_rate": 1.834808958327385e-05,
      "loss": 0.7275,
      "step": 5311
    },
    {
      "epoch": 0.8117049318103678,
      "grad_norm": 0.2589069604873657,
      "learning_rate": 1.831928343414807e-05,
      "loss": 0.751,
      "step": 5312
    },
    {
      "epoch": 0.8118577377086755,
      "grad_norm": 0.3091123700141907,
      "learning_rate": 1.8290497635087146e-05,
      "loss": 0.827,
      "step": 5313
    },
    {
      "epoch": 0.8120105436069832,
      "grad_norm": 0.30030331015586853,
      "learning_rate": 1.8261732193262872e-05,
      "loss": 0.7596,
      "step": 5314
    },
    {
      "epoch": 0.8121633495052909,
      "grad_norm": 0.3518400192260742,
      "learning_rate": 1.8232987115841884e-05,
      "loss": 0.5488,
      "step": 5315
    },
    {
      "epoch": 0.8123161554035986,
      "grad_norm": 0.26434770226478577,
      "learning_rate": 1.8204262409985763e-05,
      "loss": 0.5582,
      "step": 5316
    },
    {
      "epoch": 0.8124689613019063,
      "grad_norm": 0.3209986984729767,
      "learning_rate": 1.817555808285105e-05,
      "loss": 0.6947,
      "step": 5317
    },
    {
      "epoch": 0.812621767200214,
      "grad_norm": 0.26500552892684937,
      "learning_rate": 1.814687414158921e-05,
      "loss": 0.6549,
      "step": 5318
    },
    {
      "epoch": 0.8127745730985216,
      "grad_norm": 0.4316971004009247,
      "learning_rate": 1.8118210593346586e-05,
      "loss": 0.6712,
      "step": 5319
    },
    {
      "epoch": 0.8129273789968293,
      "grad_norm": 0.3341822028160095,
      "learning_rate": 1.808956744526443e-05,
      "loss": 0.7109,
      "step": 5320
    },
    {
      "epoch": 0.813080184895137,
      "grad_norm": 0.26364752650260925,
      "learning_rate": 1.8060944704478965e-05,
      "loss": 0.5573,
      "step": 5321
    },
    {
      "epoch": 0.8132329907934446,
      "grad_norm": 0.3340471088886261,
      "learning_rate": 1.8032342378121347e-05,
      "loss": 0.4768,
      "step": 5322
    },
    {
      "epoch": 0.8133857966917523,
      "grad_norm": 0.31111812591552734,
      "learning_rate": 1.8003760473317555e-05,
      "loss": 0.9573,
      "step": 5323
    },
    {
      "epoch": 0.81353860259006,
      "grad_norm": 0.32566016912460327,
      "learning_rate": 1.7975198997188526e-05,
      "loss": 0.8372,
      "step": 5324
    },
    {
      "epoch": 0.8136914084883676,
      "grad_norm": 0.2471184879541397,
      "learning_rate": 1.7946657956850133e-05,
      "loss": 0.615,
      "step": 5325
    },
    {
      "epoch": 0.8138442143866753,
      "grad_norm": 0.2883491516113281,
      "learning_rate": 1.7918137359413157e-05,
      "loss": 0.6954,
      "step": 5326
    },
    {
      "epoch": 0.813997020284983,
      "grad_norm": 0.2613636553287506,
      "learning_rate": 1.7889637211983246e-05,
      "loss": 0.6137,
      "step": 5327
    },
    {
      "epoch": 0.8141498261832907,
      "grad_norm": 0.3412512242794037,
      "learning_rate": 1.786115752166094e-05,
      "loss": 0.5779,
      "step": 5328
    },
    {
      "epoch": 0.8143026320815984,
      "grad_norm": 0.34528636932373047,
      "learning_rate": 1.7832698295541773e-05,
      "loss": 0.7299,
      "step": 5329
    },
    {
      "epoch": 0.814455437979906,
      "grad_norm": 0.27255862951278687,
      "learning_rate": 1.780425954071606e-05,
      "loss": 0.7939,
      "step": 5330
    },
    {
      "epoch": 0.8146082438782137,
      "grad_norm": 0.29479607939720154,
      "learning_rate": 1.7775841264269145e-05,
      "loss": 0.673,
      "step": 5331
    },
    {
      "epoch": 0.8147610497765214,
      "grad_norm": 0.2675367593765259,
      "learning_rate": 1.7747443473281133e-05,
      "loss": 0.5236,
      "step": 5332
    },
    {
      "epoch": 0.8149138556748291,
      "grad_norm": 0.47365444898605347,
      "learning_rate": 1.771906617482717e-05,
      "loss": 0.5629,
      "step": 5333
    },
    {
      "epoch": 0.8150666615731367,
      "grad_norm": 0.284067839384079,
      "learning_rate": 1.7690709375977154e-05,
      "loss": 0.4462,
      "step": 5334
    },
    {
      "epoch": 0.8152194674714444,
      "grad_norm": 0.28673845529556274,
      "learning_rate": 1.7662373083795968e-05,
      "loss": 0.6392,
      "step": 5335
    },
    {
      "epoch": 0.815372273369752,
      "grad_norm": 0.562435507774353,
      "learning_rate": 1.763405730534342e-05,
      "loss": 0.5871,
      "step": 5336
    },
    {
      "epoch": 0.8155250792680597,
      "grad_norm": 0.3012368679046631,
      "learning_rate": 1.7605762047674046e-05,
      "loss": 0.6446,
      "step": 5337
    },
    {
      "epoch": 0.8156778851663674,
      "grad_norm": 0.28177013993263245,
      "learning_rate": 1.7577487317837414e-05,
      "loss": 0.723,
      "step": 5338
    },
    {
      "epoch": 0.8158306910646751,
      "grad_norm": 0.29954829812049866,
      "learning_rate": 1.754923312287795e-05,
      "loss": 0.7209,
      "step": 5339
    },
    {
      "epoch": 0.8159834969629828,
      "grad_norm": 0.3177793323993683,
      "learning_rate": 1.7520999469834964e-05,
      "loss": 0.6263,
      "step": 5340
    },
    {
      "epoch": 0.8161363028612905,
      "grad_norm": 0.2854011356830597,
      "learning_rate": 1.749278636574262e-05,
      "loss": 0.7782,
      "step": 5341
    },
    {
      "epoch": 0.8162891087595981,
      "grad_norm": 0.24384701251983643,
      "learning_rate": 1.7464593817629926e-05,
      "loss": 0.7041,
      "step": 5342
    },
    {
      "epoch": 0.8164419146579058,
      "grad_norm": 0.4053572118282318,
      "learning_rate": 1.7436421832520866e-05,
      "loss": 0.7957,
      "step": 5343
    },
    {
      "epoch": 0.8165947205562135,
      "grad_norm": 0.2650754749774933,
      "learning_rate": 1.740827041743428e-05,
      "loss": 0.6292,
      "step": 5344
    },
    {
      "epoch": 0.8167475264545212,
      "grad_norm": 0.5751661658287048,
      "learning_rate": 1.7380139579383814e-05,
      "loss": 0.7315,
      "step": 5345
    },
    {
      "epoch": 0.8169003323528288,
      "grad_norm": 0.2932993471622467,
      "learning_rate": 1.7352029325378015e-05,
      "loss": 0.9154,
      "step": 5346
    },
    {
      "epoch": 0.8170531382511365,
      "grad_norm": 0.27605947852134705,
      "learning_rate": 1.7323939662420373e-05,
      "loss": 0.7626,
      "step": 5347
    },
    {
      "epoch": 0.8172059441494441,
      "grad_norm": 0.31729474663734436,
      "learning_rate": 1.7295870597509146e-05,
      "loss": 0.8639,
      "step": 5348
    },
    {
      "epoch": 0.8173587500477518,
      "grad_norm": 0.283658891916275,
      "learning_rate": 1.7267822137637536e-05,
      "loss": 0.6038,
      "step": 5349
    },
    {
      "epoch": 0.8175115559460595,
      "grad_norm": 0.3112010657787323,
      "learning_rate": 1.7239794289793533e-05,
      "loss": 0.7148,
      "step": 5350
    },
    {
      "epoch": 0.8176643618443672,
      "grad_norm": 0.3229861259460449,
      "learning_rate": 1.7211787060960105e-05,
      "loss": 0.7873,
      "step": 5351
    },
    {
      "epoch": 0.8178171677426749,
      "grad_norm": 0.4701400399208069,
      "learning_rate": 1.7183800458114964e-05,
      "loss": 0.856,
      "step": 5352
    },
    {
      "epoch": 0.8179699736409826,
      "grad_norm": 0.3407234251499176,
      "learning_rate": 1.7155834488230782e-05,
      "loss": 0.6922,
      "step": 5353
    },
    {
      "epoch": 0.8181227795392902,
      "grad_norm": 0.3151310086250305,
      "learning_rate": 1.7127889158275024e-05,
      "loss": 0.6667,
      "step": 5354
    },
    {
      "epoch": 0.8182755854375979,
      "grad_norm": 0.35597583651542664,
      "learning_rate": 1.7099964475210017e-05,
      "loss": 0.6749,
      "step": 5355
    },
    {
      "epoch": 0.8184283913359056,
      "grad_norm": 0.30627796053886414,
      "learning_rate": 1.7072060445992967e-05,
      "loss": 0.7082,
      "step": 5356
    },
    {
      "epoch": 0.8185811972342132,
      "grad_norm": 0.30426254868507385,
      "learning_rate": 1.7044177077575962e-05,
      "loss": 0.7114,
      "step": 5357
    },
    {
      "epoch": 0.8187340031325209,
      "grad_norm": 0.27987557649612427,
      "learning_rate": 1.7016314376905894e-05,
      "loss": 0.6147,
      "step": 5358
    },
    {
      "epoch": 0.8188868090308286,
      "grad_norm": 0.31371667981147766,
      "learning_rate": 1.6988472350924488e-05,
      "loss": 0.5975,
      "step": 5359
    },
    {
      "epoch": 0.8190396149291362,
      "grad_norm": 0.39855438470840454,
      "learning_rate": 1.6960651006568372e-05,
      "loss": 0.8116,
      "step": 5360
    },
    {
      "epoch": 0.8191924208274439,
      "grad_norm": 0.4293268322944641,
      "learning_rate": 1.6932850350769037e-05,
      "loss": 0.7199,
      "step": 5361
    },
    {
      "epoch": 0.8193452267257516,
      "grad_norm": 0.3112042546272278,
      "learning_rate": 1.690507039045275e-05,
      "loss": 0.8287,
      "step": 5362
    },
    {
      "epoch": 0.8194980326240593,
      "grad_norm": 0.2758471965789795,
      "learning_rate": 1.687731113254063e-05,
      "loss": 0.6841,
      "step": 5363
    },
    {
      "epoch": 0.819650838522367,
      "grad_norm": 0.2721893787384033,
      "learning_rate": 1.684957258394869e-05,
      "loss": 0.5713,
      "step": 5364
    },
    {
      "epoch": 0.8198036444206747,
      "grad_norm": 0.3106933832168579,
      "learning_rate": 1.6821854751587774e-05,
      "loss": 0.6588,
      "step": 5365
    },
    {
      "epoch": 0.8199564503189823,
      "grad_norm": 0.2738363444805145,
      "learning_rate": 1.6794157642363517e-05,
      "loss": 0.739,
      "step": 5366
    },
    {
      "epoch": 0.82010925621729,
      "grad_norm": 0.26095277070999146,
      "learning_rate": 1.6766481263176448e-05,
      "loss": 0.8577,
      "step": 5367
    },
    {
      "epoch": 0.8202620621155977,
      "grad_norm": 0.4808953106403351,
      "learning_rate": 1.6738825620921894e-05,
      "loss": 0.6906,
      "step": 5368
    },
    {
      "epoch": 0.8204148680139053,
      "grad_norm": 0.25246375799179077,
      "learning_rate": 1.671119072248999e-05,
      "loss": 0.5648,
      "step": 5369
    },
    {
      "epoch": 0.820567673912213,
      "grad_norm": 0.27172037959098816,
      "learning_rate": 1.668357657476578e-05,
      "loss": 0.9843,
      "step": 5370
    },
    {
      "epoch": 0.8207204798105207,
      "grad_norm": 0.5794962644577026,
      "learning_rate": 1.6655983184629108e-05,
      "loss": 0.4935,
      "step": 5371
    },
    {
      "epoch": 0.8208732857088283,
      "grad_norm": 0.2584603726863861,
      "learning_rate": 1.662841055895461e-05,
      "loss": 0.6888,
      "step": 5372
    },
    {
      "epoch": 0.821026091607136,
      "grad_norm": 0.36797964572906494,
      "learning_rate": 1.6600858704611764e-05,
      "loss": 0.77,
      "step": 5373
    },
    {
      "epoch": 0.8211788975054437,
      "grad_norm": 0.2741428315639496,
      "learning_rate": 1.6573327628464897e-05,
      "loss": 0.6751,
      "step": 5374
    },
    {
      "epoch": 0.8213317034037514,
      "grad_norm": 0.4992486536502838,
      "learning_rate": 1.6545817337373172e-05,
      "loss": 0.712,
      "step": 5375
    },
    {
      "epoch": 0.8214845093020591,
      "grad_norm": 0.28491753339767456,
      "learning_rate": 1.6518327838190528e-05,
      "loss": 0.7427,
      "step": 5376
    },
    {
      "epoch": 0.8216373152003668,
      "grad_norm": 0.47695693373680115,
      "learning_rate": 1.64908591377657e-05,
      "loss": 0.9162,
      "step": 5377
    },
    {
      "epoch": 0.8217901210986744,
      "grad_norm": 0.29675573110580444,
      "learning_rate": 1.646341124294234e-05,
      "loss": 0.7819,
      "step": 5378
    },
    {
      "epoch": 0.8219429269969821,
      "grad_norm": 0.2657209634780884,
      "learning_rate": 1.643598416055885e-05,
      "loss": 0.7338,
      "step": 5379
    },
    {
      "epoch": 0.8220957328952898,
      "grad_norm": 0.28576889634132385,
      "learning_rate": 1.640857789744846e-05,
      "loss": 0.7603,
      "step": 5380
    },
    {
      "epoch": 0.8222485387935974,
      "grad_norm": 0.2834707498550415,
      "learning_rate": 1.6381192460439175e-05,
      "loss": 0.732,
      "step": 5381
    },
    {
      "epoch": 0.8224013446919051,
      "grad_norm": 0.23507724702358246,
      "learning_rate": 1.6353827856353864e-05,
      "loss": 0.6541,
      "step": 5382
    },
    {
      "epoch": 0.8225541505902128,
      "grad_norm": 0.30040469765663147,
      "learning_rate": 1.632648409201023e-05,
      "loss": 0.7025,
      "step": 5383
    },
    {
      "epoch": 0.8227069564885204,
      "grad_norm": 0.25531867146492004,
      "learning_rate": 1.62991611742207e-05,
      "loss": 0.5456,
      "step": 5384
    },
    {
      "epoch": 0.8228597623868281,
      "grad_norm": 0.310249388217926,
      "learning_rate": 1.6271859109792543e-05,
      "loss": 0.712,
      "step": 5385
    },
    {
      "epoch": 0.8230125682851358,
      "grad_norm": 0.4035734236240387,
      "learning_rate": 1.6244577905527868e-05,
      "loss": 0.6386,
      "step": 5386
    },
    {
      "epoch": 0.8231653741834435,
      "grad_norm": 0.2613977789878845,
      "learning_rate": 1.6217317568223523e-05,
      "loss": 0.5869,
      "step": 5387
    },
    {
      "epoch": 0.8233181800817512,
      "grad_norm": 0.27511075139045715,
      "learning_rate": 1.6190078104671245e-05,
      "loss": 0.7242,
      "step": 5388
    },
    {
      "epoch": 0.8234709859800589,
      "grad_norm": 0.3719506561756134,
      "learning_rate": 1.616285952165746e-05,
      "loss": 0.637,
      "step": 5389
    },
    {
      "epoch": 0.8236237918783665,
      "grad_norm": 0.2993394434452057,
      "learning_rate": 1.61356618259635e-05,
      "loss": 0.6588,
      "step": 5390
    },
    {
      "epoch": 0.8237765977766742,
      "grad_norm": 0.3319057822227478,
      "learning_rate": 1.6108485024365383e-05,
      "loss": 0.7413,
      "step": 5391
    },
    {
      "epoch": 0.8239294036749819,
      "grad_norm": 0.2575140595436096,
      "learning_rate": 1.6081329123634027e-05,
      "loss": 0.6268,
      "step": 5392
    },
    {
      "epoch": 0.8240822095732895,
      "grad_norm": 0.35643646121025085,
      "learning_rate": 1.605419413053514e-05,
      "loss": 0.7586,
      "step": 5393
    },
    {
      "epoch": 0.8242350154715972,
      "grad_norm": 0.28320401906967163,
      "learning_rate": 1.6027080051829058e-05,
      "loss": 0.5139,
      "step": 5394
    },
    {
      "epoch": 0.8243878213699048,
      "grad_norm": 0.30584996938705444,
      "learning_rate": 1.59999868942711e-05,
      "loss": 0.8343,
      "step": 5395
    },
    {
      "epoch": 0.8245406272682125,
      "grad_norm": 0.3051997125148773,
      "learning_rate": 1.5972914664611306e-05,
      "loss": 0.6096,
      "step": 5396
    },
    {
      "epoch": 0.8246934331665202,
      "grad_norm": 0.27233120799064636,
      "learning_rate": 1.5945863369594503e-05,
      "loss": 0.63,
      "step": 5397
    },
    {
      "epoch": 0.8248462390648279,
      "grad_norm": 0.2944967448711395,
      "learning_rate": 1.5918833015960243e-05,
      "loss": 0.7065,
      "step": 5398
    },
    {
      "epoch": 0.8249990449631356,
      "grad_norm": 0.2862212061882019,
      "learning_rate": 1.5891823610442925e-05,
      "loss": 0.8733,
      "step": 5399
    },
    {
      "epoch": 0.8251518508614433,
      "grad_norm": 0.3013235032558441,
      "learning_rate": 1.5864835159771763e-05,
      "loss": 0.5567,
      "step": 5400
    },
    {
      "epoch": 0.825304656759751,
      "grad_norm": 0.4039291441440582,
      "learning_rate": 1.5837867670670638e-05,
      "loss": 0.8828,
      "step": 5401
    },
    {
      "epoch": 0.8254574626580586,
      "grad_norm": 0.25634995102882385,
      "learning_rate": 1.581092114985834e-05,
      "loss": 0.7524,
      "step": 5402
    },
    {
      "epoch": 0.8256102685563663,
      "grad_norm": 0.301152765750885,
      "learning_rate": 1.5783995604048295e-05,
      "loss": 0.8011,
      "step": 5403
    },
    {
      "epoch": 0.8257630744546739,
      "grad_norm": 0.25691330432891846,
      "learning_rate": 1.5757091039948856e-05,
      "loss": 0.6929,
      "step": 5404
    },
    {
      "epoch": 0.8259158803529816,
      "grad_norm": 0.2751913368701935,
      "learning_rate": 1.573020746426299e-05,
      "loss": 0.7996,
      "step": 5405
    },
    {
      "epoch": 0.8260686862512893,
      "grad_norm": 0.3121042251586914,
      "learning_rate": 1.5703344883688586e-05,
      "loss": 0.7233,
      "step": 5406
    },
    {
      "epoch": 0.826221492149597,
      "grad_norm": 0.30473700165748596,
      "learning_rate": 1.56765033049182e-05,
      "loss": 0.8604,
      "step": 5407
    },
    {
      "epoch": 0.8263742980479046,
      "grad_norm": 0.28437185287475586,
      "learning_rate": 1.5649682734639147e-05,
      "loss": 0.737,
      "step": 5408
    },
    {
      "epoch": 0.8265271039462123,
      "grad_norm": 0.27961575984954834,
      "learning_rate": 1.56228831795336e-05,
      "loss": 0.602,
      "step": 5409
    },
    {
      "epoch": 0.82667990984452,
      "grad_norm": 0.2619366943836212,
      "learning_rate": 1.5596104646278443e-05,
      "loss": 0.5513,
      "step": 5410
    },
    {
      "epoch": 0.8268327157428277,
      "grad_norm": 0.3035143315792084,
      "learning_rate": 1.55693471415453e-05,
      "loss": 0.5348,
      "step": 5411
    },
    {
      "epoch": 0.8269855216411354,
      "grad_norm": 0.28375378251075745,
      "learning_rate": 1.5542610672000568e-05,
      "loss": 0.6483,
      "step": 5412
    },
    {
      "epoch": 0.827138327539443,
      "grad_norm": 0.28063929080963135,
      "learning_rate": 1.5515895244305435e-05,
      "loss": 0.6351,
      "step": 5413
    },
    {
      "epoch": 0.8272911334377507,
      "grad_norm": 0.25390905141830444,
      "learning_rate": 1.5489200865115838e-05,
      "loss": 0.5528,
      "step": 5414
    },
    {
      "epoch": 0.8274439393360584,
      "grad_norm": 0.3002142906188965,
      "learning_rate": 1.546252754108245e-05,
      "loss": 0.7665,
      "step": 5415
    },
    {
      "epoch": 0.827596745234366,
      "grad_norm": 0.25479069352149963,
      "learning_rate": 1.5435875278850664e-05,
      "loss": 0.6322,
      "step": 5416
    },
    {
      "epoch": 0.8277495511326737,
      "grad_norm": 0.32886984944343567,
      "learning_rate": 1.5409244085060704e-05,
      "loss": 0.6251,
      "step": 5417
    },
    {
      "epoch": 0.8279023570309814,
      "grad_norm": 0.3810743987560272,
      "learning_rate": 1.5382633966347527e-05,
      "loss": 0.6871,
      "step": 5418
    },
    {
      "epoch": 0.828055162929289,
      "grad_norm": 0.27458542585372925,
      "learning_rate": 1.5356044929340806e-05,
      "loss": 0.7093,
      "step": 5419
    },
    {
      "epoch": 0.8282079688275967,
      "grad_norm": 0.28523746132850647,
      "learning_rate": 1.5329476980664935e-05,
      "loss": 0.6043,
      "step": 5420
    },
    {
      "epoch": 0.8283607747259044,
      "grad_norm": 0.3243076205253601,
      "learning_rate": 1.530293012693913e-05,
      "loss": 0.6776,
      "step": 5421
    },
    {
      "epoch": 0.8285135806242121,
      "grad_norm": 0.2854920029640198,
      "learning_rate": 1.5276404374777353e-05,
      "loss": 0.6931,
      "step": 5422
    },
    {
      "epoch": 0.8286663865225198,
      "grad_norm": 0.27915722131729126,
      "learning_rate": 1.524989973078822e-05,
      "loss": 0.8883,
      "step": 5423
    },
    {
      "epoch": 0.8288191924208275,
      "grad_norm": 0.29778462648391724,
      "learning_rate": 1.5223416201575137e-05,
      "loss": 0.7446,
      "step": 5424
    },
    {
      "epoch": 0.8289719983191352,
      "grad_norm": 0.26901963353157043,
      "learning_rate": 1.5196953793736301e-05,
      "loss": 0.72,
      "step": 5425
    },
    {
      "epoch": 0.8291248042174428,
      "grad_norm": 0.30687153339385986,
      "learning_rate": 1.5170512513864543e-05,
      "loss": 0.7793,
      "step": 5426
    },
    {
      "epoch": 0.8292776101157505,
      "grad_norm": 0.37296929955482483,
      "learning_rate": 1.5144092368547513e-05,
      "loss": 0.6609,
      "step": 5427
    },
    {
      "epoch": 0.8294304160140581,
      "grad_norm": 0.28542792797088623,
      "learning_rate": 1.511769336436759e-05,
      "loss": 0.7416,
      "step": 5428
    },
    {
      "epoch": 0.8295832219123658,
      "grad_norm": 0.2609799802303314,
      "learning_rate": 1.5091315507901838e-05,
      "loss": 0.8109,
      "step": 5429
    },
    {
      "epoch": 0.8297360278106735,
      "grad_norm": 0.35812559723854065,
      "learning_rate": 1.5064958805722074e-05,
      "loss": 0.8972,
      "step": 5430
    },
    {
      "epoch": 0.8298888337089811,
      "grad_norm": 0.3091167211532593,
      "learning_rate": 1.5038623264394846e-05,
      "loss": 0.6813,
      "step": 5431
    },
    {
      "epoch": 0.8300416396072888,
      "grad_norm": 0.3029838800430298,
      "learning_rate": 1.5012308890481474e-05,
      "loss": 0.8768,
      "step": 5432
    },
    {
      "epoch": 0.8301944455055965,
      "grad_norm": 0.37332555651664734,
      "learning_rate": 1.4986015690537924e-05,
      "loss": 0.7508,
      "step": 5433
    },
    {
      "epoch": 0.8303472514039042,
      "grad_norm": 0.292579710483551,
      "learning_rate": 1.4959743671114924e-05,
      "loss": 0.5804,
      "step": 5434
    },
    {
      "epoch": 0.8305000573022119,
      "grad_norm": 0.28892597556114197,
      "learning_rate": 1.4933492838757933e-05,
      "loss": 0.7084,
      "step": 5435
    },
    {
      "epoch": 0.8306528632005196,
      "grad_norm": 0.41257259249687195,
      "learning_rate": 1.490726320000716e-05,
      "loss": 0.7625,
      "step": 5436
    },
    {
      "epoch": 0.8308056690988272,
      "grad_norm": 0.2837768495082855,
      "learning_rate": 1.4881054761397472e-05,
      "loss": 0.7145,
      "step": 5437
    },
    {
      "epoch": 0.8309584749971349,
      "grad_norm": 0.2908405065536499,
      "learning_rate": 1.4854867529458461e-05,
      "loss": 0.7271,
      "step": 5438
    },
    {
      "epoch": 0.8311112808954426,
      "grad_norm": 0.3113293945789337,
      "learning_rate": 1.4828701510714494e-05,
      "loss": 0.6935,
      "step": 5439
    },
    {
      "epoch": 0.8312640867937502,
      "grad_norm": 0.2733793258666992,
      "learning_rate": 1.480255671168458e-05,
      "loss": 0.6754,
      "step": 5440
    },
    {
      "epoch": 0.8314168926920579,
      "grad_norm": 0.39152440428733826,
      "learning_rate": 1.4776433138882507e-05,
      "loss": 0.6892,
      "step": 5441
    },
    {
      "epoch": 0.8315696985903656,
      "grad_norm": 0.3532632887363434,
      "learning_rate": 1.4750330798816714e-05,
      "loss": 0.6027,
      "step": 5442
    },
    {
      "epoch": 0.8317225044886732,
      "grad_norm": 0.3093455135822296,
      "learning_rate": 1.4724249697990412e-05,
      "loss": 0.7746,
      "step": 5443
    },
    {
      "epoch": 0.8318753103869809,
      "grad_norm": 0.32264313101768494,
      "learning_rate": 1.4698189842901455e-05,
      "loss": 0.7218,
      "step": 5444
    },
    {
      "epoch": 0.8320281162852886,
      "grad_norm": 0.2763960063457489,
      "learning_rate": 1.4672151240042475e-05,
      "loss": 0.7134,
      "step": 5445
    },
    {
      "epoch": 0.8321809221835963,
      "grad_norm": 0.30286088585853577,
      "learning_rate": 1.464613389590076e-05,
      "loss": 0.6322,
      "step": 5446
    },
    {
      "epoch": 0.832333728081904,
      "grad_norm": 0.27298229932785034,
      "learning_rate": 1.4620137816958269e-05,
      "loss": 0.6095,
      "step": 5447
    },
    {
      "epoch": 0.8324865339802117,
      "grad_norm": 0.30183646082878113,
      "learning_rate": 1.4594163009691741e-05,
      "loss": 0.7774,
      "step": 5448
    },
    {
      "epoch": 0.8326393398785193,
      "grad_norm": 0.3693987727165222,
      "learning_rate": 1.4568209480572615e-05,
      "loss": 0.5401,
      "step": 5449
    },
    {
      "epoch": 0.832792145776827,
      "grad_norm": 0.3332158327102661,
      "learning_rate": 1.454227723606696e-05,
      "loss": 0.5118,
      "step": 5450
    },
    {
      "epoch": 0.8329449516751347,
      "grad_norm": 0.2714625895023346,
      "learning_rate": 1.4516366282635552e-05,
      "loss": 0.6193,
      "step": 5451
    },
    {
      "epoch": 0.8330977575734423,
      "grad_norm": 0.2512652277946472,
      "learning_rate": 1.4490476626733907e-05,
      "loss": 0.7225,
      "step": 5452
    },
    {
      "epoch": 0.83325056347175,
      "grad_norm": 0.32431286573410034,
      "learning_rate": 1.446460827481223e-05,
      "loss": 0.762,
      "step": 5453
    },
    {
      "epoch": 0.8334033693700577,
      "grad_norm": 0.26185527443885803,
      "learning_rate": 1.4438761233315445e-05,
      "loss": 0.6317,
      "step": 5454
    },
    {
      "epoch": 0.8335561752683653,
      "grad_norm": 0.2889951169490814,
      "learning_rate": 1.4412935508683024e-05,
      "loss": 0.5021,
      "step": 5455
    },
    {
      "epoch": 0.833708981166673,
      "grad_norm": 0.2649388909339905,
      "learning_rate": 1.4387131107349295e-05,
      "loss": 0.544,
      "step": 5456
    },
    {
      "epoch": 0.8338617870649807,
      "grad_norm": 0.26375481486320496,
      "learning_rate": 1.4361348035743205e-05,
      "loss": 0.7273,
      "step": 5457
    },
    {
      "epoch": 0.8340145929632884,
      "grad_norm": 0.3317602574825287,
      "learning_rate": 1.4335586300288385e-05,
      "loss": 0.7108,
      "step": 5458
    },
    {
      "epoch": 0.8341673988615961,
      "grad_norm": 0.37609806656837463,
      "learning_rate": 1.430984590740313e-05,
      "loss": 0.6881,
      "step": 5459
    },
    {
      "epoch": 0.8343202047599038,
      "grad_norm": 0.245027095079422,
      "learning_rate": 1.4284126863500457e-05,
      "loss": 0.7448,
      "step": 5460
    },
    {
      "epoch": 0.8344730106582114,
      "grad_norm": 0.34146881103515625,
      "learning_rate": 1.4258429174988086e-05,
      "loss": 0.9717,
      "step": 5461
    },
    {
      "epoch": 0.8346258165565191,
      "grad_norm": 0.331993043422699,
      "learning_rate": 1.4232752848268317e-05,
      "loss": 0.6564,
      "step": 5462
    },
    {
      "epoch": 0.8347786224548267,
      "grad_norm": 0.23571527004241943,
      "learning_rate": 1.4207097889738253e-05,
      "loss": 0.6448,
      "step": 5463
    },
    {
      "epoch": 0.8349314283531344,
      "grad_norm": 0.2884713411331177,
      "learning_rate": 1.4181464305789583e-05,
      "loss": 0.6051,
      "step": 5464
    },
    {
      "epoch": 0.8350842342514421,
      "grad_norm": 0.30997729301452637,
      "learning_rate": 1.4155852102808686e-05,
      "loss": 0.7864,
      "step": 5465
    },
    {
      "epoch": 0.8352370401497498,
      "grad_norm": 0.42678937315940857,
      "learning_rate": 1.4130261287176627e-05,
      "loss": 0.7529,
      "step": 5466
    },
    {
      "epoch": 0.8353898460480574,
      "grad_norm": 0.23659348487854004,
      "learning_rate": 1.4104691865269193e-05,
      "loss": 0.5946,
      "step": 5467
    },
    {
      "epoch": 0.8355426519463651,
      "grad_norm": 0.27591216564178467,
      "learning_rate": 1.4079143843456743e-05,
      "loss": 0.6855,
      "step": 5468
    },
    {
      "epoch": 0.8356954578446728,
      "grad_norm": 0.3251248002052307,
      "learning_rate": 1.4053617228104343e-05,
      "loss": 0.7527,
      "step": 5469
    },
    {
      "epoch": 0.8358482637429805,
      "grad_norm": 0.28075751662254333,
      "learning_rate": 1.402811202557176e-05,
      "loss": 0.5165,
      "step": 5470
    },
    {
      "epoch": 0.8360010696412882,
      "grad_norm": 0.2756359279155731,
      "learning_rate": 1.4002628242213422e-05,
      "loss": 0.5895,
      "step": 5471
    },
    {
      "epoch": 0.8361538755395959,
      "grad_norm": 0.30491769313812256,
      "learning_rate": 1.3977165884378362e-05,
      "loss": 0.721,
      "step": 5472
    },
    {
      "epoch": 0.8363066814379035,
      "grad_norm": 0.4730520248413086,
      "learning_rate": 1.3951724958410317e-05,
      "loss": 0.6908,
      "step": 5473
    },
    {
      "epoch": 0.8364594873362112,
      "grad_norm": 0.3533181846141815,
      "learning_rate": 1.3926305470647682e-05,
      "loss": 0.664,
      "step": 5474
    },
    {
      "epoch": 0.8366122932345188,
      "grad_norm": 0.3893337547779083,
      "learning_rate": 1.3900907427423537e-05,
      "loss": 0.8438,
      "step": 5475
    },
    {
      "epoch": 0.8367650991328265,
      "grad_norm": 0.2789744436740875,
      "learning_rate": 1.3875530835065576e-05,
      "loss": 0.6695,
      "step": 5476
    },
    {
      "epoch": 0.8369179050311342,
      "grad_norm": 0.2796769440174103,
      "learning_rate": 1.3850175699896128e-05,
      "loss": 0.8565,
      "step": 5477
    },
    {
      "epoch": 0.8370707109294419,
      "grad_norm": 0.3079114258289337,
      "learning_rate": 1.3824842028232265e-05,
      "loss": 0.7301,
      "step": 5478
    },
    {
      "epoch": 0.8372235168277495,
      "grad_norm": 0.2383667379617691,
      "learning_rate": 1.3799529826385616e-05,
      "loss": 0.5826,
      "step": 5479
    },
    {
      "epoch": 0.8373763227260572,
      "grad_norm": 0.27189722657203674,
      "learning_rate": 1.3774239100662545e-05,
      "loss": 0.7496,
      "step": 5480
    },
    {
      "epoch": 0.8375291286243649,
      "grad_norm": 0.28391632437705994,
      "learning_rate": 1.374896985736398e-05,
      "loss": 0.8122,
      "step": 5481
    },
    {
      "epoch": 0.8376819345226726,
      "grad_norm": 0.4183500409126282,
      "learning_rate": 1.3723722102785575e-05,
      "loss": 0.8417,
      "step": 5482
    },
    {
      "epoch": 0.8378347404209803,
      "grad_norm": 0.2729761302471161,
      "learning_rate": 1.3698495843217574e-05,
      "loss": 0.7305,
      "step": 5483
    },
    {
      "epoch": 0.837987546319288,
      "grad_norm": 0.31188705563545227,
      "learning_rate": 1.3673291084944916e-05,
      "loss": 0.7001,
      "step": 5484
    },
    {
      "epoch": 0.8381403522175956,
      "grad_norm": 0.27931174635887146,
      "learning_rate": 1.3648107834247137e-05,
      "loss": 0.788,
      "step": 5485
    },
    {
      "epoch": 0.8382931581159033,
      "grad_norm": 0.2955465018749237,
      "learning_rate": 1.3622946097398415e-05,
      "loss": 0.6219,
      "step": 5486
    },
    {
      "epoch": 0.8384459640142109,
      "grad_norm": 0.2784363329410553,
      "learning_rate": 1.3597805880667591e-05,
      "loss": 0.7067,
      "step": 5487
    },
    {
      "epoch": 0.8385987699125186,
      "grad_norm": 0.2599412202835083,
      "learning_rate": 1.3572687190318167e-05,
      "loss": 0.6021,
      "step": 5488
    },
    {
      "epoch": 0.8387515758108263,
      "grad_norm": 0.2823382616043091,
      "learning_rate": 1.3547590032608271e-05,
      "loss": 0.7634,
      "step": 5489
    },
    {
      "epoch": 0.838904381709134,
      "grad_norm": 1.0645288228988647,
      "learning_rate": 1.3522514413790577e-05,
      "loss": 1.0043,
      "step": 5490
    },
    {
      "epoch": 0.8390571876074416,
      "grad_norm": 0.29358312487602234,
      "learning_rate": 1.34974603401125e-05,
      "loss": 0.8486,
      "step": 5491
    },
    {
      "epoch": 0.8392099935057493,
      "grad_norm": 0.32983365654945374,
      "learning_rate": 1.3472427817816047e-05,
      "loss": 0.8058,
      "step": 5492
    },
    {
      "epoch": 0.839362799404057,
      "grad_norm": 0.3272798955440521,
      "learning_rate": 1.3447416853137907e-05,
      "loss": 0.6448,
      "step": 5493
    },
    {
      "epoch": 0.8395156053023647,
      "grad_norm": 0.29118168354034424,
      "learning_rate": 1.3422427452309305e-05,
      "loss": 0.6715,
      "step": 5494
    },
    {
      "epoch": 0.8396684112006724,
      "grad_norm": 0.31547603011131287,
      "learning_rate": 1.339745962155613e-05,
      "loss": 0.7579,
      "step": 5495
    },
    {
      "epoch": 0.83982121709898,
      "grad_norm": 0.28353336453437805,
      "learning_rate": 1.337251336709896e-05,
      "loss": 0.6519,
      "step": 5496
    },
    {
      "epoch": 0.8399740229972877,
      "grad_norm": 0.2982615530490875,
      "learning_rate": 1.334758869515288e-05,
      "loss": 0.7229,
      "step": 5497
    },
    {
      "epoch": 0.8401268288955954,
      "grad_norm": 0.307255357503891,
      "learning_rate": 1.332268561192771e-05,
      "loss": 0.893,
      "step": 5498
    },
    {
      "epoch": 0.840279634793903,
      "grad_norm": 0.37311118841171265,
      "learning_rate": 1.3297804123627822e-05,
      "loss": 0.7039,
      "step": 5499
    },
    {
      "epoch": 0.8404324406922107,
      "grad_norm": 0.3093119263648987,
      "learning_rate": 1.3272944236452256e-05,
      "loss": 0.6056,
      "step": 5500
    },
    {
      "epoch": 0.8405852465905184,
      "grad_norm": 0.26752883195877075,
      "learning_rate": 1.3248105956594592e-05,
      "loss": 0.5392,
      "step": 5501
    },
    {
      "epoch": 0.840738052488826,
      "grad_norm": 0.3182859420776367,
      "learning_rate": 1.3223289290243147e-05,
      "loss": 0.5405,
      "step": 5502
    },
    {
      "epoch": 0.8408908583871337,
      "grad_norm": 0.2866218686103821,
      "learning_rate": 1.319849424358075e-05,
      "loss": 0.6926,
      "step": 5503
    },
    {
      "epoch": 0.8410436642854414,
      "grad_norm": 0.2911316454410553,
      "learning_rate": 1.3173720822784852e-05,
      "loss": 0.7432,
      "step": 5504
    },
    {
      "epoch": 0.8411964701837491,
      "grad_norm": 0.370292067527771,
      "learning_rate": 1.3148969034027569e-05,
      "loss": 0.7433,
      "step": 5505
    },
    {
      "epoch": 0.8413492760820568,
      "grad_norm": 0.3643721640110016,
      "learning_rate": 1.3124238883475626e-05,
      "loss": 0.6179,
      "step": 5506
    },
    {
      "epoch": 0.8415020819803645,
      "grad_norm": 0.4829062819480896,
      "learning_rate": 1.3099530377290314e-05,
      "loss": 0.7343,
      "step": 5507
    },
    {
      "epoch": 0.8416548878786722,
      "grad_norm": 0.3859752416610718,
      "learning_rate": 1.3074843521627522e-05,
      "loss": 0.5284,
      "step": 5508
    },
    {
      "epoch": 0.8418076937769798,
      "grad_norm": 0.26677191257476807,
      "learning_rate": 1.3050178322637784e-05,
      "loss": 0.5418,
      "step": 5509
    },
    {
      "epoch": 0.8419604996752875,
      "grad_norm": 0.33165523409843445,
      "learning_rate": 1.3025534786466275e-05,
      "loss": 0.7229,
      "step": 5510
    },
    {
      "epoch": 0.8421133055735951,
      "grad_norm": 0.3077392876148224,
      "learning_rate": 1.3000912919252683e-05,
      "loss": 0.7913,
      "step": 5511
    },
    {
      "epoch": 0.8422661114719028,
      "grad_norm": 0.31485605239868164,
      "learning_rate": 1.2976312727131323e-05,
      "loss": 0.8463,
      "step": 5512
    },
    {
      "epoch": 0.8424189173702105,
      "grad_norm": 0.27010369300842285,
      "learning_rate": 1.2951734216231148e-05,
      "loss": 0.5581,
      "step": 5513
    },
    {
      "epoch": 0.8425717232685181,
      "grad_norm": 0.2709108293056488,
      "learning_rate": 1.2927177392675715e-05,
      "loss": 0.6104,
      "step": 5514
    },
    {
      "epoch": 0.8427245291668258,
      "grad_norm": 0.2696745991706848,
      "learning_rate": 1.290264226258312e-05,
      "loss": 0.5627,
      "step": 5515
    },
    {
      "epoch": 0.8428773350651335,
      "grad_norm": 0.3353211283683777,
      "learning_rate": 1.2878128832066073e-05,
      "loss": 0.6656,
      "step": 5516
    },
    {
      "epoch": 0.8430301409634412,
      "grad_norm": 0.2736285924911499,
      "learning_rate": 1.285363710723192e-05,
      "loss": 0.6254,
      "step": 5517
    },
    {
      "epoch": 0.8431829468617489,
      "grad_norm": 0.319490909576416,
      "learning_rate": 1.2829167094182537e-05,
      "loss": 0.5857,
      "step": 5518
    },
    {
      "epoch": 0.8433357527600566,
      "grad_norm": 0.30264532566070557,
      "learning_rate": 1.2804718799014459e-05,
      "loss": 0.7828,
      "step": 5519
    },
    {
      "epoch": 0.8434885586583643,
      "grad_norm": 0.5836649537086487,
      "learning_rate": 1.2780292227818735e-05,
      "loss": 0.6535,
      "step": 5520
    },
    {
      "epoch": 0.8436413645566719,
      "grad_norm": 0.27647146582603455,
      "learning_rate": 1.2755887386681076e-05,
      "loss": 0.6675,
      "step": 5521
    },
    {
      "epoch": 0.8437941704549795,
      "grad_norm": 0.28818479180336,
      "learning_rate": 1.2731504281681705e-05,
      "loss": 0.6968,
      "step": 5522
    },
    {
      "epoch": 0.8439469763532872,
      "grad_norm": 0.33592307567596436,
      "learning_rate": 1.2707142918895498e-05,
      "loss": 0.6931,
      "step": 5523
    },
    {
      "epoch": 0.8440997822515949,
      "grad_norm": 0.2817316949367523,
      "learning_rate": 1.268280330439191e-05,
      "loss": 0.6263,
      "step": 5524
    },
    {
      "epoch": 0.8442525881499026,
      "grad_norm": 0.4386361539363861,
      "learning_rate": 1.2658485444234869e-05,
      "loss": 0.4877,
      "step": 5525
    },
    {
      "epoch": 0.8444053940482102,
      "grad_norm": 0.32742446660995483,
      "learning_rate": 1.2634189344483028e-05,
      "loss": 0.8693,
      "step": 5526
    },
    {
      "epoch": 0.8445581999465179,
      "grad_norm": 0.30600279569625854,
      "learning_rate": 1.2609915011189533e-05,
      "loss": 0.5613,
      "step": 5527
    },
    {
      "epoch": 0.8447110058448256,
      "grad_norm": 0.3841206133365631,
      "learning_rate": 1.2585662450402158e-05,
      "loss": 0.684,
      "step": 5528
    },
    {
      "epoch": 0.8448638117431333,
      "grad_norm": 0.29146626591682434,
      "learning_rate": 1.2561431668163204e-05,
      "loss": 0.5726,
      "step": 5529
    },
    {
      "epoch": 0.845016617641441,
      "grad_norm": 0.4497874677181244,
      "learning_rate": 1.2537222670509563e-05,
      "loss": 0.8621,
      "step": 5530
    },
    {
      "epoch": 0.8451694235397487,
      "grad_norm": 0.31416016817092896,
      "learning_rate": 1.25130354634727e-05,
      "loss": 0.602,
      "step": 5531
    },
    {
      "epoch": 0.8453222294380563,
      "grad_norm": 0.41826075315475464,
      "learning_rate": 1.2488870053078682e-05,
      "loss": 0.5775,
      "step": 5532
    },
    {
      "epoch": 0.845475035336364,
      "grad_norm": 0.2680056393146515,
      "learning_rate": 1.2464726445348106e-05,
      "loss": 0.6435,
      "step": 5533
    },
    {
      "epoch": 0.8456278412346716,
      "grad_norm": 0.3037991225719452,
      "learning_rate": 1.2440604646296117e-05,
      "loss": 0.5587,
      "step": 5534
    },
    {
      "epoch": 0.8457806471329793,
      "grad_norm": 0.3998739421367645,
      "learning_rate": 1.2416504661932516e-05,
      "loss": 0.5474,
      "step": 5535
    },
    {
      "epoch": 0.845933453031287,
      "grad_norm": 0.35582205653190613,
      "learning_rate": 1.2392426498261556e-05,
      "loss": 0.6253,
      "step": 5536
    },
    {
      "epoch": 0.8460862589295947,
      "grad_norm": 0.3369934856891632,
      "learning_rate": 1.236837016128215e-05,
      "loss": 0.7949,
      "step": 5537
    },
    {
      "epoch": 0.8462390648279023,
      "grad_norm": 0.25886452198028564,
      "learning_rate": 1.2344335656987704e-05,
      "loss": 0.8035,
      "step": 5538
    },
    {
      "epoch": 0.84639187072621,
      "grad_norm": 0.4257791340351105,
      "learning_rate": 1.232032299136624e-05,
      "loss": 0.7621,
      "step": 5539
    },
    {
      "epoch": 0.8465446766245177,
      "grad_norm": 0.2683560848236084,
      "learning_rate": 1.2296332170400281e-05,
      "loss": 0.8101,
      "step": 5540
    },
    {
      "epoch": 0.8466974825228254,
      "grad_norm": 0.30207762122154236,
      "learning_rate": 1.2272363200066983e-05,
      "loss": 0.5819,
      "step": 5541
    },
    {
      "epoch": 0.8468502884211331,
      "grad_norm": 0.4459848403930664,
      "learning_rate": 1.2248416086337977e-05,
      "loss": 0.7585,
      "step": 5542
    },
    {
      "epoch": 0.8470030943194408,
      "grad_norm": 0.3064769506454468,
      "learning_rate": 1.222449083517948e-05,
      "loss": 0.6841,
      "step": 5543
    },
    {
      "epoch": 0.8471559002177484,
      "grad_norm": 0.4430690407752991,
      "learning_rate": 1.2200587452552281e-05,
      "loss": 0.5349,
      "step": 5544
    },
    {
      "epoch": 0.8473087061160561,
      "grad_norm": 0.27452754974365234,
      "learning_rate": 1.2176705944411726e-05,
      "loss": 0.8026,
      "step": 5545
    },
    {
      "epoch": 0.8474615120143637,
      "grad_norm": 0.25186803936958313,
      "learning_rate": 1.2152846316707678e-05,
      "loss": 0.7354,
      "step": 5546
    },
    {
      "epoch": 0.8476143179126714,
      "grad_norm": 0.2482818365097046,
      "learning_rate": 1.2129008575384537e-05,
      "loss": 0.7683,
      "step": 5547
    },
    {
      "epoch": 0.8477671238109791,
      "grad_norm": 0.25365763902664185,
      "learning_rate": 1.2105192726381298e-05,
      "loss": 0.6313,
      "step": 5548
    },
    {
      "epoch": 0.8479199297092868,
      "grad_norm": 0.2693358361721039,
      "learning_rate": 1.2081398775631502e-05,
      "loss": 0.6479,
      "step": 5549
    },
    {
      "epoch": 0.8480727356075944,
      "grad_norm": 0.35001295804977417,
      "learning_rate": 1.2057626729063198e-05,
      "loss": 0.8097,
      "step": 5550
    },
    {
      "epoch": 0.8482255415059021,
      "grad_norm": 0.35767117142677307,
      "learning_rate": 1.2033876592598959e-05,
      "loss": 0.7211,
      "step": 5551
    },
    {
      "epoch": 0.8483783474042098,
      "grad_norm": 0.3254198431968689,
      "learning_rate": 1.201014837215595e-05,
      "loss": 0.8238,
      "step": 5552
    },
    {
      "epoch": 0.8485311533025175,
      "grad_norm": 0.2950455844402313,
      "learning_rate": 1.1986442073645899e-05,
      "loss": 0.6653,
      "step": 5553
    },
    {
      "epoch": 0.8486839592008252,
      "grad_norm": 0.3335111439228058,
      "learning_rate": 1.196275770297497e-05,
      "loss": 0.7169,
      "step": 5554
    },
    {
      "epoch": 0.8488367650991329,
      "grad_norm": 0.3324906826019287,
      "learning_rate": 1.1939095266043976e-05,
      "loss": 0.6226,
      "step": 5555
    },
    {
      "epoch": 0.8489895709974405,
      "grad_norm": 0.25888141989707947,
      "learning_rate": 1.1915454768748191e-05,
      "loss": 0.8281,
      "step": 5556
    },
    {
      "epoch": 0.8491423768957482,
      "grad_norm": 0.26205694675445557,
      "learning_rate": 1.1891836216977426e-05,
      "loss": 0.5808,
      "step": 5557
    },
    {
      "epoch": 0.8492951827940558,
      "grad_norm": 0.37880903482437134,
      "learning_rate": 1.1868239616616073e-05,
      "loss": 0.8139,
      "step": 5558
    },
    {
      "epoch": 0.8494479886923635,
      "grad_norm": 0.2531372010707855,
      "learning_rate": 1.1844664973543029e-05,
      "loss": 0.7087,
      "step": 5559
    },
    {
      "epoch": 0.8496007945906712,
      "grad_norm": 0.2695479989051819,
      "learning_rate": 1.182111229363172e-05,
      "loss": 0.711,
      "step": 5560
    },
    {
      "epoch": 0.8497536004889789,
      "grad_norm": 0.3131905198097229,
      "learning_rate": 1.1797581582750062e-05,
      "loss": 0.4314,
      "step": 5561
    },
    {
      "epoch": 0.8499064063872865,
      "grad_norm": 0.3199729919433594,
      "learning_rate": 1.1774072846760565e-05,
      "loss": 0.6371,
      "step": 5562
    },
    {
      "epoch": 0.8500592122855942,
      "grad_norm": 0.3494158685207367,
      "learning_rate": 1.1750586091520244e-05,
      "loss": 0.8639,
      "step": 5563
    },
    {
      "epoch": 0.8502120181839019,
      "grad_norm": 0.30111271142959595,
      "learning_rate": 1.1727121322880607e-05,
      "loss": 0.6583,
      "step": 5564
    },
    {
      "epoch": 0.8503648240822096,
      "grad_norm": 0.6418349146842957,
      "learning_rate": 1.1703678546687701e-05,
      "loss": 0.6721,
      "step": 5565
    },
    {
      "epoch": 0.8505176299805173,
      "grad_norm": 0.2809236943721771,
      "learning_rate": 1.1680257768782098e-05,
      "loss": 0.7419,
      "step": 5566
    },
    {
      "epoch": 0.850670435878825,
      "grad_norm": 0.2570629417896271,
      "learning_rate": 1.1656858994998909e-05,
      "loss": 0.5666,
      "step": 5567
    },
    {
      "epoch": 0.8508232417771326,
      "grad_norm": 0.28726086020469666,
      "learning_rate": 1.1633482231167736e-05,
      "loss": 0.64,
      "step": 5568
    },
    {
      "epoch": 0.8509760476754402,
      "grad_norm": 0.4716130495071411,
      "learning_rate": 1.1610127483112665e-05,
      "loss": 0.728,
      "step": 5569
    },
    {
      "epoch": 0.8511288535737479,
      "grad_norm": 0.25644704699516296,
      "learning_rate": 1.1586794756652374e-05,
      "loss": 0.9191,
      "step": 5570
    },
    {
      "epoch": 0.8512816594720556,
      "grad_norm": 0.27911999821662903,
      "learning_rate": 1.1563484057600028e-05,
      "loss": 0.5204,
      "step": 5571
    },
    {
      "epoch": 0.8514344653703633,
      "grad_norm": 0.3576193153858185,
      "learning_rate": 1.1540195391763265e-05,
      "loss": 0.712,
      "step": 5572
    },
    {
      "epoch": 0.851587271268671,
      "grad_norm": 0.27247634530067444,
      "learning_rate": 1.1516928764944257e-05,
      "loss": 0.7406,
      "step": 5573
    },
    {
      "epoch": 0.8517400771669786,
      "grad_norm": 0.2926981747150421,
      "learning_rate": 1.1493684182939712e-05,
      "loss": 0.6532,
      "step": 5574
    },
    {
      "epoch": 0.8518928830652863,
      "grad_norm": 0.2958250939846039,
      "learning_rate": 1.1470461651540787e-05,
      "loss": 0.7098,
      "step": 5575
    },
    {
      "epoch": 0.852045688963594,
      "grad_norm": 0.3859553337097168,
      "learning_rate": 1.144726117653322e-05,
      "loss": 0.5666,
      "step": 5576
    },
    {
      "epoch": 0.8521984948619017,
      "grad_norm": 0.24316290020942688,
      "learning_rate": 1.1424082763697186e-05,
      "loss": 0.69,
      "step": 5577
    },
    {
      "epoch": 0.8523513007602094,
      "grad_norm": 0.29982128739356995,
      "learning_rate": 1.1400926418807423e-05,
      "loss": 0.8659,
      "step": 5578
    },
    {
      "epoch": 0.8525041066585171,
      "grad_norm": 0.307070255279541,
      "learning_rate": 1.1377792147633092e-05,
      "loss": 0.8201,
      "step": 5579
    },
    {
      "epoch": 0.8526569125568247,
      "grad_norm": 0.24706871807575226,
      "learning_rate": 1.1354679955937963e-05,
      "loss": 0.7248,
      "step": 5580
    },
    {
      "epoch": 0.8528097184551323,
      "grad_norm": 0.45683878660202026,
      "learning_rate": 1.1331589849480207e-05,
      "loss": 0.9072,
      "step": 5581
    },
    {
      "epoch": 0.85296252435344,
      "grad_norm": 0.3122950494289398,
      "learning_rate": 1.1308521834012509e-05,
      "loss": 0.7126,
      "step": 5582
    },
    {
      "epoch": 0.8531153302517477,
      "grad_norm": 0.24352186918258667,
      "learning_rate": 1.1285475915282106e-05,
      "loss": 0.5917,
      "step": 5583
    },
    {
      "epoch": 0.8532681361500554,
      "grad_norm": 0.26959019899368286,
      "learning_rate": 1.1262452099030684e-05,
      "loss": 0.7065,
      "step": 5584
    },
    {
      "epoch": 0.853420942048363,
      "grad_norm": 0.3406347632408142,
      "learning_rate": 1.1239450390994487e-05,
      "loss": 0.6882,
      "step": 5585
    },
    {
      "epoch": 0.8535737479466707,
      "grad_norm": 0.2712723910808563,
      "learning_rate": 1.1216470796904099e-05,
      "loss": 0.7421,
      "step": 5586
    },
    {
      "epoch": 0.8537265538449784,
      "grad_norm": 0.26714420318603516,
      "learning_rate": 1.119351332248474e-05,
      "loss": 0.7186,
      "step": 5587
    },
    {
      "epoch": 0.8538793597432861,
      "grad_norm": 0.255027711391449,
      "learning_rate": 1.1170577973456097e-05,
      "loss": 0.5784,
      "step": 5588
    },
    {
      "epoch": 0.8540321656415938,
      "grad_norm": 0.6876900792121887,
      "learning_rate": 1.1147664755532272e-05,
      "loss": 0.6206,
      "step": 5589
    },
    {
      "epoch": 0.8541849715399015,
      "grad_norm": 0.31581729650497437,
      "learning_rate": 1.1124773674421951e-05,
      "loss": 0.5449,
      "step": 5590
    },
    {
      "epoch": 0.8543377774382092,
      "grad_norm": 0.291388601064682,
      "learning_rate": 1.1101904735828206e-05,
      "loss": 0.6831,
      "step": 5591
    },
    {
      "epoch": 0.8544905833365168,
      "grad_norm": 0.2843266725540161,
      "learning_rate": 1.1079057945448678e-05,
      "loss": 0.6111,
      "step": 5592
    },
    {
      "epoch": 0.8546433892348244,
      "grad_norm": 0.4129765033721924,
      "learning_rate": 1.1056233308975428e-05,
      "loss": 0.6729,
      "step": 5593
    },
    {
      "epoch": 0.8547961951331321,
      "grad_norm": 0.29213792085647583,
      "learning_rate": 1.1033430832095049e-05,
      "loss": 0.796,
      "step": 5594
    },
    {
      "epoch": 0.8549490010314398,
      "grad_norm": 0.277227520942688,
      "learning_rate": 1.1010650520488564e-05,
      "loss": 0.4999,
      "step": 5595
    },
    {
      "epoch": 0.8551018069297475,
      "grad_norm": 0.2723330855369568,
      "learning_rate": 1.09878923798315e-05,
      "loss": 0.7722,
      "step": 5596
    },
    {
      "epoch": 0.8552546128280551,
      "grad_norm": 0.25330761075019836,
      "learning_rate": 1.0965156415793843e-05,
      "loss": 0.6138,
      "step": 5597
    },
    {
      "epoch": 0.8554074187263628,
      "grad_norm": 0.28613653779029846,
      "learning_rate": 1.0942442634040118e-05,
      "loss": 0.8158,
      "step": 5598
    },
    {
      "epoch": 0.8555602246246705,
      "grad_norm": 0.2570474445819855,
      "learning_rate": 1.0919751040229231e-05,
      "loss": 0.6388,
      "step": 5599
    },
    {
      "epoch": 0.8557130305229782,
      "grad_norm": 0.260789155960083,
      "learning_rate": 1.0897081640014594e-05,
      "loss": 0.8127,
      "step": 5600
    },
    {
      "epoch": 0.8558658364212859,
      "grad_norm": 0.30257099866867065,
      "learning_rate": 1.0874434439044122e-05,
      "loss": 0.697,
      "step": 5601
    },
    {
      "epoch": 0.8560186423195936,
      "grad_norm": 0.28331151604652405,
      "learning_rate": 1.085180944296018e-05,
      "loss": 0.646,
      "step": 5602
    },
    {
      "epoch": 0.8561714482179013,
      "grad_norm": 0.31558719277381897,
      "learning_rate": 1.0829206657399581e-05,
      "loss": 0.7261,
      "step": 5603
    },
    {
      "epoch": 0.8563242541162089,
      "grad_norm": 0.30309322476387024,
      "learning_rate": 1.080662608799361e-05,
      "loss": 0.6796,
      "step": 5604
    },
    {
      "epoch": 0.8564770600145165,
      "grad_norm": 0.2481728047132492,
      "learning_rate": 1.0784067740368032e-05,
      "loss": 0.5802,
      "step": 5605
    },
    {
      "epoch": 0.8566298659128242,
      "grad_norm": 0.2914709448814392,
      "learning_rate": 1.0761531620143106e-05,
      "loss": 0.7447,
      "step": 5606
    },
    {
      "epoch": 0.8567826718111319,
      "grad_norm": 0.32431426644325256,
      "learning_rate": 1.0739017732933476e-05,
      "loss": 0.5631,
      "step": 5607
    },
    {
      "epoch": 0.8569354777094396,
      "grad_norm": 0.24270617961883545,
      "learning_rate": 1.0716526084348277e-05,
      "loss": 0.5381,
      "step": 5608
    },
    {
      "epoch": 0.8570882836077472,
      "grad_norm": 0.27154895663261414,
      "learning_rate": 1.069405667999115e-05,
      "loss": 0.6087,
      "step": 5609
    },
    {
      "epoch": 0.8572410895060549,
      "grad_norm": 0.3157510459423065,
      "learning_rate": 1.0671609525460158e-05,
      "loss": 0.7442,
      "step": 5610
    },
    {
      "epoch": 0.8573938954043626,
      "grad_norm": 0.2895173728466034,
      "learning_rate": 1.0649184626347807e-05,
      "loss": 0.7309,
      "step": 5611
    },
    {
      "epoch": 0.8575467013026703,
      "grad_norm": 0.2819909155368805,
      "learning_rate": 1.0626781988241064e-05,
      "loss": 0.6276,
      "step": 5612
    },
    {
      "epoch": 0.857699507200978,
      "grad_norm": 0.3299407660961151,
      "learning_rate": 1.0604401616721371e-05,
      "loss": 0.8517,
      "step": 5613
    },
    {
      "epoch": 0.8578523130992857,
      "grad_norm": 0.3332931697368622,
      "learning_rate": 1.0582043517364604e-05,
      "loss": 0.7648,
      "step": 5614
    },
    {
      "epoch": 0.8580051189975934,
      "grad_norm": 0.3062601089477539,
      "learning_rate": 1.0559707695741083e-05,
      "loss": 0.7078,
      "step": 5615
    },
    {
      "epoch": 0.858157924895901,
      "grad_norm": 0.23787756264209747,
      "learning_rate": 1.0537394157415637e-05,
      "loss": 0.6386,
      "step": 5616
    },
    {
      "epoch": 0.8583107307942086,
      "grad_norm": 0.2560744285583496,
      "learning_rate": 1.0515102907947461e-05,
      "loss": 0.7632,
      "step": 5617
    },
    {
      "epoch": 0.8584635366925163,
      "grad_norm": 0.3579595685005188,
      "learning_rate": 1.0492833952890225e-05,
      "loss": 0.7741,
      "step": 5618
    },
    {
      "epoch": 0.858616342590824,
      "grad_norm": 0.2760065793991089,
      "learning_rate": 1.0470587297792056e-05,
      "loss": 0.691,
      "step": 5619
    },
    {
      "epoch": 0.8587691484891317,
      "grad_norm": 0.3856504559516907,
      "learning_rate": 1.0448362948195567e-05,
      "loss": 0.679,
      "step": 5620
    },
    {
      "epoch": 0.8589219543874393,
      "grad_norm": 0.3312358558177948,
      "learning_rate": 1.0426160909637694e-05,
      "loss": 0.8009,
      "step": 5621
    },
    {
      "epoch": 0.859074760285747,
      "grad_norm": 0.3192104995250702,
      "learning_rate": 1.0403981187649936e-05,
      "loss": 0.6937,
      "step": 5622
    },
    {
      "epoch": 0.8592275661840547,
      "grad_norm": 0.2923565208911896,
      "learning_rate": 1.038182378775816e-05,
      "loss": 0.8538,
      "step": 5623
    },
    {
      "epoch": 0.8593803720823624,
      "grad_norm": 0.2979848384857178,
      "learning_rate": 1.0359688715482741e-05,
      "loss": 0.6309,
      "step": 5624
    },
    {
      "epoch": 0.8595331779806701,
      "grad_norm": 0.41364747285842896,
      "learning_rate": 1.033757597633841e-05,
      "loss": 0.6971,
      "step": 5625
    },
    {
      "epoch": 0.8596859838789778,
      "grad_norm": 0.2538832724094391,
      "learning_rate": 1.031548557583436e-05,
      "loss": 0.5638,
      "step": 5626
    },
    {
      "epoch": 0.8598387897772855,
      "grad_norm": 0.32028117775917053,
      "learning_rate": 1.0293417519474268e-05,
      "loss": 0.8821,
      "step": 5627
    },
    {
      "epoch": 0.859991595675593,
      "grad_norm": 0.417066365480423,
      "learning_rate": 1.0271371812756158e-05,
      "loss": 0.7425,
      "step": 5628
    },
    {
      "epoch": 0.8601444015739007,
      "grad_norm": 0.36033546924591064,
      "learning_rate": 1.024934846117257e-05,
      "loss": 0.692,
      "step": 5629
    },
    {
      "epoch": 0.8602972074722084,
      "grad_norm": 0.2695680260658264,
      "learning_rate": 1.0227347470210413e-05,
      "loss": 0.6444,
      "step": 5630
    },
    {
      "epoch": 0.8604500133705161,
      "grad_norm": 0.28025510907173157,
      "learning_rate": 1.0205368845351082e-05,
      "loss": 0.7348,
      "step": 5631
    },
    {
      "epoch": 0.8606028192688238,
      "grad_norm": 0.27903392910957336,
      "learning_rate": 1.0183412592070319e-05,
      "loss": 0.5588,
      "step": 5632
    },
    {
      "epoch": 0.8607556251671314,
      "grad_norm": 0.29430249333381653,
      "learning_rate": 1.016147871583839e-05,
      "loss": 0.7455,
      "step": 5633
    },
    {
      "epoch": 0.8609084310654391,
      "grad_norm": 0.27932068705558777,
      "learning_rate": 1.0139567222119906e-05,
      "loss": 0.5564,
      "step": 5634
    },
    {
      "epoch": 0.8610612369637468,
      "grad_norm": 0.3375709354877472,
      "learning_rate": 1.0117678116373929e-05,
      "loss": 0.6618,
      "step": 5635
    },
    {
      "epoch": 0.8612140428620545,
      "grad_norm": 0.2798751890659332,
      "learning_rate": 1.0095811404053946e-05,
      "loss": 0.7448,
      "step": 5636
    },
    {
      "epoch": 0.8613668487603622,
      "grad_norm": 0.27346816658973694,
      "learning_rate": 1.0073967090607894e-05,
      "loss": 0.7263,
      "step": 5637
    },
    {
      "epoch": 0.8615196546586699,
      "grad_norm": 0.29813331365585327,
      "learning_rate": 1.005214518147809e-05,
      "loss": 0.6409,
      "step": 5638
    },
    {
      "epoch": 0.8616724605569775,
      "grad_norm": 0.29515641927719116,
      "learning_rate": 1.0030345682101239e-05,
      "loss": 0.7254,
      "step": 5639
    },
    {
      "epoch": 0.8618252664552851,
      "grad_norm": 0.38635021448135376,
      "learning_rate": 1.0008568597908542e-05,
      "loss": 0.6273,
      "step": 5640
    },
    {
      "epoch": 0.8619780723535928,
      "grad_norm": 0.26836127042770386,
      "learning_rate": 9.986813934325589e-06,
      "loss": 0.6707,
      "step": 5641
    },
    {
      "epoch": 0.8621308782519005,
      "grad_norm": 0.2930348515510559,
      "learning_rate": 9.965081696772349e-06,
      "loss": 0.6082,
      "step": 5642
    },
    {
      "epoch": 0.8622836841502082,
      "grad_norm": 0.37345796823501587,
      "learning_rate": 9.94337189066321e-06,
      "loss": 0.6485,
      "step": 5643
    },
    {
      "epoch": 0.8624364900485159,
      "grad_norm": 0.2520149052143097,
      "learning_rate": 9.921684521407004e-06,
      "loss": 0.5862,
      "step": 5644
    },
    {
      "epoch": 0.8625892959468235,
      "grad_norm": 0.36799752712249756,
      "learning_rate": 9.900019594406984e-06,
      "loss": 0.6129,
      "step": 5645
    },
    {
      "epoch": 0.8627421018451312,
      "grad_norm": 0.30241623520851135,
      "learning_rate": 9.878377115060755e-06,
      "loss": 0.8082,
      "step": 5646
    },
    {
      "epoch": 0.8628949077434389,
      "grad_norm": 0.309946745634079,
      "learning_rate": 9.85675708876035e-06,
      "loss": 0.5611,
      "step": 5647
    },
    {
      "epoch": 0.8630477136417466,
      "grad_norm": 0.27384522557258606,
      "learning_rate": 9.835159520892235e-06,
      "loss": 0.5449,
      "step": 5648
    },
    {
      "epoch": 0.8632005195400543,
      "grad_norm": 0.32065561413764954,
      "learning_rate": 9.813584416837273e-06,
      "loss": 0.7328,
      "step": 5649
    },
    {
      "epoch": 0.863353325438362,
      "grad_norm": 0.31820061802864075,
      "learning_rate": 9.79203178197069e-06,
      "loss": 0.7254,
      "step": 5650
    },
    {
      "epoch": 0.8635061313366696,
      "grad_norm": 0.37443435192108154,
      "learning_rate": 9.770501621662176e-06,
      "loss": 0.6916,
      "step": 5651
    },
    {
      "epoch": 0.8636589372349772,
      "grad_norm": 0.5556673407554626,
      "learning_rate": 9.748993941275775e-06,
      "loss": 0.716,
      "step": 5652
    },
    {
      "epoch": 0.8638117431332849,
      "grad_norm": 0.30924415588378906,
      "learning_rate": 9.727508746169934e-06,
      "loss": 0.7234,
      "step": 5653
    },
    {
      "epoch": 0.8639645490315926,
      "grad_norm": 0.2775317430496216,
      "learning_rate": 9.706046041697513e-06,
      "loss": 0.4973,
      "step": 5654
    },
    {
      "epoch": 0.8641173549299003,
      "grad_norm": 0.26694798469543457,
      "learning_rate": 9.684605833205796e-06,
      "loss": 0.6978,
      "step": 5655
    },
    {
      "epoch": 0.864270160828208,
      "grad_norm": 0.2846663296222687,
      "learning_rate": 9.663188126036393e-06,
      "loss": 0.8492,
      "step": 5656
    },
    {
      "epoch": 0.8644229667265156,
      "grad_norm": 0.442914217710495,
      "learning_rate": 9.64179292552535e-06,
      "loss": 0.8636,
      "step": 5657
    },
    {
      "epoch": 0.8645757726248233,
      "grad_norm": 0.4035290777683258,
      "learning_rate": 9.620420237003114e-06,
      "loss": 0.6299,
      "step": 5658
    },
    {
      "epoch": 0.864728578523131,
      "grad_norm": 0.26495277881622314,
      "learning_rate": 9.599070065794525e-06,
      "loss": 0.7732,
      "step": 5659
    },
    {
      "epoch": 0.8648813844214387,
      "grad_norm": 0.30105409026145935,
      "learning_rate": 9.577742417218782e-06,
      "loss": 0.6551,
      "step": 5660
    },
    {
      "epoch": 0.8650341903197464,
      "grad_norm": 0.2992299199104309,
      "learning_rate": 9.55643729658946e-06,
      "loss": 0.7077,
      "step": 5661
    },
    {
      "epoch": 0.8651869962180541,
      "grad_norm": 0.3086511492729187,
      "learning_rate": 9.535154709214589e-06,
      "loss": 0.6582,
      "step": 5662
    },
    {
      "epoch": 0.8653398021163617,
      "grad_norm": 0.30483368039131165,
      "learning_rate": 9.51389466039656e-06,
      "loss": 0.7493,
      "step": 5663
    },
    {
      "epoch": 0.8654926080146693,
      "grad_norm": 0.2668604850769043,
      "learning_rate": 9.492657155432105e-06,
      "loss": 0.7509,
      "step": 5664
    },
    {
      "epoch": 0.865645413912977,
      "grad_norm": 0.2963505685329437,
      "learning_rate": 9.471442199612367e-06,
      "loss": 0.7943,
      "step": 5665
    },
    {
      "epoch": 0.8657982198112847,
      "grad_norm": 0.31135594844818115,
      "learning_rate": 9.45024979822291e-06,
      "loss": 0.7238,
      "step": 5666
    },
    {
      "epoch": 0.8659510257095924,
      "grad_norm": 0.3372398018836975,
      "learning_rate": 9.429079956543596e-06,
      "loss": 0.7797,
      "step": 5667
    },
    {
      "epoch": 0.8661038316079,
      "grad_norm": 0.3138117492198944,
      "learning_rate": 9.407932679848751e-06,
      "loss": 0.6864,
      "step": 5668
    },
    {
      "epoch": 0.8662566375062077,
      "grad_norm": 0.27608153223991394,
      "learning_rate": 9.386807973407007e-06,
      "loss": 0.709,
      "step": 5669
    },
    {
      "epoch": 0.8664094434045154,
      "grad_norm": 0.27074432373046875,
      "learning_rate": 9.365705842481454e-06,
      "loss": 0.6483,
      "step": 5670
    },
    {
      "epoch": 0.8665622493028231,
      "grad_norm": 0.28649184107780457,
      "learning_rate": 9.34462629232946e-06,
      "loss": 0.7414,
      "step": 5671
    },
    {
      "epoch": 0.8667150552011308,
      "grad_norm": 0.33052703738212585,
      "learning_rate": 9.323569328202853e-06,
      "loss": 0.7705,
      "step": 5672
    },
    {
      "epoch": 0.8668678610994385,
      "grad_norm": 0.32664650678634644,
      "learning_rate": 9.302534955347796e-06,
      "loss": 0.7885,
      "step": 5673
    },
    {
      "epoch": 0.8670206669977462,
      "grad_norm": 0.25715407729148865,
      "learning_rate": 9.281523179004803e-06,
      "loss": 0.6727,
      "step": 5674
    },
    {
      "epoch": 0.8671734728960538,
      "grad_norm": 0.27501925826072693,
      "learning_rate": 9.260534004408795e-06,
      "loss": 0.5609,
      "step": 5675
    },
    {
      "epoch": 0.8673262787943614,
      "grad_norm": 0.2649621367454529,
      "learning_rate": 9.239567436789053e-06,
      "loss": 0.757,
      "step": 5676
    },
    {
      "epoch": 0.8674790846926691,
      "grad_norm": 0.28358912467956543,
      "learning_rate": 9.218623481369249e-06,
      "loss": 0.6585,
      "step": 5677
    },
    {
      "epoch": 0.8676318905909768,
      "grad_norm": 0.330975741147995,
      "learning_rate": 9.197702143367327e-06,
      "loss": 0.7913,
      "step": 5678
    },
    {
      "epoch": 0.8677846964892845,
      "grad_norm": 0.2609249949455261,
      "learning_rate": 9.176803427995706e-06,
      "loss": 0.6313,
      "step": 5679
    },
    {
      "epoch": 0.8679375023875922,
      "grad_norm": 0.27459460496902466,
      "learning_rate": 9.155927340461112e-06,
      "loss": 0.6604,
      "step": 5680
    },
    {
      "epoch": 0.8680903082858998,
      "grad_norm": 0.2390647977590561,
      "learning_rate": 9.135073885964695e-06,
      "loss": 0.6527,
      "step": 5681
    },
    {
      "epoch": 0.8682431141842075,
      "grad_norm": 0.28790050745010376,
      "learning_rate": 9.114243069701844e-06,
      "loss": 0.8257,
      "step": 5682
    },
    {
      "epoch": 0.8683959200825152,
      "grad_norm": 0.3109422028064728,
      "learning_rate": 9.093434896862408e-06,
      "loss": 0.4674,
      "step": 5683
    },
    {
      "epoch": 0.8685487259808229,
      "grad_norm": 0.30797600746154785,
      "learning_rate": 9.072649372630592e-06,
      "loss": 0.6488,
      "step": 5684
    },
    {
      "epoch": 0.8687015318791306,
      "grad_norm": 0.45591118931770325,
      "learning_rate": 9.051886502184903e-06,
      "loss": 0.6504,
      "step": 5685
    },
    {
      "epoch": 0.8688543377774383,
      "grad_norm": 0.2956595718860626,
      "learning_rate": 9.031146290698279e-06,
      "loss": 0.7148,
      "step": 5686
    },
    {
      "epoch": 0.8690071436757458,
      "grad_norm": 0.30931755900382996,
      "learning_rate": 9.010428743337906e-06,
      "loss": 0.7228,
      "step": 5687
    },
    {
      "epoch": 0.8691599495740535,
      "grad_norm": 0.27179425954818726,
      "learning_rate": 8.98973386526546e-06,
      "loss": 0.7834,
      "step": 5688
    },
    {
      "epoch": 0.8693127554723612,
      "grad_norm": 0.5840659737586975,
      "learning_rate": 8.969061661636824e-06,
      "loss": 0.6617,
      "step": 5689
    },
    {
      "epoch": 0.8694655613706689,
      "grad_norm": 0.3197341561317444,
      "learning_rate": 8.94841213760237e-06,
      "loss": 0.671,
      "step": 5690
    },
    {
      "epoch": 0.8696183672689766,
      "grad_norm": 0.29900041222572327,
      "learning_rate": 8.927785298306712e-06,
      "loss": 0.593,
      "step": 5691
    },
    {
      "epoch": 0.8697711731672843,
      "grad_norm": 0.31964540481567383,
      "learning_rate": 8.907181148888854e-06,
      "loss": 0.6972,
      "step": 5692
    },
    {
      "epoch": 0.8699239790655919,
      "grad_norm": 0.2736988067626953,
      "learning_rate": 8.886599694482155e-06,
      "loss": 0.735,
      "step": 5693
    },
    {
      "epoch": 0.8700767849638996,
      "grad_norm": 0.26970410346984863,
      "learning_rate": 8.866040940214338e-06,
      "loss": 0.6944,
      "step": 5694
    },
    {
      "epoch": 0.8702295908622073,
      "grad_norm": 0.30608078837394714,
      "learning_rate": 8.845504891207412e-06,
      "loss": 0.8553,
      "step": 5695
    },
    {
      "epoch": 0.870382396760515,
      "grad_norm": 0.3669775128364563,
      "learning_rate": 8.824991552577755e-06,
      "loss": 0.7968,
      "step": 5696
    },
    {
      "epoch": 0.8705352026588227,
      "grad_norm": 0.3871442675590515,
      "learning_rate": 8.80450092943611e-06,
      "loss": 0.8196,
      "step": 5697
    },
    {
      "epoch": 0.8706880085571304,
      "grad_norm": 0.2657804489135742,
      "learning_rate": 8.784033026887551e-06,
      "loss": 0.8516,
      "step": 5698
    },
    {
      "epoch": 0.8708408144554379,
      "grad_norm": 0.42050185799598694,
      "learning_rate": 8.763587850031484e-06,
      "loss": 0.7629,
      "step": 5699
    },
    {
      "epoch": 0.8709936203537456,
      "grad_norm": 0.3306422233581543,
      "learning_rate": 8.743165403961617e-06,
      "loss": 0.7971,
      "step": 5700
    },
    {
      "epoch": 0.8711464262520533,
      "grad_norm": 0.294408917427063,
      "learning_rate": 8.722765693766066e-06,
      "loss": 0.7246,
      "step": 5701
    },
    {
      "epoch": 0.871299232150361,
      "grad_norm": 0.3002341091632843,
      "learning_rate": 8.702388724527255e-06,
      "loss": 0.6067,
      "step": 5702
    },
    {
      "epoch": 0.8714520380486687,
      "grad_norm": 0.34488150477409363,
      "learning_rate": 8.682034501321912e-06,
      "loss": 0.7983,
      "step": 5703
    },
    {
      "epoch": 0.8716048439469763,
      "grad_norm": 0.3561033010482788,
      "learning_rate": 8.661703029221114e-06,
      "loss": 0.7648,
      "step": 5704
    },
    {
      "epoch": 0.871757649845284,
      "grad_norm": 0.30296874046325684,
      "learning_rate": 8.641394313290308e-06,
      "loss": 0.7938,
      "step": 5705
    },
    {
      "epoch": 0.8719104557435917,
      "grad_norm": 0.30546966195106506,
      "learning_rate": 8.621108358589202e-06,
      "loss": 0.7538,
      "step": 5706
    },
    {
      "epoch": 0.8720632616418994,
      "grad_norm": 0.3180548846721649,
      "learning_rate": 8.600845170171911e-06,
      "loss": 0.9197,
      "step": 5707
    },
    {
      "epoch": 0.8722160675402071,
      "grad_norm": 0.25248074531555176,
      "learning_rate": 8.580604753086807e-06,
      "loss": 0.5223,
      "step": 5708
    },
    {
      "epoch": 0.8723688734385148,
      "grad_norm": 0.272348016500473,
      "learning_rate": 8.560387112376645e-06,
      "loss": 0.657,
      "step": 5709
    },
    {
      "epoch": 0.8725216793368225,
      "grad_norm": 0.3801282048225403,
      "learning_rate": 8.54019225307845e-06,
      "loss": 0.731,
      "step": 5710
    },
    {
      "epoch": 0.87267448523513,
      "grad_norm": 0.613310694694519,
      "learning_rate": 8.52002018022362e-06,
      "loss": 0.6538,
      "step": 5711
    },
    {
      "epoch": 0.8728272911334377,
      "grad_norm": 0.2751515805721283,
      "learning_rate": 8.49987089883788e-06,
      "loss": 0.4931,
      "step": 5712
    },
    {
      "epoch": 0.8729800970317454,
      "grad_norm": 0.32077914476394653,
      "learning_rate": 8.479744413941215e-06,
      "loss": 0.7055,
      "step": 5713
    },
    {
      "epoch": 0.8731329029300531,
      "grad_norm": 0.3849603831768036,
      "learning_rate": 8.459640730547979e-06,
      "loss": 0.6672,
      "step": 5714
    },
    {
      "epoch": 0.8732857088283608,
      "grad_norm": 0.39358845353126526,
      "learning_rate": 8.439559853666846e-06,
      "loss": 0.6651,
      "step": 5715
    },
    {
      "epoch": 0.8734385147266684,
      "grad_norm": 0.47464704513549805,
      "learning_rate": 8.41950178830081e-06,
      "loss": 0.6592,
      "step": 5716
    },
    {
      "epoch": 0.8735913206249761,
      "grad_norm": 0.26846784353256226,
      "learning_rate": 8.399466539447154e-06,
      "loss": 0.6931,
      "step": 5717
    },
    {
      "epoch": 0.8737441265232838,
      "grad_norm": 0.2942967414855957,
      "learning_rate": 8.379454112097473e-06,
      "loss": 0.6378,
      "step": 5718
    },
    {
      "epoch": 0.8738969324215915,
      "grad_norm": 0.2899715304374695,
      "learning_rate": 8.359464511237713e-06,
      "loss": 0.8007,
      "step": 5719
    },
    {
      "epoch": 0.8740497383198992,
      "grad_norm": 0.26803848147392273,
      "learning_rate": 8.339497741848146e-06,
      "loss": 0.6989,
      "step": 5720
    },
    {
      "epoch": 0.8742025442182069,
      "grad_norm": 0.34943777322769165,
      "learning_rate": 8.31955380890329e-06,
      "loss": 0.7332,
      "step": 5721
    },
    {
      "epoch": 0.8743553501165146,
      "grad_norm": 0.3253035843372345,
      "learning_rate": 8.299632717371997e-06,
      "loss": 0.4714,
      "step": 5722
    },
    {
      "epoch": 0.8745081560148221,
      "grad_norm": 0.24931125342845917,
      "learning_rate": 8.279734472217471e-06,
      "loss": 0.7059,
      "step": 5723
    },
    {
      "epoch": 0.8746609619131298,
      "grad_norm": 0.2940715253353119,
      "learning_rate": 8.259859078397158e-06,
      "loss": 0.7114,
      "step": 5724
    },
    {
      "epoch": 0.8748137678114375,
      "grad_norm": 0.2790750563144684,
      "learning_rate": 8.240006540862887e-06,
      "loss": 0.9123,
      "step": 5725
    },
    {
      "epoch": 0.8749665737097452,
      "grad_norm": 0.5216521620750427,
      "learning_rate": 8.220176864560724e-06,
      "loss": 0.6532,
      "step": 5726
    },
    {
      "epoch": 0.8751193796080529,
      "grad_norm": 0.3397212326526642,
      "learning_rate": 8.200370054431072e-06,
      "loss": 0.6039,
      "step": 5727
    },
    {
      "epoch": 0.8752721855063605,
      "grad_norm": 0.3261764943599701,
      "learning_rate": 8.180586115408628e-06,
      "loss": 0.6529,
      "step": 5728
    },
    {
      "epoch": 0.8754249914046682,
      "grad_norm": 0.27617859840393066,
      "learning_rate": 8.160825052422417e-06,
      "loss": 0.5977,
      "step": 5729
    },
    {
      "epoch": 0.8755777973029759,
      "grad_norm": 0.24808910489082336,
      "learning_rate": 8.14108687039572e-06,
      "loss": 0.7398,
      "step": 5730
    },
    {
      "epoch": 0.8757306032012836,
      "grad_norm": 0.32295671105384827,
      "learning_rate": 8.121371574246128e-06,
      "loss": 0.7373,
      "step": 5731
    },
    {
      "epoch": 0.8758834090995913,
      "grad_norm": 0.4352364242076874,
      "learning_rate": 8.101679168885546e-06,
      "loss": 0.732,
      "step": 5732
    },
    {
      "epoch": 0.876036214997899,
      "grad_norm": 0.291965126991272,
      "learning_rate": 8.082009659220213e-06,
      "loss": 0.6596,
      "step": 5733
    },
    {
      "epoch": 0.8761890208962065,
      "grad_norm": 0.27537909150123596,
      "learning_rate": 8.06236305015059e-06,
      "loss": 0.7323,
      "step": 5734
    },
    {
      "epoch": 0.8763418267945142,
      "grad_norm": 0.2971029281616211,
      "learning_rate": 8.042739346571437e-06,
      "loss": 0.5912,
      "step": 5735
    },
    {
      "epoch": 0.8764946326928219,
      "grad_norm": 0.3006308674812317,
      "learning_rate": 8.023138553371878e-06,
      "loss": 0.7751,
      "step": 5736
    },
    {
      "epoch": 0.8766474385911296,
      "grad_norm": 0.3444878160953522,
      "learning_rate": 8.003560675435285e-06,
      "loss": 0.813,
      "step": 5737
    },
    {
      "epoch": 0.8768002444894373,
      "grad_norm": 0.28715288639068604,
      "learning_rate": 7.984005717639309e-06,
      "loss": 0.6781,
      "step": 5738
    },
    {
      "epoch": 0.876953050387745,
      "grad_norm": 0.25705134868621826,
      "learning_rate": 7.964473684855888e-06,
      "loss": 0.7919,
      "step": 5739
    },
    {
      "epoch": 0.8771058562860526,
      "grad_norm": 0.3298662304878235,
      "learning_rate": 7.944964581951275e-06,
      "loss": 0.6425,
      "step": 5740
    },
    {
      "epoch": 0.8772586621843603,
      "grad_norm": 0.27563560009002686,
      "learning_rate": 7.925478413786026e-06,
      "loss": 0.9351,
      "step": 5741
    },
    {
      "epoch": 0.877411468082668,
      "grad_norm": 0.29507142305374146,
      "learning_rate": 7.906015185214933e-06,
      "loss": 0.6913,
      "step": 5742
    },
    {
      "epoch": 0.8775642739809757,
      "grad_norm": 0.3378017246723175,
      "learning_rate": 7.886574901087074e-06,
      "loss": 0.6415,
      "step": 5743
    },
    {
      "epoch": 0.8777170798792834,
      "grad_norm": 0.36379972100257874,
      "learning_rate": 7.867157566245874e-06,
      "loss": 0.6531,
      "step": 5744
    },
    {
      "epoch": 0.8778698857775911,
      "grad_norm": 0.2869352102279663,
      "learning_rate": 7.847763185528967e-06,
      "loss": 0.5903,
      "step": 5745
    },
    {
      "epoch": 0.8780226916758986,
      "grad_norm": 0.26820555329322815,
      "learning_rate": 7.828391763768317e-06,
      "loss": 0.7291,
      "step": 5746
    },
    {
      "epoch": 0.8781754975742063,
      "grad_norm": 0.26429784297943115,
      "learning_rate": 7.809043305790165e-06,
      "loss": 0.724,
      "step": 5747
    },
    {
      "epoch": 0.878328303472514,
      "grad_norm": 0.29767340421676636,
      "learning_rate": 7.789717816414999e-06,
      "loss": 0.7074,
      "step": 5748
    },
    {
      "epoch": 0.8784811093708217,
      "grad_norm": 0.3220751881599426,
      "learning_rate": 7.77041530045759e-06,
      "loss": 0.6294,
      "step": 5749
    },
    {
      "epoch": 0.8786339152691294,
      "grad_norm": 0.2906290292739868,
      "learning_rate": 7.751135762727003e-06,
      "loss": 0.661,
      "step": 5750
    },
    {
      "epoch": 0.8787867211674371,
      "grad_norm": 0.37977883219718933,
      "learning_rate": 7.731879208026605e-06,
      "loss": 0.8768,
      "step": 5751
    },
    {
      "epoch": 0.8789395270657447,
      "grad_norm": 0.2759582996368408,
      "learning_rate": 7.71264564115397e-06,
      "loss": 0.652,
      "step": 5752
    },
    {
      "epoch": 0.8790923329640524,
      "grad_norm": 0.37194663286209106,
      "learning_rate": 7.693435066900989e-06,
      "loss": 0.4973,
      "step": 5753
    },
    {
      "epoch": 0.8792451388623601,
      "grad_norm": 0.2985452115535736,
      "learning_rate": 7.674247490053809e-06,
      "loss": 0.5931,
      "step": 5754
    },
    {
      "epoch": 0.8793979447606678,
      "grad_norm": 0.28206849098205566,
      "learning_rate": 7.655082915392887e-06,
      "loss": 0.6726,
      "step": 5755
    },
    {
      "epoch": 0.8795507506589755,
      "grad_norm": 0.35902372002601624,
      "learning_rate": 7.635941347692876e-06,
      "loss": 0.8268,
      "step": 5756
    },
    {
      "epoch": 0.8797035565572832,
      "grad_norm": 0.35157230496406555,
      "learning_rate": 7.616822791722744e-06,
      "loss": 0.7652,
      "step": 5757
    },
    {
      "epoch": 0.8798563624555907,
      "grad_norm": 0.28466176986694336,
      "learning_rate": 7.597727252245723e-06,
      "loss": 0.6142,
      "step": 5758
    },
    {
      "epoch": 0.8800091683538984,
      "grad_norm": 0.2658984661102295,
      "learning_rate": 7.5786547340193304e-06,
      "loss": 0.5487,
      "step": 5759
    },
    {
      "epoch": 0.8801619742522061,
      "grad_norm": 0.2549937069416046,
      "learning_rate": 7.559605241795309e-06,
      "loss": 0.477,
      "step": 5760
    },
    {
      "epoch": 0.8803147801505138,
      "grad_norm": 0.30298152565956116,
      "learning_rate": 7.5405787803196516e-06,
      "loss": 0.5568,
      "step": 5761
    },
    {
      "epoch": 0.8804675860488215,
      "grad_norm": 0.26043254137039185,
      "learning_rate": 7.5215753543326776e-06,
      "loss": 0.7528,
      "step": 5762
    },
    {
      "epoch": 0.8806203919471292,
      "grad_norm": 0.309759259223938,
      "learning_rate": 7.50259496856891e-06,
      "loss": 0.6305,
      "step": 5763
    },
    {
      "epoch": 0.8807731978454368,
      "grad_norm": 0.7723713517189026,
      "learning_rate": 7.483637627757167e-06,
      "loss": 0.7753,
      "step": 5764
    },
    {
      "epoch": 0.8809260037437445,
      "grad_norm": 0.30090397596359253,
      "learning_rate": 7.464703336620493e-06,
      "loss": 0.6874,
      "step": 5765
    },
    {
      "epoch": 0.8810788096420522,
      "grad_norm": 0.26742488145828247,
      "learning_rate": 7.445792099876236e-06,
      "loss": 0.7572,
      "step": 5766
    },
    {
      "epoch": 0.8812316155403599,
      "grad_norm": 0.3087722659111023,
      "learning_rate": 7.426903922235939e-06,
      "loss": 0.654,
      "step": 5767
    },
    {
      "epoch": 0.8813844214386676,
      "grad_norm": 0.5429712533950806,
      "learning_rate": 7.40803880840546e-06,
      "loss": 0.6168,
      "step": 5768
    },
    {
      "epoch": 0.8815372273369753,
      "grad_norm": 0.2850368022918701,
      "learning_rate": 7.3891967630848716e-06,
      "loss": 0.9643,
      "step": 5769
    },
    {
      "epoch": 0.8816900332352828,
      "grad_norm": 0.2823317050933838,
      "learning_rate": 7.3703777909684965e-06,
      "loss": 0.6667,
      "step": 5770
    },
    {
      "epoch": 0.8818428391335905,
      "grad_norm": 0.2532433271408081,
      "learning_rate": 7.351581896744941e-06,
      "loss": 0.5592,
      "step": 5771
    },
    {
      "epoch": 0.8819956450318982,
      "grad_norm": 0.2902817130088806,
      "learning_rate": 7.332809085097047e-06,
      "loss": 0.5788,
      "step": 5772
    },
    {
      "epoch": 0.8821484509302059,
      "grad_norm": 0.30547183752059937,
      "learning_rate": 7.31405936070193e-06,
      "loss": 0.665,
      "step": 5773
    },
    {
      "epoch": 0.8823012568285136,
      "grad_norm": 0.28098851442337036,
      "learning_rate": 7.2953327282308525e-06,
      "loss": 0.6169,
      "step": 5774
    },
    {
      "epoch": 0.8824540627268213,
      "grad_norm": 0.26863008737564087,
      "learning_rate": 7.276629192349449e-06,
      "loss": 0.6753,
      "step": 5775
    },
    {
      "epoch": 0.8826068686251289,
      "grad_norm": 0.3257790207862854,
      "learning_rate": 7.257948757717559e-06,
      "loss": 0.5925,
      "step": 5776
    },
    {
      "epoch": 0.8827596745234366,
      "grad_norm": 0.273481547832489,
      "learning_rate": 7.239291428989214e-06,
      "loss": 0.9063,
      "step": 5777
    },
    {
      "epoch": 0.8829124804217443,
      "grad_norm": 0.27130621671676636,
      "learning_rate": 7.220657210812775e-06,
      "loss": 0.7669,
      "step": 5778
    },
    {
      "epoch": 0.883065286320052,
      "grad_norm": 0.4380197823047638,
      "learning_rate": 7.202046107830762e-06,
      "loss": 0.6786,
      "step": 5779
    },
    {
      "epoch": 0.8832180922183597,
      "grad_norm": 0.2692827582359314,
      "learning_rate": 7.183458124679998e-06,
      "loss": 0.6413,
      "step": 5780
    },
    {
      "epoch": 0.8833708981166674,
      "grad_norm": 0.2981157898902893,
      "learning_rate": 7.1648932659915124e-06,
      "loss": 0.5292,
      "step": 5781
    },
    {
      "epoch": 0.8835237040149749,
      "grad_norm": 0.2957375645637512,
      "learning_rate": 7.146351536390605e-06,
      "loss": 0.7469,
      "step": 5782
    },
    {
      "epoch": 0.8836765099132826,
      "grad_norm": 0.3381440341472626,
      "learning_rate": 7.127832940496771e-06,
      "loss": 0.906,
      "step": 5783
    },
    {
      "epoch": 0.8838293158115903,
      "grad_norm": 0.30220258235931396,
      "learning_rate": 7.1093374829237615e-06,
      "loss": 0.9091,
      "step": 5784
    },
    {
      "epoch": 0.883982121709898,
      "grad_norm": 0.24618121981620789,
      "learning_rate": 7.090865168279559e-06,
      "loss": 0.6997,
      "step": 5785
    },
    {
      "epoch": 0.8841349276082057,
      "grad_norm": 0.22514882683753967,
      "learning_rate": 7.072416001166426e-06,
      "loss": 0.529,
      "step": 5786
    },
    {
      "epoch": 0.8842877335065134,
      "grad_norm": 0.27179720997810364,
      "learning_rate": 7.053989986180776e-06,
      "loss": 0.7022,
      "step": 5787
    },
    {
      "epoch": 0.884440539404821,
      "grad_norm": 0.32559579610824585,
      "learning_rate": 7.035587127913301e-06,
      "loss": 0.8342,
      "step": 5788
    },
    {
      "epoch": 0.8845933453031287,
      "grad_norm": 0.25409793853759766,
      "learning_rate": 7.017207430948936e-06,
      "loss": 0.658,
      "step": 5789
    },
    {
      "epoch": 0.8847461512014364,
      "grad_norm": 0.2802891731262207,
      "learning_rate": 6.998850899866827e-06,
      "loss": 0.6106,
      "step": 5790
    },
    {
      "epoch": 0.8848989570997441,
      "grad_norm": 0.2987188994884491,
      "learning_rate": 6.9805175392403385e-06,
      "loss": 0.7882,
      "step": 5791
    },
    {
      "epoch": 0.8850517629980518,
      "grad_norm": 0.26601770520210266,
      "learning_rate": 6.962207353637063e-06,
      "loss": 0.5598,
      "step": 5792
    },
    {
      "epoch": 0.8852045688963593,
      "grad_norm": 0.2836983799934387,
      "learning_rate": 6.943920347618849e-06,
      "loss": 0.6301,
      "step": 5793
    },
    {
      "epoch": 0.885357374794667,
      "grad_norm": 0.3127320110797882,
      "learning_rate": 6.925656525741753e-06,
      "loss": 0.6502,
      "step": 5794
    },
    {
      "epoch": 0.8855101806929747,
      "grad_norm": 0.29738175868988037,
      "learning_rate": 6.907415892556046e-06,
      "loss": 0.763,
      "step": 5795
    },
    {
      "epoch": 0.8856629865912824,
      "grad_norm": 0.27413830161094666,
      "learning_rate": 6.8891984526062155e-06,
      "loss": 0.8139,
      "step": 5796
    },
    {
      "epoch": 0.8858157924895901,
      "grad_norm": 0.29911452531814575,
      "learning_rate": 6.871004210430998e-06,
      "loss": 0.7271,
      "step": 5797
    },
    {
      "epoch": 0.8859685983878978,
      "grad_norm": 0.2909379303455353,
      "learning_rate": 6.852833170563344e-06,
      "loss": 0.6116,
      "step": 5798
    },
    {
      "epoch": 0.8861214042862054,
      "grad_norm": 0.2798643708229065,
      "learning_rate": 6.834685337530411e-06,
      "loss": 0.6803,
      "step": 5799
    },
    {
      "epoch": 0.8862742101845131,
      "grad_norm": 0.33216592669487,
      "learning_rate": 6.816560715853548e-06,
      "loss": 0.8831,
      "step": 5800
    },
    {
      "epoch": 0.8864270160828208,
      "grad_norm": 0.2739565074443817,
      "learning_rate": 6.7984593100484e-06,
      "loss": 0.7547,
      "step": 5801
    },
    {
      "epoch": 0.8865798219811285,
      "grad_norm": 0.27872902154922485,
      "learning_rate": 6.780381124624746e-06,
      "loss": 0.71,
      "step": 5802
    },
    {
      "epoch": 0.8867326278794362,
      "grad_norm": 0.30003616213798523,
      "learning_rate": 6.7623261640866185e-06,
      "loss": 0.7366,
      "step": 5803
    },
    {
      "epoch": 0.8868854337777439,
      "grad_norm": 0.36598697304725647,
      "learning_rate": 6.744294432932296e-06,
      "loss": 0.7659,
      "step": 5804
    },
    {
      "epoch": 0.8870382396760514,
      "grad_norm": 0.2758978307247162,
      "learning_rate": 6.726285935654197e-06,
      "loss": 0.5918,
      "step": 5805
    },
    {
      "epoch": 0.8871910455743591,
      "grad_norm": 0.2967734634876251,
      "learning_rate": 6.708300676738977e-06,
      "loss": 0.7183,
      "step": 5806
    },
    {
      "epoch": 0.8873438514726668,
      "grad_norm": 0.3790004551410675,
      "learning_rate": 6.690338660667527e-06,
      "loss": 0.5838,
      "step": 5807
    },
    {
      "epoch": 0.8874966573709745,
      "grad_norm": 0.27603670954704285,
      "learning_rate": 6.672399891914982e-06,
      "loss": 0.741,
      "step": 5808
    },
    {
      "epoch": 0.8876494632692822,
      "grad_norm": 0.2567933201789856,
      "learning_rate": 6.654484374950543e-06,
      "loss": 0.6312,
      "step": 5809
    },
    {
      "epoch": 0.8878022691675899,
      "grad_norm": 0.3038698732852936,
      "learning_rate": 6.6365921142377606e-06,
      "loss": 0.8309,
      "step": 5810
    },
    {
      "epoch": 0.8879550750658975,
      "grad_norm": 0.29641303420066833,
      "learning_rate": 6.618723114234337e-06,
      "loss": 0.7406,
      "step": 5811
    },
    {
      "epoch": 0.8881078809642052,
      "grad_norm": 0.28365176916122437,
      "learning_rate": 6.600877379392212e-06,
      "loss": 0.5779,
      "step": 5812
    },
    {
      "epoch": 0.8882606868625129,
      "grad_norm": 0.32412979006767273,
      "learning_rate": 6.58305491415746e-06,
      "loss": 0.7036,
      "step": 5813
    },
    {
      "epoch": 0.8884134927608206,
      "grad_norm": 0.2406160682439804,
      "learning_rate": 6.565255722970398e-06,
      "loss": 0.6547,
      "step": 5814
    },
    {
      "epoch": 0.8885662986591283,
      "grad_norm": 0.3018895983695984,
      "learning_rate": 6.547479810265578e-06,
      "loss": 0.7239,
      "step": 5815
    },
    {
      "epoch": 0.888719104557436,
      "grad_norm": 0.30829906463623047,
      "learning_rate": 6.529727180471679e-06,
      "loss": 0.6411,
      "step": 5816
    },
    {
      "epoch": 0.8888719104557435,
      "grad_norm": 0.4289684295654297,
      "learning_rate": 6.511997838011663e-06,
      "loss": 0.8104,
      "step": 5817
    },
    {
      "epoch": 0.8890247163540512,
      "grad_norm": 0.29046669602394104,
      "learning_rate": 6.494291787302609e-06,
      "loss": 0.7391,
      "step": 5818
    },
    {
      "epoch": 0.8891775222523589,
      "grad_norm": 0.25001123547554016,
      "learning_rate": 6.476609032755854e-06,
      "loss": 0.6197,
      "step": 5819
    },
    {
      "epoch": 0.8893303281506666,
      "grad_norm": 0.4297448694705963,
      "learning_rate": 6.458949578776885e-06,
      "loss": 0.8025,
      "step": 5820
    },
    {
      "epoch": 0.8894831340489743,
      "grad_norm": 0.4375990927219391,
      "learning_rate": 6.44131342976545e-06,
      "loss": 0.7262,
      "step": 5821
    },
    {
      "epoch": 0.889635939947282,
      "grad_norm": 0.3232177793979645,
      "learning_rate": 6.4237005901154114e-06,
      "loss": 0.716,
      "step": 5822
    },
    {
      "epoch": 0.8897887458455896,
      "grad_norm": 0.26701030135154724,
      "learning_rate": 6.406111064214848e-06,
      "loss": 0.7209,
      "step": 5823
    },
    {
      "epoch": 0.8899415517438973,
      "grad_norm": 0.24602992832660675,
      "learning_rate": 6.388544856446066e-06,
      "loss": 0.6626,
      "step": 5824
    },
    {
      "epoch": 0.890094357642205,
      "grad_norm": 0.3176684081554413,
      "learning_rate": 6.371001971185553e-06,
      "loss": 0.7256,
      "step": 5825
    },
    {
      "epoch": 0.8902471635405127,
      "grad_norm": 0.2829485535621643,
      "learning_rate": 6.3534824128039575e-06,
      "loss": 0.7708,
      "step": 5826
    },
    {
      "epoch": 0.8903999694388204,
      "grad_norm": 0.34358328580856323,
      "learning_rate": 6.335986185666109e-06,
      "loss": 1.0291,
      "step": 5827
    },
    {
      "epoch": 0.8905527753371281,
      "grad_norm": 0.2644045054912567,
      "learning_rate": 6.318513294131067e-06,
      "loss": 0.7311,
      "step": 5828
    },
    {
      "epoch": 0.8907055812354356,
      "grad_norm": 0.3157048523426056,
      "learning_rate": 6.30106374255206e-06,
      "loss": 0.6677,
      "step": 5829
    },
    {
      "epoch": 0.8908583871337433,
      "grad_norm": 0.31687742471694946,
      "learning_rate": 6.283637535276498e-06,
      "loss": 0.6844,
      "step": 5830
    },
    {
      "epoch": 0.891011193032051,
      "grad_norm": 0.2959498465061188,
      "learning_rate": 6.266234676645943e-06,
      "loss": 0.692,
      "step": 5831
    },
    {
      "epoch": 0.8911639989303587,
      "grad_norm": 0.4458772838115692,
      "learning_rate": 6.248855170996204e-06,
      "loss": 0.8595,
      "step": 5832
    },
    {
      "epoch": 0.8913168048286664,
      "grad_norm": 0.27008432149887085,
      "learning_rate": 6.231499022657239e-06,
      "loss": 0.6832,
      "step": 5833
    },
    {
      "epoch": 0.8914696107269741,
      "grad_norm": 1.1612355709075928,
      "learning_rate": 6.214166235953178e-06,
      "loss": 0.906,
      "step": 5834
    },
    {
      "epoch": 0.8916224166252817,
      "grad_norm": 0.2569592297077179,
      "learning_rate": 6.196856815202323e-06,
      "loss": 0.7152,
      "step": 5835
    },
    {
      "epoch": 0.8917752225235894,
      "grad_norm": 0.3061888813972473,
      "learning_rate": 6.17957076471718e-06,
      "loss": 0.704,
      "step": 5836
    },
    {
      "epoch": 0.8919280284218971,
      "grad_norm": 0.2537577748298645,
      "learning_rate": 6.1623080888044475e-06,
      "loss": 0.6497,
      "step": 5837
    },
    {
      "epoch": 0.8920808343202048,
      "grad_norm": 0.3566432595252991,
      "learning_rate": 6.145068791764952e-06,
      "loss": 0.7981,
      "step": 5838
    },
    {
      "epoch": 0.8922336402185125,
      "grad_norm": 0.299544095993042,
      "learning_rate": 6.127852877893736e-06,
      "loss": 0.8284,
      "step": 5839
    },
    {
      "epoch": 0.8923864461168202,
      "grad_norm": 0.3752456605434418,
      "learning_rate": 6.11066035147998e-06,
      "loss": 0.5426,
      "step": 5840
    },
    {
      "epoch": 0.8925392520151277,
      "grad_norm": 0.2913227677345276,
      "learning_rate": 6.093491216807068e-06,
      "loss": 0.7273,
      "step": 5841
    },
    {
      "epoch": 0.8926920579134354,
      "grad_norm": 0.26798391342163086,
      "learning_rate": 6.076345478152534e-06,
      "loss": 0.7619,
      "step": 5842
    },
    {
      "epoch": 0.8928448638117431,
      "grad_norm": 0.278884619474411,
      "learning_rate": 6.059223139788128e-06,
      "loss": 0.7944,
      "step": 5843
    },
    {
      "epoch": 0.8929976697100508,
      "grad_norm": 0.3024114966392517,
      "learning_rate": 6.042124205979704e-06,
      "loss": 0.8972,
      "step": 5844
    },
    {
      "epoch": 0.8931504756083585,
      "grad_norm": 0.318230003118515,
      "learning_rate": 6.025048680987322e-06,
      "loss": 0.6585,
      "step": 5845
    },
    {
      "epoch": 0.8933032815066662,
      "grad_norm": 0.27753233909606934,
      "learning_rate": 6.007996569065222e-06,
      "loss": 0.7879,
      "step": 5846
    },
    {
      "epoch": 0.8934560874049738,
      "grad_norm": 0.30329933762550354,
      "learning_rate": 5.990967874461784e-06,
      "loss": 0.6417,
      "step": 5847
    },
    {
      "epoch": 0.8936088933032815,
      "grad_norm": 0.3251444101333618,
      "learning_rate": 5.973962601419569e-06,
      "loss": 0.8238,
      "step": 5848
    },
    {
      "epoch": 0.8937616992015892,
      "grad_norm": 0.3880283236503601,
      "learning_rate": 5.956980754175289e-06,
      "loss": 0.6176,
      "step": 5849
    },
    {
      "epoch": 0.8939145050998969,
      "grad_norm": 0.2770722508430481,
      "learning_rate": 5.940022336959828e-06,
      "loss": 0.6808,
      "step": 5850
    },
    {
      "epoch": 0.8940673109982046,
      "grad_norm": 0.2863628566265106,
      "learning_rate": 5.923087353998246e-06,
      "loss": 0.8039,
      "step": 5851
    },
    {
      "epoch": 0.8942201168965122,
      "grad_norm": 0.2710552513599396,
      "learning_rate": 5.9061758095097505e-06,
      "loss": 0.6318,
      "step": 5852
    },
    {
      "epoch": 0.8943729227948198,
      "grad_norm": 0.2887222170829773,
      "learning_rate": 5.889287707707702e-06,
      "loss": 0.6414,
      "step": 5853
    },
    {
      "epoch": 0.8945257286931275,
      "grad_norm": 0.27116432785987854,
      "learning_rate": 5.872423052799636e-06,
      "loss": 0.5471,
      "step": 5854
    },
    {
      "epoch": 0.8946785345914352,
      "grad_norm": 0.2812448740005493,
      "learning_rate": 5.855581848987224e-06,
      "loss": 0.3823,
      "step": 5855
    },
    {
      "epoch": 0.8948313404897429,
      "grad_norm": 0.4182233512401581,
      "learning_rate": 5.838764100466343e-06,
      "loss": 0.6723,
      "step": 5856
    },
    {
      "epoch": 0.8949841463880506,
      "grad_norm": 0.3221946358680725,
      "learning_rate": 5.821969811426953e-06,
      "loss": 0.7409,
      "step": 5857
    },
    {
      "epoch": 0.8951369522863583,
      "grad_norm": 0.2302420288324356,
      "learning_rate": 5.80519898605324e-06,
      "loss": 0.701,
      "step": 5858
    },
    {
      "epoch": 0.8952897581846659,
      "grad_norm": 0.33879178762435913,
      "learning_rate": 5.788451628523505e-06,
      "loss": 0.5567,
      "step": 5859
    },
    {
      "epoch": 0.8954425640829736,
      "grad_norm": 0.28119370341300964,
      "learning_rate": 5.771727743010213e-06,
      "loss": 0.8658,
      "step": 5860
    },
    {
      "epoch": 0.8955953699812813,
      "grad_norm": 0.2939225733280182,
      "learning_rate": 5.755027333679974e-06,
      "loss": 0.6298,
      "step": 5861
    },
    {
      "epoch": 0.895748175879589,
      "grad_norm": 0.27552029490470886,
      "learning_rate": 5.738350404693571e-06,
      "loss": 0.74,
      "step": 5862
    },
    {
      "epoch": 0.8959009817778967,
      "grad_norm": 0.4174579381942749,
      "learning_rate": 5.7216969602058915e-06,
      "loss": 0.6768,
      "step": 5863
    },
    {
      "epoch": 0.8960537876762042,
      "grad_norm": 0.37528759241104126,
      "learning_rate": 5.705067004366027e-06,
      "loss": 0.8695,
      "step": 5864
    },
    {
      "epoch": 0.8962065935745119,
      "grad_norm": 0.27183693647384644,
      "learning_rate": 5.6884605413172085e-06,
      "loss": 0.8489,
      "step": 5865
    },
    {
      "epoch": 0.8963593994728196,
      "grad_norm": 0.27479737997055054,
      "learning_rate": 5.671877575196749e-06,
      "loss": 0.5745,
      "step": 5866
    },
    {
      "epoch": 0.8965122053711273,
      "grad_norm": 0.6669013500213623,
      "learning_rate": 5.655318110136165e-06,
      "loss": 0.5943,
      "step": 5867
    },
    {
      "epoch": 0.896665011269435,
      "grad_norm": 0.2586415410041809,
      "learning_rate": 5.638782150261135e-06,
      "loss": 0.5973,
      "step": 5868
    },
    {
      "epoch": 0.8968178171677427,
      "grad_norm": 0.2983604073524475,
      "learning_rate": 5.6222696996914625e-06,
      "loss": 0.6632,
      "step": 5869
    },
    {
      "epoch": 0.8969706230660504,
      "grad_norm": 0.4398852586746216,
      "learning_rate": 5.605780762541036e-06,
      "loss": 0.9462,
      "step": 5870
    },
    {
      "epoch": 0.897123428964358,
      "grad_norm": 0.26358821988105774,
      "learning_rate": 5.589315342917967e-06,
      "loss": 0.715,
      "step": 5871
    },
    {
      "epoch": 0.8972762348626657,
      "grad_norm": 0.3196551501750946,
      "learning_rate": 5.572873444924487e-06,
      "loss": 0.7284,
      "step": 5872
    },
    {
      "epoch": 0.8974290407609734,
      "grad_norm": 0.3146829307079315,
      "learning_rate": 5.55645507265693e-06,
      "loss": 0.73,
      "step": 5873
    },
    {
      "epoch": 0.8975818466592811,
      "grad_norm": 0.31993281841278076,
      "learning_rate": 5.5400602302058236e-06,
      "loss": 0.6271,
      "step": 5874
    },
    {
      "epoch": 0.8977346525575888,
      "grad_norm": 0.5897267460823059,
      "learning_rate": 5.523688921655779e-06,
      "loss": 0.8675,
      "step": 5875
    },
    {
      "epoch": 0.8978874584558963,
      "grad_norm": 0.31031474471092224,
      "learning_rate": 5.507341151085599e-06,
      "loss": 0.7015,
      "step": 5876
    },
    {
      "epoch": 0.898040264354204,
      "grad_norm": 0.22663110494613647,
      "learning_rate": 5.49101692256816e-06,
      "loss": 0.7444,
      "step": 5877
    },
    {
      "epoch": 0.8981930702525117,
      "grad_norm": 0.33932214975357056,
      "learning_rate": 5.47471624017053e-06,
      "loss": 0.6108,
      "step": 5878
    },
    {
      "epoch": 0.8983458761508194,
      "grad_norm": 0.26433759927749634,
      "learning_rate": 5.458439107953894e-06,
      "loss": 0.6342,
      "step": 5879
    },
    {
      "epoch": 0.8984986820491271,
      "grad_norm": 0.2902994751930237,
      "learning_rate": 5.44218552997352e-06,
      "loss": 0.6325,
      "step": 5880
    },
    {
      "epoch": 0.8986514879474348,
      "grad_norm": 0.2531551718711853,
      "learning_rate": 5.425955510278891e-06,
      "loss": 0.6177,
      "step": 5881
    },
    {
      "epoch": 0.8988042938457425,
      "grad_norm": 0.293772429227829,
      "learning_rate": 5.409749052913582e-06,
      "loss": 0.6913,
      "step": 5882
    },
    {
      "epoch": 0.8989570997440501,
      "grad_norm": 0.33307167887687683,
      "learning_rate": 5.393566161915276e-06,
      "loss": 0.7166,
      "step": 5883
    },
    {
      "epoch": 0.8991099056423578,
      "grad_norm": 0.2943355441093445,
      "learning_rate": 5.377406841315802e-06,
      "loss": 0.7835,
      "step": 5884
    },
    {
      "epoch": 0.8992627115406655,
      "grad_norm": 0.3272358477115631,
      "learning_rate": 5.36127109514113e-06,
      "loss": 0.7343,
      "step": 5885
    },
    {
      "epoch": 0.8994155174389732,
      "grad_norm": 0.29728636145591736,
      "learning_rate": 5.345158927411354e-06,
      "loss": 0.8396,
      "step": 5886
    },
    {
      "epoch": 0.8995683233372809,
      "grad_norm": 0.2658395767211914,
      "learning_rate": 5.329070342140685e-06,
      "loss": 0.8047,
      "step": 5887
    },
    {
      "epoch": 0.8997211292355884,
      "grad_norm": 0.3411000967025757,
      "learning_rate": 5.313005343337429e-06,
      "loss": 0.8666,
      "step": 5888
    },
    {
      "epoch": 0.8998739351338961,
      "grad_norm": 0.3442920744419098,
      "learning_rate": 5.296963935004062e-06,
      "loss": 0.5833,
      "step": 5889
    },
    {
      "epoch": 0.9000267410322038,
      "grad_norm": 0.24363353848457336,
      "learning_rate": 5.280946121137187e-06,
      "loss": 0.5621,
      "step": 5890
    },
    {
      "epoch": 0.9001795469305115,
      "grad_norm": 0.31754767894744873,
      "learning_rate": 5.2649519057274886e-06,
      "loss": 0.6101,
      "step": 5891
    },
    {
      "epoch": 0.9003323528288192,
      "grad_norm": 0.26689276099205017,
      "learning_rate": 5.2489812927597915e-06,
      "loss": 0.716,
      "step": 5892
    },
    {
      "epoch": 0.9004851587271269,
      "grad_norm": 0.25092315673828125,
      "learning_rate": 5.2330342862130455e-06,
      "loss": 0.7321,
      "step": 5893
    },
    {
      "epoch": 0.9006379646254346,
      "grad_norm": 0.318852037191391,
      "learning_rate": 5.217110890060295e-06,
      "loss": 0.724,
      "step": 5894
    },
    {
      "epoch": 0.9007907705237422,
      "grad_norm": 0.28129202127456665,
      "learning_rate": 5.201211108268755e-06,
      "loss": 0.6249,
      "step": 5895
    },
    {
      "epoch": 0.9009435764220499,
      "grad_norm": 0.31783658266067505,
      "learning_rate": 5.185334944799691e-06,
      "loss": 0.689,
      "step": 5896
    },
    {
      "epoch": 0.9010963823203576,
      "grad_norm": 0.27820682525634766,
      "learning_rate": 5.169482403608528e-06,
      "loss": 0.6808,
      "step": 5897
    },
    {
      "epoch": 0.9012491882186653,
      "grad_norm": 0.2727433443069458,
      "learning_rate": 5.153653488644794e-06,
      "loss": 0.8703,
      "step": 5898
    },
    {
      "epoch": 0.9014019941169729,
      "grad_norm": 0.2737170457839966,
      "learning_rate": 5.137848203852125e-06,
      "loss": 0.6923,
      "step": 5899
    },
    {
      "epoch": 0.9015548000152805,
      "grad_norm": 0.24887260794639587,
      "learning_rate": 5.1220665531682925e-06,
      "loss": 0.7023,
      "step": 5900
    },
    {
      "epoch": 0.9017076059135882,
      "grad_norm": 0.300696462392807,
      "learning_rate": 5.106308540525162e-06,
      "loss": 0.6112,
      "step": 5901
    },
    {
      "epoch": 0.9018604118118959,
      "grad_norm": 0.3953166604042053,
      "learning_rate": 5.090574169848672e-06,
      "loss": 0.6249,
      "step": 5902
    },
    {
      "epoch": 0.9020132177102036,
      "grad_norm": 0.25591349601745605,
      "learning_rate": 5.07486344505893e-06,
      "loss": 0.5919,
      "step": 5903
    },
    {
      "epoch": 0.9021660236085113,
      "grad_norm": 0.23894433677196503,
      "learning_rate": 5.0591763700701625e-06,
      "loss": 0.816,
      "step": 5904
    },
    {
      "epoch": 0.902318829506819,
      "grad_norm": 0.2608323097229004,
      "learning_rate": 5.043512948790641e-06,
      "loss": 0.6331,
      "step": 5905
    },
    {
      "epoch": 0.9024716354051266,
      "grad_norm": 0.3080928325653076,
      "learning_rate": 5.027873185122767e-06,
      "loss": 0.7063,
      "step": 5906
    },
    {
      "epoch": 0.9026244413034343,
      "grad_norm": 0.45490792393684387,
      "learning_rate": 5.012257082963067e-06,
      "loss": 0.771,
      "step": 5907
    },
    {
      "epoch": 0.902777247201742,
      "grad_norm": 0.3092617690563202,
      "learning_rate": 4.996664646202176e-06,
      "loss": 0.6215,
      "step": 5908
    },
    {
      "epoch": 0.9029300531000497,
      "grad_norm": 0.2615375816822052,
      "learning_rate": 4.981095878724817e-06,
      "loss": 0.5978,
      "step": 5909
    },
    {
      "epoch": 0.9030828589983574,
      "grad_norm": 0.30354490876197815,
      "learning_rate": 4.965550784409789e-06,
      "loss": 0.643,
      "step": 5910
    },
    {
      "epoch": 0.903235664896665,
      "grad_norm": 0.3309997022151947,
      "learning_rate": 4.950029367130049e-06,
      "loss": 0.6145,
      "step": 5911
    },
    {
      "epoch": 0.9033884707949726,
      "grad_norm": 0.2688743472099304,
      "learning_rate": 4.934531630752615e-06,
      "loss": 0.769,
      "step": 5912
    },
    {
      "epoch": 0.9035412766932803,
      "grad_norm": 0.3107849657535553,
      "learning_rate": 4.919057579138631e-06,
      "loss": 0.8814,
      "step": 5913
    },
    {
      "epoch": 0.903694082591588,
      "grad_norm": 0.3090243637561798,
      "learning_rate": 4.903607216143303e-06,
      "loss": 0.7884,
      "step": 5914
    },
    {
      "epoch": 0.9038468884898957,
      "grad_norm": 0.35385674238204956,
      "learning_rate": 4.888180545615995e-06,
      "loss": 0.8163,
      "step": 5915
    },
    {
      "epoch": 0.9039996943882034,
      "grad_norm": 0.35798507928848267,
      "learning_rate": 4.872777571400089e-06,
      "loss": 0.6307,
      "step": 5916
    },
    {
      "epoch": 0.9041525002865111,
      "grad_norm": 0.3256695568561554,
      "learning_rate": 4.8573982973331486e-06,
      "loss": 0.9048,
      "step": 5917
    },
    {
      "epoch": 0.9043053061848187,
      "grad_norm": 0.4998894929885864,
      "learning_rate": 4.842042727246776e-06,
      "loss": 0.5458,
      "step": 5918
    },
    {
      "epoch": 0.9044581120831264,
      "grad_norm": 0.2989078164100647,
      "learning_rate": 4.826710864966666e-06,
      "loss": 0.7098,
      "step": 5919
    },
    {
      "epoch": 0.9046109179814341,
      "grad_norm": 0.2614652216434479,
      "learning_rate": 4.811402714312629e-06,
      "loss": 0.6025,
      "step": 5920
    },
    {
      "epoch": 0.9047637238797418,
      "grad_norm": 0.32760369777679443,
      "learning_rate": 4.796118279098593e-06,
      "loss": 0.7237,
      "step": 5921
    },
    {
      "epoch": 0.9049165297780495,
      "grad_norm": 0.2693394720554352,
      "learning_rate": 4.780857563132513e-06,
      "loss": 0.6758,
      "step": 5922
    },
    {
      "epoch": 0.9050693356763571,
      "grad_norm": 0.28089210391044617,
      "learning_rate": 4.7656205702164665e-06,
      "loss": 0.5614,
      "step": 5923
    },
    {
      "epoch": 0.9052221415746647,
      "grad_norm": 0.3213806450366974,
      "learning_rate": 4.750407304146642e-06,
      "loss": 0.815,
      "step": 5924
    },
    {
      "epoch": 0.9053749474729724,
      "grad_norm": 0.40935635566711426,
      "learning_rate": 4.735217768713296e-06,
      "loss": 0.7063,
      "step": 5925
    },
    {
      "epoch": 0.9055277533712801,
      "grad_norm": 0.3049255609512329,
      "learning_rate": 4.720051967700767e-06,
      "loss": 0.7845,
      "step": 5926
    },
    {
      "epoch": 0.9056805592695878,
      "grad_norm": 0.2561148703098297,
      "learning_rate": 4.704909904887478e-06,
      "loss": 0.6889,
      "step": 5927
    },
    {
      "epoch": 0.9058333651678955,
      "grad_norm": 0.3279137909412384,
      "learning_rate": 4.689791584045955e-06,
      "loss": 0.6985,
      "step": 5928
    },
    {
      "epoch": 0.9059861710662032,
      "grad_norm": 0.32065799832344055,
      "learning_rate": 4.6746970089428185e-06,
      "loss": 0.6057,
      "step": 5929
    },
    {
      "epoch": 0.9061389769645108,
      "grad_norm": 0.267499178647995,
      "learning_rate": 4.659626183338728e-06,
      "loss": 0.6368,
      "step": 5930
    },
    {
      "epoch": 0.9062917828628185,
      "grad_norm": 0.34940314292907715,
      "learning_rate": 4.644579110988456e-06,
      "loss": 0.861,
      "step": 5931
    },
    {
      "epoch": 0.9064445887611262,
      "grad_norm": 0.35976719856262207,
      "learning_rate": 4.629555795640872e-06,
      "loss": 0.6298,
      "step": 5932
    },
    {
      "epoch": 0.9065973946594339,
      "grad_norm": 0.3161333501338959,
      "learning_rate": 4.614556241038892e-06,
      "loss": 0.8894,
      "step": 5933
    },
    {
      "epoch": 0.9067502005577416,
      "grad_norm": 0.3225654363632202,
      "learning_rate": 4.599580450919538e-06,
      "loss": 0.5241,
      "step": 5934
    },
    {
      "epoch": 0.9069030064560492,
      "grad_norm": 0.3067881762981415,
      "learning_rate": 4.584628429013904e-06,
      "loss": 0.6733,
      "step": 5935
    },
    {
      "epoch": 0.9070558123543568,
      "grad_norm": 0.3025769591331482,
      "learning_rate": 4.569700179047165e-06,
      "loss": 0.7682,
      "step": 5936
    },
    {
      "epoch": 0.9072086182526645,
      "grad_norm": 0.279767781496048,
      "learning_rate": 4.5547957047385345e-06,
      "loss": 0.6389,
      "step": 5937
    },
    {
      "epoch": 0.9073614241509722,
      "grad_norm": 0.31643933057785034,
      "learning_rate": 4.539915009801376e-06,
      "loss": 0.5944,
      "step": 5938
    },
    {
      "epoch": 0.9075142300492799,
      "grad_norm": 0.3155369460582733,
      "learning_rate": 4.525058097943092e-06,
      "loss": 0.8315,
      "step": 5939
    },
    {
      "epoch": 0.9076670359475876,
      "grad_norm": 0.3218875527381897,
      "learning_rate": 4.51022497286514e-06,
      "loss": 0.6969,
      "step": 5940
    },
    {
      "epoch": 0.9078198418458953,
      "grad_norm": 0.27116522192955017,
      "learning_rate": 4.495415638263057e-06,
      "loss": 0.6819,
      "step": 5941
    },
    {
      "epoch": 0.9079726477442029,
      "grad_norm": 0.27992215752601624,
      "learning_rate": 4.480630097826477e-06,
      "loss": 0.7677,
      "step": 5942
    },
    {
      "epoch": 0.9081254536425106,
      "grad_norm": 0.3074599504470825,
      "learning_rate": 4.465868355239111e-06,
      "loss": 0.5406,
      "step": 5943
    },
    {
      "epoch": 0.9082782595408183,
      "grad_norm": 0.3808681070804596,
      "learning_rate": 4.451130414178706e-06,
      "loss": 0.8077,
      "step": 5944
    },
    {
      "epoch": 0.908431065439126,
      "grad_norm": 0.35826560854911804,
      "learning_rate": 4.4364162783170906e-06,
      "loss": 0.7406,
      "step": 5945
    },
    {
      "epoch": 0.9085838713374337,
      "grad_norm": 0.2683902680873871,
      "learning_rate": 4.421725951320177e-06,
      "loss": 0.5532,
      "step": 5946
    },
    {
      "epoch": 0.9087366772357413,
      "grad_norm": 0.2759500741958618,
      "learning_rate": 4.407059436847938e-06,
      "loss": 0.6203,
      "step": 5947
    },
    {
      "epoch": 0.9088894831340489,
      "grad_norm": 0.2844933867454529,
      "learning_rate": 4.392416738554417e-06,
      "loss": 0.6726,
      "step": 5948
    },
    {
      "epoch": 0.9090422890323566,
      "grad_norm": 0.3025042414665222,
      "learning_rate": 4.377797860087696e-06,
      "loss": 0.8128,
      "step": 5949
    },
    {
      "epoch": 0.9091950949306643,
      "grad_norm": 0.31045666337013245,
      "learning_rate": 4.363202805089972e-06,
      "loss": 0.7232,
      "step": 5950
    },
    {
      "epoch": 0.909347900828972,
      "grad_norm": 0.2824687659740448,
      "learning_rate": 4.348631577197459e-06,
      "loss": 0.7842,
      "step": 5951
    },
    {
      "epoch": 0.9095007067272797,
      "grad_norm": 0.3096737861633301,
      "learning_rate": 4.334084180040488e-06,
      "loss": 0.582,
      "step": 5952
    },
    {
      "epoch": 0.9096535126255874,
      "grad_norm": 0.2907276749610901,
      "learning_rate": 4.319560617243379e-06,
      "loss": 0.6811,
      "step": 5953
    },
    {
      "epoch": 0.909806318523895,
      "grad_norm": 0.28352272510528564,
      "learning_rate": 4.305060892424595e-06,
      "loss": 0.6629,
      "step": 5954
    },
    {
      "epoch": 0.9099591244222027,
      "grad_norm": 0.2939473092556,
      "learning_rate": 4.290585009196591e-06,
      "loss": 0.755,
      "step": 5955
    },
    {
      "epoch": 0.9101119303205104,
      "grad_norm": 0.28185558319091797,
      "learning_rate": 4.276132971165936e-06,
      "loss": 0.7723,
      "step": 5956
    },
    {
      "epoch": 0.9102647362188181,
      "grad_norm": 0.2669297754764557,
      "learning_rate": 4.261704781933218e-06,
      "loss": 0.7339,
      "step": 5957
    },
    {
      "epoch": 0.9104175421171257,
      "grad_norm": 0.28795671463012695,
      "learning_rate": 4.247300445093094e-06,
      "loss": 0.595,
      "step": 5958
    },
    {
      "epoch": 0.9105703480154334,
      "grad_norm": 0.265563428401947,
      "learning_rate": 4.232919964234294e-06,
      "loss": 0.6395,
      "step": 5959
    },
    {
      "epoch": 0.910723153913741,
      "grad_norm": 0.2824648916721344,
      "learning_rate": 4.218563342939586e-06,
      "loss": 0.7736,
      "step": 5960
    },
    {
      "epoch": 0.9108759598120487,
      "grad_norm": 0.28865858912467957,
      "learning_rate": 4.20423058478584e-06,
      "loss": 0.689,
      "step": 5961
    },
    {
      "epoch": 0.9110287657103564,
      "grad_norm": 0.25682947039604187,
      "learning_rate": 4.1899216933438904e-06,
      "loss": 0.625,
      "step": 5962
    },
    {
      "epoch": 0.9111815716086641,
      "grad_norm": 0.2924043834209442,
      "learning_rate": 4.1756366721786845e-06,
      "loss": 0.5827,
      "step": 5963
    },
    {
      "epoch": 0.9113343775069718,
      "grad_norm": 0.3125070333480835,
      "learning_rate": 4.161375524849253e-06,
      "loss": 0.7185,
      "step": 5964
    },
    {
      "epoch": 0.9114871834052795,
      "grad_norm": 0.3678252696990967,
      "learning_rate": 4.147138254908589e-06,
      "loss": 0.6279,
      "step": 5965
    },
    {
      "epoch": 0.9116399893035871,
      "grad_norm": 0.2505943477153778,
      "learning_rate": 4.132924865903842e-06,
      "loss": 0.5451,
      "step": 5966
    },
    {
      "epoch": 0.9117927952018948,
      "grad_norm": 0.2890109419822693,
      "learning_rate": 4.118735361376125e-06,
      "loss": 0.6725,
      "step": 5967
    },
    {
      "epoch": 0.9119456011002025,
      "grad_norm": 0.37433120608329773,
      "learning_rate": 4.104569744860642e-06,
      "loss": 0.8314,
      "step": 5968
    },
    {
      "epoch": 0.9120984069985102,
      "grad_norm": 0.39062103629112244,
      "learning_rate": 4.0904280198866274e-06,
      "loss": 0.5721,
      "step": 5969
    },
    {
      "epoch": 0.9122512128968178,
      "grad_norm": 0.2963562607765198,
      "learning_rate": 4.0763101899774056e-06,
      "loss": 0.7728,
      "step": 5970
    },
    {
      "epoch": 0.9124040187951254,
      "grad_norm": 0.35083603858947754,
      "learning_rate": 4.062216258650264e-06,
      "loss": 0.5537,
      "step": 5971
    },
    {
      "epoch": 0.9125568246934331,
      "grad_norm": 0.2698793113231659,
      "learning_rate": 4.048146229416639e-06,
      "loss": 0.7664,
      "step": 5972
    },
    {
      "epoch": 0.9127096305917408,
      "grad_norm": 0.25656360387802124,
      "learning_rate": 4.034100105781924e-06,
      "loss": 0.5758,
      "step": 5973
    },
    {
      "epoch": 0.9128624364900485,
      "grad_norm": 0.2732607126235962,
      "learning_rate": 4.020077891245622e-06,
      "loss": 0.8021,
      "step": 5974
    },
    {
      "epoch": 0.9130152423883562,
      "grad_norm": 0.2684570550918579,
      "learning_rate": 4.006079589301237e-06,
      "loss": 0.7484,
      "step": 5975
    },
    {
      "epoch": 0.9131680482866639,
      "grad_norm": 0.26051005721092224,
      "learning_rate": 3.992105203436303e-06,
      "loss": 0.8032,
      "step": 5976
    },
    {
      "epoch": 0.9133208541849716,
      "grad_norm": 0.2778724730014801,
      "learning_rate": 3.9781547371324555e-06,
      "loss": 0.5917,
      "step": 5977
    },
    {
      "epoch": 0.9134736600832792,
      "grad_norm": 0.30622413754463196,
      "learning_rate": 3.964228193865327e-06,
      "loss": 0.6081,
      "step": 5978
    },
    {
      "epoch": 0.9136264659815869,
      "grad_norm": 0.47084107995033264,
      "learning_rate": 3.950325577104597e-06,
      "loss": 0.5655,
      "step": 5979
    },
    {
      "epoch": 0.9137792718798946,
      "grad_norm": 0.3108175992965698,
      "learning_rate": 3.936446890313983e-06,
      "loss": 0.7595,
      "step": 5980
    },
    {
      "epoch": 0.9139320777782023,
      "grad_norm": 0.32348400354385376,
      "learning_rate": 3.9225921369512305e-06,
      "loss": 0.6049,
      "step": 5981
    },
    {
      "epoch": 0.9140848836765099,
      "grad_norm": 0.2693932354450226,
      "learning_rate": 3.90876132046818e-06,
      "loss": 0.559,
      "step": 5982
    },
    {
      "epoch": 0.9142376895748175,
      "grad_norm": 0.26965370774269104,
      "learning_rate": 3.894954444310617e-06,
      "loss": 0.5038,
      "step": 5983
    },
    {
      "epoch": 0.9143904954731252,
      "grad_norm": 0.370766818523407,
      "learning_rate": 3.881171511918424e-06,
      "loss": 0.888,
      "step": 5984
    },
    {
      "epoch": 0.9145433013714329,
      "grad_norm": 0.3055818974971771,
      "learning_rate": 3.8674125267255e-06,
      "loss": 0.6979,
      "step": 5985
    },
    {
      "epoch": 0.9146961072697406,
      "grad_norm": 0.3472610116004944,
      "learning_rate": 3.85367749215979e-06,
      "loss": 0.7484,
      "step": 5986
    },
    {
      "epoch": 0.9148489131680483,
      "grad_norm": 0.30086827278137207,
      "learning_rate": 3.83996641164327e-06,
      "loss": 0.7992,
      "step": 5987
    },
    {
      "epoch": 0.915001719066356,
      "grad_norm": 0.328401118516922,
      "learning_rate": 3.826279288591905e-06,
      "loss": 0.6501,
      "step": 5988
    },
    {
      "epoch": 0.9151545249646637,
      "grad_norm": 0.26785561442375183,
      "learning_rate": 3.812616126415769e-06,
      "loss": 0.7008,
      "step": 5989
    },
    {
      "epoch": 0.9153073308629713,
      "grad_norm": 0.2655331492424011,
      "learning_rate": 3.7989769285188823e-06,
      "loss": 0.7452,
      "step": 5990
    },
    {
      "epoch": 0.915460136761279,
      "grad_norm": 0.28456825017929077,
      "learning_rate": 3.7853616982993833e-06,
      "loss": 0.7024,
      "step": 5991
    },
    {
      "epoch": 0.9156129426595867,
      "grad_norm": 0.25299546122550964,
      "learning_rate": 3.771770439149347e-06,
      "loss": 0.6077,
      "step": 5992
    },
    {
      "epoch": 0.9157657485578944,
      "grad_norm": 0.33184099197387695,
      "learning_rate": 3.7582031544549643e-06,
      "loss": 0.6939,
      "step": 5993
    },
    {
      "epoch": 0.915918554456202,
      "grad_norm": 0.26723524928092957,
      "learning_rate": 3.744659847596366e-06,
      "loss": 0.6467,
      "step": 5994
    },
    {
      "epoch": 0.9160713603545096,
      "grad_norm": 0.2578485310077667,
      "learning_rate": 3.7311405219477846e-06,
      "loss": 0.6241,
      "step": 5995
    },
    {
      "epoch": 0.9162241662528173,
      "grad_norm": 0.26527437567710876,
      "learning_rate": 3.7176451808774603e-06,
      "loss": 0.9047,
      "step": 5996
    },
    {
      "epoch": 0.916376972151125,
      "grad_norm": 0.3009488582611084,
      "learning_rate": 3.704173827747592e-06,
      "loss": 0.7634,
      "step": 5997
    },
    {
      "epoch": 0.9165297780494327,
      "grad_norm": 0.3305479884147644,
      "learning_rate": 3.6907264659144846e-06,
      "loss": 0.7967,
      "step": 5998
    },
    {
      "epoch": 0.9166825839477404,
      "grad_norm": 0.4715300500392914,
      "learning_rate": 3.677303098728435e-06,
      "loss": 0.8379,
      "step": 5999
    },
    {
      "epoch": 0.9168353898460481,
      "grad_norm": 0.36732909083366394,
      "learning_rate": 3.66390372953378e-06,
      "loss": 0.7329,
      "step": 6000
    },
    {
      "epoch": 0.9169881957443557,
      "grad_norm": 0.3098233640193939,
      "learning_rate": 3.650528361668837e-06,
      "loss": 0.7784,
      "step": 6001
    },
    {
      "epoch": 0.9171410016426634,
      "grad_norm": 0.38401779532432556,
      "learning_rate": 3.6371769984659633e-06,
      "loss": 0.6848,
      "step": 6002
    },
    {
      "epoch": 0.9172938075409711,
      "grad_norm": 0.26952889561653137,
      "learning_rate": 3.6238496432515647e-06,
      "loss": 0.7228,
      "step": 6003
    },
    {
      "epoch": 0.9174466134392788,
      "grad_norm": 0.2722468376159668,
      "learning_rate": 3.610546299345996e-06,
      "loss": 0.7185,
      "step": 6004
    },
    {
      "epoch": 0.9175994193375865,
      "grad_norm": 0.32538071274757385,
      "learning_rate": 3.5972669700637173e-06,
      "loss": 0.7636,
      "step": 6005
    },
    {
      "epoch": 0.9177522252358941,
      "grad_norm": 0.2902810573577881,
      "learning_rate": 3.584011658713138e-06,
      "loss": 0.7217,
      "step": 6006
    },
    {
      "epoch": 0.9179050311342017,
      "grad_norm": 0.3095985949039459,
      "learning_rate": 3.5707803685967268e-06,
      "loss": 0.6745,
      "step": 6007
    },
    {
      "epoch": 0.9180578370325094,
      "grad_norm": 0.3057194948196411,
      "learning_rate": 3.557573103010925e-06,
      "loss": 0.8661,
      "step": 6008
    },
    {
      "epoch": 0.9182106429308171,
      "grad_norm": 0.2756378948688507,
      "learning_rate": 3.5443898652462336e-06,
      "loss": 0.7208,
      "step": 6009
    },
    {
      "epoch": 0.9183634488291248,
      "grad_norm": 0.2705722749233246,
      "learning_rate": 3.5312306585871147e-06,
      "loss": 0.7542,
      "step": 6010
    },
    {
      "epoch": 0.9185162547274325,
      "grad_norm": 0.3743616044521332,
      "learning_rate": 3.518095486312112e-06,
      "loss": 0.7122,
      "step": 6011
    },
    {
      "epoch": 0.9186690606257402,
      "grad_norm": 0.27353158593177795,
      "learning_rate": 3.5049843516937187e-06,
      "loss": 0.8823,
      "step": 6012
    },
    {
      "epoch": 0.9188218665240478,
      "grad_norm": 0.27861884236335754,
      "learning_rate": 3.491897257998478e-06,
      "loss": 0.5775,
      "step": 6013
    },
    {
      "epoch": 0.9189746724223555,
      "grad_norm": 0.3004818558692932,
      "learning_rate": 3.4788342084869364e-06,
      "loss": 0.5588,
      "step": 6014
    },
    {
      "epoch": 0.9191274783206632,
      "grad_norm": 0.25704312324523926,
      "learning_rate": 3.4657952064136025e-06,
      "loss": 0.7754,
      "step": 6015
    },
    {
      "epoch": 0.9192802842189709,
      "grad_norm": 0.3294251263141632,
      "learning_rate": 3.452780255027066e-06,
      "loss": 0.5161,
      "step": 6016
    },
    {
      "epoch": 0.9194330901172785,
      "grad_norm": 0.32235532999038696,
      "learning_rate": 3.4397893575699e-06,
      "loss": 0.8189,
      "step": 6017
    },
    {
      "epoch": 0.9195858960155862,
      "grad_norm": 0.2824956774711609,
      "learning_rate": 3.4268225172786605e-06,
      "loss": 0.8571,
      "step": 6018
    },
    {
      "epoch": 0.9197387019138938,
      "grad_norm": 0.33709898591041565,
      "learning_rate": 3.4138797373839292e-06,
      "loss": 0.8621,
      "step": 6019
    },
    {
      "epoch": 0.9198915078122015,
      "grad_norm": 0.26414015889167786,
      "learning_rate": 3.400961021110294e-06,
      "loss": 0.6458,
      "step": 6020
    },
    {
      "epoch": 0.9200443137105092,
      "grad_norm": 0.28258901834487915,
      "learning_rate": 3.388066371676346e-06,
      "loss": 0.6819,
      "step": 6021
    },
    {
      "epoch": 0.9201971196088169,
      "grad_norm": 0.2308105230331421,
      "learning_rate": 3.375195792294694e-06,
      "loss": 0.8169,
      "step": 6022
    },
    {
      "epoch": 0.9203499255071246,
      "grad_norm": 0.2916742265224457,
      "learning_rate": 3.3623492861718954e-06,
      "loss": 0.6812,
      "step": 6023
    },
    {
      "epoch": 0.9205027314054323,
      "grad_norm": 0.39090749621391296,
      "learning_rate": 3.349526856508567e-06,
      "loss": 0.7291,
      "step": 6024
    },
    {
      "epoch": 0.92065553730374,
      "grad_norm": 0.293911337852478,
      "learning_rate": 3.3367285064993315e-06,
      "loss": 0.6763,
      "step": 6025
    },
    {
      "epoch": 0.9208083432020476,
      "grad_norm": 0.3265632688999176,
      "learning_rate": 3.3239542393327717e-06,
      "loss": 0.708,
      "step": 6026
    },
    {
      "epoch": 0.9209611491003553,
      "grad_norm": 0.2923089563846588,
      "learning_rate": 3.311204058191486e-06,
      "loss": 0.8443,
      "step": 6027
    },
    {
      "epoch": 0.921113954998663,
      "grad_norm": 0.5083439350128174,
      "learning_rate": 3.2984779662520895e-06,
      "loss": 0.7357,
      "step": 6028
    },
    {
      "epoch": 0.9212667608969706,
      "grad_norm": 0.41934359073638916,
      "learning_rate": 3.2857759666851563e-06,
      "loss": 0.6068,
      "step": 6029
    },
    {
      "epoch": 0.9214195667952783,
      "grad_norm": 0.3560185730457306,
      "learning_rate": 3.2730980626553e-06,
      "loss": 0.8213,
      "step": 6030
    },
    {
      "epoch": 0.9215723726935859,
      "grad_norm": 0.26341885328292847,
      "learning_rate": 3.260444257321127e-06,
      "loss": 0.7186,
      "step": 6031
    },
    {
      "epoch": 0.9217251785918936,
      "grad_norm": 0.26726219058036804,
      "learning_rate": 3.2478145538352044e-06,
      "loss": 0.7076,
      "step": 6032
    },
    {
      "epoch": 0.9218779844902013,
      "grad_norm": 0.30211687088012695,
      "learning_rate": 3.2352089553441266e-06,
      "loss": 0.6466,
      "step": 6033
    },
    {
      "epoch": 0.922030790388509,
      "grad_norm": 0.28752097487449646,
      "learning_rate": 3.222627464988459e-06,
      "loss": 0.7951,
      "step": 6034
    },
    {
      "epoch": 0.9221835962868167,
      "grad_norm": 0.27376800775527954,
      "learning_rate": 3.210070085902794e-06,
      "loss": 0.6021,
      "step": 6035
    },
    {
      "epoch": 0.9223364021851244,
      "grad_norm": 0.27677032351493835,
      "learning_rate": 3.1975368212156965e-06,
      "loss": 0.5663,
      "step": 6036
    },
    {
      "epoch": 0.922489208083432,
      "grad_norm": 0.294606477022171,
      "learning_rate": 3.1850276740497007e-06,
      "loss": 0.7105,
      "step": 6037
    },
    {
      "epoch": 0.9226420139817397,
      "grad_norm": 0.288740873336792,
      "learning_rate": 3.1725426475213817e-06,
      "loss": 0.6642,
      "step": 6038
    },
    {
      "epoch": 0.9227948198800474,
      "grad_norm": 0.2658214569091797,
      "learning_rate": 3.1600817447412613e-06,
      "loss": 0.6853,
      "step": 6039
    },
    {
      "epoch": 0.9229476257783551,
      "grad_norm": 0.3108449876308441,
      "learning_rate": 3.1476449688138896e-06,
      "loss": 0.6996,
      "step": 6040
    },
    {
      "epoch": 0.9231004316766627,
      "grad_norm": 0.320084810256958,
      "learning_rate": 3.1352323228377556e-06,
      "loss": 0.4263,
      "step": 6041
    },
    {
      "epoch": 0.9232532375749704,
      "grad_norm": 0.2856956124305725,
      "learning_rate": 3.1228438099053956e-06,
      "loss": 0.6659,
      "step": 6042
    },
    {
      "epoch": 0.923406043473278,
      "grad_norm": 0.3506624400615692,
      "learning_rate": 3.110479433103286e-06,
      "loss": 0.6637,
      "step": 6043
    },
    {
      "epoch": 0.9235588493715857,
      "grad_norm": 0.2929691970348358,
      "learning_rate": 3.0981391955119065e-06,
      "loss": 0.5573,
      "step": 6044
    },
    {
      "epoch": 0.9237116552698934,
      "grad_norm": 0.28566694259643555,
      "learning_rate": 3.0858231002057313e-06,
      "loss": 0.7845,
      "step": 6045
    },
    {
      "epoch": 0.9238644611682011,
      "grad_norm": 0.2971111536026001,
      "learning_rate": 3.073531150253217e-06,
      "loss": 0.6768,
      "step": 6046
    },
    {
      "epoch": 0.9240172670665088,
      "grad_norm": 0.2790727913379669,
      "learning_rate": 3.0612633487167807e-06,
      "loss": 0.7252,
      "step": 6047
    },
    {
      "epoch": 0.9241700729648165,
      "grad_norm": 0.2813704013824463,
      "learning_rate": 3.0490196986528664e-06,
      "loss": 0.594,
      "step": 6048
    },
    {
      "epoch": 0.9243228788631241,
      "grad_norm": 0.31998634338378906,
      "learning_rate": 3.0368002031118446e-06,
      "loss": 0.5821,
      "step": 6049
    },
    {
      "epoch": 0.9244756847614318,
      "grad_norm": 0.27246496081352234,
      "learning_rate": 3.0246048651381367e-06,
      "loss": 0.7559,
      "step": 6050
    },
    {
      "epoch": 0.9246284906597395,
      "grad_norm": 0.2767251431941986,
      "learning_rate": 3.0124336877700775e-06,
      "loss": 0.5597,
      "step": 6051
    },
    {
      "epoch": 0.9247812965580472,
      "grad_norm": 0.3764009475708008,
      "learning_rate": 3.0002866740400427e-06,
      "loss": 0.6798,
      "step": 6052
    },
    {
      "epoch": 0.9249341024563548,
      "grad_norm": 0.30215245485305786,
      "learning_rate": 2.988163826974344e-06,
      "loss": 0.6884,
      "step": 6053
    },
    {
      "epoch": 0.9250869083546625,
      "grad_norm": 0.4687015414237976,
      "learning_rate": 2.9760651495932766e-06,
      "loss": 0.6384,
      "step": 6054
    },
    {
      "epoch": 0.9252397142529701,
      "grad_norm": 0.33076927065849304,
      "learning_rate": 2.96399064491113e-06,
      "loss": 0.7587,
      "step": 6055
    },
    {
      "epoch": 0.9253925201512778,
      "grad_norm": 0.26885855197906494,
      "learning_rate": 2.9519403159361746e-06,
      "loss": 0.7553,
      "step": 6056
    },
    {
      "epoch": 0.9255453260495855,
      "grad_norm": 0.2613165080547333,
      "learning_rate": 2.939914165670665e-06,
      "loss": 0.7376,
      "step": 6057
    },
    {
      "epoch": 0.9256981319478932,
      "grad_norm": 0.37895190715789795,
      "learning_rate": 2.9279121971107716e-06,
      "loss": 0.4247,
      "step": 6058
    },
    {
      "epoch": 0.9258509378462009,
      "grad_norm": 0.3495396673679352,
      "learning_rate": 2.9159344132467014e-06,
      "loss": 0.7048,
      "step": 6059
    },
    {
      "epoch": 0.9260037437445086,
      "grad_norm": 0.3031073808670044,
      "learning_rate": 2.903980817062646e-06,
      "loss": 0.7942,
      "step": 6060
    },
    {
      "epoch": 0.9261565496428162,
      "grad_norm": 0.3974023759365082,
      "learning_rate": 2.8920514115367113e-06,
      "loss": 0.6494,
      "step": 6061
    },
    {
      "epoch": 0.9263093555411239,
      "grad_norm": 0.35635390877723694,
      "learning_rate": 2.8801461996410207e-06,
      "loss": 0.9328,
      "step": 6062
    },
    {
      "epoch": 0.9264621614394316,
      "grad_norm": 0.31114310026168823,
      "learning_rate": 2.8682651843416563e-06,
      "loss": 0.8144,
      "step": 6063
    },
    {
      "epoch": 0.9266149673377392,
      "grad_norm": 0.2945016622543335,
      "learning_rate": 2.8564083685986843e-06,
      "loss": 0.5831,
      "step": 6064
    },
    {
      "epoch": 0.9267677732360469,
      "grad_norm": 0.29071712493896484,
      "learning_rate": 2.844575755366108e-06,
      "loss": 0.7598,
      "step": 6065
    },
    {
      "epoch": 0.9269205791343545,
      "grad_norm": 0.3301604986190796,
      "learning_rate": 2.832767347591936e-06,
      "loss": 0.7241,
      "step": 6066
    },
    {
      "epoch": 0.9270733850326622,
      "grad_norm": 0.34881338477134705,
      "learning_rate": 2.8209831482181483e-06,
      "loss": 0.9521,
      "step": 6067
    },
    {
      "epoch": 0.9272261909309699,
      "grad_norm": 0.27328556776046753,
      "learning_rate": 2.8092231601806517e-06,
      "loss": 0.6417,
      "step": 6068
    },
    {
      "epoch": 0.9273789968292776,
      "grad_norm": 0.3333223760128021,
      "learning_rate": 2.797487386409359e-06,
      "loss": 0.8499,
      "step": 6069
    },
    {
      "epoch": 0.9275318027275853,
      "grad_norm": 0.3463587462902069,
      "learning_rate": 2.785775829828152e-06,
      "loss": 0.7211,
      "step": 6070
    },
    {
      "epoch": 0.927684608625893,
      "grad_norm": 0.35262081027030945,
      "learning_rate": 2.7740884933548538e-06,
      "loss": 1.0996,
      "step": 6071
    },
    {
      "epoch": 0.9278374145242007,
      "grad_norm": 0.311922162771225,
      "learning_rate": 2.762425379901268e-06,
      "loss": 0.865,
      "step": 6072
    },
    {
      "epoch": 0.9279902204225083,
      "grad_norm": 0.26930543780326843,
      "learning_rate": 2.7507864923731584e-06,
      "loss": 0.5989,
      "step": 6073
    },
    {
      "epoch": 0.928143026320816,
      "grad_norm": 0.3048146069049835,
      "learning_rate": 2.739171833670262e-06,
      "loss": 0.637,
      "step": 6074
    },
    {
      "epoch": 0.9282958322191237,
      "grad_norm": 0.28027278184890747,
      "learning_rate": 2.727581406686286e-06,
      "loss": 0.7139,
      "step": 6075
    },
    {
      "epoch": 0.9284486381174313,
      "grad_norm": 0.28340592980384827,
      "learning_rate": 2.7160152143088535e-06,
      "loss": 0.7486,
      "step": 6076
    },
    {
      "epoch": 0.928601444015739,
      "grad_norm": 0.4058891236782074,
      "learning_rate": 2.7044732594196152e-06,
      "loss": 0.8938,
      "step": 6077
    },
    {
      "epoch": 0.9287542499140466,
      "grad_norm": 0.28652113676071167,
      "learning_rate": 2.692955544894149e-06,
      "loss": 0.5256,
      "step": 6078
    },
    {
      "epoch": 0.9289070558123543,
      "grad_norm": 1.982176661491394,
      "learning_rate": 2.6814620736019813e-06,
      "loss": 0.7787,
      "step": 6079
    },
    {
      "epoch": 0.929059861710662,
      "grad_norm": 0.26007115840911865,
      "learning_rate": 2.6699928484066217e-06,
      "loss": 0.6662,
      "step": 6080
    },
    {
      "epoch": 0.9292126676089697,
      "grad_norm": 0.2802926003932953,
      "learning_rate": 2.65854787216554e-06,
      "loss": 0.8371,
      "step": 6081
    },
    {
      "epoch": 0.9293654735072774,
      "grad_norm": 0.26776376366615295,
      "learning_rate": 2.647127147730133e-06,
      "loss": 0.6102,
      "step": 6082
    },
    {
      "epoch": 0.9295182794055851,
      "grad_norm": 0.3064311146736145,
      "learning_rate": 2.6357306779458133e-06,
      "loss": 0.6569,
      "step": 6083
    },
    {
      "epoch": 0.9296710853038928,
      "grad_norm": 0.3474329113960266,
      "learning_rate": 2.624358465651877e-06,
      "loss": 0.7091,
      "step": 6084
    },
    {
      "epoch": 0.9298238912022004,
      "grad_norm": 0.28256288170814514,
      "learning_rate": 2.613010513681646e-06,
      "loss": 0.6321,
      "step": 6085
    },
    {
      "epoch": 0.9299766971005081,
      "grad_norm": 0.2918296754360199,
      "learning_rate": 2.6016868248623482e-06,
      "loss": 0.6811,
      "step": 6086
    },
    {
      "epoch": 0.9301295029988158,
      "grad_norm": 0.29972580075263977,
      "learning_rate": 2.590387402015193e-06,
      "loss": 0.7267,
      "step": 6087
    },
    {
      "epoch": 0.9302823088971234,
      "grad_norm": 0.3669845461845398,
      "learning_rate": 2.5791122479553507e-06,
      "loss": 0.6745,
      "step": 6088
    },
    {
      "epoch": 0.9304351147954311,
      "grad_norm": 0.31043675541877747,
      "learning_rate": 2.567861365491908e-06,
      "loss": 0.7701,
      "step": 6089
    },
    {
      "epoch": 0.9305879206937387,
      "grad_norm": 0.30671799182891846,
      "learning_rate": 2.5566347574279337e-06,
      "loss": 0.5834,
      "step": 6090
    },
    {
      "epoch": 0.9307407265920464,
      "grad_norm": 0.42947250604629517,
      "learning_rate": 2.5454324265604456e-06,
      "loss": 0.7175,
      "step": 6091
    },
    {
      "epoch": 0.9308935324903541,
      "grad_norm": 0.3376956582069397,
      "learning_rate": 2.5342543756804226e-06,
      "loss": 0.4594,
      "step": 6092
    },
    {
      "epoch": 0.9310463383886618,
      "grad_norm": 0.2570996880531311,
      "learning_rate": 2.5231006075727592e-06,
      "loss": 0.8667,
      "step": 6093
    },
    {
      "epoch": 0.9311991442869695,
      "grad_norm": 0.28008586168289185,
      "learning_rate": 2.5119711250163325e-06,
      "loss": 0.8293,
      "step": 6094
    },
    {
      "epoch": 0.9313519501852772,
      "grad_norm": 1.2276524305343628,
      "learning_rate": 2.5008659307839577e-06,
      "loss": 0.9234,
      "step": 6095
    },
    {
      "epoch": 0.9315047560835849,
      "grad_norm": 0.3057681620121002,
      "learning_rate": 2.489785027642422e-06,
      "loss": 0.5919,
      "step": 6096
    },
    {
      "epoch": 0.9316575619818925,
      "grad_norm": 0.2838587760925293,
      "learning_rate": 2.478728418352416e-06,
      "loss": 0.6622,
      "step": 6097
    },
    {
      "epoch": 0.9318103678802002,
      "grad_norm": 0.2826572358608246,
      "learning_rate": 2.4676961056686045e-06,
      "loss": 0.7228,
      "step": 6098
    },
    {
      "epoch": 0.9319631737785079,
      "grad_norm": 0.30671226978302,
      "learning_rate": 2.4566880923395985e-06,
      "loss": 0.6242,
      "step": 6099
    },
    {
      "epoch": 0.9321159796768155,
      "grad_norm": 0.31722599267959595,
      "learning_rate": 2.4457043811079495e-06,
      "loss": 0.6481,
      "step": 6100
    },
    {
      "epoch": 0.9322687855751232,
      "grad_norm": 0.32529863715171814,
      "learning_rate": 2.434744974710168e-06,
      "loss": 0.7273,
      "step": 6101
    },
    {
      "epoch": 0.9324215914734308,
      "grad_norm": 0.3722645044326782,
      "learning_rate": 2.4238098758766816e-06,
      "loss": 0.6426,
      "step": 6102
    },
    {
      "epoch": 0.9325743973717385,
      "grad_norm": 0.3125240206718445,
      "learning_rate": 2.412899087331888e-06,
      "loss": 0.7518,
      "step": 6103
    },
    {
      "epoch": 0.9327272032700462,
      "grad_norm": 0.26976755261421204,
      "learning_rate": 2.4020126117941134e-06,
      "loss": 0.6159,
      "step": 6104
    },
    {
      "epoch": 0.9328800091683539,
      "grad_norm": 0.30735623836517334,
      "learning_rate": 2.3911504519756435e-06,
      "loss": 0.6861,
      "step": 6105
    },
    {
      "epoch": 0.9330328150666616,
      "grad_norm": 0.2629193663597107,
      "learning_rate": 2.380312610582691e-06,
      "loss": 0.7826,
      "step": 6106
    },
    {
      "epoch": 0.9331856209649693,
      "grad_norm": 0.30757153034210205,
      "learning_rate": 2.3694990903153857e-06,
      "loss": 0.7173,
      "step": 6107
    },
    {
      "epoch": 0.933338426863277,
      "grad_norm": 0.44342416524887085,
      "learning_rate": 2.358709893867861e-06,
      "loss": 0.6669,
      "step": 6108
    },
    {
      "epoch": 0.9334912327615846,
      "grad_norm": 0.28203803300857544,
      "learning_rate": 2.3479450239281443e-06,
      "loss": 0.5641,
      "step": 6109
    },
    {
      "epoch": 0.9336440386598923,
      "grad_norm": 0.284018337726593,
      "learning_rate": 2.3372044831782125e-06,
      "loss": 0.6156,
      "step": 6110
    },
    {
      "epoch": 0.9337968445582,
      "grad_norm": 0.34571659564971924,
      "learning_rate": 2.3264882742939697e-06,
      "loss": 0.6791,
      "step": 6111
    },
    {
      "epoch": 0.9339496504565076,
      "grad_norm": 0.253670334815979,
      "learning_rate": 2.3157963999452804e-06,
      "loss": 0.8092,
      "step": 6112
    },
    {
      "epoch": 0.9341024563548153,
      "grad_norm": 0.3017883598804474,
      "learning_rate": 2.3051288627959357e-06,
      "loss": 0.5998,
      "step": 6113
    },
    {
      "epoch": 0.9342552622531229,
      "grad_norm": 0.2832029163837433,
      "learning_rate": 2.294485665503665e-06,
      "loss": 0.7049,
      "step": 6114
    },
    {
      "epoch": 0.9344080681514306,
      "grad_norm": 0.30555370450019836,
      "learning_rate": 2.2838668107201143e-06,
      "loss": 0.8306,
      "step": 6115
    },
    {
      "epoch": 0.9345608740497383,
      "grad_norm": 0.28199902176856995,
      "learning_rate": 2.2732723010909007e-06,
      "loss": 0.6949,
      "step": 6116
    },
    {
      "epoch": 0.934713679948046,
      "grad_norm": 0.29065364599227905,
      "learning_rate": 2.262702139255557e-06,
      "loss": 0.8368,
      "step": 6117
    },
    {
      "epoch": 0.9348664858463537,
      "grad_norm": 0.5368300676345825,
      "learning_rate": 2.252156327847543e-06,
      "loss": 0.6388,
      "step": 6118
    },
    {
      "epoch": 0.9350192917446614,
      "grad_norm": 0.3096945583820343,
      "learning_rate": 2.2416348694942467e-06,
      "loss": 0.755,
      "step": 6119
    },
    {
      "epoch": 0.935172097642969,
      "grad_norm": 0.3204090893268585,
      "learning_rate": 2.2311377668170265e-06,
      "loss": 0.8538,
      "step": 6120
    },
    {
      "epoch": 0.9353249035412767,
      "grad_norm": 0.30497118830680847,
      "learning_rate": 2.2206650224311344e-06,
      "loss": 0.6324,
      "step": 6121
    },
    {
      "epoch": 0.9354777094395844,
      "grad_norm": 0.3680538237094879,
      "learning_rate": 2.2102166389457614e-06,
      "loss": 0.7133,
      "step": 6122
    },
    {
      "epoch": 0.935630515337892,
      "grad_norm": 0.24908339977264404,
      "learning_rate": 2.1997926189640584e-06,
      "loss": 0.6931,
      "step": 6123
    },
    {
      "epoch": 0.9357833212361997,
      "grad_norm": 0.28190726041793823,
      "learning_rate": 2.189392965083059e-06,
      "loss": 0.7671,
      "step": 6124
    },
    {
      "epoch": 0.9359361271345074,
      "grad_norm": 0.4661271274089813,
      "learning_rate": 2.179017679893747e-06,
      "loss": 0.6609,
      "step": 6125
    },
    {
      "epoch": 0.936088933032815,
      "grad_norm": 0.29947930574417114,
      "learning_rate": 2.168666765981053e-06,
      "loss": 0.638,
      "step": 6126
    },
    {
      "epoch": 0.9362417389311227,
      "grad_norm": 0.2782745063304901,
      "learning_rate": 2.1583402259238163e-06,
      "loss": 0.7907,
      "step": 6127
    },
    {
      "epoch": 0.9363945448294304,
      "grad_norm": 0.38503745198249817,
      "learning_rate": 2.1480380622948105e-06,
      "loss": 0.7776,
      "step": 6128
    },
    {
      "epoch": 0.9365473507277381,
      "grad_norm": 0.3161745071411133,
      "learning_rate": 2.1377602776607165e-06,
      "loss": 0.5808,
      "step": 6129
    },
    {
      "epoch": 0.9367001566260458,
      "grad_norm": 0.3757038712501526,
      "learning_rate": 2.1275068745821748e-06,
      "loss": 0.5165,
      "step": 6130
    },
    {
      "epoch": 0.9368529625243535,
      "grad_norm": 0.32424384355545044,
      "learning_rate": 2.1172778556137307e-06,
      "loss": 0.5642,
      "step": 6131
    },
    {
      "epoch": 0.9370057684226611,
      "grad_norm": 0.3121247887611389,
      "learning_rate": 2.107073223303857e-06,
      "loss": 0.5107,
      "step": 6132
    },
    {
      "epoch": 0.9371585743209688,
      "grad_norm": 0.48844408988952637,
      "learning_rate": 2.0968929801949533e-06,
      "loss": 0.5614,
      "step": 6133
    },
    {
      "epoch": 0.9373113802192765,
      "grad_norm": 0.30841362476348877,
      "learning_rate": 2.086737128823335e-06,
      "loss": 0.8262,
      "step": 6134
    },
    {
      "epoch": 0.9374641861175841,
      "grad_norm": 0.34776777029037476,
      "learning_rate": 2.0766056717192674e-06,
      "loss": 0.8351,
      "step": 6135
    },
    {
      "epoch": 0.9376169920158918,
      "grad_norm": 0.2539510130882263,
      "learning_rate": 2.0664986114068974e-06,
      "loss": 0.542,
      "step": 6136
    },
    {
      "epoch": 0.9377697979141995,
      "grad_norm": 0.2508013844490051,
      "learning_rate": 2.0564159504043112e-06,
      "loss": 0.8223,
      "step": 6137
    },
    {
      "epoch": 0.9379226038125071,
      "grad_norm": 0.27509501576423645,
      "learning_rate": 2.046357691223544e-06,
      "loss": 0.8044,
      "step": 6138
    },
    {
      "epoch": 0.9380754097108148,
      "grad_norm": 0.29400357604026794,
      "learning_rate": 2.036323836370502e-06,
      "loss": 0.6546,
      "step": 6139
    },
    {
      "epoch": 0.9382282156091225,
      "grad_norm": 0.2571744918823242,
      "learning_rate": 2.0263143883450406e-06,
      "loss": 0.7295,
      "step": 6140
    },
    {
      "epoch": 0.9383810215074302,
      "grad_norm": 0.264504998922348,
      "learning_rate": 2.016329349640944e-06,
      "loss": 0.821,
      "step": 6141
    },
    {
      "epoch": 0.9385338274057379,
      "grad_norm": 0.29346346855163574,
      "learning_rate": 2.006368722745888e-06,
      "loss": 0.7786,
      "step": 6142
    },
    {
      "epoch": 0.9386866333040456,
      "grad_norm": 0.2435157150030136,
      "learning_rate": 1.996432510141477e-06,
      "loss": 0.681,
      "step": 6143
    },
    {
      "epoch": 0.9388394392023532,
      "grad_norm": 0.30916231870651245,
      "learning_rate": 1.9865207143032525e-06,
      "loss": 0.7043,
      "step": 6144
    },
    {
      "epoch": 0.9389922451006609,
      "grad_norm": 0.32135826349258423,
      "learning_rate": 1.9766333377006398e-06,
      "loss": 0.6599,
      "step": 6145
    },
    {
      "epoch": 0.9391450509989686,
      "grad_norm": 0.31256183981895447,
      "learning_rate": 1.9667703827969897e-06,
      "loss": 0.8267,
      "step": 6146
    },
    {
      "epoch": 0.9392978568972762,
      "grad_norm": 0.32118478417396545,
      "learning_rate": 1.9569318520495817e-06,
      "loss": 0.6224,
      "step": 6147
    },
    {
      "epoch": 0.9394506627955839,
      "grad_norm": 0.2773655652999878,
      "learning_rate": 1.94711774790961e-06,
      "loss": 0.7065,
      "step": 6148
    },
    {
      "epoch": 0.9396034686938916,
      "grad_norm": 0.3196839690208435,
      "learning_rate": 1.9373280728221863e-06,
      "loss": 0.6748,
      "step": 6149
    },
    {
      "epoch": 0.9397562745921992,
      "grad_norm": 0.2970350682735443,
      "learning_rate": 1.9275628292262926e-06,
      "loss": 0.954,
      "step": 6150
    },
    {
      "epoch": 0.9399090804905069,
      "grad_norm": 0.33877527713775635,
      "learning_rate": 1.9178220195548824e-06,
      "loss": 0.6657,
      "step": 6151
    },
    {
      "epoch": 0.9400618863888146,
      "grad_norm": 0.31130823493003845,
      "learning_rate": 1.9081056462347924e-06,
      "loss": 0.5888,
      "step": 6152
    },
    {
      "epoch": 0.9402146922871223,
      "grad_norm": 0.3352448344230652,
      "learning_rate": 1.898413711686764e-06,
      "loss": 0.5394,
      "step": 6153
    },
    {
      "epoch": 0.94036749818543,
      "grad_norm": 0.27089211344718933,
      "learning_rate": 1.8887462183254878e-06,
      "loss": 0.7617,
      "step": 6154
    },
    {
      "epoch": 0.9405203040837377,
      "grad_norm": 0.4040941894054413,
      "learning_rate": 1.879103168559504e-06,
      "loss": 0.6088,
      "step": 6155
    },
    {
      "epoch": 0.9406731099820453,
      "grad_norm": 0.2687217593193054,
      "learning_rate": 1.869484564791335e-06,
      "loss": 0.7469,
      "step": 6156
    },
    {
      "epoch": 0.940825915880353,
      "grad_norm": 0.31856074929237366,
      "learning_rate": 1.8598904094173308e-06,
      "loss": 0.4672,
      "step": 6157
    },
    {
      "epoch": 0.9409787217786607,
      "grad_norm": 0.30012932419776917,
      "learning_rate": 1.8503207048278348e-06,
      "loss": 0.7236,
      "step": 6158
    },
    {
      "epoch": 0.9411315276769683,
      "grad_norm": 0.27605703473091125,
      "learning_rate": 1.8407754534070398e-06,
      "loss": 0.6358,
      "step": 6159
    },
    {
      "epoch": 0.941284333575276,
      "grad_norm": 0.2866106629371643,
      "learning_rate": 1.831254657533077e-06,
      "loss": 0.5922,
      "step": 6160
    },
    {
      "epoch": 0.9414371394735837,
      "grad_norm": 0.3067355155944824,
      "learning_rate": 1.8217583195779485e-06,
      "loss": 0.7558,
      "step": 6161
    },
    {
      "epoch": 0.9415899453718913,
      "grad_norm": 0.23551535606384277,
      "learning_rate": 1.812286441907618e-06,
      "loss": 0.6481,
      "step": 6162
    },
    {
      "epoch": 0.941742751270199,
      "grad_norm": 0.508856475353241,
      "learning_rate": 1.8028390268818973e-06,
      "loss": 0.6483,
      "step": 6163
    },
    {
      "epoch": 0.9418955571685067,
      "grad_norm": 0.2898612916469574,
      "learning_rate": 1.7934160768545372e-06,
      "loss": 0.5378,
      "step": 6164
    },
    {
      "epoch": 0.9420483630668144,
      "grad_norm": 0.28692492842674255,
      "learning_rate": 1.7840175941732041e-06,
      "loss": 0.5875,
      "step": 6165
    },
    {
      "epoch": 0.9422011689651221,
      "grad_norm": 0.2957552671432495,
      "learning_rate": 1.774643581179436e-06,
      "loss": 0.5997,
      "step": 6166
    },
    {
      "epoch": 0.9423539748634298,
      "grad_norm": 0.27881738543510437,
      "learning_rate": 1.7652940402086872e-06,
      "loss": 0.7217,
      "step": 6167
    },
    {
      "epoch": 0.9425067807617374,
      "grad_norm": 0.42624831199645996,
      "learning_rate": 1.7559689735903273e-06,
      "loss": 0.7888,
      "step": 6168
    },
    {
      "epoch": 0.9426595866600451,
      "grad_norm": 0.25755804777145386,
      "learning_rate": 1.7466683836476093e-06,
      "loss": 0.966,
      "step": 6169
    },
    {
      "epoch": 0.9428123925583528,
      "grad_norm": 0.26354870200157166,
      "learning_rate": 1.737392272697702e-06,
      "loss": 0.76,
      "step": 6170
    },
    {
      "epoch": 0.9429651984566604,
      "grad_norm": 0.28261834383010864,
      "learning_rate": 1.728140643051679e-06,
      "loss": 0.8941,
      "step": 6171
    },
    {
      "epoch": 0.9431180043549681,
      "grad_norm": 0.25513985753059387,
      "learning_rate": 1.7189134970144848e-06,
      "loss": 0.8031,
      "step": 6172
    },
    {
      "epoch": 0.9432708102532757,
      "grad_norm": 0.3026675283908844,
      "learning_rate": 1.7097108368849923e-06,
      "loss": 0.5943,
      "step": 6173
    },
    {
      "epoch": 0.9434236161515834,
      "grad_norm": 0.37587878108024597,
      "learning_rate": 1.7005326649559893e-06,
      "loss": 0.7718,
      "step": 6174
    },
    {
      "epoch": 0.9435764220498911,
      "grad_norm": 0.3272812068462372,
      "learning_rate": 1.6913789835141135e-06,
      "loss": 0.6088,
      "step": 6175
    },
    {
      "epoch": 0.9437292279481988,
      "grad_norm": 0.28328678011894226,
      "learning_rate": 1.6822497948399407e-06,
      "loss": 0.6142,
      "step": 6176
    },
    {
      "epoch": 0.9438820338465065,
      "grad_norm": 0.4213845133781433,
      "learning_rate": 1.6731451012079292e-06,
      "loss": 0.8319,
      "step": 6177
    },
    {
      "epoch": 0.9440348397448142,
      "grad_norm": 0.2808535695075989,
      "learning_rate": 1.664064904886431e-06,
      "loss": 0.6973,
      "step": 6178
    },
    {
      "epoch": 0.9441876456431219,
      "grad_norm": 0.2822289764881134,
      "learning_rate": 1.6550092081377034e-06,
      "loss": 0.6103,
      "step": 6179
    },
    {
      "epoch": 0.9443404515414295,
      "grad_norm": 0.28763696551322937,
      "learning_rate": 1.645978013217908e-06,
      "loss": 0.8216,
      "step": 6180
    },
    {
      "epoch": 0.9444932574397372,
      "grad_norm": 0.28879836201667786,
      "learning_rate": 1.6369713223770788e-06,
      "loss": 0.6061,
      "step": 6181
    },
    {
      "epoch": 0.9446460633380448,
      "grad_norm": 0.39242953062057495,
      "learning_rate": 1.627989137859165e-06,
      "loss": 0.7673,
      "step": 6182
    },
    {
      "epoch": 0.9447988692363525,
      "grad_norm": 0.29807108640670776,
      "learning_rate": 1.6190314619019876e-06,
      "loss": 0.7077,
      "step": 6183
    },
    {
      "epoch": 0.9449516751346602,
      "grad_norm": 0.2821442484855652,
      "learning_rate": 1.6100982967373058e-06,
      "loss": 0.759,
      "step": 6184
    },
    {
      "epoch": 0.9451044810329678,
      "grad_norm": 0.31379956007003784,
      "learning_rate": 1.6011896445907171e-06,
      "loss": 0.6476,
      "step": 6185
    },
    {
      "epoch": 0.9452572869312755,
      "grad_norm": 0.276607871055603,
      "learning_rate": 1.592305507681735e-06,
      "loss": 0.8936,
      "step": 6186
    },
    {
      "epoch": 0.9454100928295832,
      "grad_norm": 0.468855619430542,
      "learning_rate": 1.583445888223778e-06,
      "loss": 0.6336,
      "step": 6187
    },
    {
      "epoch": 0.9455628987278909,
      "grad_norm": 0.2827998995780945,
      "learning_rate": 1.574610788424158e-06,
      "loss": 0.6883,
      "step": 6188
    },
    {
      "epoch": 0.9457157046261986,
      "grad_norm": 0.25603756308555603,
      "learning_rate": 1.5658002104840586e-06,
      "loss": 0.6325,
      "step": 6189
    },
    {
      "epoch": 0.9458685105245063,
      "grad_norm": 0.27887246012687683,
      "learning_rate": 1.5570141565985353e-06,
      "loss": 0.6751,
      "step": 6190
    },
    {
      "epoch": 0.946021316422814,
      "grad_norm": 1.5862551927566528,
      "learning_rate": 1.5482526289565924e-06,
      "loss": 0.6279,
      "step": 6191
    },
    {
      "epoch": 0.9461741223211216,
      "grad_norm": 0.28105485439300537,
      "learning_rate": 1.539515629741084e-06,
      "loss": 0.7632,
      "step": 6192
    },
    {
      "epoch": 0.9463269282194293,
      "grad_norm": 0.4909925162792206,
      "learning_rate": 1.5308031611287466e-06,
      "loss": 0.8598,
      "step": 6193
    },
    {
      "epoch": 0.9464797341177369,
      "grad_norm": 0.26141437888145447,
      "learning_rate": 1.5221152252902215e-06,
      "loss": 0.675,
      "step": 6194
    },
    {
      "epoch": 0.9466325400160446,
      "grad_norm": 0.37421944737434387,
      "learning_rate": 1.5134518243900552e-06,
      "loss": 0.859,
      "step": 6195
    },
    {
      "epoch": 0.9467853459143523,
      "grad_norm": 0.2938391864299774,
      "learning_rate": 1.5048129605866433e-06,
      "loss": 0.7082,
      "step": 6196
    },
    {
      "epoch": 0.94693815181266,
      "grad_norm": 0.23865161836147308,
      "learning_rate": 1.4961986360322867e-06,
      "loss": 0.6739,
      "step": 6197
    },
    {
      "epoch": 0.9470909577109676,
      "grad_norm": 0.29308661818504333,
      "learning_rate": 1.487608852873168e-06,
      "loss": 0.6938,
      "step": 6198
    },
    {
      "epoch": 0.9472437636092753,
      "grad_norm": 0.3156602084636688,
      "learning_rate": 1.4790436132493757e-06,
      "loss": 0.7344,
      "step": 6199
    },
    {
      "epoch": 0.947396569507583,
      "grad_norm": 0.27409300208091736,
      "learning_rate": 1.4705029192948584e-06,
      "loss": 0.8084,
      "step": 6200
    },
    {
      "epoch": 0.9475493754058907,
      "grad_norm": 0.29559117555618286,
      "learning_rate": 1.4619867731374581e-06,
      "loss": 0.6225,
      "step": 6201
    },
    {
      "epoch": 0.9477021813041984,
      "grad_norm": 0.24822796881198883,
      "learning_rate": 1.4534951768989002e-06,
      "loss": 0.6979,
      "step": 6202
    },
    {
      "epoch": 0.947854987202506,
      "grad_norm": 0.2914453446865082,
      "learning_rate": 1.4450281326947922e-06,
      "loss": 0.7604,
      "step": 6203
    },
    {
      "epoch": 0.9480077931008137,
      "grad_norm": 0.25838714838027954,
      "learning_rate": 1.4365856426346248e-06,
      "loss": 0.6905,
      "step": 6204
    },
    {
      "epoch": 0.9481605989991214,
      "grad_norm": 0.2767367362976074,
      "learning_rate": 1.4281677088217925e-06,
      "loss": 0.6708,
      "step": 6205
    },
    {
      "epoch": 0.948313404897429,
      "grad_norm": 0.2727099657058716,
      "learning_rate": 1.4197743333535407e-06,
      "loss": 0.7106,
      "step": 6206
    },
    {
      "epoch": 0.9484662107957367,
      "grad_norm": 0.3296019434928894,
      "learning_rate": 1.4114055183209961e-06,
      "loss": 0.644,
      "step": 6207
    },
    {
      "epoch": 0.9486190166940444,
      "grad_norm": 0.5572933554649353,
      "learning_rate": 1.4030612658091913e-06,
      "loss": 0.7392,
      "step": 6208
    },
    {
      "epoch": 0.948771822592352,
      "grad_norm": 0.3326074481010437,
      "learning_rate": 1.3947415778970296e-06,
      "loss": 0.7055,
      "step": 6209
    },
    {
      "epoch": 0.9489246284906597,
      "grad_norm": 0.35185036063194275,
      "learning_rate": 1.3864464566572865e-06,
      "loss": 0.7567,
      "step": 6210
    },
    {
      "epoch": 0.9490774343889674,
      "grad_norm": 0.2703647017478943,
      "learning_rate": 1.37817590415662e-06,
      "loss": 0.7198,
      "step": 6211
    },
    {
      "epoch": 0.9492302402872751,
      "grad_norm": 0.2500843405723572,
      "learning_rate": 1.3699299224555707e-06,
      "loss": 0.6154,
      "step": 6212
    },
    {
      "epoch": 0.9493830461855828,
      "grad_norm": 0.24339242279529572,
      "learning_rate": 1.3617085136085617e-06,
      "loss": 0.6167,
      "step": 6213
    },
    {
      "epoch": 0.9495358520838905,
      "grad_norm": 0.24041394889354706,
      "learning_rate": 1.3535116796638768e-06,
      "loss": 0.7468,
      "step": 6214
    },
    {
      "epoch": 0.9496886579821981,
      "grad_norm": 0.2889711558818817,
      "learning_rate": 1.345339422663705e-06,
      "loss": 0.6913,
      "step": 6215
    },
    {
      "epoch": 0.9498414638805058,
      "grad_norm": 0.3345462381839752,
      "learning_rate": 1.337191744644084e-06,
      "loss": 0.9103,
      "step": 6216
    },
    {
      "epoch": 0.9499942697788135,
      "grad_norm": 0.29227215051651,
      "learning_rate": 1.3290686476349234e-06,
      "loss": 0.6628,
      "step": 6217
    },
    {
      "epoch": 0.9501470756771211,
      "grad_norm": 0.28349965810775757,
      "learning_rate": 1.3209701336600488e-06,
      "loss": 0.6475,
      "step": 6218
    },
    {
      "epoch": 0.9502998815754288,
      "grad_norm": 0.2800476551055908,
      "learning_rate": 1.3128962047371463e-06,
      "loss": 0.9071,
      "step": 6219
    },
    {
      "epoch": 0.9504526874737365,
      "grad_norm": 0.27852532267570496,
      "learning_rate": 1.30484686287774e-06,
      "loss": 0.6444,
      "step": 6220
    },
    {
      "epoch": 0.9506054933720441,
      "grad_norm": 0.3461797535419464,
      "learning_rate": 1.296822110087259e-06,
      "loss": 0.7275,
      "step": 6221
    },
    {
      "epoch": 0.9507582992703518,
      "grad_norm": 0.2584891617298126,
      "learning_rate": 1.2888219483650043e-06,
      "loss": 0.6953,
      "step": 6222
    },
    {
      "epoch": 0.9509111051686595,
      "grad_norm": 0.25109121203422546,
      "learning_rate": 1.2808463797041703e-06,
      "loss": 0.5831,
      "step": 6223
    },
    {
      "epoch": 0.9510639110669672,
      "grad_norm": 0.2807595729827881,
      "learning_rate": 1.2728954060917898e-06,
      "loss": 0.6815,
      "step": 6224
    },
    {
      "epoch": 0.9512167169652749,
      "grad_norm": 0.3147951662540436,
      "learning_rate": 1.264969029508778e-06,
      "loss": 0.578,
      "step": 6225
    },
    {
      "epoch": 0.9513695228635826,
      "grad_norm": 0.2849219739437103,
      "learning_rate": 1.257067251929911e-06,
      "loss": 0.7381,
      "step": 6226
    },
    {
      "epoch": 0.9515223287618902,
      "grad_norm": 0.2966640293598175,
      "learning_rate": 1.2491900753238806e-06,
      "loss": 0.5892,
      "step": 6227
    },
    {
      "epoch": 0.9516751346601979,
      "grad_norm": 0.3076305687427521,
      "learning_rate": 1.2413375016532058e-06,
      "loss": 0.8709,
      "step": 6228
    },
    {
      "epoch": 0.9518279405585055,
      "grad_norm": 0.2656288743019104,
      "learning_rate": 1.2335095328742885e-06,
      "loss": 0.7306,
      "step": 6229
    },
    {
      "epoch": 0.9519807464568132,
      "grad_norm": 0.293072909116745,
      "learning_rate": 1.2257061709373907e-06,
      "loss": 0.7178,
      "step": 6230
    },
    {
      "epoch": 0.9521335523551209,
      "grad_norm": 0.3086521327495575,
      "learning_rate": 1.2179274177866796e-06,
      "loss": 0.6118,
      "step": 6231
    },
    {
      "epoch": 0.9522863582534286,
      "grad_norm": 0.31312182545661926,
      "learning_rate": 1.210173275360138e-06,
      "loss": 0.7736,
      "step": 6232
    },
    {
      "epoch": 0.9524391641517362,
      "grad_norm": 0.30142563581466675,
      "learning_rate": 1.2024437455896653e-06,
      "loss": 0.6248,
      "step": 6233
    },
    {
      "epoch": 0.9525919700500439,
      "grad_norm": 0.2886991798877716,
      "learning_rate": 1.19473883040101e-06,
      "loss": 0.7147,
      "step": 6234
    },
    {
      "epoch": 0.9527447759483516,
      "grad_norm": 0.27349576354026794,
      "learning_rate": 1.1870585317137583e-06,
      "loss": 0.9949,
      "step": 6235
    },
    {
      "epoch": 0.9528975818466593,
      "grad_norm": 0.26117628812789917,
      "learning_rate": 1.1794028514414356e-06,
      "loss": 0.749,
      "step": 6236
    },
    {
      "epoch": 0.953050387744967,
      "grad_norm": 0.31577005982398987,
      "learning_rate": 1.1717717914913496e-06,
      "loss": 0.7419,
      "step": 6237
    },
    {
      "epoch": 0.9532031936432747,
      "grad_norm": 0.29771387577056885,
      "learning_rate": 1.1641653537647456e-06,
      "loss": 0.6722,
      "step": 6238
    },
    {
      "epoch": 0.9533559995415823,
      "grad_norm": 0.27761638164520264,
      "learning_rate": 1.156583540156686e-06,
      "loss": 0.5968,
      "step": 6239
    },
    {
      "epoch": 0.95350880543989,
      "grad_norm": 0.2751348316669464,
      "learning_rate": 1.1490263525561373e-06,
      "loss": 0.6508,
      "step": 6240
    },
    {
      "epoch": 0.9536616113381976,
      "grad_norm": 0.245052307844162,
      "learning_rate": 1.1414937928458824e-06,
      "loss": 0.6157,
      "step": 6241
    },
    {
      "epoch": 0.9538144172365053,
      "grad_norm": 0.32120779156684875,
      "learning_rate": 1.133985862902598e-06,
      "loss": 0.7439,
      "step": 6242
    },
    {
      "epoch": 0.953967223134813,
      "grad_norm": 0.3647390305995941,
      "learning_rate": 1.1265025645968318e-06,
      "loss": 0.72,
      "step": 6243
    },
    {
      "epoch": 0.9541200290331207,
      "grad_norm": 0.30320584774017334,
      "learning_rate": 1.119043899792993e-06,
      "loss": 0.7782,
      "step": 6244
    },
    {
      "epoch": 0.9542728349314283,
      "grad_norm": 0.2579381763935089,
      "learning_rate": 1.1116098703493394e-06,
      "loss": 0.612,
      "step": 6245
    },
    {
      "epoch": 0.954425640829736,
      "grad_norm": 0.32816165685653687,
      "learning_rate": 1.1042004781179893e-06,
      "loss": 0.7938,
      "step": 6246
    },
    {
      "epoch": 0.9545784467280437,
      "grad_norm": 0.4004554748535156,
      "learning_rate": 1.096815724944922e-06,
      "loss": 0.836,
      "step": 6247
    },
    {
      "epoch": 0.9547312526263514,
      "grad_norm": 0.2599641978740692,
      "learning_rate": 1.0894556126700094e-06,
      "loss": 0.6191,
      "step": 6248
    },
    {
      "epoch": 0.9548840585246591,
      "grad_norm": 0.2757795453071594,
      "learning_rate": 1.0821201431269523e-06,
      "loss": 0.5368,
      "step": 6249
    },
    {
      "epoch": 0.9550368644229668,
      "grad_norm": 0.2588087022304535,
      "learning_rate": 1.0748093181433216e-06,
      "loss": 0.7106,
      "step": 6250
    },
    {
      "epoch": 0.9551896703212744,
      "grad_norm": 0.26044732332229614,
      "learning_rate": 1.0675231395405495e-06,
      "loss": 0.6474,
      "step": 6251
    },
    {
      "epoch": 0.9553424762195821,
      "grad_norm": 0.30654361844062805,
      "learning_rate": 1.0602616091339168e-06,
      "loss": 0.6999,
      "step": 6252
    },
    {
      "epoch": 0.9554952821178897,
      "grad_norm": 0.2802150845527649,
      "learning_rate": 1.0530247287325768e-06,
      "loss": 0.6875,
      "step": 6253
    },
    {
      "epoch": 0.9556480880161974,
      "grad_norm": 0.2953641712665558,
      "learning_rate": 1.0458125001395536e-06,
      "loss": 0.7029,
      "step": 6254
    },
    {
      "epoch": 0.9558008939145051,
      "grad_norm": 0.268854022026062,
      "learning_rate": 1.038624925151699e-06,
      "loss": 0.8357,
      "step": 6255
    },
    {
      "epoch": 0.9559536998128128,
      "grad_norm": 0.26503345370292664,
      "learning_rate": 1.0314620055597246e-06,
      "loss": 0.5589,
      "step": 6256
    },
    {
      "epoch": 0.9561065057111204,
      "grad_norm": 0.23971620202064514,
      "learning_rate": 1.0243237431482366e-06,
      "loss": 0.6528,
      "step": 6257
    },
    {
      "epoch": 0.9562593116094281,
      "grad_norm": 0.464169442653656,
      "learning_rate": 1.0172101396956567e-06,
      "loss": 0.8958,
      "step": 6258
    },
    {
      "epoch": 0.9564121175077358,
      "grad_norm": 0.3739687204360962,
      "learning_rate": 1.0101211969742896e-06,
      "loss": 0.8565,
      "step": 6259
    },
    {
      "epoch": 0.9565649234060435,
      "grad_norm": 0.3403286635875702,
      "learning_rate": 1.0030569167502778e-06,
      "loss": 0.5678,
      "step": 6260
    },
    {
      "epoch": 0.9567177293043512,
      "grad_norm": 0.2906002700328827,
      "learning_rate": 9.96017300783636e-07,
      "loss": 0.7359,
      "step": 6261
    },
    {
      "epoch": 0.9568705352026589,
      "grad_norm": 0.33831942081451416,
      "learning_rate": 9.890023508282166e-07,
      "loss": 0.7405,
      "step": 6262
    },
    {
      "epoch": 0.9570233411009665,
      "grad_norm": 0.2358943223953247,
      "learning_rate": 9.820120686317435e-07,
      "loss": 0.5598,
      "step": 6263
    },
    {
      "epoch": 0.9571761469992742,
      "grad_norm": 0.3636802136898041,
      "learning_rate": 9.750464559357686e-07,
      "loss": 0.8462,
      "step": 6264
    },
    {
      "epoch": 0.9573289528975818,
      "grad_norm": 0.27122461795806885,
      "learning_rate": 9.681055144757367e-07,
      "loss": 0.6452,
      "step": 6265
    },
    {
      "epoch": 0.9574817587958895,
      "grad_norm": 0.31347087025642395,
      "learning_rate": 9.611892459809201e-07,
      "loss": 0.566,
      "step": 6266
    },
    {
      "epoch": 0.9576345646941972,
      "grad_norm": 0.47410255670547485,
      "learning_rate": 9.542976521744518e-07,
      "loss": 0.6469,
      "step": 6267
    },
    {
      "epoch": 0.9577873705925048,
      "grad_norm": 0.2801438868045807,
      "learning_rate": 9.474307347733025e-07,
      "loss": 0.7822,
      "step": 6268
    },
    {
      "epoch": 0.9579401764908125,
      "grad_norm": 0.23874764144420624,
      "learning_rate": 9.405884954883148e-07,
      "loss": 0.5762,
      "step": 6269
    },
    {
      "epoch": 0.9580929823891202,
      "grad_norm": 0.5731150507926941,
      "learning_rate": 9.337709360241809e-07,
      "loss": 0.6621,
      "step": 6270
    },
    {
      "epoch": 0.9582457882874279,
      "grad_norm": 0.3229494094848633,
      "learning_rate": 9.269780580794307e-07,
      "loss": 0.5108,
      "step": 6271
    },
    {
      "epoch": 0.9583985941857356,
      "grad_norm": 0.27924874424934387,
      "learning_rate": 9.20209863346444e-07,
      "loss": 0.5841,
      "step": 6272
    },
    {
      "epoch": 0.9585514000840433,
      "grad_norm": 0.2501339018344879,
      "learning_rate": 9.134663535114829e-07,
      "loss": 0.7157,
      "step": 6273
    },
    {
      "epoch": 0.958704205982351,
      "grad_norm": 0.38578376173973083,
      "learning_rate": 9.067475302546147e-07,
      "loss": 0.782,
      "step": 6274
    },
    {
      "epoch": 0.9588570118806586,
      "grad_norm": 0.2761724293231964,
      "learning_rate": 9.000533952497892e-07,
      "loss": 0.9587,
      "step": 6275
    },
    {
      "epoch": 0.9590098177789663,
      "grad_norm": 0.3226098120212555,
      "learning_rate": 8.933839501647945e-07,
      "loss": 0.7157,
      "step": 6276
    },
    {
      "epoch": 0.9591626236772739,
      "grad_norm": 0.28342607617378235,
      "learning_rate": 8.86739196661257e-07,
      "loss": 0.6777,
      "step": 6277
    },
    {
      "epoch": 0.9593154295755816,
      "grad_norm": 0.27868959307670593,
      "learning_rate": 8.801191363946748e-07,
      "loss": 0.7985,
      "step": 6278
    },
    {
      "epoch": 0.9594682354738893,
      "grad_norm": 0.29065823554992676,
      "learning_rate": 8.735237710143618e-07,
      "loss": 0.6976,
      "step": 6279
    },
    {
      "epoch": 0.959621041372197,
      "grad_norm": 0.32618793845176697,
      "learning_rate": 8.669531021635258e-07,
      "loss": 0.6947,
      "step": 6280
    },
    {
      "epoch": 0.9597738472705046,
      "grad_norm": 0.4600655436515808,
      "learning_rate": 8.604071314791684e-07,
      "loss": 0.724,
      "step": 6281
    },
    {
      "epoch": 0.9599266531688123,
      "grad_norm": 0.27274805307388306,
      "learning_rate": 8.53885860592174e-07,
      "loss": 0.7377,
      "step": 6282
    },
    {
      "epoch": 0.96007945906712,
      "grad_norm": 0.36312827467918396,
      "learning_rate": 8.47389291127254e-07,
      "loss": 0.7004,
      "step": 6283
    },
    {
      "epoch": 0.9602322649654277,
      "grad_norm": 0.286912739276886,
      "learning_rate": 8.409174247029916e-07,
      "loss": 0.6783,
      "step": 6284
    },
    {
      "epoch": 0.9603850708637354,
      "grad_norm": 0.3135521709918976,
      "learning_rate": 8.344702629317857e-07,
      "loss": 0.7232,
      "step": 6285
    },
    {
      "epoch": 0.960537876762043,
      "grad_norm": 0.2688337564468384,
      "learning_rate": 8.28047807419885e-07,
      "loss": 0.612,
      "step": 6286
    },
    {
      "epoch": 0.9606906826603507,
      "grad_norm": 0.31565752625465393,
      "learning_rate": 8.216500597674093e-07,
      "loss": 0.8034,
      "step": 6287
    },
    {
      "epoch": 0.9608434885586583,
      "grad_norm": 0.257478266954422,
      "learning_rate": 8.152770215682836e-07,
      "loss": 0.6048,
      "step": 6288
    },
    {
      "epoch": 0.960996294456966,
      "grad_norm": 0.2403869926929474,
      "learning_rate": 8.089286944103158e-07,
      "loss": 0.6884,
      "step": 6289
    },
    {
      "epoch": 0.9611491003552737,
      "grad_norm": 0.4128204584121704,
      "learning_rate": 8.026050798751294e-07,
      "loss": 0.8223,
      "step": 6290
    },
    {
      "epoch": 0.9613019062535814,
      "grad_norm": 0.2880534529685974,
      "learning_rate": 7.963061795381976e-07,
      "loss": 0.687,
      "step": 6291
    },
    {
      "epoch": 0.961454712151889,
      "grad_norm": 0.31626081466674805,
      "learning_rate": 7.900319949688428e-07,
      "loss": 0.4903,
      "step": 6292
    },
    {
      "epoch": 0.9616075180501967,
      "grad_norm": 0.23981697857379913,
      "learning_rate": 7.837825277302258e-07,
      "loss": 0.7501,
      "step": 6293
    },
    {
      "epoch": 0.9617603239485044,
      "grad_norm": 0.34683600068092346,
      "learning_rate": 7.775577793793454e-07,
      "loss": 0.8155,
      "step": 6294
    },
    {
      "epoch": 0.9619131298468121,
      "grad_norm": 0.291105717420578,
      "learning_rate": 7.7135775146705e-07,
      "loss": 0.707,
      "step": 6295
    },
    {
      "epoch": 0.9620659357451198,
      "grad_norm": 0.38474923372268677,
      "learning_rate": 7.651824455380153e-07,
      "loss": 0.8183,
      "step": 6296
    },
    {
      "epoch": 0.9622187416434275,
      "grad_norm": 0.2873779833316803,
      "learning_rate": 7.590318631307769e-07,
      "loss": 0.5921,
      "step": 6297
    },
    {
      "epoch": 0.9623715475417352,
      "grad_norm": 0.3031384348869324,
      "learning_rate": 7.529060057776982e-07,
      "loss": 0.8043,
      "step": 6298
    },
    {
      "epoch": 0.9625243534400428,
      "grad_norm": 0.27383852005004883,
      "learning_rate": 7.468048750049694e-07,
      "loss": 0.5811,
      "step": 6299
    },
    {
      "epoch": 0.9626771593383504,
      "grad_norm": 0.2807175815105438,
      "learning_rate": 7.407284723326635e-07,
      "loss": 0.6625,
      "step": 6300
    },
    {
      "epoch": 0.9628299652366581,
      "grad_norm": 0.31852301955223083,
      "learning_rate": 7.346767992746584e-07,
      "loss": 0.6023,
      "step": 6301
    },
    {
      "epoch": 0.9629827711349658,
      "grad_norm": 0.27150818705558777,
      "learning_rate": 7.286498573386591e-07,
      "loss": 0.7008,
      "step": 6302
    },
    {
      "epoch": 0.9631355770332735,
      "grad_norm": 0.3553084433078766,
      "learning_rate": 7.226476480262423e-07,
      "loss": 0.842,
      "step": 6303
    },
    {
      "epoch": 0.9632883829315811,
      "grad_norm": 0.2782277762889862,
      "learning_rate": 7.166701728328118e-07,
      "loss": 0.7429,
      "step": 6304
    },
    {
      "epoch": 0.9634411888298888,
      "grad_norm": 0.3005227744579315,
      "learning_rate": 7.107174332475986e-07,
      "loss": 0.6681,
      "step": 6305
    },
    {
      "epoch": 0.9635939947281965,
      "grad_norm": 0.2721962332725525,
      "learning_rate": 7.047894307536718e-07,
      "loss": 0.6964,
      "step": 6306
    },
    {
      "epoch": 0.9637468006265042,
      "grad_norm": 0.2653356194496155,
      "learning_rate": 6.9888616682795e-07,
      "loss": 0.7553,
      "step": 6307
    },
    {
      "epoch": 0.9638996065248119,
      "grad_norm": 0.39718544483184814,
      "learning_rate": 6.930076429411902e-07,
      "loss": 0.5233,
      "step": 6308
    },
    {
      "epoch": 0.9640524124231196,
      "grad_norm": 0.2432132214307785,
      "learning_rate": 6.871538605579653e-07,
      "loss": 0.9123,
      "step": 6309
    },
    {
      "epoch": 0.9642052183214272,
      "grad_norm": 0.2763080596923828,
      "learning_rate": 6.813248211366973e-07,
      "loss": 0.6741,
      "step": 6310
    },
    {
      "epoch": 0.9643580242197349,
      "grad_norm": 0.3247620165348053,
      "learning_rate": 6.755205261296471e-07,
      "loss": 0.8244,
      "step": 6311
    },
    {
      "epoch": 0.9645108301180425,
      "grad_norm": 0.2837117910385132,
      "learning_rate": 6.697409769829132e-07,
      "loss": 0.5683,
      "step": 6312
    },
    {
      "epoch": 0.9646636360163502,
      "grad_norm": 0.2593044638633728,
      "learning_rate": 6.639861751363996e-07,
      "loss": 0.6678,
      "step": 6313
    },
    {
      "epoch": 0.9648164419146579,
      "grad_norm": 0.2585027813911438,
      "learning_rate": 6.582561220238814e-07,
      "loss": 0.7211,
      "step": 6314
    },
    {
      "epoch": 0.9649692478129656,
      "grad_norm": 0.27481648325920105,
      "learning_rate": 6.525508190729501e-07,
      "loss": 0.6615,
      "step": 6315
    },
    {
      "epoch": 0.9651220537112732,
      "grad_norm": 0.28525200486183167,
      "learning_rate": 6.468702677050464e-07,
      "loss": 0.6796,
      "step": 6316
    },
    {
      "epoch": 0.9652748596095809,
      "grad_norm": 0.39430657029151917,
      "learning_rate": 6.41214469335405e-07,
      "loss": 0.7448,
      "step": 6317
    },
    {
      "epoch": 0.9654276655078886,
      "grad_norm": 0.31355559825897217,
      "learning_rate": 6.35583425373143e-07,
      "loss": 0.8168,
      "step": 6318
    },
    {
      "epoch": 0.9655804714061963,
      "grad_norm": 0.2974868416786194,
      "learning_rate": 6.299771372211937e-07,
      "loss": 0.6698,
      "step": 6319
    },
    {
      "epoch": 0.965733277304504,
      "grad_norm": 0.2556328773498535,
      "learning_rate": 6.243956062762956e-07,
      "loss": 0.6631,
      "step": 6320
    },
    {
      "epoch": 0.9658860832028117,
      "grad_norm": 0.3320145308971405,
      "learning_rate": 6.188388339290474e-07,
      "loss": 0.6777,
      "step": 6321
    },
    {
      "epoch": 0.9660388891011193,
      "grad_norm": 0.3326362073421478,
      "learning_rate": 6.133068215638749e-07,
      "loss": 0.7277,
      "step": 6322
    },
    {
      "epoch": 0.966191694999427,
      "grad_norm": 0.3622719347476959,
      "learning_rate": 6.077995705590311e-07,
      "loss": 0.8761,
      "step": 6323
    },
    {
      "epoch": 0.9663445008977346,
      "grad_norm": 0.32251179218292236,
      "learning_rate": 6.023170822866075e-07,
      "loss": 0.7356,
      "step": 6324
    },
    {
      "epoch": 0.9664973067960423,
      "grad_norm": 0.2953559458255768,
      "learning_rate": 5.968593581125004e-07,
      "loss": 0.6952,
      "step": 6325
    },
    {
      "epoch": 0.96665011269435,
      "grad_norm": 0.29557231068611145,
      "learning_rate": 5.914263993964886e-07,
      "loss": 0.5888,
      "step": 6326
    },
    {
      "epoch": 0.9668029185926577,
      "grad_norm": 0.3225257992744446,
      "learning_rate": 5.860182074921117e-07,
      "loss": 0.743,
      "step": 6327
    },
    {
      "epoch": 0.9669557244909653,
      "grad_norm": 0.283965528011322,
      "learning_rate": 5.806347837468029e-07,
      "loss": 0.8457,
      "step": 6328
    },
    {
      "epoch": 0.967108530389273,
      "grad_norm": 0.3201238811016083,
      "learning_rate": 5.752761295017895e-07,
      "loss": 0.6668,
      "step": 6329
    },
    {
      "epoch": 0.9672613362875807,
      "grad_norm": 0.2973778247833252,
      "learning_rate": 5.699422460921255e-07,
      "loss": 0.8359,
      "step": 6330
    },
    {
      "epoch": 0.9674141421858884,
      "grad_norm": 0.24163559079170227,
      "learning_rate": 5.646331348467149e-07,
      "loss": 0.708,
      "step": 6331
    },
    {
      "epoch": 0.9675669480841961,
      "grad_norm": 0.2467086911201477,
      "learning_rate": 5.593487970882771e-07,
      "loss": 0.7813,
      "step": 6332
    },
    {
      "epoch": 0.9677197539825038,
      "grad_norm": 0.2962093651294708,
      "learning_rate": 5.540892341333592e-07,
      "loss": 0.6654,
      "step": 6333
    },
    {
      "epoch": 0.9678725598808114,
      "grad_norm": 0.28548145294189453,
      "learning_rate": 5.488544472923241e-07,
      "loss": 0.5871,
      "step": 6334
    },
    {
      "epoch": 0.9680253657791191,
      "grad_norm": 0.29998165369033813,
      "learning_rate": 5.436444378693951e-07,
      "loss": 0.705,
      "step": 6335
    },
    {
      "epoch": 0.9681781716774267,
      "grad_norm": 0.29790687561035156,
      "learning_rate": 5.384592071625894e-07,
      "loss": 1.0276,
      "step": 6336
    },
    {
      "epoch": 0.9683309775757344,
      "grad_norm": 0.2657981812953949,
      "learning_rate": 5.332987564637737e-07,
      "loss": 0.7289,
      "step": 6337
    },
    {
      "epoch": 0.9684837834740421,
      "grad_norm": 0.3056153655052185,
      "learning_rate": 5.281630870586196e-07,
      "loss": 0.7339,
      "step": 6338
    },
    {
      "epoch": 0.9686365893723498,
      "grad_norm": 0.2564420700073242,
      "learning_rate": 5.230522002266481e-07,
      "loss": 0.8466,
      "step": 6339
    },
    {
      "epoch": 0.9687893952706574,
      "grad_norm": 0.27843570709228516,
      "learning_rate": 5.179660972411848e-07,
      "loss": 0.6875,
      "step": 6340
    },
    {
      "epoch": 0.9689422011689651,
      "grad_norm": 0.35951921343803406,
      "learning_rate": 5.129047793693831e-07,
      "loss": 0.6167,
      "step": 6341
    },
    {
      "epoch": 0.9690950070672728,
      "grad_norm": 0.2623217701911926,
      "learning_rate": 5.078682478722451e-07,
      "loss": 0.5105,
      "step": 6342
    },
    {
      "epoch": 0.9692478129655805,
      "grad_norm": 0.2455863207578659,
      "learning_rate": 5.028565040045674e-07,
      "loss": 0.5531,
      "step": 6343
    },
    {
      "epoch": 0.9694006188638882,
      "grad_norm": 0.28337204456329346,
      "learning_rate": 4.978695490149953e-07,
      "loss": 0.7003,
      "step": 6344
    },
    {
      "epoch": 0.9695534247621959,
      "grad_norm": 0.29444316029548645,
      "learning_rate": 4.929073841459686e-07,
      "loss": 0.7188,
      "step": 6345
    },
    {
      "epoch": 0.9697062306605035,
      "grad_norm": 0.26820212602615356,
      "learning_rate": 4.879700106337981e-07,
      "loss": 0.6843,
      "step": 6346
    },
    {
      "epoch": 0.9698590365588111,
      "grad_norm": 0.2960645854473114,
      "learning_rate": 4.830574297085555e-07,
      "loss": 0.67,
      "step": 6347
    },
    {
      "epoch": 0.9700118424571188,
      "grad_norm": 0.25855451822280884,
      "learning_rate": 4.78169642594195e-07,
      "loss": 0.5746,
      "step": 6348
    },
    {
      "epoch": 0.9701646483554265,
      "grad_norm": 0.2733268141746521,
      "learning_rate": 4.733066505084427e-07,
      "loss": 0.6331,
      "step": 6349
    },
    {
      "epoch": 0.9703174542537342,
      "grad_norm": 0.38543620705604553,
      "learning_rate": 4.68468454662907e-07,
      "loss": 0.8998,
      "step": 6350
    },
    {
      "epoch": 0.9704702601520419,
      "grad_norm": 0.2834281325340271,
      "learning_rate": 4.636550562629571e-07,
      "loss": 0.6405,
      "step": 6351
    },
    {
      "epoch": 0.9706230660503495,
      "grad_norm": 0.3168293833732605,
      "learning_rate": 4.588664565078116e-07,
      "loss": 1.0936,
      "step": 6352
    },
    {
      "epoch": 0.9707758719486572,
      "grad_norm": 0.32848724722862244,
      "learning_rate": 4.54102656590516e-07,
      "loss": 0.742,
      "step": 6353
    },
    {
      "epoch": 0.9709286778469649,
      "grad_norm": 0.3079994022846222,
      "learning_rate": 4.493636576979321e-07,
      "loss": 0.5539,
      "step": 6354
    },
    {
      "epoch": 0.9710814837452726,
      "grad_norm": 0.3098090887069702,
      "learning_rate": 4.446494610107488e-07,
      "loss": 0.5675,
      "step": 6355
    },
    {
      "epoch": 0.9712342896435803,
      "grad_norm": 0.2650286555290222,
      "learning_rate": 4.399600677034488e-07,
      "loss": 0.6844,
      "step": 6356
    },
    {
      "epoch": 0.971387095541888,
      "grad_norm": 0.269327312707901,
      "learning_rate": 4.352954789443753e-07,
      "loss": 0.7365,
      "step": 6357
    },
    {
      "epoch": 0.9715399014401956,
      "grad_norm": 0.25867950916290283,
      "learning_rate": 4.3065569589565425e-07,
      "loss": 0.6923,
      "step": 6358
    },
    {
      "epoch": 0.9716927073385032,
      "grad_norm": 0.259370893239975,
      "learning_rate": 4.260407197132721e-07,
      "loss": 0.5959,
      "step": 6359
    },
    {
      "epoch": 0.9718455132368109,
      "grad_norm": 0.3415398895740509,
      "learning_rate": 4.2145055154697575e-07,
      "loss": 0.8221,
      "step": 6360
    },
    {
      "epoch": 0.9719983191351186,
      "grad_norm": 0.29200610518455505,
      "learning_rate": 4.16885192540406e-07,
      "loss": 0.61,
      "step": 6361
    },
    {
      "epoch": 0.9721511250334263,
      "grad_norm": 0.28360190987586975,
      "learning_rate": 4.1234464383095304e-07,
      "loss": 0.67,
      "step": 6362
    },
    {
      "epoch": 0.972303930931734,
      "grad_norm": 0.2954583764076233,
      "learning_rate": 4.078289065498786e-07,
      "loss": 0.7833,
      "step": 6363
    },
    {
      "epoch": 0.9724567368300416,
      "grad_norm": 0.4153759479522705,
      "learning_rate": 4.0333798182222716e-07,
      "loss": 0.6991,
      "step": 6364
    },
    {
      "epoch": 0.9726095427283493,
      "grad_norm": 0.3050250709056854,
      "learning_rate": 3.988718707668815e-07,
      "loss": 0.7836,
      "step": 6365
    },
    {
      "epoch": 0.972762348626657,
      "grad_norm": 0.28519800305366516,
      "learning_rate": 3.944305744965293e-07,
      "loss": 0.8232,
      "step": 6366
    },
    {
      "epoch": 0.9729151545249647,
      "grad_norm": 0.2635805606842041,
      "learning_rate": 3.900140941176855e-07,
      "loss": 0.7258,
      "step": 6367
    },
    {
      "epoch": 0.9730679604232724,
      "grad_norm": 0.3375990390777588,
      "learning_rate": 3.8562243073068107e-07,
      "loss": 0.8841,
      "step": 6368
    },
    {
      "epoch": 0.97322076632158,
      "grad_norm": 0.25449639558792114,
      "learning_rate": 3.812555854296629e-07,
      "loss": 0.759,
      "step": 6369
    },
    {
      "epoch": 0.9733735722198877,
      "grad_norm": 0.2887367308139801,
      "learning_rate": 3.769135593025941e-07,
      "loss": 0.7091,
      "step": 6370
    },
    {
      "epoch": 0.9735263781181953,
      "grad_norm": 0.28508010506629944,
      "learning_rate": 3.725963534312427e-07,
      "loss": 0.7535,
      "step": 6371
    },
    {
      "epoch": 0.973679184016503,
      "grad_norm": 0.32648003101348877,
      "learning_rate": 3.6830396889122597e-07,
      "loss": 0.8433,
      "step": 6372
    },
    {
      "epoch": 0.9738319899148107,
      "grad_norm": 0.35597503185272217,
      "learning_rate": 3.6403640675193307e-07,
      "loss": 0.5688,
      "step": 6373
    },
    {
      "epoch": 0.9739847958131184,
      "grad_norm": 0.42400041222572327,
      "learning_rate": 3.597936680766023e-07,
      "loss": 0.7087,
      "step": 6374
    },
    {
      "epoch": 0.974137601711426,
      "grad_norm": 0.33522462844848633,
      "learning_rate": 3.5557575392226595e-07,
      "loss": 0.7737,
      "step": 6375
    },
    {
      "epoch": 0.9742904076097337,
      "grad_norm": 0.32269105315208435,
      "learning_rate": 3.513826653398056e-07,
      "loss": 0.5236,
      "step": 6376
    },
    {
      "epoch": 0.9744432135080414,
      "grad_norm": 0.27982404828071594,
      "learning_rate": 3.4721440337387445e-07,
      "loss": 0.5924,
      "step": 6377
    },
    {
      "epoch": 0.9745960194063491,
      "grad_norm": 0.35985851287841797,
      "learning_rate": 3.430709690629641e-07,
      "loss": 0.7741,
      "step": 6378
    },
    {
      "epoch": 0.9747488253046568,
      "grad_norm": 0.3565606474876404,
      "learning_rate": 3.3895236343937097e-07,
      "loss": 0.705,
      "step": 6379
    },
    {
      "epoch": 0.9749016312029645,
      "grad_norm": 0.2602279484272003,
      "learning_rate": 3.348585875292298e-07,
      "loss": 0.6135,
      "step": 6380
    },
    {
      "epoch": 0.9750544371012722,
      "grad_norm": 0.3423800766468048,
      "learning_rate": 3.307896423524581e-07,
      "loss": 1.0398,
      "step": 6381
    },
    {
      "epoch": 0.9752072429995798,
      "grad_norm": 0.2850226163864136,
      "learning_rate": 3.267455289227894e-07,
      "loss": 0.7439,
      "step": 6382
    },
    {
      "epoch": 0.9753600488978874,
      "grad_norm": 0.3209698498249054,
      "learning_rate": 3.227262482477955e-07,
      "loss": 0.6061,
      "step": 6383
    },
    {
      "epoch": 0.9755128547961951,
      "grad_norm": 0.35197779536247253,
      "learning_rate": 3.187318013288421e-07,
      "loss": 0.684,
      "step": 6384
    },
    {
      "epoch": 0.9756656606945028,
      "grad_norm": 0.3414282500743866,
      "learning_rate": 3.147621891611108e-07,
      "loss": 0.7432,
      "step": 6385
    },
    {
      "epoch": 0.9758184665928105,
      "grad_norm": 0.28099194169044495,
      "learning_rate": 3.1081741273358835e-07,
      "loss": 0.6576,
      "step": 6386
    },
    {
      "epoch": 0.9759712724911181,
      "grad_norm": 0.2589315176010132,
      "learning_rate": 3.0689747302911074e-07,
      "loss": 0.8223,
      "step": 6387
    },
    {
      "epoch": 0.9761240783894258,
      "grad_norm": 0.3601363003253937,
      "learning_rate": 3.0300237102426355e-07,
      "loss": 0.8068,
      "step": 6388
    },
    {
      "epoch": 0.9762768842877335,
      "grad_norm": 0.3021461069583893,
      "learning_rate": 2.9913210768950374e-07,
      "loss": 0.6874,
      "step": 6389
    },
    {
      "epoch": 0.9764296901860412,
      "grad_norm": 0.2406938225030899,
      "learning_rate": 2.952866839890711e-07,
      "loss": 0.4772,
      "step": 6390
    },
    {
      "epoch": 0.9765824960843489,
      "grad_norm": 0.26343291997909546,
      "learning_rate": 2.9146610088099933e-07,
      "loss": 0.6531,
      "step": 6391
    },
    {
      "epoch": 0.9767353019826566,
      "grad_norm": 0.30205318331718445,
      "learning_rate": 2.8767035931718256e-07,
      "loss": 0.7255,
      "step": 6392
    },
    {
      "epoch": 0.9768881078809643,
      "grad_norm": 0.3833494186401367,
      "learning_rate": 2.838994602432865e-07,
      "loss": 0.498,
      "step": 6393
    },
    {
      "epoch": 0.9770409137792718,
      "grad_norm": 0.41331061720848083,
      "learning_rate": 2.8015340459879304e-07,
      "loss": 0.6305,
      "step": 6394
    },
    {
      "epoch": 0.9771937196775795,
      "grad_norm": 0.32242459058761597,
      "learning_rate": 2.764321933170111e-07,
      "loss": 0.7668,
      "step": 6395
    },
    {
      "epoch": 0.9773465255758872,
      "grad_norm": 0.2696183919906616,
      "learning_rate": 2.727358273250324e-07,
      "loss": 0.7781,
      "step": 6396
    },
    {
      "epoch": 0.9774993314741949,
      "grad_norm": 0.3143042325973511,
      "learning_rate": 2.690643075437982e-07,
      "loss": 0.5344,
      "step": 6397
    },
    {
      "epoch": 0.9776521373725026,
      "grad_norm": 0.2705305218696594,
      "learning_rate": 2.654176348880322e-07,
      "loss": 0.6465,
      "step": 6398
    },
    {
      "epoch": 0.9778049432708102,
      "grad_norm": 0.2748562693595886,
      "learning_rate": 2.617958102662521e-07,
      "loss": 0.6275,
      "step": 6399
    },
    {
      "epoch": 0.9779577491691179,
      "grad_norm": 0.27969980239868164,
      "learning_rate": 2.581988345808251e-07,
      "loss": 0.8317,
      "step": 6400
    },
    {
      "epoch": 0.9781105550674256,
      "grad_norm": 0.3142959773540497,
      "learning_rate": 2.5462670872790085e-07,
      "loss": 0.856,
      "step": 6401
    },
    {
      "epoch": 0.9782633609657333,
      "grad_norm": 0.37164634466171265,
      "learning_rate": 2.510794335974453e-07,
      "loss": 0.5511,
      "step": 6402
    },
    {
      "epoch": 0.978416166864041,
      "grad_norm": 0.28552374243736267,
      "learning_rate": 2.475570100732405e-07,
      "loss": 0.648,
      "step": 6403
    },
    {
      "epoch": 0.9785689727623487,
      "grad_norm": 0.3474300503730774,
      "learning_rate": 2.44059439032851e-07,
      "loss": 0.711,
      "step": 6404
    },
    {
      "epoch": 0.9787217786606563,
      "grad_norm": 0.2783917188644409,
      "learning_rate": 2.405867213476798e-07,
      "loss": 0.578,
      "step": 6405
    },
    {
      "epoch": 0.9788745845589639,
      "grad_norm": 0.32661694288253784,
      "learning_rate": 2.3713885788291258e-07,
      "loss": 0.5063,
      "step": 6406
    },
    {
      "epoch": 0.9790273904572716,
      "grad_norm": 0.2747705280780792,
      "learning_rate": 2.3371584949757331e-07,
      "loss": 0.6251,
      "step": 6407
    },
    {
      "epoch": 0.9791801963555793,
      "grad_norm": 0.2549538016319275,
      "learning_rate": 2.303176970444687e-07,
      "loss": 0.8576,
      "step": 6408
    },
    {
      "epoch": 0.979333002253887,
      "grad_norm": 0.35974299907684326,
      "learning_rate": 2.2694440137022155e-07,
      "loss": 0.8619,
      "step": 6409
    },
    {
      "epoch": 0.9794858081521947,
      "grad_norm": 0.26300784945487976,
      "learning_rate": 2.2359596331524847e-07,
      "loss": 0.6533,
      "step": 6410
    },
    {
      "epoch": 0.9796386140505023,
      "grad_norm": 0.24799039959907532,
      "learning_rate": 2.2027238371380431e-07,
      "loss": 0.5962,
      "step": 6411
    },
    {
      "epoch": 0.97979141994881,
      "grad_norm": 0.27073585987091064,
      "learning_rate": 2.1697366339391568e-07,
      "loss": 0.7697,
      "step": 6412
    },
    {
      "epoch": 0.9799442258471177,
      "grad_norm": 0.24513952434062958,
      "learning_rate": 2.136998031774362e-07,
      "loss": 0.5483,
      "step": 6413
    },
    {
      "epoch": 0.9800970317454254,
      "grad_norm": 0.30561357736587524,
      "learning_rate": 2.1045080388001348e-07,
      "loss": 0.7539,
      "step": 6414
    },
    {
      "epoch": 0.9802498376437331,
      "grad_norm": 0.4293336570262909,
      "learning_rate": 2.072266663111222e-07,
      "loss": 0.6453,
      "step": 6415
    },
    {
      "epoch": 0.9804026435420408,
      "grad_norm": 0.3037967383861542,
      "learning_rate": 2.040273912740198e-07,
      "loss": 0.6495,
      "step": 6416
    },
    {
      "epoch": 0.9805554494403484,
      "grad_norm": 0.24957461655139923,
      "learning_rate": 2.0085297956577987e-07,
      "loss": 0.5915,
      "step": 6417
    },
    {
      "epoch": 0.980708255338656,
      "grad_norm": 0.2533586621284485,
      "learning_rate": 1.977034319772919e-07,
      "loss": 0.5522,
      "step": 6418
    },
    {
      "epoch": 0.9808610612369637,
      "grad_norm": 0.36297425627708435,
      "learning_rate": 1.9457874929321718e-07,
      "loss": 0.8412,
      "step": 6419
    },
    {
      "epoch": 0.9810138671352714,
      "grad_norm": 0.2810041308403015,
      "learning_rate": 1.9147893229206626e-07,
      "loss": 0.6873,
      "step": 6420
    },
    {
      "epoch": 0.9811666730335791,
      "grad_norm": 0.2846044898033142,
      "learning_rate": 1.884039817461103e-07,
      "loss": 0.7492,
      "step": 6421
    },
    {
      "epoch": 0.9813194789318868,
      "grad_norm": 0.2690313756465912,
      "learning_rate": 1.8535389842146978e-07,
      "loss": 0.7325,
      "step": 6422
    },
    {
      "epoch": 0.9814722848301944,
      "grad_norm": 0.3626621663570404,
      "learning_rate": 1.8232868307802574e-07,
      "loss": 0.6484,
      "step": 6423
    },
    {
      "epoch": 0.9816250907285021,
      "grad_norm": 0.2992866337299347,
      "learning_rate": 1.7932833646950865e-07,
      "loss": 0.6384,
      "step": 6424
    },
    {
      "epoch": 0.9817778966268098,
      "grad_norm": 0.25797179341316223,
      "learning_rate": 1.763528593434094e-07,
      "loss": 0.6844,
      "step": 6425
    },
    {
      "epoch": 0.9819307025251175,
      "grad_norm": 1.2198379039764404,
      "learning_rate": 1.7340225244105722e-07,
      "loss": 0.5816,
      "step": 6426
    },
    {
      "epoch": 0.9820835084234252,
      "grad_norm": 0.27036669850349426,
      "learning_rate": 1.7047651649756414e-07,
      "loss": 0.8355,
      "step": 6427
    },
    {
      "epoch": 0.9822363143217329,
      "grad_norm": 0.3024647831916809,
      "learning_rate": 1.6757565224184702e-07,
      "loss": 0.5679,
      "step": 6428
    },
    {
      "epoch": 0.9823891202200405,
      "grad_norm": 0.3333212733268738,
      "learning_rate": 1.6469966039664996e-07,
      "loss": 0.5932,
      "step": 6429
    },
    {
      "epoch": 0.9825419261183481,
      "grad_norm": 0.32781001925468445,
      "learning_rate": 1.6184854167847764e-07,
      "loss": 0.8297,
      "step": 6430
    },
    {
      "epoch": 0.9826947320166558,
      "grad_norm": 0.2812676429748535,
      "learning_rate": 1.5902229679768398e-07,
      "loss": 0.5423,
      "step": 6431
    },
    {
      "epoch": 0.9828475379149635,
      "grad_norm": 0.39646944403648376,
      "learning_rate": 1.562209264583836e-07,
      "loss": 0.6035,
      "step": 6432
    },
    {
      "epoch": 0.9830003438132712,
      "grad_norm": 0.32472583651542664,
      "learning_rate": 1.5344443135854037e-07,
      "loss": 0.6549,
      "step": 6433
    },
    {
      "epoch": 0.9831531497115789,
      "grad_norm": 0.2801920771598816,
      "learning_rate": 1.5069281218987873e-07,
      "loss": 0.7028,
      "step": 6434
    },
    {
      "epoch": 0.9833059556098865,
      "grad_norm": 0.2855030596256256,
      "learning_rate": 1.4796606963793924e-07,
      "loss": 0.6612,
      "step": 6435
    },
    {
      "epoch": 0.9834587615081942,
      "grad_norm": 0.260616272687912,
      "learning_rate": 1.4526420438207845e-07,
      "loss": 0.6242,
      "step": 6436
    },
    {
      "epoch": 0.9836115674065019,
      "grad_norm": 0.2544775605201721,
      "learning_rate": 1.4258721709542456e-07,
      "loss": 0.6208,
      "step": 6437
    },
    {
      "epoch": 0.9837643733048096,
      "grad_norm": 0.29562172293663025,
      "learning_rate": 1.3993510844494406e-07,
      "loss": 0.6581,
      "step": 6438
    },
    {
      "epoch": 0.9839171792031173,
      "grad_norm": 0.3022526204586029,
      "learning_rate": 1.373078790913862e-07,
      "loss": 0.5913,
      "step": 6439
    },
    {
      "epoch": 0.984069985101425,
      "grad_norm": 0.28804531693458557,
      "learning_rate": 1.3470552968929405e-07,
      "loss": 0.587,
      "step": 6440
    },
    {
      "epoch": 0.9842227909997326,
      "grad_norm": 0.29857340455055237,
      "learning_rate": 1.3212806088702678e-07,
      "loss": 0.7566,
      "step": 6441
    },
    {
      "epoch": 0.9843755968980402,
      "grad_norm": 0.25568607449531555,
      "learning_rate": 1.2957547332673736e-07,
      "loss": 0.5728,
      "step": 6442
    },
    {
      "epoch": 0.9845284027963479,
      "grad_norm": 0.2965342700481415,
      "learning_rate": 1.2704776764438374e-07,
      "loss": 0.7538,
      "step": 6443
    },
    {
      "epoch": 0.9846812086946556,
      "grad_norm": 0.30493324995040894,
      "learning_rate": 1.2454494446971777e-07,
      "loss": 0.9407,
      "step": 6444
    },
    {
      "epoch": 0.9848340145929633,
      "grad_norm": 0.3716253936290741,
      "learning_rate": 1.2206700442629616e-07,
      "loss": 0.7945,
      "step": 6445
    },
    {
      "epoch": 0.984986820491271,
      "grad_norm": 0.310250848531723,
      "learning_rate": 1.1961394813149173e-07,
      "loss": 0.7811,
      "step": 6446
    },
    {
      "epoch": 0.9851396263895786,
      "grad_norm": 0.40353232622146606,
      "learning_rate": 1.171857761964379e-07,
      "loss": 0.7651,
      "step": 6447
    },
    {
      "epoch": 0.9852924322878863,
      "grad_norm": 0.4181149899959564,
      "learning_rate": 1.1478248922611734e-07,
      "loss": 0.6196,
      "step": 6448
    },
    {
      "epoch": 0.985445238186194,
      "grad_norm": 0.2948823869228363,
      "learning_rate": 1.1240408781927336e-07,
      "loss": 0.6426,
      "step": 6449
    },
    {
      "epoch": 0.9855980440845017,
      "grad_norm": 0.2704068124294281,
      "learning_rate": 1.100505725684764e-07,
      "loss": 0.6998,
      "step": 6450
    },
    {
      "epoch": 0.9857508499828094,
      "grad_norm": 0.30879390239715576,
      "learning_rate": 1.0772194406007962e-07,
      "loss": 0.5366,
      "step": 6451
    },
    {
      "epoch": 0.9859036558811171,
      "grad_norm": 0.3139268159866333,
      "learning_rate": 1.0541820287423009e-07,
      "loss": 0.6669,
      "step": 6452
    },
    {
      "epoch": 0.9860564617794246,
      "grad_norm": 0.26715102791786194,
      "learning_rate": 1.0313934958490201e-07,
      "loss": 0.5881,
      "step": 6453
    },
    {
      "epoch": 0.9862092676777323,
      "grad_norm": 0.2639731764793396,
      "learning_rate": 1.0088538475985232e-07,
      "loss": 0.7364,
      "step": 6454
    },
    {
      "epoch": 0.98636207357604,
      "grad_norm": 0.293069452047348,
      "learning_rate": 9.865630896062073e-08,
      "loss": 0.6151,
      "step": 6455
    },
    {
      "epoch": 0.9865148794743477,
      "grad_norm": 0.27687135338783264,
      "learning_rate": 9.645212274257409e-08,
      "loss": 0.5804,
      "step": 6456
    },
    {
      "epoch": 0.9866676853726554,
      "grad_norm": 0.251794695854187,
      "learning_rate": 9.427282665487314e-08,
      "loss": 0.7532,
      "step": 6457
    },
    {
      "epoch": 0.986820491270963,
      "grad_norm": 0.4326778054237366,
      "learning_rate": 9.211842124046132e-08,
      "loss": 0.7069,
      "step": 6458
    },
    {
      "epoch": 0.9869732971692707,
      "grad_norm": 0.2878977358341217,
      "learning_rate": 8.99889070360982e-08,
      "loss": 0.6604,
      "step": 6459
    },
    {
      "epoch": 0.9871261030675784,
      "grad_norm": 0.40638232231140137,
      "learning_rate": 8.788428457232601e-08,
      "loss": 0.7524,
      "step": 6460
    },
    {
      "epoch": 0.9872789089658861,
      "grad_norm": 0.6233261823654175,
      "learning_rate": 8.58045543735031e-08,
      "loss": 0.5427,
      "step": 6461
    },
    {
      "epoch": 0.9874317148641938,
      "grad_norm": 0.3426532745361328,
      "learning_rate": 8.374971695775946e-08,
      "loss": 0.6793,
      "step": 6462
    },
    {
      "epoch": 0.9875845207625015,
      "grad_norm": 0.3367740213871002,
      "learning_rate": 8.171977283706333e-08,
      "loss": 0.6241,
      "step": 6463
    },
    {
      "epoch": 0.9877373266608092,
      "grad_norm": 0.30172792077064514,
      "learning_rate": 7.971472251714352e-08,
      "loss": 0.7202,
      "step": 6464
    },
    {
      "epoch": 0.9878901325591167,
      "grad_norm": 0.27091774344444275,
      "learning_rate": 7.773456649754485e-08,
      "loss": 0.5109,
      "step": 6465
    },
    {
      "epoch": 0.9880429384574244,
      "grad_norm": 0.30555298924446106,
      "learning_rate": 7.577930527160604e-08,
      "loss": 0.6339,
      "step": 6466
    },
    {
      "epoch": 0.9881957443557321,
      "grad_norm": 0.34500110149383545,
      "learning_rate": 7.384893932645965e-08,
      "loss": 0.9091,
      "step": 6467
    },
    {
      "epoch": 0.9883485502540398,
      "grad_norm": 0.28037339448928833,
      "learning_rate": 7.194346914305427e-08,
      "loss": 0.7315,
      "step": 6468
    },
    {
      "epoch": 0.9885013561523475,
      "grad_norm": 0.3640865385532379,
      "learning_rate": 7.00628951961102e-08,
      "loss": 0.6326,
      "step": 6469
    },
    {
      "epoch": 0.9886541620506551,
      "grad_norm": 0.399759441614151,
      "learning_rate": 6.820721795416373e-08,
      "loss": 0.7696,
      "step": 6470
    },
    {
      "epoch": 0.9888069679489628,
      "grad_norm": 0.3892582356929779,
      "learning_rate": 6.637643787953395e-08,
      "loss": 0.6551,
      "step": 6471
    },
    {
      "epoch": 0.9889597738472705,
      "grad_norm": 0.25890570878982544,
      "learning_rate": 6.45705554283449e-08,
      "loss": 0.7363,
      "step": 6472
    },
    {
      "epoch": 0.9891125797455782,
      "grad_norm": 0.2983168065547943,
      "learning_rate": 6.278957105052552e-08,
      "loss": 0.5663,
      "step": 6473
    },
    {
      "epoch": 0.9892653856438859,
      "grad_norm": 0.27841824293136597,
      "learning_rate": 6.103348518978758e-08,
      "loss": 0.6055,
      "step": 6474
    },
    {
      "epoch": 0.9894181915421936,
      "grad_norm": 0.3425733745098114,
      "learning_rate": 5.9302298283636645e-08,
      "loss": 0.8392,
      "step": 6475
    },
    {
      "epoch": 0.9895709974405013,
      "grad_norm": 0.27444878220558167,
      "learning_rate": 5.7596010763394384e-08,
      "loss": 0.889,
      "step": 6476
    },
    {
      "epoch": 0.9897238033388088,
      "grad_norm": 0.2911641001701355,
      "learning_rate": 5.591462305416517e-08,
      "loss": 0.7882,
      "step": 6477
    },
    {
      "epoch": 0.9898766092371165,
      "grad_norm": 0.2616746127605438,
      "learning_rate": 5.4258135574858373e-08,
      "loss": 0.6209,
      "step": 6478
    },
    {
      "epoch": 0.9900294151354242,
      "grad_norm": 0.3651047348976135,
      "learning_rate": 5.262654873816608e-08,
      "loss": 0.6955,
      "step": 6479
    },
    {
      "epoch": 0.9901822210337319,
      "grad_norm": 0.3302326202392578,
      "learning_rate": 5.1019862950585364e-08,
      "loss": 0.765,
      "step": 6480
    },
    {
      "epoch": 0.9903350269320396,
      "grad_norm": 1.1409937143325806,
      "learning_rate": 4.9438078612407124e-08,
      "loss": 0.7911,
      "step": 6481
    },
    {
      "epoch": 0.9904878328303472,
      "grad_norm": 0.2777949571609497,
      "learning_rate": 4.7881196117727237e-08,
      "loss": 0.746,
      "step": 6482
    },
    {
      "epoch": 0.9906406387286549,
      "grad_norm": 0.2922649681568146,
      "learning_rate": 4.634921585442431e-08,
      "loss": 0.5891,
      "step": 6483
    },
    {
      "epoch": 0.9907934446269626,
      "grad_norm": 0.38926783204078674,
      "learning_rate": 4.484213820417082e-08,
      "loss": 0.6729,
      "step": 6484
    },
    {
      "epoch": 0.9909462505252703,
      "grad_norm": 0.29072585701942444,
      "learning_rate": 4.335996354245531e-08,
      "loss": 0.6274,
      "step": 6485
    },
    {
      "epoch": 0.991099056423578,
      "grad_norm": 0.2570950388908386,
      "learning_rate": 4.190269223854904e-08,
      "loss": 0.6373,
      "step": 6486
    },
    {
      "epoch": 0.9912518623218857,
      "grad_norm": 0.29220324754714966,
      "learning_rate": 4.047032465550604e-08,
      "loss": 0.6388,
      "step": 6487
    },
    {
      "epoch": 0.9914046682201934,
      "grad_norm": 0.3268110752105713,
      "learning_rate": 3.906286115020752e-08,
      "loss": 0.811,
      "step": 6488
    },
    {
      "epoch": 0.9915574741185009,
      "grad_norm": 0.402190625667572,
      "learning_rate": 3.7680302073295204e-08,
      "loss": 0.7488,
      "step": 6489
    },
    {
      "epoch": 0.9917102800168086,
      "grad_norm": 0.3102878928184509,
      "learning_rate": 3.632264776922689e-08,
      "loss": 0.5882,
      "step": 6490
    },
    {
      "epoch": 0.9918630859151163,
      "grad_norm": 0.3195332884788513,
      "learning_rate": 3.4989898576254234e-08,
      "loss": 0.6679,
      "step": 6491
    },
    {
      "epoch": 0.992015891813424,
      "grad_norm": 0.3438659608364105,
      "learning_rate": 3.3682054826411627e-08,
      "loss": 0.6346,
      "step": 6492
    },
    {
      "epoch": 0.9921686977117317,
      "grad_norm": 0.3308936655521393,
      "learning_rate": 3.239911684554953e-08,
      "loss": 0.6171,
      "step": 6493
    },
    {
      "epoch": 0.9923215036100393,
      "grad_norm": 0.25023937225341797,
      "learning_rate": 3.114108495329004e-08,
      "loss": 0.6195,
      "step": 6494
    },
    {
      "epoch": 0.992474309508347,
      "grad_norm": 0.3215552568435669,
      "learning_rate": 2.9907959463071346e-08,
      "loss": 0.8299,
      "step": 6495
    },
    {
      "epoch": 0.9926271154066547,
      "grad_norm": 0.38360846042633057,
      "learning_rate": 2.8699740682103237e-08,
      "loss": 0.7618,
      "step": 6496
    },
    {
      "epoch": 0.9927799213049624,
      "grad_norm": 0.299980491399765,
      "learning_rate": 2.7516428911422698e-08,
      "loss": 0.6017,
      "step": 6497
    },
    {
      "epoch": 0.9929327272032701,
      "grad_norm": 0.2955044209957123,
      "learning_rate": 2.6358024445816142e-08,
      "loss": 0.5831,
      "step": 6498
    },
    {
      "epoch": 0.9930855331015778,
      "grad_norm": 0.4438436031341553,
      "learning_rate": 2.5224527573919353e-08,
      "loss": 0.7005,
      "step": 6499
    },
    {
      "epoch": 0.9932383389998855,
      "grad_norm": 0.34785547852516174,
      "learning_rate": 2.4115938578117558e-08,
      "loss": 0.779,
      "step": 6500
    },
    {
      "epoch": 0.993391144898193,
      "grad_norm": 0.2642604410648346,
      "learning_rate": 2.3032257734600937e-08,
      "loss": 0.715,
      "step": 6501
    },
    {
      "epoch": 0.9935439507965007,
      "grad_norm": 0.2867738604545593,
      "learning_rate": 2.1973485313364627e-08,
      "loss": 0.6634,
      "step": 6502
    },
    {
      "epoch": 0.9936967566948084,
      "grad_norm": 0.27846238017082214,
      "learning_rate": 2.0939621578197623e-08,
      "loss": 0.7115,
      "step": 6503
    },
    {
      "epoch": 0.9938495625931161,
      "grad_norm": 0.5877290368080139,
      "learning_rate": 1.993066678668276e-08,
      "loss": 0.7385,
      "step": 6504
    },
    {
      "epoch": 0.9940023684914238,
      "grad_norm": 0.25964123010635376,
      "learning_rate": 1.894662119017454e-08,
      "loss": 0.5934,
      "step": 6505
    },
    {
      "epoch": 0.9941551743897314,
      "grad_norm": 0.33881714940071106,
      "learning_rate": 1.7987485033854613e-08,
      "loss": 0.6664,
      "step": 6506
    },
    {
      "epoch": 0.9943079802880391,
      "grad_norm": 0.25839629769325256,
      "learning_rate": 1.7053258556676277e-08,
      "loss": 0.6436,
      "step": 6507
    },
    {
      "epoch": 0.9944607861863468,
      "grad_norm": 0.39556682109832764,
      "learning_rate": 1.6143941991397792e-08,
      "loss": 0.7926,
      "step": 6508
    },
    {
      "epoch": 0.9946135920846545,
      "grad_norm": 0.3205921947956085,
      "learning_rate": 1.525953556457127e-08,
      "loss": 0.8031,
      "step": 6509
    },
    {
      "epoch": 0.9947663979829622,
      "grad_norm": 0.2670949399471283,
      "learning_rate": 1.440003949653157e-08,
      "loss": 0.696,
      "step": 6510
    },
    {
      "epoch": 0.9949192038812699,
      "grad_norm": 0.2751535177230835,
      "learning_rate": 1.3565454001429611e-08,
      "loss": 0.6453,
      "step": 6511
    },
    {
      "epoch": 0.9950720097795774,
      "grad_norm": 0.3137910068035126,
      "learning_rate": 1.2755779287176862e-08,
      "loss": 0.6896,
      "step": 6512
    },
    {
      "epoch": 0.9952248156778851,
      "grad_norm": 0.3138625919818878,
      "learning_rate": 1.1971015555500841e-08,
      "loss": 0.6058,
      "step": 6513
    },
    {
      "epoch": 0.9953776215761928,
      "grad_norm": 0.25915777683258057,
      "learning_rate": 1.1211163001922931e-08,
      "loss": 0.6876,
      "step": 6514
    },
    {
      "epoch": 0.9955304274745005,
      "grad_norm": 0.29875871539115906,
      "learning_rate": 1.0476221815758358e-08,
      "loss": 0.6711,
      "step": 6515
    },
    {
      "epoch": 0.9956832333728082,
      "grad_norm": 0.24008925259113312,
      "learning_rate": 9.766192180105105e-09,
      "loss": 0.6005,
      "step": 6516
    },
    {
      "epoch": 0.9958360392711159,
      "grad_norm": 0.28519225120544434,
      "learning_rate": 9.081074271855005e-09,
      "loss": 0.6034,
      "step": 6517
    },
    {
      "epoch": 0.9959888451694235,
      "grad_norm": 0.5138064622879028,
      "learning_rate": 8.420868261715953e-09,
      "loss": 0.7087,
      "step": 6518
    },
    {
      "epoch": 0.9961416510677312,
      "grad_norm": 0.47081300616264343,
      "learning_rate": 7.785574314156385e-09,
      "loss": 0.5475,
      "step": 6519
    },
    {
      "epoch": 0.9962944569660389,
      "grad_norm": 0.28254806995391846,
      "learning_rate": 7.175192587471902e-09,
      "loss": 0.6454,
      "step": 6520
    },
    {
      "epoch": 0.9964472628643466,
      "grad_norm": 0.3041219115257263,
      "learning_rate": 6.589723233718648e-09,
      "loss": 0.7802,
      "step": 6521
    },
    {
      "epoch": 0.9966000687626543,
      "grad_norm": 0.32065922021865845,
      "learning_rate": 6.029166398768826e-09,
      "loss": 0.8051,
      "step": 6522
    },
    {
      "epoch": 0.996752874660962,
      "grad_norm": 0.3174595534801483,
      "learning_rate": 5.493522222277392e-09,
      "loss": 0.7947,
      "step": 6523
    },
    {
      "epoch": 0.9969056805592695,
      "grad_norm": 0.2868291139602661,
      "learning_rate": 4.9827908376931524e-09,
      "loss": 0.8688,
      "step": 6524
    },
    {
      "epoch": 0.9970584864575772,
      "grad_norm": 0.3602704405784607,
      "learning_rate": 4.49697237226987e-09,
      "loss": 0.6016,
      "step": 6525
    },
    {
      "epoch": 0.9972112923558849,
      "grad_norm": 0.2539635896682739,
      "learning_rate": 4.036066947032957e-09,
      "loss": 0.7062,
      "step": 6526
    },
    {
      "epoch": 0.9973640982541926,
      "grad_norm": 0.3819586932659149,
      "learning_rate": 3.6000746768238834e-09,
      "loss": 0.8573,
      "step": 6527
    },
    {
      "epoch": 0.9975169041525003,
      "grad_norm": 0.292914479970932,
      "learning_rate": 3.1889956702557675e-09,
      "loss": 0.716,
      "step": 6528
    },
    {
      "epoch": 0.997669710050808,
      "grad_norm": 0.33907845616340637,
      "learning_rate": 2.8028300297577857e-09,
      "loss": 0.6172,
      "step": 6529
    },
    {
      "epoch": 0.9978225159491156,
      "grad_norm": 0.30105409026145935,
      "learning_rate": 2.4415778515418654e-09,
      "loss": 0.858,
      "step": 6530
    },
    {
      "epoch": 0.9979753218474233,
      "grad_norm": 0.2698521018028259,
      "learning_rate": 2.105239225591582e-09,
      "loss": 0.5617,
      "step": 6531
    },
    {
      "epoch": 0.998128127745731,
      "grad_norm": 0.34229230880737305,
      "learning_rate": 1.7938142357176724e-09,
      "loss": 0.7707,
      "step": 6532
    },
    {
      "epoch": 0.9982809336440387,
      "grad_norm": 0.33994314074516296,
      "learning_rate": 1.5073029595025213e-09,
      "loss": 0.559,
      "step": 6533
    },
    {
      "epoch": 0.9984337395423464,
      "grad_norm": 0.3268454372882843,
      "learning_rate": 1.245705468333469e-09,
      "loss": 0.8513,
      "step": 6534
    },
    {
      "epoch": 0.9985865454406541,
      "grad_norm": 0.2963615953922272,
      "learning_rate": 1.0090218273806073e-09,
      "loss": 0.7742,
      "step": 6535
    },
    {
      "epoch": 0.9987393513389616,
      "grad_norm": 0.2639820873737335,
      "learning_rate": 7.972520956189833e-10,
      "loss": 0.5563,
      "step": 6536
    },
    {
      "epoch": 0.9988921572372693,
      "grad_norm": 0.4614093005657196,
      "learning_rate": 6.10396325806395e-10,
      "loss": 0.8817,
      "step": 6537
    },
    {
      "epoch": 0.999044963135577,
      "grad_norm": 0.29073458909988403,
      "learning_rate": 4.4845456448339154e-10,
      "loss": 0.6519,
      "step": 6538
    },
    {
      "epoch": 0.9991977690338847,
      "grad_norm": 0.28678426146507263,
      "learning_rate": 3.1142685201768217e-10,
      "loss": 0.6484,
      "step": 6539
    },
    {
      "epoch": 0.9993505749321924,
      "grad_norm": 0.3102395534515381,
      "learning_rate": 1.993132225375227e-10,
      "loss": 0.7115,
      "step": 6540
    },
    {
      "epoch": 0.9995033808305,
      "grad_norm": 0.30270108580589294,
      "learning_rate": 1.1211370396502218e-10,
      "loss": 0.7306,
      "step": 6541
    },
    {
      "epoch": 0.9996561867288077,
      "grad_norm": 0.29784873127937317,
      "learning_rate": 4.9828318049449654e-11,
      "loss": 0.7267,
      "step": 6542
    },
    {
      "epoch": 0.9998089926271154,
      "grad_norm": 0.2554897665977478,
      "learning_rate": 1.245708028951853e-11,
      "loss": 0.6481,
      "step": 6543
    },
    {
      "epoch": 0.9999617985254231,
      "grad_norm": 0.5652004480361938,
      "learning_rate": 0.0,
      "loss": 0.7754,
      "step": 6544
    }
  ],
  "logging_steps": 1,
  "max_steps": 6544,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 2.300239644555477e+18,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}