{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.9991899554475496, "eval_steps": 500, "global_step": 3702, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 33.316567074748235, "learning_rate": 2.688172043010753e-08, "loss": 1.6612, "step": 1 }, { "epoch": 0.0, "grad_norm": 31.980110590569833, "learning_rate": 5.376344086021506e-08, "loss": 1.6515, "step": 2 }, { "epoch": 0.0, "grad_norm": 33.117438713997785, "learning_rate": 8.064516129032259e-08, "loss": 1.6539, "step": 3 }, { "epoch": 0.0, "grad_norm": 36.626797991999304, "learning_rate": 1.0752688172043012e-07, "loss": 1.6692, "step": 4 }, { "epoch": 0.0, "grad_norm": 31.787166496256624, "learning_rate": 1.3440860215053764e-07, "loss": 1.6603, "step": 5 }, { "epoch": 0.0, "grad_norm": 34.42111228601516, "learning_rate": 1.6129032258064518e-07, "loss": 1.6906, "step": 6 }, { "epoch": 0.0, "grad_norm": 33.994178212797685, "learning_rate": 1.881720430107527e-07, "loss": 1.6012, "step": 7 }, { "epoch": 0.0, "grad_norm": 31.663937822004556, "learning_rate": 2.1505376344086024e-07, "loss": 1.59, "step": 8 }, { "epoch": 0.0, "grad_norm": 36.22600796728921, "learning_rate": 2.4193548387096775e-07, "loss": 1.7013, "step": 9 }, { "epoch": 0.01, "grad_norm": 33.56970537484393, "learning_rate": 2.688172043010753e-07, "loss": 1.6689, "step": 10 }, { "epoch": 0.01, "grad_norm": 36.08877900878925, "learning_rate": 2.956989247311828e-07, "loss": 1.6624, "step": 11 }, { "epoch": 0.01, "grad_norm": 35.119916918201156, "learning_rate": 3.2258064516129035e-07, "loss": 1.6348, "step": 12 }, { "epoch": 0.01, "grad_norm": 33.46913669890192, "learning_rate": 3.4946236559139783e-07, "loss": 1.5936, "step": 13 }, { "epoch": 0.01, "grad_norm": 27.672120188002385, "learning_rate": 3.763440860215054e-07, "loss": 1.5626, "step": 14 }, { "epoch": 0.01, "grad_norm": 23.733566814473278, "learning_rate": 4.032258064516129e-07, "loss": 1.5614, "step": 15 }, { "epoch": 0.01, "grad_norm": 23.24932146808129, "learning_rate": 4.301075268817205e-07, "loss": 1.544, "step": 16 }, { "epoch": 0.01, "grad_norm": 19.296627011785894, "learning_rate": 4.56989247311828e-07, "loss": 1.5665, "step": 17 }, { "epoch": 0.01, "grad_norm": 18.841920437790964, "learning_rate": 4.838709677419355e-07, "loss": 1.5304, "step": 18 }, { "epoch": 0.01, "grad_norm": 10.52949245893451, "learning_rate": 5.10752688172043e-07, "loss": 1.3625, "step": 19 }, { "epoch": 0.01, "grad_norm": 10.712493184060385, "learning_rate": 5.376344086021506e-07, "loss": 1.3516, "step": 20 }, { "epoch": 0.01, "grad_norm": 10.872328766124275, "learning_rate": 5.645161290322581e-07, "loss": 1.3628, "step": 21 }, { "epoch": 0.01, "grad_norm": 9.488253779364236, "learning_rate": 5.913978494623656e-07, "loss": 1.3267, "step": 22 }, { "epoch": 0.01, "grad_norm": 9.562223413373314, "learning_rate": 6.182795698924732e-07, "loss": 1.3566, "step": 23 }, { "epoch": 0.01, "grad_norm": 10.056490027443989, "learning_rate": 6.451612903225807e-07, "loss": 1.3543, "step": 24 }, { "epoch": 0.01, "grad_norm": 10.321030423714994, "learning_rate": 6.720430107526882e-07, "loss": 1.2683, "step": 25 }, { "epoch": 0.01, "grad_norm": 9.469640956178132, "learning_rate": 6.989247311827957e-07, "loss": 1.2258, "step": 26 }, { "epoch": 0.01, "grad_norm": 7.9517963669206555, "learning_rate": 7.258064516129033e-07, "loss": 1.219, "step": 27 }, { "epoch": 0.02, "grad_norm": 5.308924533436733, "learning_rate": 7.526881720430108e-07, "loss": 1.2129, "step": 28 }, { "epoch": 0.02, "grad_norm": 3.934112558638316, "learning_rate": 7.795698924731184e-07, "loss": 1.1509, "step": 29 }, { "epoch": 0.02, "grad_norm": 3.600129100969449, "learning_rate": 8.064516129032258e-07, "loss": 1.2003, "step": 30 }, { "epoch": 0.02, "grad_norm": 3.1952192025345654, "learning_rate": 8.333333333333333e-07, "loss": 1.1278, "step": 31 }, { "epoch": 0.02, "grad_norm": 3.12285707027288, "learning_rate": 8.60215053763441e-07, "loss": 1.1344, "step": 32 }, { "epoch": 0.02, "grad_norm": 2.9467673015189835, "learning_rate": 8.870967741935485e-07, "loss": 1.1364, "step": 33 }, { "epoch": 0.02, "grad_norm": 2.460729618610968, "learning_rate": 9.13978494623656e-07, "loss": 1.1115, "step": 34 }, { "epoch": 0.02, "grad_norm": 3.023601724608506, "learning_rate": 9.408602150537635e-07, "loss": 1.084, "step": 35 }, { "epoch": 0.02, "grad_norm": 2.7882481101476166, "learning_rate": 9.67741935483871e-07, "loss": 1.0858, "step": 36 }, { "epoch": 0.02, "grad_norm": 2.7530256496406627, "learning_rate": 9.946236559139785e-07, "loss": 1.0487, "step": 37 }, { "epoch": 0.02, "grad_norm": 2.562289856232147, "learning_rate": 1.021505376344086e-06, "loss": 1.0983, "step": 38 }, { "epoch": 0.02, "grad_norm": 2.4010845077571994, "learning_rate": 1.0483870967741936e-06, "loss": 1.061, "step": 39 }, { "epoch": 0.02, "grad_norm": 2.3795829755598525, "learning_rate": 1.0752688172043011e-06, "loss": 1.0822, "step": 40 }, { "epoch": 0.02, "grad_norm": 2.2872595652300065, "learning_rate": 1.1021505376344087e-06, "loss": 1.047, "step": 41 }, { "epoch": 0.02, "grad_norm": 2.1173270554016046, "learning_rate": 1.1290322580645162e-06, "loss": 1.0336, "step": 42 }, { "epoch": 0.02, "grad_norm": 2.071765831109401, "learning_rate": 1.1559139784946237e-06, "loss": 1.0095, "step": 43 }, { "epoch": 0.02, "grad_norm": 2.014806046153581, "learning_rate": 1.1827956989247313e-06, "loss": 0.9934, "step": 44 }, { "epoch": 0.02, "grad_norm": 1.9854406570065062, "learning_rate": 1.2096774193548388e-06, "loss": 1.0311, "step": 45 }, { "epoch": 0.02, "grad_norm": 1.9637035893969708, "learning_rate": 1.2365591397849463e-06, "loss": 1.0088, "step": 46 }, { "epoch": 0.03, "grad_norm": 1.8649655809552, "learning_rate": 1.2634408602150539e-06, "loss": 0.9933, "step": 47 }, { "epoch": 0.03, "grad_norm": 2.0282744824651027, "learning_rate": 1.2903225806451614e-06, "loss": 0.9861, "step": 48 }, { "epoch": 0.03, "grad_norm": 2.1056042025752846, "learning_rate": 1.317204301075269e-06, "loss": 1.0233, "step": 49 }, { "epoch": 0.03, "grad_norm": 1.8708929873461766, "learning_rate": 1.3440860215053765e-06, "loss": 0.9937, "step": 50 }, { "epoch": 0.03, "grad_norm": 1.9598046834539296, "learning_rate": 1.3709677419354838e-06, "loss": 0.9913, "step": 51 }, { "epoch": 0.03, "grad_norm": 1.8721816995449694, "learning_rate": 1.3978494623655913e-06, "loss": 0.9929, "step": 52 }, { "epoch": 0.03, "grad_norm": 1.9031810612692708, "learning_rate": 1.424731182795699e-06, "loss": 0.9939, "step": 53 }, { "epoch": 0.03, "grad_norm": 1.880058850954062, "learning_rate": 1.4516129032258066e-06, "loss": 0.976, "step": 54 }, { "epoch": 0.03, "grad_norm": 1.8460057480433814, "learning_rate": 1.4784946236559141e-06, "loss": 0.9522, "step": 55 }, { "epoch": 0.03, "grad_norm": 1.9149828318328113, "learning_rate": 1.5053763440860217e-06, "loss": 0.9995, "step": 56 }, { "epoch": 0.03, "grad_norm": 1.8305236499414903, "learning_rate": 1.5322580645161292e-06, "loss": 0.9785, "step": 57 }, { "epoch": 0.03, "grad_norm": 1.8270058759553203, "learning_rate": 1.5591397849462367e-06, "loss": 0.9548, "step": 58 }, { "epoch": 0.03, "grad_norm": 1.8909943429025797, "learning_rate": 1.5860215053763443e-06, "loss": 1.0282, "step": 59 }, { "epoch": 0.03, "grad_norm": 1.8944126114347248, "learning_rate": 1.6129032258064516e-06, "loss": 0.9696, "step": 60 }, { "epoch": 0.03, "grad_norm": 1.8413718475767964, "learning_rate": 1.6397849462365591e-06, "loss": 0.9668, "step": 61 }, { "epoch": 0.03, "grad_norm": 1.79286868071972, "learning_rate": 1.6666666666666667e-06, "loss": 0.9458, "step": 62 }, { "epoch": 0.03, "grad_norm": 2.0608479577206205, "learning_rate": 1.6935483870967742e-06, "loss": 0.9858, "step": 63 }, { "epoch": 0.03, "grad_norm": 1.7986821456335191, "learning_rate": 1.720430107526882e-06, "loss": 0.9376, "step": 64 }, { "epoch": 0.04, "grad_norm": 1.7527072520567788, "learning_rate": 1.7473118279569895e-06, "loss": 0.9281, "step": 65 }, { "epoch": 0.04, "grad_norm": 1.8037245838042075, "learning_rate": 1.774193548387097e-06, "loss": 0.9456, "step": 66 }, { "epoch": 0.04, "grad_norm": 1.8081472552271722, "learning_rate": 1.8010752688172045e-06, "loss": 0.9356, "step": 67 }, { "epoch": 0.04, "grad_norm": 1.6986988999434052, "learning_rate": 1.827956989247312e-06, "loss": 0.944, "step": 68 }, { "epoch": 0.04, "grad_norm": 1.8061440254388728, "learning_rate": 1.8548387096774196e-06, "loss": 0.9449, "step": 69 }, { "epoch": 0.04, "grad_norm": 2.0189572704109136, "learning_rate": 1.881720430107527e-06, "loss": 0.9385, "step": 70 }, { "epoch": 0.04, "grad_norm": 1.8622318389169208, "learning_rate": 1.9086021505376345e-06, "loss": 0.9712, "step": 71 }, { "epoch": 0.04, "grad_norm": 1.7773050472268217, "learning_rate": 1.935483870967742e-06, "loss": 0.9481, "step": 72 }, { "epoch": 0.04, "grad_norm": 1.6984030481497467, "learning_rate": 1.9623655913978495e-06, "loss": 0.9058, "step": 73 }, { "epoch": 0.04, "grad_norm": 1.7861428890319955, "learning_rate": 1.989247311827957e-06, "loss": 0.9086, "step": 74 }, { "epoch": 0.04, "grad_norm": 1.7538037385081537, "learning_rate": 2.0161290322580646e-06, "loss": 0.9453, "step": 75 }, { "epoch": 0.04, "grad_norm": 1.8178786766811956, "learning_rate": 2.043010752688172e-06, "loss": 0.9139, "step": 76 }, { "epoch": 0.04, "grad_norm": 1.7066246852556968, "learning_rate": 2.0698924731182797e-06, "loss": 0.9307, "step": 77 }, { "epoch": 0.04, "grad_norm": 1.743445461257098, "learning_rate": 2.096774193548387e-06, "loss": 0.9322, "step": 78 }, { "epoch": 0.04, "grad_norm": 1.766659803266471, "learning_rate": 2.1236559139784947e-06, "loss": 0.9368, "step": 79 }, { "epoch": 0.04, "grad_norm": 1.8475132519525201, "learning_rate": 2.1505376344086023e-06, "loss": 0.9324, "step": 80 }, { "epoch": 0.04, "grad_norm": 1.7312991272779612, "learning_rate": 2.17741935483871e-06, "loss": 0.9306, "step": 81 }, { "epoch": 0.04, "grad_norm": 1.7573367494724814, "learning_rate": 2.2043010752688173e-06, "loss": 0.9102, "step": 82 }, { "epoch": 0.04, "grad_norm": 1.6630045740530859, "learning_rate": 2.231182795698925e-06, "loss": 0.8997, "step": 83 }, { "epoch": 0.05, "grad_norm": 1.844344077839761, "learning_rate": 2.2580645161290324e-06, "loss": 0.9302, "step": 84 }, { "epoch": 0.05, "grad_norm": 1.8154387651724273, "learning_rate": 2.28494623655914e-06, "loss": 0.9212, "step": 85 }, { "epoch": 0.05, "grad_norm": 1.805785867801324, "learning_rate": 2.3118279569892475e-06, "loss": 0.8967, "step": 86 }, { "epoch": 0.05, "grad_norm": 1.7875515799839696, "learning_rate": 2.338709677419355e-06, "loss": 0.9378, "step": 87 }, { "epoch": 0.05, "grad_norm": 1.7961284209132538, "learning_rate": 2.3655913978494625e-06, "loss": 0.9128, "step": 88 }, { "epoch": 0.05, "grad_norm": 1.7940374994480033, "learning_rate": 2.39247311827957e-06, "loss": 0.9087, "step": 89 }, { "epoch": 0.05, "grad_norm": 1.7368013840333223, "learning_rate": 2.4193548387096776e-06, "loss": 0.9225, "step": 90 }, { "epoch": 0.05, "grad_norm": 1.7898605854588252, "learning_rate": 2.446236559139785e-06, "loss": 0.9052, "step": 91 }, { "epoch": 0.05, "grad_norm": 1.7049357837688788, "learning_rate": 2.4731182795698927e-06, "loss": 0.9077, "step": 92 }, { "epoch": 0.05, "grad_norm": 1.7323350809066655, "learning_rate": 2.5e-06, "loss": 0.8944, "step": 93 }, { "epoch": 0.05, "grad_norm": 1.815325460656344, "learning_rate": 2.5268817204301077e-06, "loss": 0.9157, "step": 94 }, { "epoch": 0.05, "grad_norm": 1.7893529300412028, "learning_rate": 2.5537634408602153e-06, "loss": 0.89, "step": 95 }, { "epoch": 0.05, "grad_norm": 1.7353749759903345, "learning_rate": 2.580645161290323e-06, "loss": 0.9118, "step": 96 }, { "epoch": 0.05, "grad_norm": 1.6830893674134135, "learning_rate": 2.6075268817204303e-06, "loss": 0.8663, "step": 97 }, { "epoch": 0.05, "grad_norm": 1.9067265186746236, "learning_rate": 2.634408602150538e-06, "loss": 0.9013, "step": 98 }, { "epoch": 0.05, "grad_norm": 1.878302655192332, "learning_rate": 2.6612903225806454e-06, "loss": 0.9217, "step": 99 }, { "epoch": 0.05, "grad_norm": 1.7532931436899426, "learning_rate": 2.688172043010753e-06, "loss": 0.886, "step": 100 }, { "epoch": 0.05, "grad_norm": 1.863951041707985, "learning_rate": 2.71505376344086e-06, "loss": 0.9234, "step": 101 }, { "epoch": 0.06, "grad_norm": 1.770327650866675, "learning_rate": 2.7419354838709676e-06, "loss": 0.8964, "step": 102 }, { "epoch": 0.06, "grad_norm": 1.8007540634948822, "learning_rate": 2.768817204301075e-06, "loss": 0.9015, "step": 103 }, { "epoch": 0.06, "grad_norm": 1.78903281126814, "learning_rate": 2.7956989247311827e-06, "loss": 0.888, "step": 104 }, { "epoch": 0.06, "grad_norm": 1.860709044110902, "learning_rate": 2.822580645161291e-06, "loss": 0.9122, "step": 105 }, { "epoch": 0.06, "grad_norm": 1.7975894765605227, "learning_rate": 2.849462365591398e-06, "loss": 0.9042, "step": 106 }, { "epoch": 0.06, "grad_norm": 1.7503582413947056, "learning_rate": 2.8763440860215057e-06, "loss": 0.9097, "step": 107 }, { "epoch": 0.06, "grad_norm": 1.8855537681080181, "learning_rate": 2.903225806451613e-06, "loss": 0.9074, "step": 108 }, { "epoch": 0.06, "grad_norm": 1.9352328739460873, "learning_rate": 2.9301075268817207e-06, "loss": 0.9021, "step": 109 }, { "epoch": 0.06, "grad_norm": 1.8289570270906401, "learning_rate": 2.9569892473118283e-06, "loss": 0.8941, "step": 110 }, { "epoch": 0.06, "grad_norm": 1.7584503974426828, "learning_rate": 2.983870967741936e-06, "loss": 0.869, "step": 111 }, { "epoch": 0.06, "grad_norm": 1.8344964136701984, "learning_rate": 3.0107526881720433e-06, "loss": 0.918, "step": 112 }, { "epoch": 0.06, "grad_norm": 1.9637032997147765, "learning_rate": 3.037634408602151e-06, "loss": 0.8787, "step": 113 }, { "epoch": 0.06, "grad_norm": 1.8431584118102178, "learning_rate": 3.0645161290322584e-06, "loss": 0.9001, "step": 114 }, { "epoch": 0.06, "grad_norm": 1.8064474419272367, "learning_rate": 3.091397849462366e-06, "loss": 0.9013, "step": 115 }, { "epoch": 0.06, "grad_norm": 1.902578046024261, "learning_rate": 3.1182795698924735e-06, "loss": 0.8637, "step": 116 }, { "epoch": 0.06, "grad_norm": 1.8078640360395348, "learning_rate": 3.145161290322581e-06, "loss": 0.9066, "step": 117 }, { "epoch": 0.06, "grad_norm": 2.0248804706901553, "learning_rate": 3.1720430107526885e-06, "loss": 0.8748, "step": 118 }, { "epoch": 0.06, "grad_norm": 1.787145953750176, "learning_rate": 3.198924731182796e-06, "loss": 0.8748, "step": 119 }, { "epoch": 0.06, "grad_norm": 1.856866804769782, "learning_rate": 3.225806451612903e-06, "loss": 0.8772, "step": 120 }, { "epoch": 0.07, "grad_norm": 1.8315278425030803, "learning_rate": 3.2526881720430107e-06, "loss": 0.8683, "step": 121 }, { "epoch": 0.07, "grad_norm": 3.9772734155686065, "learning_rate": 3.2795698924731183e-06, "loss": 0.9286, "step": 122 }, { "epoch": 0.07, "grad_norm": 1.7791254728107881, "learning_rate": 3.306451612903226e-06, "loss": 0.8788, "step": 123 }, { "epoch": 0.07, "grad_norm": 1.9305697726606612, "learning_rate": 3.3333333333333333e-06, "loss": 0.8886, "step": 124 }, { "epoch": 0.07, "grad_norm": 1.7976738335449705, "learning_rate": 3.360215053763441e-06, "loss": 0.8809, "step": 125 }, { "epoch": 0.07, "grad_norm": 1.879059730466772, "learning_rate": 3.3870967741935484e-06, "loss": 0.8797, "step": 126 }, { "epoch": 0.07, "grad_norm": 1.8668284369314867, "learning_rate": 3.413978494623656e-06, "loss": 0.8833, "step": 127 }, { "epoch": 0.07, "grad_norm": 1.8860297057540394, "learning_rate": 3.440860215053764e-06, "loss": 0.8809, "step": 128 }, { "epoch": 0.07, "grad_norm": 1.846837882958692, "learning_rate": 3.4677419354838714e-06, "loss": 0.8746, "step": 129 }, { "epoch": 0.07, "grad_norm": 1.901851876301591, "learning_rate": 3.494623655913979e-06, "loss": 0.873, "step": 130 }, { "epoch": 0.07, "grad_norm": 1.8534773789605765, "learning_rate": 3.5215053763440865e-06, "loss": 0.8593, "step": 131 }, { "epoch": 0.07, "grad_norm": 1.9090162565419244, "learning_rate": 3.548387096774194e-06, "loss": 0.8973, "step": 132 }, { "epoch": 0.07, "grad_norm": 1.8685428630910401, "learning_rate": 3.5752688172043015e-06, "loss": 0.88, "step": 133 }, { "epoch": 0.07, "grad_norm": 1.9269189690214545, "learning_rate": 3.602150537634409e-06, "loss": 0.8924, "step": 134 }, { "epoch": 0.07, "grad_norm": 1.8876074940273573, "learning_rate": 3.6290322580645166e-06, "loss": 0.8444, "step": 135 }, { "epoch": 0.07, "grad_norm": 1.8380402414102346, "learning_rate": 3.655913978494624e-06, "loss": 0.8793, "step": 136 }, { "epoch": 0.07, "grad_norm": 1.8550544472572061, "learning_rate": 3.6827956989247317e-06, "loss": 0.8693, "step": 137 }, { "epoch": 0.07, "grad_norm": 1.8740799219290236, "learning_rate": 3.7096774193548392e-06, "loss": 0.8789, "step": 138 }, { "epoch": 0.08, "grad_norm": 1.9758342643029574, "learning_rate": 3.7365591397849468e-06, "loss": 0.891, "step": 139 }, { "epoch": 0.08, "grad_norm": 1.9893805667595184, "learning_rate": 3.763440860215054e-06, "loss": 0.8871, "step": 140 }, { "epoch": 0.08, "grad_norm": 1.8405964122691059, "learning_rate": 3.7903225806451614e-06, "loss": 0.8721, "step": 141 }, { "epoch": 0.08, "grad_norm": 1.8787170326665776, "learning_rate": 3.817204301075269e-06, "loss": 0.8365, "step": 142 }, { "epoch": 0.08, "grad_norm": 1.8869318809930997, "learning_rate": 3.8440860215053765e-06, "loss": 0.8517, "step": 143 }, { "epoch": 0.08, "grad_norm": 1.9174624944899452, "learning_rate": 3.870967741935484e-06, "loss": 0.8405, "step": 144 }, { "epoch": 0.08, "grad_norm": 1.8583235111930767, "learning_rate": 3.8978494623655915e-06, "loss": 0.84, "step": 145 }, { "epoch": 0.08, "grad_norm": 1.8618711325809767, "learning_rate": 3.924731182795699e-06, "loss": 0.8597, "step": 146 }, { "epoch": 0.08, "grad_norm": 1.9817634077682016, "learning_rate": 3.951612903225807e-06, "loss": 0.8684, "step": 147 }, { "epoch": 0.08, "grad_norm": 1.9349299598699123, "learning_rate": 3.978494623655914e-06, "loss": 0.8709, "step": 148 }, { "epoch": 0.08, "grad_norm": 1.8479182853281413, "learning_rate": 4.005376344086022e-06, "loss": 0.8682, "step": 149 }, { "epoch": 0.08, "grad_norm": 1.8312231897616595, "learning_rate": 4.032258064516129e-06, "loss": 0.8504, "step": 150 }, { "epoch": 0.08, "grad_norm": 1.9961123413007091, "learning_rate": 4.059139784946237e-06, "loss": 0.8513, "step": 151 }, { "epoch": 0.08, "grad_norm": 1.8271438242220583, "learning_rate": 4.086021505376344e-06, "loss": 0.8786, "step": 152 }, { "epoch": 0.08, "grad_norm": 1.8777118307497216, "learning_rate": 4.112903225806452e-06, "loss": 0.8322, "step": 153 }, { "epoch": 0.08, "grad_norm": 1.8537349212603724, "learning_rate": 4.139784946236559e-06, "loss": 0.8407, "step": 154 }, { "epoch": 0.08, "grad_norm": 1.9653869778388608, "learning_rate": 4.166666666666667e-06, "loss": 0.8626, "step": 155 }, { "epoch": 0.08, "grad_norm": 1.9551493508352102, "learning_rate": 4.193548387096774e-06, "loss": 0.8692, "step": 156 }, { "epoch": 0.08, "grad_norm": 1.9115681147792138, "learning_rate": 4.220430107526882e-06, "loss": 0.8569, "step": 157 }, { "epoch": 0.09, "grad_norm": 1.969818004100738, "learning_rate": 4.2473118279569895e-06, "loss": 0.8605, "step": 158 }, { "epoch": 0.09, "grad_norm": 1.8156404678151865, "learning_rate": 4.274193548387097e-06, "loss": 0.7983, "step": 159 }, { "epoch": 0.09, "grad_norm": 2.0305758085623915, "learning_rate": 4.3010752688172045e-06, "loss": 0.8534, "step": 160 }, { "epoch": 0.09, "grad_norm": 1.8907121514522187, "learning_rate": 4.327956989247312e-06, "loss": 0.8529, "step": 161 }, { "epoch": 0.09, "grad_norm": 1.9320502276118958, "learning_rate": 4.35483870967742e-06, "loss": 0.8358, "step": 162 }, { "epoch": 0.09, "grad_norm": 2.125608001621172, "learning_rate": 4.381720430107527e-06, "loss": 0.837, "step": 163 }, { "epoch": 0.09, "grad_norm": 2.0092826287294683, "learning_rate": 4.408602150537635e-06, "loss": 0.831, "step": 164 }, { "epoch": 0.09, "grad_norm": 1.9607580788817132, "learning_rate": 4.435483870967742e-06, "loss": 0.8334, "step": 165 }, { "epoch": 0.09, "grad_norm": 2.0553681741592724, "learning_rate": 4.46236559139785e-06, "loss": 0.8118, "step": 166 }, { "epoch": 0.09, "grad_norm": 2.0255937291801795, "learning_rate": 4.489247311827957e-06, "loss": 0.8447, "step": 167 }, { "epoch": 0.09, "grad_norm": 2.0701090678366474, "learning_rate": 4.516129032258065e-06, "loss": 0.8232, "step": 168 }, { "epoch": 0.09, "grad_norm": 1.9162542429747143, "learning_rate": 4.543010752688172e-06, "loss": 0.8261, "step": 169 }, { "epoch": 0.09, "grad_norm": 1.815777793494547, "learning_rate": 4.56989247311828e-06, "loss": 0.838, "step": 170 }, { "epoch": 0.09, "grad_norm": 2.102947422828333, "learning_rate": 4.596774193548387e-06, "loss": 0.8519, "step": 171 }, { "epoch": 0.09, "grad_norm": 2.0577745518606387, "learning_rate": 4.623655913978495e-06, "loss": 0.8375, "step": 172 }, { "epoch": 0.09, "grad_norm": 2.0180171212458933, "learning_rate": 4.6505376344086025e-06, "loss": 0.8332, "step": 173 }, { "epoch": 0.09, "grad_norm": 1.8793711189690858, "learning_rate": 4.67741935483871e-06, "loss": 0.837, "step": 174 }, { "epoch": 0.09, "grad_norm": 1.8737300966766057, "learning_rate": 4.7043010752688175e-06, "loss": 0.817, "step": 175 }, { "epoch": 0.1, "grad_norm": 1.991193399603255, "learning_rate": 4.731182795698925e-06, "loss": 0.8787, "step": 176 }, { "epoch": 0.1, "grad_norm": 1.9956715275032013, "learning_rate": 4.758064516129033e-06, "loss": 0.8301, "step": 177 }, { "epoch": 0.1, "grad_norm": 1.894554592586964, "learning_rate": 4.78494623655914e-06, "loss": 0.8247, "step": 178 }, { "epoch": 0.1, "grad_norm": 1.9513414732558496, "learning_rate": 4.811827956989248e-06, "loss": 0.8308, "step": 179 }, { "epoch": 0.1, "grad_norm": 1.8931093659472684, "learning_rate": 4.838709677419355e-06, "loss": 0.8277, "step": 180 }, { "epoch": 0.1, "grad_norm": 1.9343084234845418, "learning_rate": 4.865591397849463e-06, "loss": 0.8605, "step": 181 }, { "epoch": 0.1, "grad_norm": 1.9245033373547629, "learning_rate": 4.89247311827957e-06, "loss": 0.8093, "step": 182 }, { "epoch": 0.1, "grad_norm": 1.9211117543952956, "learning_rate": 4.919354838709678e-06, "loss": 0.8399, "step": 183 }, { "epoch": 0.1, "grad_norm": 1.9051441341893183, "learning_rate": 4.946236559139785e-06, "loss": 0.8348, "step": 184 }, { "epoch": 0.1, "grad_norm": 1.9482581019442875, "learning_rate": 4.973118279569893e-06, "loss": 0.8168, "step": 185 }, { "epoch": 0.1, "grad_norm": 2.0104420464142043, "learning_rate": 5e-06, "loss": 0.8505, "step": 186 }, { "epoch": 0.1, "grad_norm": 1.8797686903998179, "learning_rate": 4.999999002042615e-06, "loss": 0.8105, "step": 187 }, { "epoch": 0.1, "grad_norm": 1.9401287251819193, "learning_rate": 4.999996008171254e-06, "loss": 0.8019, "step": 188 }, { "epoch": 0.1, "grad_norm": 2.012457859066416, "learning_rate": 4.9999910183883085e-06, "loss": 0.8175, "step": 189 }, { "epoch": 0.1, "grad_norm": 1.8816702153995835, "learning_rate": 4.999984032697762e-06, "loss": 0.8342, "step": 190 }, { "epoch": 0.1, "grad_norm": 1.9316359436524568, "learning_rate": 4.999975051105191e-06, "loss": 0.8354, "step": 191 }, { "epoch": 0.1, "grad_norm": 1.9182288151850746, "learning_rate": 4.999964073617768e-06, "loss": 0.8089, "step": 192 }, { "epoch": 0.1, "grad_norm": 1.8695391170527185, "learning_rate": 4.999951100244255e-06, "loss": 0.8267, "step": 193 }, { "epoch": 0.1, "grad_norm": 1.956105738790641, "learning_rate": 4.999936130995011e-06, "loss": 0.8281, "step": 194 }, { "epoch": 0.11, "grad_norm": 1.897965336309057, "learning_rate": 4.999919165881985e-06, "loss": 0.7897, "step": 195 }, { "epoch": 0.11, "grad_norm": 1.906384126032916, "learning_rate": 4.999900204918724e-06, "loss": 0.8216, "step": 196 }, { "epoch": 0.11, "grad_norm": 1.8963280763288448, "learning_rate": 4.999879248120363e-06, "loss": 0.781, "step": 197 }, { "epoch": 0.11, "grad_norm": 1.9650690425416506, "learning_rate": 4.999856295503635e-06, "loss": 0.8072, "step": 198 }, { "epoch": 0.11, "grad_norm": 2.0216889787436974, "learning_rate": 4.999831347086864e-06, "loss": 0.8201, "step": 199 }, { "epoch": 0.11, "grad_norm": 1.8457974032911406, "learning_rate": 4.999804402889969e-06, "loss": 0.8022, "step": 200 }, { "epoch": 0.11, "grad_norm": 1.8756929488316012, "learning_rate": 4.9997754629344596e-06, "loss": 0.8346, "step": 201 }, { "epoch": 0.11, "grad_norm": 2.0234374436446743, "learning_rate": 4.999744527243441e-06, "loss": 0.8238, "step": 202 }, { "epoch": 0.11, "grad_norm": 1.9109053427972582, "learning_rate": 4.999711595841612e-06, "loss": 0.7676, "step": 203 }, { "epoch": 0.11, "grad_norm": 1.9754904811578242, "learning_rate": 4.999676668755263e-06, "loss": 0.781, "step": 204 }, { "epoch": 0.11, "grad_norm": 1.9057273712946252, "learning_rate": 4.99963974601228e-06, "loss": 0.8074, "step": 205 }, { "epoch": 0.11, "grad_norm": 1.9511596471787325, "learning_rate": 4.999600827642139e-06, "loss": 0.8059, "step": 206 }, { "epoch": 0.11, "grad_norm": 1.984825988731519, "learning_rate": 4.999559913675912e-06, "loss": 0.8179, "step": 207 }, { "epoch": 0.11, "grad_norm": 1.9927143154167044, "learning_rate": 4.9995170041462635e-06, "loss": 0.7922, "step": 208 }, { "epoch": 0.11, "grad_norm": 1.851979376543888, "learning_rate": 4.999472099087451e-06, "loss": 0.7932, "step": 209 }, { "epoch": 0.11, "grad_norm": 1.8797278127437524, "learning_rate": 4.999425198535325e-06, "loss": 0.7747, "step": 210 }, { "epoch": 0.11, "grad_norm": 1.993343282904433, "learning_rate": 4.9993763025273286e-06, "loss": 0.7977, "step": 211 }, { "epoch": 0.11, "grad_norm": 2.017693531936902, "learning_rate": 4.9993254111025e-06, "loss": 0.7713, "step": 212 }, { "epoch": 0.12, "grad_norm": 1.9539404756432692, "learning_rate": 4.999272524301469e-06, "loss": 0.7858, "step": 213 }, { "epoch": 0.12, "grad_norm": 1.8869092376373888, "learning_rate": 4.999217642166456e-06, "loss": 0.7983, "step": 214 }, { "epoch": 0.12, "grad_norm": 1.819369987549526, "learning_rate": 4.999160764741281e-06, "loss": 0.773, "step": 215 }, { "epoch": 0.12, "grad_norm": 1.8236301436479825, "learning_rate": 4.9991018920713505e-06, "loss": 0.7848, "step": 216 }, { "epoch": 0.12, "grad_norm": 1.8804086118755707, "learning_rate": 4.999041024203668e-06, "loss": 0.7681, "step": 217 }, { "epoch": 0.12, "grad_norm": 1.8997390300973922, "learning_rate": 4.998978161186827e-06, "loss": 0.7769, "step": 218 }, { "epoch": 0.12, "grad_norm": 1.9642761173180865, "learning_rate": 4.9989133030710154e-06, "loss": 0.7801, "step": 219 }, { "epoch": 0.12, "grad_norm": 1.9076462101378708, "learning_rate": 4.998846449908014e-06, "loss": 0.8175, "step": 220 }, { "epoch": 0.12, "grad_norm": 1.9690754457770134, "learning_rate": 4.998777601751196e-06, "loss": 0.7954, "step": 221 }, { "epoch": 0.12, "grad_norm": 1.8067913032565632, "learning_rate": 4.9987067586555275e-06, "loss": 0.766, "step": 222 }, { "epoch": 0.12, "grad_norm": 1.8068036306287154, "learning_rate": 4.998633920677567e-06, "loss": 0.7737, "step": 223 }, { "epoch": 0.12, "grad_norm": 1.8359136585988247, "learning_rate": 4.998559087875466e-06, "loss": 0.7887, "step": 224 }, { "epoch": 0.12, "grad_norm": 1.7832840103200143, "learning_rate": 4.998482260308969e-06, "loss": 0.7679, "step": 225 }, { "epoch": 0.12, "grad_norm": 1.8651835924748998, "learning_rate": 4.998403438039412e-06, "loss": 0.7801, "step": 226 }, { "epoch": 0.12, "grad_norm": 1.7511585569396315, "learning_rate": 4.998322621129724e-06, "loss": 0.7534, "step": 227 }, { "epoch": 0.12, "grad_norm": 1.7670872781618208, "learning_rate": 4.998239809644427e-06, "loss": 0.7896, "step": 228 }, { "epoch": 0.12, "grad_norm": 1.7550360567207788, "learning_rate": 4.998155003649632e-06, "loss": 0.8092, "step": 229 }, { "epoch": 0.12, "grad_norm": 1.843135195889086, "learning_rate": 4.9980682032130496e-06, "loss": 0.7813, "step": 230 }, { "epoch": 0.12, "grad_norm": 1.8379741860701668, "learning_rate": 4.9979794084039755e-06, "loss": 0.7451, "step": 231 }, { "epoch": 0.13, "grad_norm": 1.8211401226319122, "learning_rate": 4.997888619293302e-06, "loss": 0.7761, "step": 232 }, { "epoch": 0.13, "grad_norm": 1.7175949260496974, "learning_rate": 4.997795835953511e-06, "loss": 0.7596, "step": 233 }, { "epoch": 0.13, "grad_norm": 1.8689695786535683, "learning_rate": 4.997701058458677e-06, "loss": 0.7992, "step": 234 }, { "epoch": 0.13, "grad_norm": 1.831760347793965, "learning_rate": 4.9976042868844675e-06, "loss": 0.7448, "step": 235 }, { "epoch": 0.13, "grad_norm": 1.72888982927639, "learning_rate": 4.997505521308144e-06, "loss": 0.7343, "step": 236 }, { "epoch": 0.13, "grad_norm": 1.6756792232063396, "learning_rate": 4.997404761808554e-06, "loss": 0.7582, "step": 237 }, { "epoch": 0.13, "grad_norm": 1.7489576891135408, "learning_rate": 4.997302008466143e-06, "loss": 0.7477, "step": 238 }, { "epoch": 0.13, "grad_norm": 1.6963742206958685, "learning_rate": 4.997197261362944e-06, "loss": 0.7898, "step": 239 }, { "epoch": 0.13, "grad_norm": 1.801641199161574, "learning_rate": 4.9970905205825845e-06, "loss": 0.7543, "step": 240 }, { "epoch": 0.13, "grad_norm": 1.6272031743713797, "learning_rate": 4.996981786210283e-06, "loss": 0.7712, "step": 241 }, { "epoch": 0.13, "grad_norm": 1.703509359871784, "learning_rate": 4.996871058332849e-06, "loss": 0.7368, "step": 242 }, { "epoch": 0.13, "grad_norm": 1.6693340068805866, "learning_rate": 4.996758337038683e-06, "loss": 0.7663, "step": 243 }, { "epoch": 0.13, "grad_norm": 1.735837647741616, "learning_rate": 4.996643622417779e-06, "loss": 0.7828, "step": 244 }, { "epoch": 0.13, "grad_norm": 1.7440723194906, "learning_rate": 4.996526914561721e-06, "loss": 0.788, "step": 245 }, { "epoch": 0.13, "grad_norm": 1.743969049672581, "learning_rate": 4.996408213563684e-06, "loss": 0.7636, "step": 246 }, { "epoch": 0.13, "grad_norm": 1.6506411208979208, "learning_rate": 4.996287519518436e-06, "loss": 0.7528, "step": 247 }, { "epoch": 0.13, "grad_norm": 1.6013744178523366, "learning_rate": 4.996164832522333e-06, "loss": 0.7401, "step": 248 }, { "epoch": 0.13, "grad_norm": 1.725555897637127, "learning_rate": 4.996040152673326e-06, "loss": 0.7775, "step": 249 }, { "epoch": 0.14, "grad_norm": 1.6615810007301208, "learning_rate": 4.995913480070954e-06, "loss": 0.7562, "step": 250 }, { "epoch": 0.14, "grad_norm": 1.6555126721494287, "learning_rate": 4.995784814816349e-06, "loss": 0.7708, "step": 251 }, { "epoch": 0.14, "grad_norm": 1.593832921980404, "learning_rate": 4.995654157012233e-06, "loss": 0.7536, "step": 252 }, { "epoch": 0.14, "grad_norm": 1.6663604339749598, "learning_rate": 4.995521506762917e-06, "loss": 0.7642, "step": 253 }, { "epoch": 0.14, "grad_norm": 1.5715954625078343, "learning_rate": 4.995386864174306e-06, "loss": 0.751, "step": 254 }, { "epoch": 0.14, "grad_norm": 1.6994811719954899, "learning_rate": 4.995250229353895e-06, "loss": 0.7784, "step": 255 }, { "epoch": 0.14, "grad_norm": 1.6272330042651204, "learning_rate": 4.995111602410766e-06, "loss": 0.7719, "step": 256 }, { "epoch": 0.14, "grad_norm": 1.702326352683978, "learning_rate": 4.994970983455596e-06, "loss": 0.7429, "step": 257 }, { "epoch": 0.14, "grad_norm": 1.6205322305467758, "learning_rate": 4.99482837260065e-06, "loss": 0.7621, "step": 258 }, { "epoch": 0.14, "grad_norm": 1.6183518604970204, "learning_rate": 4.994683769959782e-06, "loss": 0.7694, "step": 259 }, { "epoch": 0.14, "grad_norm": 1.6023845740500755, "learning_rate": 4.994537175648441e-06, "loss": 0.7801, "step": 260 }, { "epoch": 0.14, "grad_norm": 1.651146993894283, "learning_rate": 4.99438858978366e-06, "loss": 0.745, "step": 261 }, { "epoch": 0.14, "grad_norm": 1.6467301125125537, "learning_rate": 4.9942380124840656e-06, "loss": 0.7369, "step": 262 }, { "epoch": 0.14, "grad_norm": 1.6734795975123398, "learning_rate": 4.994085443869874e-06, "loss": 0.7652, "step": 263 }, { "epoch": 0.14, "grad_norm": 1.5210713041450081, "learning_rate": 4.993930884062892e-06, "loss": 0.7637, "step": 264 }, { "epoch": 0.14, "grad_norm": 1.6304383507880529, "learning_rate": 4.993774333186513e-06, "loss": 0.7786, "step": 265 }, { "epoch": 0.14, "grad_norm": 1.531011023471268, "learning_rate": 4.993615791365722e-06, "loss": 0.7448, "step": 266 }, { "epoch": 0.14, "grad_norm": 1.596268386959725, "learning_rate": 4.993455258727094e-06, "loss": 0.7663, "step": 267 }, { "epoch": 0.14, "grad_norm": 1.5507937235475824, "learning_rate": 4.993292735398793e-06, "loss": 0.7505, "step": 268 }, { "epoch": 0.15, "grad_norm": 1.6174753667907542, "learning_rate": 4.9931282215105714e-06, "loss": 0.7481, "step": 269 }, { "epoch": 0.15, "grad_norm": 1.6107359634400829, "learning_rate": 4.992961717193773e-06, "loss": 0.7621, "step": 270 }, { "epoch": 0.15, "grad_norm": 1.6248457274403711, "learning_rate": 4.992793222581327e-06, "loss": 0.7382, "step": 271 }, { "epoch": 0.15, "grad_norm": 1.560586985326968, "learning_rate": 4.992622737807754e-06, "loss": 0.7587, "step": 272 }, { "epoch": 0.15, "grad_norm": 1.5986099928144866, "learning_rate": 4.9924502630091655e-06, "loss": 0.7652, "step": 273 }, { "epoch": 0.15, "grad_norm": 1.591471160128473, "learning_rate": 4.9922757983232575e-06, "loss": 0.7067, "step": 274 }, { "epoch": 0.15, "grad_norm": 1.6158382115880472, "learning_rate": 4.9920993438893175e-06, "loss": 0.7612, "step": 275 }, { "epoch": 0.15, "grad_norm": 1.572008820948022, "learning_rate": 4.99192089984822e-06, "loss": 0.7643, "step": 276 }, { "epoch": 0.15, "grad_norm": 1.5485262545616667, "learning_rate": 4.991740466342428e-06, "loss": 0.7591, "step": 277 }, { "epoch": 0.15, "grad_norm": 1.5572489837682697, "learning_rate": 4.991558043515996e-06, "loss": 0.7322, "step": 278 }, { "epoch": 0.15, "grad_norm": 1.6391762430616337, "learning_rate": 4.9913736315145614e-06, "loss": 0.7682, "step": 279 }, { "epoch": 0.15, "grad_norm": 1.568929524034426, "learning_rate": 4.991187230485355e-06, "loss": 0.765, "step": 280 }, { "epoch": 0.15, "grad_norm": 1.617763964618128, "learning_rate": 4.9909988405771905e-06, "loss": 0.7702, "step": 281 }, { "epoch": 0.15, "grad_norm": 1.5373713511251714, "learning_rate": 4.990808461940474e-06, "loss": 0.7281, "step": 282 }, { "epoch": 0.15, "grad_norm": 1.5685350319488212, "learning_rate": 4.990616094727196e-06, "loss": 0.7526, "step": 283 }, { "epoch": 0.15, "grad_norm": 1.5546482832397555, "learning_rate": 4.9904217390909365e-06, "loss": 0.7537, "step": 284 }, { "epoch": 0.15, "grad_norm": 1.4958480089133763, "learning_rate": 4.990225395186862e-06, "loss": 0.7547, "step": 285 }, { "epoch": 0.15, "grad_norm": 1.5436384149180422, "learning_rate": 4.9900270631717276e-06, "loss": 0.7516, "step": 286 }, { "epoch": 0.15, "grad_norm": 1.5367620654838186, "learning_rate": 4.989826743203875e-06, "loss": 0.7498, "step": 287 }, { "epoch": 0.16, "grad_norm": 1.655999305921837, "learning_rate": 4.9896244354432314e-06, "loss": 0.7417, "step": 288 }, { "epoch": 0.16, "grad_norm": 1.6363930944639844, "learning_rate": 4.989420140051313e-06, "loss": 0.7324, "step": 289 }, { "epoch": 0.16, "grad_norm": 1.6079578442333788, "learning_rate": 4.989213857191223e-06, "loss": 0.7703, "step": 290 }, { "epoch": 0.16, "grad_norm": 1.6215864093910637, "learning_rate": 4.98900558702765e-06, "loss": 0.7745, "step": 291 }, { "epoch": 0.16, "grad_norm": 1.5824309661494913, "learning_rate": 4.9887953297268685e-06, "loss": 0.7505, "step": 292 }, { "epoch": 0.16, "grad_norm": 1.5927073696106975, "learning_rate": 4.988583085456744e-06, "loss": 0.7796, "step": 293 }, { "epoch": 0.16, "grad_norm": 1.5777608330073847, "learning_rate": 4.9883688543867225e-06, "loss": 0.7766, "step": 294 }, { "epoch": 0.16, "grad_norm": 1.60681417587963, "learning_rate": 4.9881526366878395e-06, "loss": 0.7745, "step": 295 }, { "epoch": 0.16, "grad_norm": 1.5806211063527582, "learning_rate": 4.987934432532716e-06, "loss": 0.7682, "step": 296 }, { "epoch": 0.16, "grad_norm": 1.5402155179491328, "learning_rate": 4.987714242095558e-06, "loss": 0.7659, "step": 297 }, { "epoch": 0.16, "grad_norm": 1.5463137149676476, "learning_rate": 4.987492065552159e-06, "loss": 0.761, "step": 298 }, { "epoch": 0.16, "grad_norm": 1.5941294286794239, "learning_rate": 4.987267903079897e-06, "loss": 0.7394, "step": 299 }, { "epoch": 0.16, "grad_norm": 1.6488370348716668, "learning_rate": 4.9870417548577355e-06, "loss": 0.786, "step": 300 }, { "epoch": 0.16, "grad_norm": 1.5695944200391263, "learning_rate": 4.986813621066223e-06, "loss": 0.7481, "step": 301 }, { "epoch": 0.16, "grad_norm": 1.6267090905111357, "learning_rate": 4.986583501887495e-06, "loss": 0.7656, "step": 302 }, { "epoch": 0.16, "grad_norm": 1.5855497244513388, "learning_rate": 4.9863513975052696e-06, "loss": 0.7646, "step": 303 }, { "epoch": 0.16, "grad_norm": 1.5613325485351794, "learning_rate": 4.986117308104852e-06, "loss": 0.7522, "step": 304 }, { "epoch": 0.16, "grad_norm": 1.5833355489399696, "learning_rate": 4.98588123387313e-06, "loss": 0.7408, "step": 305 }, { "epoch": 0.17, "grad_norm": 1.5587991131038397, "learning_rate": 4.985643174998578e-06, "loss": 0.7423, "step": 306 }, { "epoch": 0.17, "grad_norm": 1.7136697716176872, "learning_rate": 4.985403131671254e-06, "loss": 0.7755, "step": 307 }, { "epoch": 0.17, "grad_norm": 1.5354621287917718, "learning_rate": 4.9851611040828005e-06, "loss": 0.7528, "step": 308 }, { "epoch": 0.17, "grad_norm": 1.5745278548948236, "learning_rate": 4.984917092426445e-06, "loss": 0.7473, "step": 309 }, { "epoch": 0.17, "grad_norm": 1.6535409533879866, "learning_rate": 4.984671096896996e-06, "loss": 0.7568, "step": 310 }, { "epoch": 0.17, "grad_norm": 1.611975395928841, "learning_rate": 4.9844231176908485e-06, "loss": 0.7593, "step": 311 }, { "epoch": 0.17, "grad_norm": 1.6501011882608942, "learning_rate": 4.984173155005982e-06, "loss": 0.7225, "step": 312 }, { "epoch": 0.17, "grad_norm": 1.6085633060853612, "learning_rate": 4.983921209041958e-06, "loss": 0.7416, "step": 313 }, { "epoch": 0.17, "grad_norm": 1.5977063305259172, "learning_rate": 4.98366727999992e-06, "loss": 0.7661, "step": 314 }, { "epoch": 0.17, "grad_norm": 1.6413896084231991, "learning_rate": 4.983411368082597e-06, "loss": 0.7325, "step": 315 }, { "epoch": 0.17, "grad_norm": 1.577613265617774, "learning_rate": 4.983153473494301e-06, "loss": 0.7428, "step": 316 }, { "epoch": 0.17, "grad_norm": 1.5985146895278637, "learning_rate": 4.982893596440925e-06, "loss": 0.7286, "step": 317 }, { "epoch": 0.17, "grad_norm": 1.6174297424204684, "learning_rate": 4.982631737129948e-06, "loss": 0.7361, "step": 318 }, { "epoch": 0.17, "grad_norm": 1.5497686210718917, "learning_rate": 4.982367895770428e-06, "loss": 0.7572, "step": 319 }, { "epoch": 0.17, "grad_norm": 1.5897537228792304, "learning_rate": 4.982102072573008e-06, "loss": 0.7373, "step": 320 }, { "epoch": 0.17, "grad_norm": 1.7222572842063568, "learning_rate": 4.98183426774991e-06, "loss": 0.7725, "step": 321 }, { "epoch": 0.17, "grad_norm": 1.5266617680472014, "learning_rate": 4.981564481514942e-06, "loss": 0.7628, "step": 322 }, { "epoch": 0.17, "grad_norm": 1.603072780556317, "learning_rate": 4.981292714083492e-06, "loss": 0.762, "step": 323 }, { "epoch": 0.17, "grad_norm": 1.6061068580861793, "learning_rate": 4.981018965672529e-06, "loss": 0.7185, "step": 324 }, { "epoch": 0.18, "grad_norm": 1.6195648607903905, "learning_rate": 4.980743236500607e-06, "loss": 0.7315, "step": 325 }, { "epoch": 0.18, "grad_norm": 1.5819895661575079, "learning_rate": 4.9804655267878555e-06, "loss": 0.73, "step": 326 }, { "epoch": 0.18, "grad_norm": 1.5244137361397427, "learning_rate": 4.98018583675599e-06, "loss": 0.7367, "step": 327 }, { "epoch": 0.18, "grad_norm": 1.5443830140649504, "learning_rate": 4.979904166628306e-06, "loss": 0.7289, "step": 328 }, { "epoch": 0.18, "grad_norm": 1.7960737736277728, "learning_rate": 4.9796205166296775e-06, "loss": 0.7459, "step": 329 }, { "epoch": 0.18, "grad_norm": 1.5490124049023746, "learning_rate": 4.979334886986562e-06, "loss": 0.7454, "step": 330 }, { "epoch": 0.18, "grad_norm": 1.4597326683812653, "learning_rate": 4.9790472779269975e-06, "loss": 0.7123, "step": 331 }, { "epoch": 0.18, "grad_norm": 1.5728549785320205, "learning_rate": 4.9787576896806e-06, "loss": 0.7701, "step": 332 }, { "epoch": 0.18, "grad_norm": 1.644991519640754, "learning_rate": 4.978466122478567e-06, "loss": 0.7521, "step": 333 }, { "epoch": 0.18, "grad_norm": 1.5798225069402587, "learning_rate": 4.978172576553676e-06, "loss": 0.7219, "step": 334 }, { "epoch": 0.18, "grad_norm": 1.5636314789328185, "learning_rate": 4.977877052140285e-06, "loss": 0.734, "step": 335 }, { "epoch": 0.18, "grad_norm": 1.552513874501997, "learning_rate": 4.97757954947433e-06, "loss": 0.7154, "step": 336 }, { "epoch": 0.18, "grad_norm": 1.5181092078519283, "learning_rate": 4.977280068793325e-06, "loss": 0.741, "step": 337 }, { "epoch": 0.18, "grad_norm": 1.61021495306524, "learning_rate": 4.976978610336368e-06, "loss": 0.7368, "step": 338 }, { "epoch": 0.18, "grad_norm": 1.599035334635175, "learning_rate": 4.976675174344132e-06, "loss": 0.7566, "step": 339 }, { "epoch": 0.18, "grad_norm": 1.6341371284116266, "learning_rate": 4.97636976105887e-06, "loss": 0.756, "step": 340 }, { "epoch": 0.18, "grad_norm": 1.6042689921919182, "learning_rate": 4.976062370724412e-06, "loss": 0.7587, "step": 341 }, { "epoch": 0.18, "grad_norm": 1.5573887724760496, "learning_rate": 4.975753003586172e-06, "loss": 0.7168, "step": 342 }, { "epoch": 0.19, "grad_norm": 1.5469915046572957, "learning_rate": 4.975441659891135e-06, "loss": 0.752, "step": 343 }, { "epoch": 0.19, "grad_norm": 1.530337637487127, "learning_rate": 4.975128339887867e-06, "loss": 0.7483, "step": 344 }, { "epoch": 0.19, "grad_norm": 1.5176232259694864, "learning_rate": 4.974813043826513e-06, "loss": 0.765, "step": 345 }, { "epoch": 0.19, "grad_norm": 1.47183891236816, "learning_rate": 4.974495771958795e-06, "loss": 0.7116, "step": 346 }, { "epoch": 0.19, "grad_norm": 1.4868792084806968, "learning_rate": 4.974176524538011e-06, "loss": 0.713, "step": 347 }, { "epoch": 0.19, "grad_norm": 1.5118345294901254, "learning_rate": 4.973855301819039e-06, "loss": 0.7162, "step": 348 }, { "epoch": 0.19, "grad_norm": 1.6271239703463907, "learning_rate": 4.97353210405833e-06, "loss": 0.7767, "step": 349 }, { "epoch": 0.19, "grad_norm": 1.6700934274384638, "learning_rate": 4.973206931513915e-06, "loss": 0.729, "step": 350 }, { "epoch": 0.19, "grad_norm": 1.5987940174203137, "learning_rate": 4.972879784445402e-06, "loss": 0.7378, "step": 351 }, { "epoch": 0.19, "grad_norm": 1.5508205367647467, "learning_rate": 4.9725506631139716e-06, "loss": 0.7756, "step": 352 }, { "epoch": 0.19, "grad_norm": 1.5388793162125067, "learning_rate": 4.972219567782386e-06, "loss": 0.7257, "step": 353 }, { "epoch": 0.19, "grad_norm": 1.6331347326363583, "learning_rate": 4.971886498714978e-06, "loss": 0.7266, "step": 354 }, { "epoch": 0.19, "grad_norm": 1.635086941729678, "learning_rate": 4.971551456177658e-06, "loss": 0.7377, "step": 355 }, { "epoch": 0.19, "grad_norm": 1.579286313648976, "learning_rate": 4.971214440437915e-06, "loss": 0.7422, "step": 356 }, { "epoch": 0.19, "grad_norm": 1.5139867162133138, "learning_rate": 4.97087545176481e-06, "loss": 0.7495, "step": 357 }, { "epoch": 0.19, "grad_norm": 1.5796287476457336, "learning_rate": 4.9705344904289795e-06, "loss": 0.7136, "step": 358 }, { "epoch": 0.19, "grad_norm": 1.6102149462229982, "learning_rate": 4.970191556702636e-06, "loss": 0.7417, "step": 359 }, { "epoch": 0.19, "grad_norm": 1.6117958987504424, "learning_rate": 4.9698466508595655e-06, "loss": 0.7097, "step": 360 }, { "epoch": 0.19, "grad_norm": 1.4414732264368804, "learning_rate": 4.9694997731751295e-06, "loss": 0.723, "step": 361 }, { "epoch": 0.2, "grad_norm": 1.6152988941873876, "learning_rate": 4.9691509239262625e-06, "loss": 0.7113, "step": 362 }, { "epoch": 0.2, "grad_norm": 1.5411882978050158, "learning_rate": 4.9688001033914756e-06, "loss": 0.7281, "step": 363 }, { "epoch": 0.2, "grad_norm": 1.5453839564668752, "learning_rate": 4.9684473118508505e-06, "loss": 0.7541, "step": 364 }, { "epoch": 0.2, "grad_norm": 1.5403871866113104, "learning_rate": 4.968092549586044e-06, "loss": 0.7306, "step": 365 }, { "epoch": 0.2, "grad_norm": 1.4865699996023756, "learning_rate": 4.967735816880286e-06, "loss": 0.7227, "step": 366 }, { "epoch": 0.2, "grad_norm": 1.6430409688228755, "learning_rate": 4.967377114018381e-06, "loss": 0.7546, "step": 367 }, { "epoch": 0.2, "grad_norm": 1.5603151213363429, "learning_rate": 4.9670164412867044e-06, "loss": 0.7531, "step": 368 }, { "epoch": 0.2, "grad_norm": 1.556262073949824, "learning_rate": 4.966653798973205e-06, "loss": 0.7607, "step": 369 }, { "epoch": 0.2, "grad_norm": 1.4399284001520942, "learning_rate": 4.966289187367403e-06, "loss": 0.7206, "step": 370 }, { "epoch": 0.2, "grad_norm": 1.555573902748247, "learning_rate": 4.965922606760395e-06, "loss": 0.7721, "step": 371 }, { "epoch": 0.2, "grad_norm": 1.5414205788850177, "learning_rate": 4.965554057444842e-06, "loss": 0.718, "step": 372 }, { "epoch": 0.2, "grad_norm": 1.6203498190806032, "learning_rate": 4.965183539714986e-06, "loss": 0.7514, "step": 373 }, { "epoch": 0.2, "grad_norm": 1.6359233963898678, "learning_rate": 4.964811053866631e-06, "loss": 0.7392, "step": 374 }, { "epoch": 0.2, "grad_norm": 1.4996075824909532, "learning_rate": 4.964436600197161e-06, "loss": 0.7038, "step": 375 }, { "epoch": 0.2, "grad_norm": 1.616018583549427, "learning_rate": 4.9640601790055245e-06, "loss": 0.7284, "step": 376 }, { "epoch": 0.2, "grad_norm": 1.5779537564567072, "learning_rate": 4.963681790592245e-06, "loss": 0.7058, "step": 377 }, { "epoch": 0.2, "grad_norm": 1.564949107255089, "learning_rate": 4.963301435259413e-06, "loss": 0.7304, "step": 378 }, { "epoch": 0.2, "grad_norm": 1.6485631976053579, "learning_rate": 4.962919113310694e-06, "loss": 0.7048, "step": 379 }, { "epoch": 0.21, "grad_norm": 1.583422560088658, "learning_rate": 4.9625348250513174e-06, "loss": 0.7424, "step": 380 }, { "epoch": 0.21, "grad_norm": 1.5649579971680316, "learning_rate": 4.962148570788088e-06, "loss": 0.7277, "step": 381 }, { "epoch": 0.21, "grad_norm": 1.531529722017428, "learning_rate": 4.961760350829378e-06, "loss": 0.7156, "step": 382 }, { "epoch": 0.21, "grad_norm": 1.5577503196504068, "learning_rate": 4.9613701654851285e-06, "loss": 0.7312, "step": 383 }, { "epoch": 0.21, "grad_norm": 1.515021600976259, "learning_rate": 4.96097801506685e-06, "loss": 0.7201, "step": 384 }, { "epoch": 0.21, "grad_norm": 1.5279765317209573, "learning_rate": 4.960583899887623e-06, "loss": 0.7114, "step": 385 }, { "epoch": 0.21, "grad_norm": 1.5513803339581718, "learning_rate": 4.9601878202620955e-06, "loss": 0.7649, "step": 386 }, { "epoch": 0.21, "grad_norm": 1.5051146353384688, "learning_rate": 4.959789776506482e-06, "loss": 0.7422, "step": 387 }, { "epoch": 0.21, "grad_norm": 1.5130084886173, "learning_rate": 4.95938976893857e-06, "loss": 0.7402, "step": 388 }, { "epoch": 0.21, "grad_norm": 1.5906410828516648, "learning_rate": 4.958987797877709e-06, "loss": 0.7339, "step": 389 }, { "epoch": 0.21, "grad_norm": 1.4671959605551643, "learning_rate": 4.958583863644821e-06, "loss": 0.7165, "step": 390 }, { "epoch": 0.21, "grad_norm": 1.5313647003134243, "learning_rate": 4.958177966562392e-06, "loss": 0.7429, "step": 391 }, { "epoch": 0.21, "grad_norm": 1.6552733073348538, "learning_rate": 4.957770106954477e-06, "loss": 0.7366, "step": 392 }, { "epoch": 0.21, "grad_norm": 1.5003653155002776, "learning_rate": 4.9573602851466985e-06, "loss": 0.7383, "step": 393 }, { "epoch": 0.21, "grad_norm": 1.5364136223836846, "learning_rate": 4.956948501466242e-06, "loss": 0.7251, "step": 394 }, { "epoch": 0.21, "grad_norm": 1.5399072922813681, "learning_rate": 4.956534756241863e-06, "loss": 0.7408, "step": 395 }, { "epoch": 0.21, "grad_norm": 1.6707479214203214, "learning_rate": 4.9561190498038815e-06, "loss": 0.7547, "step": 396 }, { "epoch": 0.21, "grad_norm": 1.5191936682927385, "learning_rate": 4.955701382484183e-06, "loss": 0.7331, "step": 397 }, { "epoch": 0.21, "grad_norm": 1.6665031696881971, "learning_rate": 4.9552817546162185e-06, "loss": 0.7483, "step": 398 }, { "epoch": 0.22, "grad_norm": 1.4985533308047472, "learning_rate": 4.954860166535005e-06, "loss": 0.7307, "step": 399 }, { "epoch": 0.22, "grad_norm": 1.5227571749174804, "learning_rate": 4.954436618577124e-06, "loss": 0.7714, "step": 400 }, { "epoch": 0.22, "grad_norm": 1.5018847918803848, "learning_rate": 4.954011111080722e-06, "loss": 0.7347, "step": 401 }, { "epoch": 0.22, "grad_norm": 1.47168240859341, "learning_rate": 4.95358364438551e-06, "loss": 0.746, "step": 402 }, { "epoch": 0.22, "grad_norm": 1.453107806158309, "learning_rate": 4.953154218832761e-06, "loss": 0.7321, "step": 403 }, { "epoch": 0.22, "grad_norm": 1.526997205838955, "learning_rate": 4.952722834765316e-06, "loss": 0.734, "step": 404 }, { "epoch": 0.22, "grad_norm": 1.5364149592998824, "learning_rate": 4.952289492527576e-06, "loss": 0.7076, "step": 405 }, { "epoch": 0.22, "grad_norm": 1.5024985967224656, "learning_rate": 4.951854192465507e-06, "loss": 0.7079, "step": 406 }, { "epoch": 0.22, "grad_norm": 1.4393740466207956, "learning_rate": 4.951416934926638e-06, "loss": 0.7162, "step": 407 }, { "epoch": 0.22, "grad_norm": 1.586753927728969, "learning_rate": 4.9509777202600605e-06, "loss": 0.7218, "step": 408 }, { "epoch": 0.22, "grad_norm": 1.5695847766951025, "learning_rate": 4.950536548816427e-06, "loss": 0.7436, "step": 409 }, { "epoch": 0.22, "grad_norm": 1.5201987379426554, "learning_rate": 4.950093420947957e-06, "loss": 0.7201, "step": 410 }, { "epoch": 0.22, "grad_norm": 1.5559347934270535, "learning_rate": 4.949648337008425e-06, "loss": 0.7283, "step": 411 }, { "epoch": 0.22, "grad_norm": 1.4546034533036762, "learning_rate": 4.949201297353173e-06, "loss": 0.7034, "step": 412 }, { "epoch": 0.22, "grad_norm": 1.4862868864778047, "learning_rate": 4.948752302339102e-06, "loss": 0.7157, "step": 413 }, { "epoch": 0.22, "grad_norm": 1.6423300048360043, "learning_rate": 4.948301352324674e-06, "loss": 0.76, "step": 414 }, { "epoch": 0.22, "grad_norm": 1.6043178483921359, "learning_rate": 4.947848447669912e-06, "loss": 0.7523, "step": 415 }, { "epoch": 0.22, "grad_norm": 1.4899687635013303, "learning_rate": 4.9473935887364e-06, "loss": 0.7383, "step": 416 }, { "epoch": 0.23, "grad_norm": 1.5600276801042794, "learning_rate": 4.946936775887281e-06, "loss": 0.7315, "step": 417 }, { "epoch": 0.23, "grad_norm": 1.6287167616323268, "learning_rate": 4.946478009487261e-06, "loss": 0.7471, "step": 418 }, { "epoch": 0.23, "grad_norm": 1.5492451424325926, "learning_rate": 4.9460172899026e-06, "loss": 0.7445, "step": 419 }, { "epoch": 0.23, "grad_norm": 1.7364142428411193, "learning_rate": 4.945554617501124e-06, "loss": 0.7547, "step": 420 }, { "epoch": 0.23, "grad_norm": 1.5844282684124265, "learning_rate": 4.945089992652214e-06, "loss": 0.7455, "step": 421 }, { "epoch": 0.23, "grad_norm": 1.5047319245219561, "learning_rate": 4.944623415726809e-06, "loss": 0.716, "step": 422 }, { "epoch": 0.23, "grad_norm": 1.5619591267448, "learning_rate": 4.944154887097411e-06, "loss": 0.7184, "step": 423 }, { "epoch": 0.23, "grad_norm": 1.637599120010444, "learning_rate": 4.9436844071380745e-06, "loss": 0.7389, "step": 424 }, { "epoch": 0.23, "grad_norm": 1.5148897803258639, "learning_rate": 4.943211976224416e-06, "loss": 0.7187, "step": 425 }, { "epoch": 0.23, "grad_norm": 1.5342977902142387, "learning_rate": 4.942737594733608e-06, "loss": 0.7627, "step": 426 }, { "epoch": 0.23, "grad_norm": 1.4529149998163282, "learning_rate": 4.942261263044381e-06, "loss": 0.7513, "step": 427 }, { "epoch": 0.23, "grad_norm": 1.5432847022987355, "learning_rate": 4.941782981537021e-06, "loss": 0.7473, "step": 428 }, { "epoch": 0.23, "grad_norm": 1.592621408624318, "learning_rate": 4.941302750593373e-06, "loss": 0.7496, "step": 429 }, { "epoch": 0.23, "grad_norm": 1.5546340473005302, "learning_rate": 4.9408205705968356e-06, "loss": 0.7396, "step": 430 }, { "epoch": 0.23, "grad_norm": 1.6012343677383651, "learning_rate": 4.940336441932366e-06, "loss": 0.7235, "step": 431 }, { "epoch": 0.23, "grad_norm": 1.5563388854328224, "learning_rate": 4.939850364986475e-06, "loss": 0.7156, "step": 432 }, { "epoch": 0.23, "grad_norm": 1.5336136544416714, "learning_rate": 4.93936234014723e-06, "loss": 0.7364, "step": 433 }, { "epoch": 0.23, "grad_norm": 1.469414481695815, "learning_rate": 4.938872367804255e-06, "loss": 0.7296, "step": 434 }, { "epoch": 0.23, "grad_norm": 1.4502988947537192, "learning_rate": 4.938380448348725e-06, "loss": 0.7198, "step": 435 }, { "epoch": 0.24, "grad_norm": 1.480914682835902, "learning_rate": 4.937886582173374e-06, "loss": 0.7113, "step": 436 }, { "epoch": 0.24, "grad_norm": 1.5926132750350195, "learning_rate": 4.937390769672485e-06, "loss": 0.7398, "step": 437 }, { "epoch": 0.24, "grad_norm": 1.4827817326110437, "learning_rate": 4.9368930112419e-06, "loss": 0.7032, "step": 438 }, { "epoch": 0.24, "grad_norm": 1.4891761104821308, "learning_rate": 4.936393307279011e-06, "loss": 0.7286, "step": 439 }, { "epoch": 0.24, "grad_norm": 1.4995310044085723, "learning_rate": 4.935891658182767e-06, "loss": 0.7109, "step": 440 }, { "epoch": 0.24, "grad_norm": 1.4747987498097699, "learning_rate": 4.935388064353665e-06, "loss": 0.735, "step": 441 }, { "epoch": 0.24, "grad_norm": 1.58879826591138, "learning_rate": 4.934882526193758e-06, "loss": 0.7358, "step": 442 }, { "epoch": 0.24, "grad_norm": 1.4836250775879796, "learning_rate": 4.934375044106651e-06, "loss": 0.7431, "step": 443 }, { "epoch": 0.24, "grad_norm": 1.4986361694405805, "learning_rate": 4.9338656184975e-06, "loss": 0.7094, "step": 444 }, { "epoch": 0.24, "grad_norm": 1.5367325268763512, "learning_rate": 4.933354249773013e-06, "loss": 0.7507, "step": 445 }, { "epoch": 0.24, "grad_norm": 1.5391938476801523, "learning_rate": 4.932840938341448e-06, "loss": 0.727, "step": 446 }, { "epoch": 0.24, "grad_norm": 1.5356886928646563, "learning_rate": 4.932325684612618e-06, "loss": 0.7468, "step": 447 }, { "epoch": 0.24, "grad_norm": 1.52359108021334, "learning_rate": 4.931808488997882e-06, "loss": 0.7443, "step": 448 }, { "epoch": 0.24, "grad_norm": 1.421948957418664, "learning_rate": 4.931289351910153e-06, "loss": 0.6996, "step": 449 }, { "epoch": 0.24, "grad_norm": 1.4730816811033562, "learning_rate": 4.93076827376389e-06, "loss": 0.7292, "step": 450 }, { "epoch": 0.24, "grad_norm": 1.4474330084485916, "learning_rate": 4.930245254975106e-06, "loss": 0.7153, "step": 451 }, { "epoch": 0.24, "grad_norm": 1.6160838413002914, "learning_rate": 4.929720295961361e-06, "loss": 0.7803, "step": 452 }, { "epoch": 0.24, "grad_norm": 1.588105697101647, "learning_rate": 4.9291933971417635e-06, "loss": 0.7368, "step": 453 }, { "epoch": 0.25, "grad_norm": 1.5693389478235444, "learning_rate": 4.928664558936972e-06, "loss": 0.7166, "step": 454 }, { "epoch": 0.25, "grad_norm": 1.531412193471816, "learning_rate": 4.928133781769194e-06, "loss": 0.7524, "step": 455 }, { "epoch": 0.25, "grad_norm": 1.5573172330168852, "learning_rate": 4.9276010660621835e-06, "loss": 0.7289, "step": 456 }, { "epoch": 0.25, "grad_norm": 1.5117583876987546, "learning_rate": 4.9270664122412404e-06, "loss": 0.7189, "step": 457 }, { "epoch": 0.25, "grad_norm": 1.5913149902436838, "learning_rate": 4.926529820733217e-06, "loss": 0.7453, "step": 458 }, { "epoch": 0.25, "grad_norm": 1.6342034934666014, "learning_rate": 4.925991291966508e-06, "loss": 0.7299, "step": 459 }, { "epoch": 0.25, "grad_norm": 1.576988301959619, "learning_rate": 4.925450826371056e-06, "loss": 0.7504, "step": 460 }, { "epoch": 0.25, "grad_norm": 1.506538869767856, "learning_rate": 4.924908424378352e-06, "loss": 0.7341, "step": 461 }, { "epoch": 0.25, "grad_norm": 1.5426843004674817, "learning_rate": 4.92436408642143e-06, "loss": 0.7177, "step": 462 }, { "epoch": 0.25, "grad_norm": 1.523274225181766, "learning_rate": 4.923817812934871e-06, "loss": 0.74, "step": 463 }, { "epoch": 0.25, "grad_norm": 1.541192727103094, "learning_rate": 4.923269604354802e-06, "loss": 0.7672, "step": 464 }, { "epoch": 0.25, "grad_norm": 1.5357534622023128, "learning_rate": 4.9227194611188934e-06, "loss": 0.7211, "step": 465 }, { "epoch": 0.25, "grad_norm": 1.6007674710534217, "learning_rate": 4.922167383666361e-06, "loss": 0.7486, "step": 466 }, { "epoch": 0.25, "grad_norm": 1.5482246333536338, "learning_rate": 4.921613372437964e-06, "loss": 0.7109, "step": 467 }, { "epoch": 0.25, "grad_norm": 1.528533762014916, "learning_rate": 4.921057427876007e-06, "loss": 0.7111, "step": 468 }, { "epoch": 0.25, "grad_norm": 1.528962001514952, "learning_rate": 4.9204995504243356e-06, "loss": 0.7452, "step": 469 }, { "epoch": 0.25, "grad_norm": 1.5040858791985772, "learning_rate": 4.919939740528342e-06, "loss": 0.7397, "step": 470 }, { "epoch": 0.25, "grad_norm": 1.6704906652458824, "learning_rate": 4.919377998634959e-06, "loss": 0.7455, "step": 471 }, { "epoch": 0.25, "grad_norm": 1.5301311519267438, "learning_rate": 4.91881432519266e-06, "loss": 0.7494, "step": 472 }, { "epoch": 0.26, "grad_norm": 1.5091607462368088, "learning_rate": 4.918248720651466e-06, "loss": 0.7206, "step": 473 }, { "epoch": 0.26, "grad_norm": 1.6153777159497857, "learning_rate": 4.917681185462934e-06, "loss": 0.7085, "step": 474 }, { "epoch": 0.26, "grad_norm": 1.6250762350694912, "learning_rate": 4.917111720080166e-06, "loss": 0.7157, "step": 475 }, { "epoch": 0.26, "grad_norm": 1.6050362808295005, "learning_rate": 4.916540324957803e-06, "loss": 0.7424, "step": 476 }, { "epoch": 0.26, "grad_norm": 1.4952842515973097, "learning_rate": 4.915967000552028e-06, "loss": 0.735, "step": 477 }, { "epoch": 0.26, "grad_norm": 1.518064859170877, "learning_rate": 4.915391747320563e-06, "loss": 0.7091, "step": 478 }, { "epoch": 0.26, "grad_norm": 1.5930025764866425, "learning_rate": 4.914814565722671e-06, "loss": 0.7223, "step": 479 }, { "epoch": 0.26, "grad_norm": 1.5525019310119756, "learning_rate": 4.914235456219154e-06, "loss": 0.7069, "step": 480 }, { "epoch": 0.26, "grad_norm": 1.5629096231421908, "learning_rate": 4.913654419272353e-06, "loss": 0.7058, "step": 481 }, { "epoch": 0.26, "grad_norm": 1.476698665027699, "learning_rate": 4.913071455346149e-06, "loss": 0.7327, "step": 482 }, { "epoch": 0.26, "grad_norm": 1.4649591333111762, "learning_rate": 4.912486564905959e-06, "loss": 0.7614, "step": 483 }, { "epoch": 0.26, "grad_norm": 1.468736814853037, "learning_rate": 4.911899748418742e-06, "loss": 0.7173, "step": 484 }, { "epoch": 0.26, "grad_norm": 1.5431538572459431, "learning_rate": 4.91131100635299e-06, "loss": 0.7049, "step": 485 }, { "epoch": 0.26, "grad_norm": 1.5075738129473961, "learning_rate": 4.910720339178735e-06, "loss": 0.7069, "step": 486 }, { "epoch": 0.26, "grad_norm": 1.5707594420038977, "learning_rate": 4.910127747367546e-06, "loss": 0.7331, "step": 487 }, { "epoch": 0.26, "grad_norm": 1.482840403030399, "learning_rate": 4.909533231392528e-06, "loss": 0.7224, "step": 488 }, { "epoch": 0.26, "grad_norm": 1.5167973022386587, "learning_rate": 4.908936791728323e-06, "loss": 0.7387, "step": 489 }, { "epoch": 0.26, "grad_norm": 1.6122501957795847, "learning_rate": 4.908338428851106e-06, "loss": 0.7126, "step": 490 }, { "epoch": 0.27, "grad_norm": 1.492680700048344, "learning_rate": 4.907738143238592e-06, "loss": 0.6977, "step": 491 }, { "epoch": 0.27, "grad_norm": 1.439087668433639, "learning_rate": 4.907135935370027e-06, "loss": 0.6767, "step": 492 }, { "epoch": 0.27, "grad_norm": 1.5610561031395536, "learning_rate": 4.906531805726194e-06, "loss": 0.7137, "step": 493 }, { "epoch": 0.27, "grad_norm": 1.5859970494501412, "learning_rate": 4.905925754789409e-06, "loss": 0.7261, "step": 494 }, { "epoch": 0.27, "grad_norm": 1.5212988279248842, "learning_rate": 4.905317783043523e-06, "loss": 0.7275, "step": 495 }, { "epoch": 0.27, "grad_norm": 1.6111530342514488, "learning_rate": 4.904707890973919e-06, "loss": 0.7458, "step": 496 }, { "epoch": 0.27, "grad_norm": 1.527027322186164, "learning_rate": 4.904096079067515e-06, "loss": 0.7337, "step": 497 }, { "epoch": 0.27, "grad_norm": 1.573209521706388, "learning_rate": 4.9034823478127605e-06, "loss": 0.7286, "step": 498 }, { "epoch": 0.27, "grad_norm": 1.5031450278667091, "learning_rate": 4.902866697699636e-06, "loss": 0.7135, "step": 499 }, { "epoch": 0.27, "grad_norm": 1.5204134534387403, "learning_rate": 4.902249129219659e-06, "loss": 0.7286, "step": 500 }, { "epoch": 0.27, "grad_norm": 1.4748224447193412, "learning_rate": 4.901629642865872e-06, "loss": 0.7061, "step": 501 }, { "epoch": 0.27, "grad_norm": 1.451232127995744, "learning_rate": 4.901008239132853e-06, "loss": 0.7189, "step": 502 }, { "epoch": 0.27, "grad_norm": 1.4474354756649095, "learning_rate": 4.90038491851671e-06, "loss": 0.7324, "step": 503 }, { "epoch": 0.27, "grad_norm": 1.4736416859651988, "learning_rate": 4.89975968151508e-06, "loss": 0.6999, "step": 504 }, { "epoch": 0.27, "grad_norm": 1.5251471657083409, "learning_rate": 4.899132528627131e-06, "loss": 0.71, "step": 505 }, { "epoch": 0.27, "grad_norm": 1.5824181980457084, "learning_rate": 4.898503460353562e-06, "loss": 0.7357, "step": 506 }, { "epoch": 0.27, "grad_norm": 1.5163125945610436, "learning_rate": 4.8978724771965965e-06, "loss": 0.7354, "step": 507 }, { "epoch": 0.27, "grad_norm": 1.5276365966479846, "learning_rate": 4.897239579659994e-06, "loss": 0.7429, "step": 508 }, { "epoch": 0.27, "grad_norm": 1.4907728547515495, "learning_rate": 4.896604768249035e-06, "loss": 0.718, "step": 509 }, { "epoch": 0.28, "grad_norm": 1.4474709397177077, "learning_rate": 4.895968043470532e-06, "loss": 0.7169, "step": 510 }, { "epoch": 0.28, "grad_norm": 1.4704665069534937, "learning_rate": 4.895329405832827e-06, "loss": 0.7467, "step": 511 }, { "epoch": 0.28, "grad_norm": 1.4977534080667958, "learning_rate": 4.8946888558457825e-06, "loss": 0.7079, "step": 512 }, { "epoch": 0.28, "grad_norm": 1.54281336702859, "learning_rate": 4.894046394020794e-06, "loss": 0.7387, "step": 513 }, { "epoch": 0.28, "grad_norm": 1.4903420961127067, "learning_rate": 4.893402020870781e-06, "loss": 0.7392, "step": 514 }, { "epoch": 0.28, "grad_norm": 1.4526702185140556, "learning_rate": 4.892755736910189e-06, "loss": 0.7176, "step": 515 }, { "epoch": 0.28, "grad_norm": 1.4646824970736538, "learning_rate": 4.892107542654988e-06, "loss": 0.7094, "step": 516 }, { "epoch": 0.28, "grad_norm": 1.4847419743068355, "learning_rate": 4.8914574386226764e-06, "loss": 0.7023, "step": 517 }, { "epoch": 0.28, "grad_norm": 1.5800520608504578, "learning_rate": 4.890805425332274e-06, "loss": 0.7401, "step": 518 }, { "epoch": 0.28, "grad_norm": 1.462881620153189, "learning_rate": 4.890151503304325e-06, "loss": 0.6899, "step": 519 }, { "epoch": 0.28, "grad_norm": 1.4526014480563811, "learning_rate": 4.8894956730608985e-06, "loss": 0.7284, "step": 520 }, { "epoch": 0.28, "grad_norm": 1.4786696162685073, "learning_rate": 4.8888379351255885e-06, "loss": 0.734, "step": 521 }, { "epoch": 0.28, "grad_norm": 1.4728504731949208, "learning_rate": 4.88817829002351e-06, "loss": 0.7152, "step": 522 }, { "epoch": 0.28, "grad_norm": 1.5394582136750736, "learning_rate": 4.8875167382813e-06, "loss": 0.7082, "step": 523 }, { "epoch": 0.28, "grad_norm": 1.5399883558600957, "learning_rate": 4.886853280427121e-06, "loss": 0.7335, "step": 524 }, { "epoch": 0.28, "grad_norm": 1.5453566156988168, "learning_rate": 4.886187916990653e-06, "loss": 0.7485, "step": 525 }, { "epoch": 0.28, "grad_norm": 1.4575508480302957, "learning_rate": 4.8855206485031016e-06, "loss": 0.7235, "step": 526 }, { "epoch": 0.28, "grad_norm": 1.5151665214962948, "learning_rate": 4.8848514754971896e-06, "loss": 0.7059, "step": 527 }, { "epoch": 0.29, "grad_norm": 1.5695690763018244, "learning_rate": 4.884180398507163e-06, "loss": 0.7116, "step": 528 }, { "epoch": 0.29, "grad_norm": 1.5637198117135596, "learning_rate": 4.8835074180687845e-06, "loss": 0.713, "step": 529 }, { "epoch": 0.29, "grad_norm": 1.5671808434000063, "learning_rate": 4.882832534719341e-06, "loss": 0.7149, "step": 530 }, { "epoch": 0.29, "grad_norm": 1.5567101423492482, "learning_rate": 4.882155748997636e-06, "loss": 0.7471, "step": 531 }, { "epoch": 0.29, "grad_norm": 1.5801278921905342, "learning_rate": 4.881477061443992e-06, "loss": 0.7133, "step": 532 }, { "epoch": 0.29, "grad_norm": 1.5598511515663325, "learning_rate": 4.88079647260025e-06, "loss": 0.7315, "step": 533 }, { "epoch": 0.29, "grad_norm": 1.520615149002882, "learning_rate": 4.8801139830097685e-06, "loss": 0.7357, "step": 534 }, { "epoch": 0.29, "grad_norm": 1.4560963303408214, "learning_rate": 4.879429593217424e-06, "loss": 0.7184, "step": 535 }, { "epoch": 0.29, "grad_norm": 1.4701798976059568, "learning_rate": 4.878743303769611e-06, "loss": 0.729, "step": 536 }, { "epoch": 0.29, "grad_norm": 1.603532355663668, "learning_rate": 4.878055115214238e-06, "loss": 0.7302, "step": 537 }, { "epoch": 0.29, "grad_norm": 1.5402240131040144, "learning_rate": 4.877365028100732e-06, "loss": 0.7816, "step": 538 }, { "epoch": 0.29, "grad_norm": 1.5477716868051647, "learning_rate": 4.876673042980036e-06, "loss": 0.7586, "step": 539 }, { "epoch": 0.29, "grad_norm": 1.505970596858273, "learning_rate": 4.875979160404607e-06, "loss": 0.7147, "step": 540 }, { "epoch": 0.29, "grad_norm": 1.5157430632049658, "learning_rate": 4.875283380928415e-06, "loss": 0.73, "step": 541 }, { "epoch": 0.29, "grad_norm": 1.4920737220648743, "learning_rate": 4.874585705106949e-06, "loss": 0.7097, "step": 542 }, { "epoch": 0.29, "grad_norm": 1.6076315558993488, "learning_rate": 4.873886133497209e-06, "loss": 0.716, "step": 543 }, { "epoch": 0.29, "grad_norm": 1.4616829488510272, "learning_rate": 4.873184666657709e-06, "loss": 0.7382, "step": 544 }, { "epoch": 0.29, "grad_norm": 1.5293210151802181, "learning_rate": 4.872481305148476e-06, "loss": 0.7111, "step": 545 }, { "epoch": 0.29, "grad_norm": 1.6828452698704885, "learning_rate": 4.87177604953105e-06, "loss": 0.7281, "step": 546 }, { "epoch": 0.3, "grad_norm": 1.661964068484526, "learning_rate": 4.871068900368483e-06, "loss": 0.7273, "step": 547 }, { "epoch": 0.3, "grad_norm": 1.6262337420899042, "learning_rate": 4.870359858225339e-06, "loss": 0.7335, "step": 548 }, { "epoch": 0.3, "grad_norm": 1.4583866506517125, "learning_rate": 4.869648923667694e-06, "loss": 0.7144, "step": 549 }, { "epoch": 0.3, "grad_norm": 1.5677832436516064, "learning_rate": 4.868936097263132e-06, "loss": 0.7155, "step": 550 }, { "epoch": 0.3, "grad_norm": 1.585605392923975, "learning_rate": 4.8682213795807495e-06, "loss": 0.7293, "step": 551 }, { "epoch": 0.3, "grad_norm": 1.6356027841193377, "learning_rate": 4.867504771191154e-06, "loss": 0.7239, "step": 552 }, { "epoch": 0.3, "grad_norm": 1.5301517428667857, "learning_rate": 4.866786272666461e-06, "loss": 0.7158, "step": 553 }, { "epoch": 0.3, "grad_norm": 1.5282025590710828, "learning_rate": 4.866065884580294e-06, "loss": 0.7396, "step": 554 }, { "epoch": 0.3, "grad_norm": 1.586890313765773, "learning_rate": 4.865343607507788e-06, "loss": 0.7249, "step": 555 }, { "epoch": 0.3, "grad_norm": 1.562907305960387, "learning_rate": 4.864619442025582e-06, "loss": 0.7278, "step": 556 }, { "epoch": 0.3, "grad_norm": 1.5161038784724987, "learning_rate": 4.863893388711828e-06, "loss": 0.7186, "step": 557 }, { "epoch": 0.3, "grad_norm": 1.5171105889619128, "learning_rate": 4.86316544814618e-06, "loss": 0.715, "step": 558 }, { "epoch": 0.3, "grad_norm": 1.555843615542511, "learning_rate": 4.862435620909803e-06, "loss": 0.71, "step": 559 }, { "epoch": 0.3, "grad_norm": 1.5410000752038706, "learning_rate": 4.8617039075853636e-06, "loss": 0.7067, "step": 560 }, { "epoch": 0.3, "grad_norm": 1.6070009835382544, "learning_rate": 4.860970308757038e-06, "loss": 0.6958, "step": 561 }, { "epoch": 0.3, "grad_norm": 1.5875648124196018, "learning_rate": 4.860234825010507e-06, "loss": 0.7238, "step": 562 }, { "epoch": 0.3, "grad_norm": 1.5364195230456157, "learning_rate": 4.859497456932956e-06, "loss": 0.7159, "step": 563 }, { "epoch": 0.3, "grad_norm": 1.4984426531259358, "learning_rate": 4.858758205113072e-06, "loss": 0.7036, "step": 564 }, { "epoch": 0.31, "grad_norm": 1.5566960620494432, "learning_rate": 4.8580170701410515e-06, "loss": 0.7343, "step": 565 }, { "epoch": 0.31, "grad_norm": 1.4751632638626027, "learning_rate": 4.85727405260859e-06, "loss": 0.7291, "step": 566 }, { "epoch": 0.31, "grad_norm": 1.499720719369144, "learning_rate": 4.856529153108888e-06, "loss": 0.7383, "step": 567 }, { "epoch": 0.31, "grad_norm": 1.4966270538899795, "learning_rate": 4.855782372236647e-06, "loss": 0.7256, "step": 568 }, { "epoch": 0.31, "grad_norm": 1.4416326114373974, "learning_rate": 4.855033710588071e-06, "loss": 0.7228, "step": 569 }, { "epoch": 0.31, "grad_norm": 1.5339683252577687, "learning_rate": 4.854283168760868e-06, "loss": 0.7168, "step": 570 }, { "epoch": 0.31, "grad_norm": 1.5475269177121693, "learning_rate": 4.853530747354243e-06, "loss": 0.7018, "step": 571 }, { "epoch": 0.31, "grad_norm": 1.524492218118321, "learning_rate": 4.852776446968904e-06, "loss": 0.7081, "step": 572 }, { "epoch": 0.31, "grad_norm": 1.5408955568359766, "learning_rate": 4.85202026820706e-06, "loss": 0.7525, "step": 573 }, { "epoch": 0.31, "grad_norm": 1.4592970288344935, "learning_rate": 4.851262211672417e-06, "loss": 0.7174, "step": 574 }, { "epoch": 0.31, "grad_norm": 1.5090241472840094, "learning_rate": 4.8505022779701815e-06, "loss": 0.6885, "step": 575 }, { "epoch": 0.31, "grad_norm": 1.5350377439832459, "learning_rate": 4.84974046770706e-06, "loss": 0.7025, "step": 576 }, { "epoch": 0.31, "grad_norm": 1.5782508740358758, "learning_rate": 4.8489767814912546e-06, "loss": 0.7297, "step": 577 }, { "epoch": 0.31, "grad_norm": 1.5338239606959683, "learning_rate": 4.848211219932466e-06, "loss": 0.7417, "step": 578 }, { "epoch": 0.31, "grad_norm": 1.496968169646213, "learning_rate": 4.847443783641893e-06, "loss": 0.7205, "step": 579 }, { "epoch": 0.31, "grad_norm": 1.4948026120652849, "learning_rate": 4.846674473232232e-06, "loss": 0.721, "step": 580 }, { "epoch": 0.31, "grad_norm": 1.4774122764074293, "learning_rate": 4.845903289317671e-06, "loss": 0.7036, "step": 581 }, { "epoch": 0.31, "grad_norm": 1.4583013142663652, "learning_rate": 4.845130232513901e-06, "loss": 0.7154, "step": 582 }, { "epoch": 0.31, "grad_norm": 1.4837333055066433, "learning_rate": 4.8443553034381e-06, "loss": 0.694, "step": 583 }, { "epoch": 0.32, "grad_norm": 1.4899791109446894, "learning_rate": 4.843578502708948e-06, "loss": 0.7049, "step": 584 }, { "epoch": 0.32, "grad_norm": 1.5461060438692744, "learning_rate": 4.842799830946615e-06, "loss": 0.7376, "step": 585 }, { "epoch": 0.32, "grad_norm": 1.4725726974302873, "learning_rate": 4.842019288772766e-06, "loss": 0.7169, "step": 586 }, { "epoch": 0.32, "grad_norm": 1.543525622312564, "learning_rate": 4.8412368768105596e-06, "loss": 0.7418, "step": 587 }, { "epoch": 0.32, "grad_norm": 1.5759622892901484, "learning_rate": 4.840452595684646e-06, "loss": 0.728, "step": 588 }, { "epoch": 0.32, "grad_norm": 1.4642246473519758, "learning_rate": 4.83966644602117e-06, "loss": 0.6907, "step": 589 }, { "epoch": 0.32, "grad_norm": 1.5058260849255019, "learning_rate": 4.838878428447766e-06, "loss": 0.7275, "step": 590 }, { "epoch": 0.32, "grad_norm": 1.55220262477339, "learning_rate": 4.83808854359356e-06, "loss": 0.7191, "step": 591 }, { "epoch": 0.32, "grad_norm": 1.506611863727398, "learning_rate": 4.837296792089169e-06, "loss": 0.7166, "step": 592 }, { "epoch": 0.32, "grad_norm": 1.5625565616674035, "learning_rate": 4.8365031745667015e-06, "loss": 0.7245, "step": 593 }, { "epoch": 0.32, "grad_norm": 1.518139166666513, "learning_rate": 4.835707691659753e-06, "loss": 0.7296, "step": 594 }, { "epoch": 0.32, "grad_norm": 1.5114401492598455, "learning_rate": 4.834910344003411e-06, "loss": 0.7067, "step": 595 }, { "epoch": 0.32, "grad_norm": 1.4843686208236486, "learning_rate": 4.834111132234251e-06, "loss": 0.7381, "step": 596 }, { "epoch": 0.32, "grad_norm": 1.5055818906513043, "learning_rate": 4.8333100569903365e-06, "loss": 0.7406, "step": 597 }, { "epoch": 0.32, "grad_norm": 1.5019554157577535, "learning_rate": 4.832507118911217e-06, "loss": 0.7047, "step": 598 }, { "epoch": 0.32, "grad_norm": 1.469255918458041, "learning_rate": 4.8317023186379335e-06, "loss": 0.7126, "step": 599 }, { "epoch": 0.32, "grad_norm": 1.4727256198659462, "learning_rate": 4.8308956568130094e-06, "loss": 0.6805, "step": 600 }, { "epoch": 0.32, "grad_norm": 1.4909404265280741, "learning_rate": 4.830087134080455e-06, "loss": 0.7155, "step": 601 }, { "epoch": 0.33, "grad_norm": 1.5183807650189143, "learning_rate": 4.829276751085769e-06, "loss": 0.7038, "step": 602 }, { "epoch": 0.33, "grad_norm": 1.4996011689540445, "learning_rate": 4.828464508475934e-06, "loss": 0.7001, "step": 603 }, { "epoch": 0.33, "grad_norm": 1.5046109339910183, "learning_rate": 4.827650406899415e-06, "loss": 0.6861, "step": 604 }, { "epoch": 0.33, "grad_norm": 1.4231949297147681, "learning_rate": 4.826834447006165e-06, "loss": 0.7171, "step": 605 }, { "epoch": 0.33, "grad_norm": 1.4990837200331364, "learning_rate": 4.826016629447616e-06, "loss": 0.7046, "step": 606 }, { "epoch": 0.33, "grad_norm": 1.6077451442038297, "learning_rate": 4.825196954876689e-06, "loss": 0.727, "step": 607 }, { "epoch": 0.33, "grad_norm": 1.5451738945135145, "learning_rate": 4.82437542394778e-06, "loss": 0.7018, "step": 608 }, { "epoch": 0.33, "grad_norm": 1.4219333698441254, "learning_rate": 4.823552037316775e-06, "loss": 0.6821, "step": 609 }, { "epoch": 0.33, "grad_norm": 1.4951799627862263, "learning_rate": 4.8227267956410366e-06, "loss": 0.7012, "step": 610 }, { "epoch": 0.33, "grad_norm": 1.5233005510924693, "learning_rate": 4.821899699579409e-06, "loss": 0.713, "step": 611 }, { "epoch": 0.33, "grad_norm": 1.5468941572617376, "learning_rate": 4.821070749792218e-06, "loss": 0.7381, "step": 612 }, { "epoch": 0.33, "grad_norm": 1.51417493015797, "learning_rate": 4.820239946941269e-06, "loss": 0.722, "step": 613 }, { "epoch": 0.33, "grad_norm": 1.5631086328553927, "learning_rate": 4.819407291689845e-06, "loss": 0.7086, "step": 614 }, { "epoch": 0.33, "grad_norm": 1.4760996310698735, "learning_rate": 4.818572784702713e-06, "loss": 0.7046, "step": 615 }, { "epoch": 0.33, "grad_norm": 1.5095637854019794, "learning_rate": 4.817736426646112e-06, "loss": 0.7469, "step": 616 }, { "epoch": 0.33, "grad_norm": 1.4995023557030982, "learning_rate": 4.816898218187762e-06, "loss": 0.6834, "step": 617 }, { "epoch": 0.33, "grad_norm": 1.475703948347541, "learning_rate": 4.816058159996863e-06, "loss": 0.7251, "step": 618 }, { "epoch": 0.33, "grad_norm": 1.5082716228466047, "learning_rate": 4.815216252744085e-06, "loss": 0.7038, "step": 619 }, { "epoch": 0.33, "grad_norm": 1.4826866069138764, "learning_rate": 4.814372497101579e-06, "loss": 0.7052, "step": 620 }, { "epoch": 0.34, "grad_norm": 1.5173115360882803, "learning_rate": 4.813526893742972e-06, "loss": 0.7033, "step": 621 }, { "epoch": 0.34, "grad_norm": 1.5579019508957628, "learning_rate": 4.812679443343366e-06, "loss": 0.6947, "step": 622 }, { "epoch": 0.34, "grad_norm": 1.441069255874311, "learning_rate": 4.8118301465793325e-06, "loss": 0.7014, "step": 623 }, { "epoch": 0.34, "grad_norm": 1.5143258669003272, "learning_rate": 4.810979004128924e-06, "loss": 0.6982, "step": 624 }, { "epoch": 0.34, "grad_norm": 1.5869849007123062, "learning_rate": 4.8101260166716635e-06, "loss": 0.7312, "step": 625 }, { "epoch": 0.34, "grad_norm": 1.5508542660477553, "learning_rate": 4.809271184888546e-06, "loss": 0.7057, "step": 626 }, { "epoch": 0.34, "grad_norm": 1.5227434797461896, "learning_rate": 4.808414509462042e-06, "loss": 0.707, "step": 627 }, { "epoch": 0.34, "grad_norm": 1.510794525716034, "learning_rate": 4.807555991076089e-06, "loss": 0.7125, "step": 628 }, { "epoch": 0.34, "grad_norm": 1.525893163276939, "learning_rate": 4.8066956304161e-06, "loss": 0.717, "step": 629 }, { "epoch": 0.34, "grad_norm": 1.5668441660979437, "learning_rate": 4.80583342816896e-06, "loss": 0.7185, "step": 630 }, { "epoch": 0.34, "grad_norm": 1.5480617985150187, "learning_rate": 4.804969385023018e-06, "loss": 0.6966, "step": 631 }, { "epoch": 0.34, "grad_norm": 1.4438405108995218, "learning_rate": 4.8041035016681e-06, "loss": 0.6946, "step": 632 }, { "epoch": 0.34, "grad_norm": 1.5250975770249213, "learning_rate": 4.803235778795496e-06, "loss": 0.701, "step": 633 }, { "epoch": 0.34, "grad_norm": 1.436823781370142, "learning_rate": 4.802366217097966e-06, "loss": 0.6775, "step": 634 }, { "epoch": 0.34, "grad_norm": 1.4435429777292612, "learning_rate": 4.801494817269739e-06, "loss": 0.7111, "step": 635 }, { "epoch": 0.34, "grad_norm": 1.5455920813806665, "learning_rate": 4.800621580006511e-06, "loss": 0.7257, "step": 636 }, { "epoch": 0.34, "grad_norm": 1.5178478556160844, "learning_rate": 4.799746506005446e-06, "loss": 0.7404, "step": 637 }, { "epoch": 0.34, "grad_norm": 1.4383163062863515, "learning_rate": 4.798869595965171e-06, "loss": 0.7076, "step": 638 }, { "epoch": 0.35, "grad_norm": 1.4778936094183432, "learning_rate": 4.797990850585782e-06, "loss": 0.7254, "step": 639 }, { "epoch": 0.35, "grad_norm": 1.4844685462773182, "learning_rate": 4.79711027056884e-06, "loss": 0.723, "step": 640 }, { "epoch": 0.35, "grad_norm": 1.5642562835057814, "learning_rate": 4.7962278566173695e-06, "loss": 0.7299, "step": 641 }, { "epoch": 0.35, "grad_norm": 1.4491832965654603, "learning_rate": 4.79534360943586e-06, "loss": 0.7234, "step": 642 }, { "epoch": 0.35, "grad_norm": 1.5415419736141445, "learning_rate": 4.7944575297302635e-06, "loss": 0.7238, "step": 643 }, { "epoch": 0.35, "grad_norm": 1.6336523551868445, "learning_rate": 4.793569618207996e-06, "loss": 0.74, "step": 644 }, { "epoch": 0.35, "grad_norm": 1.4406937200743128, "learning_rate": 4.792679875577937e-06, "loss": 0.723, "step": 645 }, { "epoch": 0.35, "grad_norm": 1.4196235274535982, "learning_rate": 4.7917883025504254e-06, "loss": 0.7298, "step": 646 }, { "epoch": 0.35, "grad_norm": 1.390260540002686, "learning_rate": 4.790894899837264e-06, "loss": 0.7064, "step": 647 }, { "epoch": 0.35, "grad_norm": 1.512781692796313, "learning_rate": 4.789999668151714e-06, "loss": 0.7105, "step": 648 }, { "epoch": 0.35, "grad_norm": 1.5679154076735617, "learning_rate": 4.789102608208497e-06, "loss": 0.7014, "step": 649 }, { "epoch": 0.35, "grad_norm": 1.3944573242833016, "learning_rate": 4.788203720723797e-06, "loss": 0.7157, "step": 650 }, { "epoch": 0.35, "grad_norm": 1.5036102809645322, "learning_rate": 4.7873030064152545e-06, "loss": 0.7224, "step": 651 }, { "epoch": 0.35, "grad_norm": 1.528428026853132, "learning_rate": 4.786400466001969e-06, "loss": 0.7141, "step": 652 }, { "epoch": 0.35, "grad_norm": 1.5149319066868614, "learning_rate": 4.7854961002044975e-06, "loss": 0.7087, "step": 653 }, { "epoch": 0.35, "grad_norm": 1.534445918727299, "learning_rate": 4.784589909744856e-06, "loss": 0.7022, "step": 654 }, { "epoch": 0.35, "grad_norm": 1.4628603747880093, "learning_rate": 4.783681895346513e-06, "loss": 0.7516, "step": 655 }, { "epoch": 0.35, "grad_norm": 1.4303090266479777, "learning_rate": 4.782772057734401e-06, "loss": 0.6961, "step": 656 }, { "epoch": 0.35, "grad_norm": 1.4460340465110242, "learning_rate": 4.7818603976349005e-06, "loss": 0.6662, "step": 657 }, { "epoch": 0.36, "grad_norm": 1.4393621648280108, "learning_rate": 4.7809469157758494e-06, "loss": 0.7116, "step": 658 }, { "epoch": 0.36, "grad_norm": 1.4386388951563152, "learning_rate": 4.780031612886542e-06, "loss": 0.6935, "step": 659 }, { "epoch": 0.36, "grad_norm": 1.4656561494590568, "learning_rate": 4.779114489697724e-06, "loss": 0.6855, "step": 660 }, { "epoch": 0.36, "grad_norm": 1.433201761777395, "learning_rate": 4.778195546941595e-06, "loss": 0.7179, "step": 661 }, { "epoch": 0.36, "grad_norm": 1.4664770559088305, "learning_rate": 4.777274785351809e-06, "loss": 0.716, "step": 662 }, { "epoch": 0.36, "grad_norm": 1.5809251210933277, "learning_rate": 4.776352205663469e-06, "loss": 0.7241, "step": 663 }, { "epoch": 0.36, "grad_norm": 1.564688430646361, "learning_rate": 4.7754278086131326e-06, "loss": 0.7261, "step": 664 }, { "epoch": 0.36, "grad_norm": 1.52988637088748, "learning_rate": 4.774501594938806e-06, "loss": 0.7472, "step": 665 }, { "epoch": 0.36, "grad_norm": 1.46251840925043, "learning_rate": 4.773573565379947e-06, "loss": 0.7013, "step": 666 }, { "epoch": 0.36, "grad_norm": 1.5019129563663896, "learning_rate": 4.772643720677461e-06, "loss": 0.7013, "step": 667 }, { "epoch": 0.36, "grad_norm": 1.4809256634792842, "learning_rate": 4.771712061573708e-06, "loss": 0.6802, "step": 668 }, { "epoch": 0.36, "grad_norm": 1.5200809921776797, "learning_rate": 4.770778588812489e-06, "loss": 0.7211, "step": 669 }, { "epoch": 0.36, "grad_norm": 1.5635680217274255, "learning_rate": 4.7698433031390605e-06, "loss": 0.7425, "step": 670 }, { "epoch": 0.36, "grad_norm": 1.4711285263516678, "learning_rate": 4.768906205300119e-06, "loss": 0.7322, "step": 671 }, { "epoch": 0.36, "grad_norm": 1.4998125753291016, "learning_rate": 4.7679672960438135e-06, "loss": 0.7184, "step": 672 }, { "epoch": 0.36, "grad_norm": 1.580119095353957, "learning_rate": 4.7670265761197375e-06, "loss": 0.6908, "step": 673 }, { "epoch": 0.36, "grad_norm": 1.5029927505553313, "learning_rate": 4.7660840462789296e-06, "loss": 0.7128, "step": 674 }, { "epoch": 0.36, "grad_norm": 1.4305027365293366, "learning_rate": 4.765139707273872e-06, "loss": 0.6783, "step": 675 }, { "epoch": 0.37, "grad_norm": 1.4907125427585328, "learning_rate": 4.764193559858494e-06, "loss": 0.6972, "step": 676 }, { "epoch": 0.37, "grad_norm": 1.4888307746290539, "learning_rate": 4.763245604788168e-06, "loss": 0.7102, "step": 677 }, { "epoch": 0.37, "grad_norm": 1.458487253599782, "learning_rate": 4.762295842819707e-06, "loss": 0.695, "step": 678 }, { "epoch": 0.37, "grad_norm": 1.4909661328806927, "learning_rate": 4.76134427471137e-06, "loss": 0.698, "step": 679 }, { "epoch": 0.37, "grad_norm": 1.4443747995298755, "learning_rate": 4.760390901222857e-06, "loss": 0.7012, "step": 680 }, { "epoch": 0.37, "grad_norm": 1.6047847561713728, "learning_rate": 4.759435723115308e-06, "loss": 0.6887, "step": 681 }, { "epoch": 0.37, "grad_norm": 1.5300055964464965, "learning_rate": 4.758478741151305e-06, "loss": 0.6935, "step": 682 }, { "epoch": 0.37, "grad_norm": 1.540764592025931, "learning_rate": 4.757519956094869e-06, "loss": 0.6945, "step": 683 }, { "epoch": 0.37, "grad_norm": 1.5244202893824799, "learning_rate": 4.756559368711463e-06, "loss": 0.7449, "step": 684 }, { "epoch": 0.37, "grad_norm": 1.4707014792902795, "learning_rate": 4.755596979767986e-06, "loss": 0.6897, "step": 685 }, { "epoch": 0.37, "grad_norm": 1.505566238681795, "learning_rate": 4.754632790032776e-06, "loss": 0.6986, "step": 686 }, { "epoch": 0.37, "grad_norm": 1.4593081618190449, "learning_rate": 4.75366680027561e-06, "loss": 0.6946, "step": 687 }, { "epoch": 0.37, "grad_norm": 1.4337722523699428, "learning_rate": 4.7526990112677015e-06, "loss": 0.703, "step": 688 }, { "epoch": 0.37, "grad_norm": 1.5022629544453385, "learning_rate": 4.751729423781699e-06, "loss": 0.7318, "step": 689 }, { "epoch": 0.37, "grad_norm": 1.6083511911175188, "learning_rate": 4.7507580385916906e-06, "loss": 0.7152, "step": 690 }, { "epoch": 0.37, "grad_norm": 1.5729524566244715, "learning_rate": 4.749784856473194e-06, "loss": 0.7392, "step": 691 }, { "epoch": 0.37, "grad_norm": 1.5696994281557544, "learning_rate": 4.748809878203167e-06, "loss": 0.7183, "step": 692 }, { "epoch": 0.37, "grad_norm": 1.4080326693778624, "learning_rate": 4.747833104559999e-06, "loss": 0.6983, "step": 693 }, { "epoch": 0.37, "grad_norm": 1.4650393240908666, "learning_rate": 4.746854536323511e-06, "loss": 0.7414, "step": 694 }, { "epoch": 0.38, "grad_norm": 1.5192449050043122, "learning_rate": 4.745874174274959e-06, "loss": 0.7068, "step": 695 }, { "epoch": 0.38, "grad_norm": 1.491412908537003, "learning_rate": 4.744892019197033e-06, "loss": 0.708, "step": 696 }, { "epoch": 0.38, "grad_norm": 1.4827547356283306, "learning_rate": 4.743908071873848e-06, "loss": 0.6786, "step": 697 }, { "epoch": 0.38, "grad_norm": 1.4983841298660767, "learning_rate": 4.742922333090958e-06, "loss": 0.7135, "step": 698 }, { "epoch": 0.38, "grad_norm": 1.495773158493301, "learning_rate": 4.74193480363534e-06, "loss": 0.7143, "step": 699 }, { "epoch": 0.38, "grad_norm": 1.510618707286854, "learning_rate": 4.740945484295407e-06, "loss": 0.6958, "step": 700 }, { "epoch": 0.38, "grad_norm": 1.5595198375394796, "learning_rate": 4.739954375860995e-06, "loss": 0.6882, "step": 701 }, { "epoch": 0.38, "grad_norm": 1.6411666906779143, "learning_rate": 4.738961479123373e-06, "loss": 0.6966, "step": 702 }, { "epoch": 0.38, "grad_norm": 1.4597538073681595, "learning_rate": 4.737966794875235e-06, "loss": 0.6733, "step": 703 }, { "epoch": 0.38, "grad_norm": 1.4566305627601004, "learning_rate": 4.736970323910703e-06, "loss": 0.6976, "step": 704 }, { "epoch": 0.38, "grad_norm": 1.504709873691936, "learning_rate": 4.735972067025326e-06, "loss": 0.7193, "step": 705 }, { "epoch": 0.38, "grad_norm": 1.5810385265489326, "learning_rate": 4.734972025016078e-06, "loss": 0.7184, "step": 706 }, { "epoch": 0.38, "grad_norm": 1.5466101156162482, "learning_rate": 4.733970198681358e-06, "loss": 0.694, "step": 707 }, { "epoch": 0.38, "grad_norm": 1.4883658620186135, "learning_rate": 4.732966588820991e-06, "loss": 0.7045, "step": 708 }, { "epoch": 0.38, "grad_norm": 1.5063085161256884, "learning_rate": 4.731961196236225e-06, "loss": 0.6981, "step": 709 }, { "epoch": 0.38, "grad_norm": 1.5236684172406567, "learning_rate": 4.730954021729729e-06, "loss": 0.666, "step": 710 }, { "epoch": 0.38, "grad_norm": 1.5465968869997178, "learning_rate": 4.729945066105599e-06, "loss": 0.7049, "step": 711 }, { "epoch": 0.38, "grad_norm": 1.506026816998116, "learning_rate": 4.72893433016935e-06, "loss": 0.733, "step": 712 }, { "epoch": 0.39, "grad_norm": 1.4400437721552506, "learning_rate": 4.727921814727919e-06, "loss": 0.7052, "step": 713 }, { "epoch": 0.39, "grad_norm": 1.598686277821301, "learning_rate": 4.726907520589664e-06, "loss": 0.7023, "step": 714 }, { "epoch": 0.39, "grad_norm": 1.4662385334701429, "learning_rate": 4.725891448564362e-06, "loss": 0.717, "step": 715 }, { "epoch": 0.39, "grad_norm": 1.5176253653269687, "learning_rate": 4.724873599463213e-06, "loss": 0.7246, "step": 716 }, { "epoch": 0.39, "grad_norm": 1.476851248372507, "learning_rate": 4.72385397409883e-06, "loss": 0.6958, "step": 717 }, { "epoch": 0.39, "grad_norm": 1.529461480824099, "learning_rate": 4.722832573285249e-06, "loss": 0.7165, "step": 718 }, { "epoch": 0.39, "grad_norm": 1.6254313328594803, "learning_rate": 4.72180939783792e-06, "loss": 0.6976, "step": 719 }, { "epoch": 0.39, "grad_norm": 1.457095827561779, "learning_rate": 4.720784448573712e-06, "loss": 0.6804, "step": 720 }, { "epoch": 0.39, "grad_norm": 1.5364754907356284, "learning_rate": 4.7197577263109105e-06, "loss": 0.707, "step": 721 }, { "epoch": 0.39, "grad_norm": 1.5719602774692536, "learning_rate": 4.718729231869214e-06, "loss": 0.6983, "step": 722 }, { "epoch": 0.39, "grad_norm": 1.537938662653087, "learning_rate": 4.717698966069739e-06, "loss": 0.7081, "step": 723 }, { "epoch": 0.39, "grad_norm": 1.4747867880687906, "learning_rate": 4.716666929735014e-06, "loss": 0.7078, "step": 724 }, { "epoch": 0.39, "grad_norm": 1.4714227039210013, "learning_rate": 4.715633123688981e-06, "loss": 0.6964, "step": 725 }, { "epoch": 0.39, "grad_norm": 1.4694894080028393, "learning_rate": 4.7145975487569965e-06, "loss": 0.7109, "step": 726 }, { "epoch": 0.39, "grad_norm": 1.4425760792120865, "learning_rate": 4.713560205765827e-06, "loss": 0.6768, "step": 727 }, { "epoch": 0.39, "grad_norm": 1.503753730187201, "learning_rate": 4.712521095543653e-06, "loss": 0.7295, "step": 728 }, { "epoch": 0.39, "grad_norm": 1.3939627641478862, "learning_rate": 4.711480218920064e-06, "loss": 0.6988, "step": 729 }, { "epoch": 0.39, "grad_norm": 1.488151199882484, "learning_rate": 4.71043757672606e-06, "loss": 0.7059, "step": 730 }, { "epoch": 0.39, "grad_norm": 1.464096496034454, "learning_rate": 4.709393169794052e-06, "loss": 0.684, "step": 731 }, { "epoch": 0.4, "grad_norm": 1.3874992135973894, "learning_rate": 4.708346998957859e-06, "loss": 0.7106, "step": 732 }, { "epoch": 0.4, "grad_norm": 1.5724798049563657, "learning_rate": 4.707299065052706e-06, "loss": 0.7179, "step": 733 }, { "epoch": 0.4, "grad_norm": 1.4287029751329325, "learning_rate": 4.706249368915231e-06, "loss": 0.71, "step": 734 }, { "epoch": 0.4, "grad_norm": 1.3765864793482245, "learning_rate": 4.705197911383473e-06, "loss": 0.7122, "step": 735 }, { "epoch": 0.4, "grad_norm": 1.5328948939254983, "learning_rate": 4.704144693296881e-06, "loss": 0.7331, "step": 736 }, { "epoch": 0.4, "grad_norm": 1.4315110686571646, "learning_rate": 4.703089715496307e-06, "loss": 0.6835, "step": 737 }, { "epoch": 0.4, "grad_norm": 1.3906582841960524, "learning_rate": 4.7020329788240115e-06, "loss": 0.6916, "step": 738 }, { "epoch": 0.4, "grad_norm": 1.5048912109270502, "learning_rate": 4.700974484123656e-06, "loss": 0.7348, "step": 739 }, { "epoch": 0.4, "grad_norm": 1.5078008444242137, "learning_rate": 4.699914232240307e-06, "loss": 0.6898, "step": 740 }, { "epoch": 0.4, "grad_norm": 1.4678338324976943, "learning_rate": 4.6988522240204325e-06, "loss": 0.7075, "step": 741 }, { "epoch": 0.4, "grad_norm": 1.4349153269666872, "learning_rate": 4.697788460311903e-06, "loss": 0.6879, "step": 742 }, { "epoch": 0.4, "grad_norm": 1.4821831640417114, "learning_rate": 4.696722941963993e-06, "loss": 0.721, "step": 743 }, { "epoch": 0.4, "grad_norm": 1.5103981547544825, "learning_rate": 4.695655669827377e-06, "loss": 0.7295, "step": 744 }, { "epoch": 0.4, "grad_norm": 1.4839463368703498, "learning_rate": 4.6945866447541255e-06, "loss": 0.7105, "step": 745 }, { "epoch": 0.4, "grad_norm": 1.5739027407436266, "learning_rate": 4.693515867597714e-06, "loss": 0.716, "step": 746 }, { "epoch": 0.4, "grad_norm": 1.5193457065507812, "learning_rate": 4.6924433392130135e-06, "loss": 0.7226, "step": 747 }, { "epoch": 0.4, "grad_norm": 1.4832548855883707, "learning_rate": 4.691369060456294e-06, "loss": 0.6945, "step": 748 }, { "epoch": 0.4, "grad_norm": 1.4820885022458337, "learning_rate": 4.690293032185223e-06, "loss": 0.7374, "step": 749 }, { "epoch": 0.41, "grad_norm": 1.5279412208692909, "learning_rate": 4.689215255258866e-06, "loss": 0.706, "step": 750 }, { "epoch": 0.41, "grad_norm": 1.4417186105310824, "learning_rate": 4.688135730537682e-06, "loss": 0.7285, "step": 751 }, { "epoch": 0.41, "grad_norm": 1.444710250461491, "learning_rate": 4.6870544588835275e-06, "loss": 0.6913, "step": 752 }, { "epoch": 0.41, "grad_norm": 1.5078879334094517, "learning_rate": 4.685971441159653e-06, "loss": 0.6709, "step": 753 }, { "epoch": 0.41, "grad_norm": 1.43003902309629, "learning_rate": 4.684886678230701e-06, "loss": 0.697, "step": 754 }, { "epoch": 0.41, "grad_norm": 1.4867722543555675, "learning_rate": 4.683800170962713e-06, "loss": 0.6884, "step": 755 }, { "epoch": 0.41, "grad_norm": 1.5637486505091276, "learning_rate": 4.682711920223115e-06, "loss": 0.7274, "step": 756 }, { "epoch": 0.41, "grad_norm": 1.5241638077276196, "learning_rate": 4.6816219268807325e-06, "loss": 0.7044, "step": 757 }, { "epoch": 0.41, "grad_norm": 1.5322856590893086, "learning_rate": 4.6805301918057785e-06, "loss": 0.7193, "step": 758 }, { "epoch": 0.41, "grad_norm": 1.451055393093512, "learning_rate": 4.679436715869856e-06, "loss": 0.695, "step": 759 }, { "epoch": 0.41, "grad_norm": 1.4908532257713225, "learning_rate": 4.67834149994596e-06, "loss": 0.6853, "step": 760 }, { "epoch": 0.41, "grad_norm": 1.5049301450614245, "learning_rate": 4.677244544908472e-06, "loss": 0.6993, "step": 761 }, { "epoch": 0.41, "grad_norm": 1.491454462041645, "learning_rate": 4.676145851633166e-06, "loss": 0.7265, "step": 762 }, { "epoch": 0.41, "grad_norm": 1.6029123295704035, "learning_rate": 4.675045420997199e-06, "loss": 0.7222, "step": 763 }, { "epoch": 0.41, "grad_norm": 1.477088617173531, "learning_rate": 4.673943253879118e-06, "loss": 0.694, "step": 764 }, { "epoch": 0.41, "grad_norm": 1.507173772414093, "learning_rate": 4.672839351158856e-06, "loss": 0.7291, "step": 765 }, { "epoch": 0.41, "grad_norm": 1.4568216381393146, "learning_rate": 4.671733713717731e-06, "loss": 0.7078, "step": 766 }, { "epoch": 0.41, "grad_norm": 1.4858148410846026, "learning_rate": 4.670626342438445e-06, "loss": 0.692, "step": 767 }, { "epoch": 0.41, "grad_norm": 1.4665579543911471, "learning_rate": 4.669517238205089e-06, "loss": 0.6988, "step": 768 }, { "epoch": 0.42, "grad_norm": 1.4083297921317193, "learning_rate": 4.66840640190313e-06, "loss": 0.6832, "step": 769 }, { "epoch": 0.42, "grad_norm": 1.4832895393297176, "learning_rate": 4.667293834419425e-06, "loss": 0.7222, "step": 770 }, { "epoch": 0.42, "grad_norm": 1.482826560034709, "learning_rate": 4.666179536642208e-06, "loss": 0.697, "step": 771 }, { "epoch": 0.42, "grad_norm": 1.4650372485353058, "learning_rate": 4.665063509461098e-06, "loss": 0.7345, "step": 772 }, { "epoch": 0.42, "grad_norm": 1.5090020480562505, "learning_rate": 4.66394575376709e-06, "loss": 0.6964, "step": 773 }, { "epoch": 0.42, "grad_norm": 1.45937100131912, "learning_rate": 4.662826270452565e-06, "loss": 0.6938, "step": 774 }, { "epoch": 0.42, "grad_norm": 1.4799632087131813, "learning_rate": 4.661705060411279e-06, "loss": 0.7351, "step": 775 }, { "epoch": 0.42, "grad_norm": 1.4452551898306583, "learning_rate": 4.660582124538369e-06, "loss": 0.7018, "step": 776 }, { "epoch": 0.42, "grad_norm": 1.4524406159150622, "learning_rate": 4.659457463730347e-06, "loss": 0.6962, "step": 777 }, { "epoch": 0.42, "grad_norm": 1.4970162671779115, "learning_rate": 4.658331078885105e-06, "loss": 0.7226, "step": 778 }, { "epoch": 0.42, "grad_norm": 1.6158351580771972, "learning_rate": 4.657202970901909e-06, "loss": 0.7202, "step": 779 }, { "epoch": 0.42, "grad_norm": 1.503584101836388, "learning_rate": 4.6560731406814056e-06, "loss": 0.7065, "step": 780 }, { "epoch": 0.42, "grad_norm": 1.461462421003857, "learning_rate": 4.6549415891256075e-06, "loss": 0.7101, "step": 781 }, { "epoch": 0.42, "grad_norm": 1.515314394224673, "learning_rate": 4.653808317137911e-06, "loss": 0.698, "step": 782 }, { "epoch": 0.42, "grad_norm": 1.511958903522258, "learning_rate": 4.65267332562308e-06, "loss": 0.7031, "step": 783 }, { "epoch": 0.42, "grad_norm": 1.5769414523438594, "learning_rate": 4.651536615487253e-06, "loss": 0.747, "step": 784 }, { "epoch": 0.42, "grad_norm": 1.5056173079092432, "learning_rate": 4.650398187637941e-06, "loss": 0.7023, "step": 785 }, { "epoch": 0.42, "grad_norm": 1.4179839042178055, "learning_rate": 4.649258042984026e-06, "loss": 0.716, "step": 786 }, { "epoch": 0.43, "grad_norm": 1.470125600144782, "learning_rate": 4.648116182435761e-06, "loss": 0.736, "step": 787 }, { "epoch": 0.43, "grad_norm": 1.5383147462612572, "learning_rate": 4.646972606904768e-06, "loss": 0.7182, "step": 788 }, { "epoch": 0.43, "grad_norm": 1.5981431522038163, "learning_rate": 4.6458273173040395e-06, "loss": 0.736, "step": 789 }, { "epoch": 0.43, "grad_norm": 1.4802873660429745, "learning_rate": 4.644680314547934e-06, "loss": 0.7222, "step": 790 }, { "epoch": 0.43, "grad_norm": 1.4822145464263854, "learning_rate": 4.64353159955218e-06, "loss": 0.7009, "step": 791 }, { "epoch": 0.43, "grad_norm": 1.4494956022132146, "learning_rate": 4.642381173233874e-06, "loss": 0.7046, "step": 792 }, { "epoch": 0.43, "grad_norm": 1.4632420537374933, "learning_rate": 4.641229036511475e-06, "loss": 0.7093, "step": 793 }, { "epoch": 0.43, "grad_norm": 1.4447468932218892, "learning_rate": 4.640075190304811e-06, "loss": 0.7193, "step": 794 }, { "epoch": 0.43, "grad_norm": 1.426753496472145, "learning_rate": 4.638919635535073e-06, "loss": 0.6609, "step": 795 }, { "epoch": 0.43, "grad_norm": 1.5112743184711321, "learning_rate": 4.637762373124817e-06, "loss": 0.7065, "step": 796 }, { "epoch": 0.43, "grad_norm": 1.4425000689185996, "learning_rate": 4.636603403997961e-06, "loss": 0.7118, "step": 797 }, { "epoch": 0.43, "grad_norm": 1.4806043440348629, "learning_rate": 4.635442729079788e-06, "loss": 0.6982, "step": 798 }, { "epoch": 0.43, "grad_norm": 1.4850006422032136, "learning_rate": 4.634280349296939e-06, "loss": 0.7134, "step": 799 }, { "epoch": 0.43, "grad_norm": 1.450191680263853, "learning_rate": 4.63311626557742e-06, "loss": 0.6879, "step": 800 }, { "epoch": 0.43, "grad_norm": 1.3811692004000655, "learning_rate": 4.6319504788505956e-06, "loss": 0.7018, "step": 801 }, { "epoch": 0.43, "grad_norm": 1.5327006364804148, "learning_rate": 4.630782990047189e-06, "loss": 0.6901, "step": 802 }, { "epoch": 0.43, "grad_norm": 1.558360429541224, "learning_rate": 4.629613800099286e-06, "loss": 0.7004, "step": 803 }, { "epoch": 0.43, "grad_norm": 1.53036451520124, "learning_rate": 4.628442909940325e-06, "loss": 0.7084, "step": 804 }, { "epoch": 0.43, "grad_norm": 1.5452590208701542, "learning_rate": 4.6272703205051066e-06, "loss": 0.709, "step": 805 }, { "epoch": 0.44, "grad_norm": 1.5338830420144145, "learning_rate": 4.626096032729786e-06, "loss": 0.7392, "step": 806 }, { "epoch": 0.44, "grad_norm": 1.4441626698106849, "learning_rate": 4.624920047551874e-06, "loss": 0.6919, "step": 807 }, { "epoch": 0.44, "grad_norm": 1.4512719020887692, "learning_rate": 4.623742365910237e-06, "loss": 0.7126, "step": 808 }, { "epoch": 0.44, "grad_norm": 1.4195174005641538, "learning_rate": 4.622562988745098e-06, "loss": 0.6589, "step": 809 }, { "epoch": 0.44, "grad_norm": 1.4784472771956005, "learning_rate": 4.621381916998029e-06, "loss": 0.7312, "step": 810 }, { "epoch": 0.44, "grad_norm": 1.4613491519961583, "learning_rate": 4.620199151611958e-06, "loss": 0.6778, "step": 811 }, { "epoch": 0.44, "grad_norm": 1.506979767517668, "learning_rate": 4.619014693531164e-06, "loss": 0.7134, "step": 812 }, { "epoch": 0.44, "grad_norm": 1.5062235050979, "learning_rate": 4.6178285437012806e-06, "loss": 0.7269, "step": 813 }, { "epoch": 0.44, "grad_norm": 1.4686390363848083, "learning_rate": 4.616640703069286e-06, "loss": 0.7278, "step": 814 }, { "epoch": 0.44, "grad_norm": 1.4709577184091358, "learning_rate": 4.615451172583514e-06, "loss": 0.7233, "step": 815 }, { "epoch": 0.44, "grad_norm": 1.4369753977565851, "learning_rate": 4.6142599531936435e-06, "loss": 0.7145, "step": 816 }, { "epoch": 0.44, "grad_norm": 1.5545772978612478, "learning_rate": 4.613067045850705e-06, "loss": 0.7206, "step": 817 }, { "epoch": 0.44, "grad_norm": 1.4278938238909695, "learning_rate": 4.6118724515070745e-06, "loss": 0.6792, "step": 818 }, { "epoch": 0.44, "grad_norm": 1.4173282650099115, "learning_rate": 4.610676171116475e-06, "loss": 0.6683, "step": 819 }, { "epoch": 0.44, "grad_norm": 1.4422502308925234, "learning_rate": 4.609478205633976e-06, "loss": 0.701, "step": 820 }, { "epoch": 0.44, "grad_norm": 1.5411104104132578, "learning_rate": 4.608278556015994e-06, "loss": 0.7206, "step": 821 }, { "epoch": 0.44, "grad_norm": 1.4797521028726233, "learning_rate": 4.607077223220286e-06, "loss": 0.6805, "step": 822 }, { "epoch": 0.44, "grad_norm": 1.4725654311490857, "learning_rate": 4.605874208205955e-06, "loss": 0.6863, "step": 823 }, { "epoch": 0.44, "grad_norm": 1.4745819235973838, "learning_rate": 4.604669511933449e-06, "loss": 0.6998, "step": 824 }, { "epoch": 0.45, "grad_norm": 1.4913798169767376, "learning_rate": 4.603463135364556e-06, "loss": 0.6891, "step": 825 }, { "epoch": 0.45, "grad_norm": 1.4953878013022095, "learning_rate": 4.602255079462406e-06, "loss": 0.7275, "step": 826 }, { "epoch": 0.45, "grad_norm": 1.4944359722260037, "learning_rate": 4.6010453451914685e-06, "loss": 0.7005, "step": 827 }, { "epoch": 0.45, "grad_norm": 1.4547272574406855, "learning_rate": 4.5998339335175555e-06, "loss": 0.6964, "step": 828 }, { "epoch": 0.45, "grad_norm": 1.5060501244962399, "learning_rate": 4.5986208454078155e-06, "loss": 0.6907, "step": 829 }, { "epoch": 0.45, "grad_norm": 1.4283757937542585, "learning_rate": 4.5974060818307375e-06, "loss": 0.6965, "step": 830 }, { "epoch": 0.45, "grad_norm": 1.4953183789855824, "learning_rate": 4.596189643756147e-06, "loss": 0.7018, "step": 831 }, { "epoch": 0.45, "grad_norm": 1.4761682109346137, "learning_rate": 4.594971532155208e-06, "loss": 0.7109, "step": 832 }, { "epoch": 0.45, "grad_norm": 1.435808718185347, "learning_rate": 4.593751748000417e-06, "loss": 0.6992, "step": 833 }, { "epoch": 0.45, "grad_norm": 1.4384199099124313, "learning_rate": 4.592530292265609e-06, "loss": 0.6879, "step": 834 }, { "epoch": 0.45, "grad_norm": 1.5197800203165353, "learning_rate": 4.591307165925954e-06, "loss": 0.7004, "step": 835 }, { "epoch": 0.45, "grad_norm": 1.5172044983379567, "learning_rate": 4.590082369957952e-06, "loss": 0.7414, "step": 836 }, { "epoch": 0.45, "grad_norm": 1.485682958938801, "learning_rate": 4.58885590533944e-06, "loss": 0.688, "step": 837 }, { "epoch": 0.45, "grad_norm": 1.4745533655028824, "learning_rate": 4.587627773049586e-06, "loss": 0.7291, "step": 838 }, { "epoch": 0.45, "grad_norm": 1.4188807148599707, "learning_rate": 4.586397974068886e-06, "loss": 0.6943, "step": 839 }, { "epoch": 0.45, "grad_norm": 1.4480198209148911, "learning_rate": 4.585166509379173e-06, "loss": 0.6999, "step": 840 }, { "epoch": 0.45, "grad_norm": 1.4907369375274473, "learning_rate": 4.5839333799636045e-06, "loss": 0.7199, "step": 841 }, { "epoch": 0.45, "grad_norm": 1.4352169686637486, "learning_rate": 4.58269858680667e-06, "loss": 0.712, "step": 842 }, { "epoch": 0.46, "grad_norm": 1.4581239212079053, "learning_rate": 4.581462130894186e-06, "loss": 0.7211, "step": 843 }, { "epoch": 0.46, "grad_norm": 1.4543129390094016, "learning_rate": 4.580224013213296e-06, "loss": 0.6825, "step": 844 }, { "epoch": 0.46, "grad_norm": 1.5065464071687333, "learning_rate": 4.578984234752472e-06, "loss": 0.6881, "step": 845 }, { "epoch": 0.46, "grad_norm": 1.4644110579780134, "learning_rate": 4.57774279650151e-06, "loss": 0.7079, "step": 846 }, { "epoch": 0.46, "grad_norm": 1.4270262053172142, "learning_rate": 4.576499699451532e-06, "loss": 0.6978, "step": 847 }, { "epoch": 0.46, "grad_norm": 1.5048300397253231, "learning_rate": 4.575254944594985e-06, "loss": 0.6878, "step": 848 }, { "epoch": 0.46, "grad_norm": 1.4826122051278703, "learning_rate": 4.574008532925638e-06, "loss": 0.6949, "step": 849 }, { "epoch": 0.46, "grad_norm": 1.5115028446085579, "learning_rate": 4.572760465438585e-06, "loss": 0.6967, "step": 850 }, { "epoch": 0.46, "grad_norm": 1.4094725630835154, "learning_rate": 4.571510743130239e-06, "loss": 0.6888, "step": 851 }, { "epoch": 0.46, "grad_norm": 1.4264967483727164, "learning_rate": 4.570259366998336e-06, "loss": 0.6861, "step": 852 }, { "epoch": 0.46, "grad_norm": 1.4305941838977423, "learning_rate": 4.569006338041933e-06, "loss": 0.6973, "step": 853 }, { "epoch": 0.46, "grad_norm": 1.4740369361432286, "learning_rate": 4.5677516572614045e-06, "loss": 0.6932, "step": 854 }, { "epoch": 0.46, "grad_norm": 1.4656562247735432, "learning_rate": 4.566495325658445e-06, "loss": 0.7086, "step": 855 }, { "epoch": 0.46, "grad_norm": 1.37614823946192, "learning_rate": 4.565237344236068e-06, "loss": 0.6698, "step": 856 }, { "epoch": 0.46, "grad_norm": 1.4891889935788998, "learning_rate": 4.563977713998601e-06, "loss": 0.6916, "step": 857 }, { "epoch": 0.46, "grad_norm": 1.4232825416709638, "learning_rate": 4.5627164359516915e-06, "loss": 0.6816, "step": 858 }, { "epoch": 0.46, "grad_norm": 1.486485875110106, "learning_rate": 4.5614535111023e-06, "loss": 0.699, "step": 859 }, { "epoch": 0.46, "grad_norm": 1.4229419108056687, "learning_rate": 4.560188940458703e-06, "loss": 0.6722, "step": 860 }, { "epoch": 0.46, "grad_norm": 1.53076252089041, "learning_rate": 4.558922725030491e-06, "loss": 0.7121, "step": 861 }, { "epoch": 0.47, "grad_norm": 1.5105315872614307, "learning_rate": 4.557654865828567e-06, "loss": 0.7448, "step": 862 }, { "epoch": 0.47, "grad_norm": 1.5007450583586275, "learning_rate": 4.5563853638651455e-06, "loss": 0.699, "step": 863 }, { "epoch": 0.47, "grad_norm": 1.5362015165351124, "learning_rate": 4.555114220153755e-06, "loss": 0.7145, "step": 864 }, { "epoch": 0.47, "grad_norm": 1.4646608723132877, "learning_rate": 4.553841435709233e-06, "loss": 0.7135, "step": 865 }, { "epoch": 0.47, "grad_norm": 1.4992526651694702, "learning_rate": 4.552567011547726e-06, "loss": 0.7253, "step": 866 }, { "epoch": 0.47, "grad_norm": 1.4639412269796497, "learning_rate": 4.551290948686693e-06, "loss": 0.6906, "step": 867 }, { "epoch": 0.47, "grad_norm": 1.4967657089094801, "learning_rate": 4.550013248144896e-06, "loss": 0.6747, "step": 868 }, { "epoch": 0.47, "grad_norm": 1.440027640967829, "learning_rate": 4.548733910942411e-06, "loss": 0.6964, "step": 869 }, { "epoch": 0.47, "grad_norm": 1.42340444860649, "learning_rate": 4.547452938100615e-06, "loss": 0.7027, "step": 870 }, { "epoch": 0.47, "grad_norm": 1.4760902205487396, "learning_rate": 4.546170330642194e-06, "loss": 0.7069, "step": 871 }, { "epoch": 0.47, "grad_norm": 1.4142837307416984, "learning_rate": 4.544886089591138e-06, "loss": 0.7137, "step": 872 }, { "epoch": 0.47, "grad_norm": 1.4822763974563413, "learning_rate": 4.54360021597274e-06, "loss": 0.7029, "step": 873 }, { "epoch": 0.47, "grad_norm": 1.4603418308217788, "learning_rate": 4.542312710813599e-06, "loss": 0.6911, "step": 874 }, { "epoch": 0.47, "grad_norm": 1.5184586430553368, "learning_rate": 4.5410235751416145e-06, "loss": 0.7134, "step": 875 }, { "epoch": 0.47, "grad_norm": 1.5146855151001655, "learning_rate": 4.539732809985989e-06, "loss": 0.7366, "step": 876 }, { "epoch": 0.47, "grad_norm": 1.4219410815272282, "learning_rate": 4.538440416377227e-06, "loss": 0.7106, "step": 877 }, { "epoch": 0.47, "grad_norm": 1.4592639594169916, "learning_rate": 4.537146395347128e-06, "loss": 0.7085, "step": 878 }, { "epoch": 0.47, "grad_norm": 1.4816972895328795, "learning_rate": 4.535850747928796e-06, "loss": 0.7156, "step": 879 }, { "epoch": 0.48, "grad_norm": 1.5332079791394464, "learning_rate": 4.534553475156632e-06, "loss": 0.6954, "step": 880 }, { "epoch": 0.48, "grad_norm": 1.4942025771879575, "learning_rate": 4.533254578066334e-06, "loss": 0.6731, "step": 881 }, { "epoch": 0.48, "grad_norm": 1.4541769308414492, "learning_rate": 4.531954057694897e-06, "loss": 0.7005, "step": 882 }, { "epoch": 0.48, "grad_norm": 1.401998249967958, "learning_rate": 4.530651915080614e-06, "loss": 0.6878, "step": 883 }, { "epoch": 0.48, "grad_norm": 1.4999642671004423, "learning_rate": 4.529348151263068e-06, "loss": 0.7111, "step": 884 }, { "epoch": 0.48, "grad_norm": 1.4741763233385419, "learning_rate": 4.5280427672831414e-06, "loss": 0.6918, "step": 885 }, { "epoch": 0.48, "grad_norm": 1.3974342015158387, "learning_rate": 4.526735764183009e-06, "loss": 0.693, "step": 886 }, { "epoch": 0.48, "grad_norm": 1.510352782686982, "learning_rate": 4.525427143006136e-06, "loss": 0.7021, "step": 887 }, { "epoch": 0.48, "grad_norm": 1.4669498571732937, "learning_rate": 4.524116904797281e-06, "loss": 0.6957, "step": 888 }, { "epoch": 0.48, "grad_norm": 1.5656986966710325, "learning_rate": 4.522805050602494e-06, "loss": 0.6957, "step": 889 }, { "epoch": 0.48, "grad_norm": 1.4892629986072206, "learning_rate": 4.521491581469116e-06, "loss": 0.7132, "step": 890 }, { "epoch": 0.48, "grad_norm": 1.4999323157719, "learning_rate": 4.520176498445774e-06, "loss": 0.6973, "step": 891 }, { "epoch": 0.48, "grad_norm": 1.4519177450374232, "learning_rate": 4.518859802582387e-06, "loss": 0.7106, "step": 892 }, { "epoch": 0.48, "grad_norm": 1.4247881839514382, "learning_rate": 4.5175414949301585e-06, "loss": 0.7058, "step": 893 }, { "epoch": 0.48, "grad_norm": 1.5272017254218964, "learning_rate": 4.516221576541581e-06, "loss": 0.7224, "step": 894 }, { "epoch": 0.48, "grad_norm": 1.4139913161700075, "learning_rate": 4.514900048470433e-06, "loss": 0.702, "step": 895 }, { "epoch": 0.48, "grad_norm": 1.4305084900980898, "learning_rate": 4.513576911771777e-06, "loss": 0.6974, "step": 896 }, { "epoch": 0.48, "grad_norm": 1.5063890941666422, "learning_rate": 4.512252167501959e-06, "loss": 0.7002, "step": 897 }, { "epoch": 0.48, "grad_norm": 1.44946880960161, "learning_rate": 4.510925816718612e-06, "loss": 0.6949, "step": 898 }, { "epoch": 0.49, "grad_norm": 1.5669030359353444, "learning_rate": 4.509597860480648e-06, "loss": 0.6654, "step": 899 }, { "epoch": 0.49, "grad_norm": 1.5199043554453437, "learning_rate": 4.508268299848262e-06, "loss": 0.6997, "step": 900 }, { "epoch": 0.49, "grad_norm": 1.5052724292817476, "learning_rate": 4.506937135882929e-06, "loss": 0.7047, "step": 901 }, { "epoch": 0.49, "grad_norm": 1.4926714639629517, "learning_rate": 4.505604369647407e-06, "loss": 0.7318, "step": 902 }, { "epoch": 0.49, "grad_norm": 1.621519815283688, "learning_rate": 4.50427000220573e-06, "loss": 0.7165, "step": 903 }, { "epoch": 0.49, "grad_norm": 1.4135740970328439, "learning_rate": 4.502934034623211e-06, "loss": 0.6774, "step": 904 }, { "epoch": 0.49, "grad_norm": 1.4358929400284226, "learning_rate": 4.501596467966443e-06, "loss": 0.6774, "step": 905 }, { "epoch": 0.49, "grad_norm": 1.4453732936141712, "learning_rate": 4.50025730330329e-06, "loss": 0.6975, "step": 906 }, { "epoch": 0.49, "grad_norm": 1.4690083097258988, "learning_rate": 4.4989165417028984e-06, "loss": 0.7183, "step": 907 }, { "epoch": 0.49, "grad_norm": 1.4390228621162058, "learning_rate": 4.4975741842356855e-06, "loss": 0.7009, "step": 908 }, { "epoch": 0.49, "grad_norm": 1.488057067666688, "learning_rate": 4.4962302319733445e-06, "loss": 0.6836, "step": 909 }, { "epoch": 0.49, "grad_norm": 1.564819305238743, "learning_rate": 4.494884685988839e-06, "loss": 0.7296, "step": 910 }, { "epoch": 0.49, "grad_norm": 1.5640500252311567, "learning_rate": 4.4935375473564105e-06, "loss": 0.707, "step": 911 }, { "epoch": 0.49, "grad_norm": 1.4659764904546329, "learning_rate": 4.492188817151565e-06, "loss": 0.689, "step": 912 }, { "epoch": 0.49, "grad_norm": 1.4546821405179517, "learning_rate": 4.490838496451085e-06, "loss": 0.6966, "step": 913 }, { "epoch": 0.49, "grad_norm": 1.5328823481019143, "learning_rate": 4.48948658633302e-06, "loss": 0.6899, "step": 914 }, { "epoch": 0.49, "grad_norm": 1.4700211829725374, "learning_rate": 4.488133087876688e-06, "loss": 0.6955, "step": 915 }, { "epoch": 0.49, "grad_norm": 1.4687624482442982, "learning_rate": 4.486778002162677e-06, "loss": 0.7097, "step": 916 }, { "epoch": 0.5, "grad_norm": 1.4139398814827906, "learning_rate": 4.4854213302728415e-06, "loss": 0.6993, "step": 917 }, { "epoch": 0.5, "grad_norm": 1.4694038769797697, "learning_rate": 4.484063073290301e-06, "loss": 0.6963, "step": 918 }, { "epoch": 0.5, "grad_norm": 1.6126556281089108, "learning_rate": 4.482703232299443e-06, "loss": 0.6776, "step": 919 }, { "epoch": 0.5, "grad_norm": 1.476752206129208, "learning_rate": 4.4813418083859165e-06, "loss": 0.6991, "step": 920 }, { "epoch": 0.5, "grad_norm": 1.4808314189710219, "learning_rate": 4.479978802636637e-06, "loss": 0.7081, "step": 921 }, { "epoch": 0.5, "grad_norm": 1.4736839774436377, "learning_rate": 4.478614216139781e-06, "loss": 0.7054, "step": 922 }, { "epoch": 0.5, "grad_norm": 1.5431274989189723, "learning_rate": 4.47724804998479e-06, "loss": 0.6883, "step": 923 }, { "epoch": 0.5, "grad_norm": 1.4901936906230366, "learning_rate": 4.475880305262362e-06, "loss": 0.6916, "step": 924 }, { "epoch": 0.5, "grad_norm": 1.4880692322567408, "learning_rate": 4.474510983064459e-06, "loss": 0.7127, "step": 925 }, { "epoch": 0.5, "grad_norm": 1.4900512338937697, "learning_rate": 4.473140084484301e-06, "loss": 0.6844, "step": 926 }, { "epoch": 0.5, "grad_norm": 1.5190690412449548, "learning_rate": 4.471767610616366e-06, "loss": 0.6991, "step": 927 }, { "epoch": 0.5, "grad_norm": 1.4124051063930423, "learning_rate": 4.470393562556391e-06, "loss": 0.6882, "step": 928 }, { "epoch": 0.5, "grad_norm": 1.4268466899113412, "learning_rate": 4.469017941401369e-06, "loss": 0.6888, "step": 929 }, { "epoch": 0.5, "grad_norm": 1.4642823465271668, "learning_rate": 4.467640748249549e-06, "loss": 0.7083, "step": 930 }, { "epoch": 0.5, "grad_norm": 1.617643839427449, "learning_rate": 4.466261984200435e-06, "loss": 0.7229, "step": 931 }, { "epoch": 0.5, "grad_norm": 1.4584044623808967, "learning_rate": 4.464881650354786e-06, "loss": 0.7115, "step": 932 }, { "epoch": 0.5, "grad_norm": 1.47080075707188, "learning_rate": 4.4634997478146125e-06, "loss": 0.7098, "step": 933 }, { "epoch": 0.5, "grad_norm": 1.46934420375485, "learning_rate": 4.462116277683178e-06, "loss": 0.6887, "step": 934 }, { "epoch": 0.5, "grad_norm": 1.483400336117227, "learning_rate": 4.460731241064999e-06, "loss": 0.7004, "step": 935 }, { "epoch": 0.51, "grad_norm": 1.4768562816099529, "learning_rate": 4.459344639065842e-06, "loss": 0.6686, "step": 936 }, { "epoch": 0.51, "grad_norm": 1.3769083618273978, "learning_rate": 4.457956472792721e-06, "loss": 0.6856, "step": 937 }, { "epoch": 0.51, "grad_norm": 1.423357352433923, "learning_rate": 4.456566743353901e-06, "loss": 0.7029, "step": 938 }, { "epoch": 0.51, "grad_norm": 1.4459031509434883, "learning_rate": 4.455175451858897e-06, "loss": 0.689, "step": 939 }, { "epoch": 0.51, "grad_norm": 1.47026207173997, "learning_rate": 4.453782599418465e-06, "loss": 0.659, "step": 940 }, { "epoch": 0.51, "grad_norm": 1.4534153084721624, "learning_rate": 4.452388187144614e-06, "loss": 0.7126, "step": 941 }, { "epoch": 0.51, "grad_norm": 1.4216594185164713, "learning_rate": 4.450992216150592e-06, "loss": 0.7088, "step": 942 }, { "epoch": 0.51, "grad_norm": 1.4722844450794041, "learning_rate": 4.449594687550898e-06, "loss": 0.7191, "step": 943 }, { "epoch": 0.51, "grad_norm": 1.5168584346289142, "learning_rate": 4.4481956024612695e-06, "loss": 0.6774, "step": 944 }, { "epoch": 0.51, "grad_norm": 1.3938385431300966, "learning_rate": 4.446794961998689e-06, "loss": 0.7032, "step": 945 }, { "epoch": 0.51, "grad_norm": 1.4857285681720327, "learning_rate": 4.445392767281379e-06, "loss": 0.6663, "step": 946 }, { "epoch": 0.51, "grad_norm": 1.465647843456467, "learning_rate": 4.4439890194288056e-06, "loss": 0.7054, "step": 947 }, { "epoch": 0.51, "grad_norm": 1.5206483079407873, "learning_rate": 4.442583719561671e-06, "loss": 0.7255, "step": 948 }, { "epoch": 0.51, "grad_norm": 1.463457601416387, "learning_rate": 4.441176868801921e-06, "loss": 0.6836, "step": 949 }, { "epoch": 0.51, "grad_norm": 1.4750318655196808, "learning_rate": 4.439768468272736e-06, "loss": 0.6915, "step": 950 }, { "epoch": 0.51, "grad_norm": 1.4543339490220577, "learning_rate": 4.438358519098536e-06, "loss": 0.6669, "step": 951 }, { "epoch": 0.51, "grad_norm": 1.488977599972264, "learning_rate": 4.436947022404974e-06, "loss": 0.7132, "step": 952 }, { "epoch": 0.51, "grad_norm": 1.406360032797042, "learning_rate": 4.435533979318943e-06, "loss": 0.7226, "step": 953 }, { "epoch": 0.52, "grad_norm": 1.4781700489088403, "learning_rate": 4.4341193909685685e-06, "loss": 0.6787, "step": 954 }, { "epoch": 0.52, "grad_norm": 1.4706423937294684, "learning_rate": 4.432703258483208e-06, "loss": 0.7165, "step": 955 }, { "epoch": 0.52, "grad_norm": 1.44655866061536, "learning_rate": 4.431285582993455e-06, "loss": 0.7193, "step": 956 }, { "epoch": 0.52, "grad_norm": 1.4464865072241075, "learning_rate": 4.429866365631134e-06, "loss": 0.7007, "step": 957 }, { "epoch": 0.52, "grad_norm": 1.4421977181843129, "learning_rate": 4.428445607529296e-06, "loss": 0.6611, "step": 958 }, { "epoch": 0.52, "grad_norm": 1.4585726725172325, "learning_rate": 4.42702330982223e-06, "loss": 0.7148, "step": 959 }, { "epoch": 0.52, "grad_norm": 1.4417803319585683, "learning_rate": 4.425599473645447e-06, "loss": 0.7054, "step": 960 }, { "epoch": 0.52, "grad_norm": 1.4434302629162283, "learning_rate": 4.424174100135691e-06, "loss": 0.6593, "step": 961 }, { "epoch": 0.52, "grad_norm": 1.479993005505386, "learning_rate": 4.422747190430932e-06, "loss": 0.6694, "step": 962 }, { "epoch": 0.52, "grad_norm": 1.4464645311238093, "learning_rate": 4.421318745670364e-06, "loss": 0.7293, "step": 963 }, { "epoch": 0.52, "grad_norm": 1.4715263569783918, "learning_rate": 4.41988876699441e-06, "loss": 0.6757, "step": 964 }, { "epoch": 0.52, "grad_norm": 1.460625126568726, "learning_rate": 4.418457255544716e-06, "loss": 0.6984, "step": 965 }, { "epoch": 0.52, "grad_norm": 1.426177596026811, "learning_rate": 4.4170242124641524e-06, "loss": 0.7083, "step": 966 }, { "epoch": 0.52, "grad_norm": 1.50456099694258, "learning_rate": 4.415589638896811e-06, "loss": 0.7179, "step": 967 }, { "epoch": 0.52, "grad_norm": 1.5657102670670202, "learning_rate": 4.414153535988008e-06, "loss": 0.7212, "step": 968 }, { "epoch": 0.52, "grad_norm": 1.4830564360193244, "learning_rate": 4.412715904884277e-06, "loss": 0.7058, "step": 969 }, { "epoch": 0.52, "grad_norm": 1.4968851176250444, "learning_rate": 4.411276746733375e-06, "loss": 0.7163, "step": 970 }, { "epoch": 0.52, "grad_norm": 1.4349922796628016, "learning_rate": 4.409836062684276e-06, "loss": 0.705, "step": 971 }, { "epoch": 0.52, "grad_norm": 1.5136994950363811, "learning_rate": 4.4083938538871735e-06, "loss": 0.6956, "step": 972 }, { "epoch": 0.53, "grad_norm": 1.4564206487315694, "learning_rate": 4.406950121493478e-06, "loss": 0.6793, "step": 973 }, { "epoch": 0.53, "grad_norm": 1.5110296499937608, "learning_rate": 4.405504866655816e-06, "loss": 0.7051, "step": 974 }, { "epoch": 0.53, "grad_norm": 1.4746405361668353, "learning_rate": 4.4040580905280295e-06, "loss": 0.7035, "step": 975 }, { "epoch": 0.53, "grad_norm": 1.4496214885876202, "learning_rate": 4.402609794265175e-06, "loss": 0.6977, "step": 976 }, { "epoch": 0.53, "grad_norm": 1.4328401563548627, "learning_rate": 4.401159979023524e-06, "loss": 0.6599, "step": 977 }, { "epoch": 0.53, "grad_norm": 1.4423457808626081, "learning_rate": 4.3997086459605586e-06, "loss": 0.6568, "step": 978 }, { "epoch": 0.53, "grad_norm": 1.4994494900554958, "learning_rate": 4.398255796234974e-06, "loss": 0.6902, "step": 979 }, { "epoch": 0.53, "grad_norm": 1.4749164298434847, "learning_rate": 4.396801431006675e-06, "loss": 0.7033, "step": 980 }, { "epoch": 0.53, "grad_norm": 1.4818519434706179, "learning_rate": 4.395345551436779e-06, "loss": 0.7015, "step": 981 }, { "epoch": 0.53, "grad_norm": 1.4383493894997719, "learning_rate": 4.3938881586876095e-06, "loss": 0.6777, "step": 982 }, { "epoch": 0.53, "grad_norm": 1.4348382147053163, "learning_rate": 4.392429253922699e-06, "loss": 0.708, "step": 983 }, { "epoch": 0.53, "grad_norm": 1.4311675736335487, "learning_rate": 4.390968838306788e-06, "loss": 0.6901, "step": 984 }, { "epoch": 0.53, "grad_norm": 1.4922688199510736, "learning_rate": 4.389506913005822e-06, "loss": 0.6703, "step": 985 }, { "epoch": 0.53, "grad_norm": 1.4696291548726368, "learning_rate": 4.388043479186953e-06, "loss": 0.6981, "step": 986 }, { "epoch": 0.53, "grad_norm": 1.3998983655389239, "learning_rate": 4.386578538018535e-06, "loss": 0.7021, "step": 987 }, { "epoch": 0.53, "grad_norm": 1.5284144249143878, "learning_rate": 4.385112090670129e-06, "loss": 0.7244, "step": 988 }, { "epoch": 0.53, "grad_norm": 1.4736786785285723, "learning_rate": 4.383644138312495e-06, "loss": 0.6912, "step": 989 }, { "epoch": 0.53, "grad_norm": 1.472516495914621, "learning_rate": 4.382174682117598e-06, "loss": 0.7077, "step": 990 }, { "epoch": 0.54, "grad_norm": 1.4050279655392088, "learning_rate": 4.3807037232586e-06, "loss": 0.6917, "step": 991 }, { "epoch": 0.54, "grad_norm": 1.4223896910334408, "learning_rate": 4.3792312629098655e-06, "loss": 0.6833, "step": 992 }, { "epoch": 0.54, "grad_norm": 1.4539475081290092, "learning_rate": 4.377757302246956e-06, "loss": 0.6778, "step": 993 }, { "epoch": 0.54, "grad_norm": 1.4875500018333143, "learning_rate": 4.376281842446631e-06, "loss": 0.6938, "step": 994 }, { "epoch": 0.54, "grad_norm": 1.4641940752254272, "learning_rate": 4.374804884686849e-06, "loss": 0.705, "step": 995 }, { "epoch": 0.54, "grad_norm": 1.460850179016618, "learning_rate": 4.373326430146762e-06, "loss": 0.7224, "step": 996 }, { "epoch": 0.54, "grad_norm": 1.4301007646667165, "learning_rate": 4.371846480006716e-06, "loss": 0.696, "step": 997 }, { "epoch": 0.54, "grad_norm": 1.4260691622087125, "learning_rate": 4.370365035448255e-06, "loss": 0.6878, "step": 998 }, { "epoch": 0.54, "grad_norm": 1.3894071018226728, "learning_rate": 4.368882097654113e-06, "loss": 0.6856, "step": 999 }, { "epoch": 0.54, "grad_norm": 1.3994314701748647, "learning_rate": 4.367397667808216e-06, "loss": 0.6802, "step": 1000 }, { "epoch": 0.54, "grad_norm": 1.4265588045067275, "learning_rate": 4.365911747095685e-06, "loss": 0.6595, "step": 1001 }, { "epoch": 0.54, "grad_norm": 1.51110716126548, "learning_rate": 4.364424336702825e-06, "loss": 0.7036, "step": 1002 }, { "epoch": 0.54, "grad_norm": 1.507168279078406, "learning_rate": 4.362935437817136e-06, "loss": 0.6841, "step": 1003 }, { "epoch": 0.54, "grad_norm": 1.529245155776704, "learning_rate": 4.361445051627303e-06, "loss": 0.7028, "step": 1004 }, { "epoch": 0.54, "grad_norm": 1.497219152365634, "learning_rate": 4.3599531793232e-06, "loss": 0.6942, "step": 1005 }, { "epoch": 0.54, "grad_norm": 1.4009934569032745, "learning_rate": 4.358459822095887e-06, "loss": 0.735, "step": 1006 }, { "epoch": 0.54, "grad_norm": 1.451296631457235, "learning_rate": 4.356964981137609e-06, "loss": 0.681, "step": 1007 }, { "epoch": 0.54, "grad_norm": 1.4976726644862794, "learning_rate": 4.355468657641797e-06, "loss": 0.6753, "step": 1008 }, { "epoch": 0.54, "grad_norm": 1.5105291631094648, "learning_rate": 4.353970852803064e-06, "loss": 0.6983, "step": 1009 }, { "epoch": 0.55, "grad_norm": 1.4191769888960597, "learning_rate": 4.3524715678172065e-06, "loss": 0.6854, "step": 1010 }, { "epoch": 0.55, "grad_norm": 1.4484553234083168, "learning_rate": 4.3509708038812035e-06, "loss": 0.6806, "step": 1011 }, { "epoch": 0.55, "grad_norm": 1.4403367276945152, "learning_rate": 4.349468562193211e-06, "loss": 0.6809, "step": 1012 }, { "epoch": 0.55, "grad_norm": 1.5486261721592134, "learning_rate": 4.347964843952571e-06, "loss": 0.6778, "step": 1013 }, { "epoch": 0.55, "grad_norm": 1.5838213846198965, "learning_rate": 4.346459650359798e-06, "loss": 0.7074, "step": 1014 }, { "epoch": 0.55, "grad_norm": 1.5154165727135915, "learning_rate": 4.344952982616589e-06, "loss": 0.697, "step": 1015 }, { "epoch": 0.55, "grad_norm": 1.3806862428110465, "learning_rate": 4.343444841925815e-06, "loss": 0.7117, "step": 1016 }, { "epoch": 0.55, "grad_norm": 1.4481500047336255, "learning_rate": 4.341935229491525e-06, "loss": 0.6796, "step": 1017 }, { "epoch": 0.55, "grad_norm": 1.4227150751084585, "learning_rate": 4.3404241465189425e-06, "loss": 0.6863, "step": 1018 }, { "epoch": 0.55, "grad_norm": 1.3793265374729031, "learning_rate": 4.338911594214463e-06, "loss": 0.7155, "step": 1019 }, { "epoch": 0.55, "grad_norm": 1.457917844491858, "learning_rate": 4.337397573785659e-06, "loss": 0.6868, "step": 1020 }, { "epoch": 0.55, "grad_norm": 1.458853930853277, "learning_rate": 4.33588208644127e-06, "loss": 0.6903, "step": 1021 }, { "epoch": 0.55, "grad_norm": 1.4267264345779067, "learning_rate": 4.3343651333912115e-06, "loss": 0.7049, "step": 1022 }, { "epoch": 0.55, "grad_norm": 1.4840873970405088, "learning_rate": 4.332846715846566e-06, "loss": 0.6873, "step": 1023 }, { "epoch": 0.55, "grad_norm": 1.355282211285355, "learning_rate": 4.331326835019587e-06, "loss": 0.687, "step": 1024 }, { "epoch": 0.55, "grad_norm": 1.4689735059313742, "learning_rate": 4.329805492123696e-06, "loss": 0.672, "step": 1025 }, { "epoch": 0.55, "grad_norm": 1.5082224518062608, "learning_rate": 4.328282688373479e-06, "loss": 0.667, "step": 1026 }, { "epoch": 0.55, "grad_norm": 1.4195592178662013, "learning_rate": 4.326758424984694e-06, "loss": 0.6706, "step": 1027 }, { "epoch": 0.56, "grad_norm": 1.528508273137452, "learning_rate": 4.3252327031742595e-06, "loss": 0.7083, "step": 1028 }, { "epoch": 0.56, "grad_norm": 1.4599816088658144, "learning_rate": 4.323705524160258e-06, "loss": 0.6895, "step": 1029 }, { "epoch": 0.56, "grad_norm": 1.4212523987844814, "learning_rate": 4.3221768891619385e-06, "loss": 0.6918, "step": 1030 }, { "epoch": 0.56, "grad_norm": 1.5102730583642103, "learning_rate": 4.320646799399711e-06, "loss": 0.7132, "step": 1031 }, { "epoch": 0.56, "grad_norm": 1.3885026684825015, "learning_rate": 4.319115256095149e-06, "loss": 0.693, "step": 1032 }, { "epoch": 0.56, "grad_norm": 1.4730414139784023, "learning_rate": 4.31758226047098e-06, "loss": 0.6978, "step": 1033 }, { "epoch": 0.56, "grad_norm": 1.4054435648433825, "learning_rate": 4.3160478137511e-06, "loss": 0.6634, "step": 1034 }, { "epoch": 0.56, "grad_norm": 1.4504552025842687, "learning_rate": 4.314511917160557e-06, "loss": 0.7152, "step": 1035 }, { "epoch": 0.56, "grad_norm": 1.4507154713546437, "learning_rate": 4.312974571925558e-06, "loss": 0.6992, "step": 1036 }, { "epoch": 0.56, "grad_norm": 1.4477327984420412, "learning_rate": 4.311435779273467e-06, "loss": 0.7258, "step": 1037 }, { "epoch": 0.56, "grad_norm": 1.509476875699511, "learning_rate": 4.3098955404328045e-06, "loss": 0.6834, "step": 1038 }, { "epoch": 0.56, "grad_norm": 1.4562625972165089, "learning_rate": 4.308353856633245e-06, "loss": 0.6772, "step": 1039 }, { "epoch": 0.56, "grad_norm": 1.5105820139721964, "learning_rate": 4.306810729105615e-06, "loss": 0.7256, "step": 1040 }, { "epoch": 0.56, "grad_norm": 1.413991152662039, "learning_rate": 4.305266159081895e-06, "loss": 0.6932, "step": 1041 }, { "epoch": 0.56, "grad_norm": 1.4614461438699464, "learning_rate": 4.3037201477952186e-06, "loss": 0.6787, "step": 1042 }, { "epoch": 0.56, "grad_norm": 1.4257816195450825, "learning_rate": 4.302172696479866e-06, "loss": 0.7053, "step": 1043 }, { "epoch": 0.56, "grad_norm": 1.4033291664855865, "learning_rate": 4.3006238063712725e-06, "loss": 0.677, "step": 1044 }, { "epoch": 0.56, "grad_norm": 1.4894554763849612, "learning_rate": 4.299073478706017e-06, "loss": 0.7028, "step": 1045 }, { "epoch": 0.56, "grad_norm": 1.4422357204975007, "learning_rate": 4.297521714721829e-06, "loss": 0.6465, "step": 1046 }, { "epoch": 0.57, "grad_norm": 1.533371242185831, "learning_rate": 4.295968515657583e-06, "loss": 0.7031, "step": 1047 }, { "epoch": 0.57, "grad_norm": 1.4200115367049202, "learning_rate": 4.294413882753303e-06, "loss": 0.6807, "step": 1048 }, { "epoch": 0.57, "grad_norm": 1.3987911036583658, "learning_rate": 4.2928578172501515e-06, "loss": 0.6854, "step": 1049 }, { "epoch": 0.57, "grad_norm": 1.41576643976464, "learning_rate": 4.29130032039044e-06, "loss": 0.6857, "step": 1050 }, { "epoch": 0.57, "grad_norm": 1.4200625038772123, "learning_rate": 4.2897413934176214e-06, "loss": 0.687, "step": 1051 }, { "epoch": 0.57, "grad_norm": 1.4514018635349295, "learning_rate": 4.288181037576288e-06, "loss": 0.7114, "step": 1052 }, { "epoch": 0.57, "grad_norm": 1.4504939495528466, "learning_rate": 4.2866192541121755e-06, "loss": 0.7047, "step": 1053 }, { "epoch": 0.57, "grad_norm": 1.4490405255365786, "learning_rate": 4.285056044272159e-06, "loss": 0.6695, "step": 1054 }, { "epoch": 0.57, "grad_norm": 1.4102978660202907, "learning_rate": 4.283491409304252e-06, "loss": 0.6818, "step": 1055 }, { "epoch": 0.57, "grad_norm": 1.5088150165299385, "learning_rate": 4.281925350457606e-06, "loss": 0.7131, "step": 1056 }, { "epoch": 0.57, "grad_norm": 1.4671017821378582, "learning_rate": 4.280357868982508e-06, "loss": 0.7157, "step": 1057 }, { "epoch": 0.57, "grad_norm": 1.4859093935717291, "learning_rate": 4.278788966130382e-06, "loss": 0.6907, "step": 1058 }, { "epoch": 0.57, "grad_norm": 1.4647438425418624, "learning_rate": 4.277218643153787e-06, "loss": 0.6819, "step": 1059 }, { "epoch": 0.57, "grad_norm": 1.42952430539635, "learning_rate": 4.275646901306414e-06, "loss": 0.668, "step": 1060 }, { "epoch": 0.57, "grad_norm": 1.3726599132964332, "learning_rate": 4.27407374184309e-06, "loss": 0.667, "step": 1061 }, { "epoch": 0.57, "grad_norm": 1.3980764980763134, "learning_rate": 4.272499166019771e-06, "loss": 0.6813, "step": 1062 }, { "epoch": 0.57, "grad_norm": 1.52079130965915, "learning_rate": 4.2709231750935446e-06, "loss": 0.7111, "step": 1063 }, { "epoch": 0.57, "grad_norm": 1.5313191694834525, "learning_rate": 4.2693457703226295e-06, "loss": 0.6883, "step": 1064 }, { "epoch": 0.58, "grad_norm": 1.5121511019478207, "learning_rate": 4.267766952966369e-06, "loss": 0.6762, "step": 1065 }, { "epoch": 0.58, "grad_norm": 1.4095190863388283, "learning_rate": 4.26618672428524e-06, "loss": 0.6735, "step": 1066 }, { "epoch": 0.58, "grad_norm": 1.503271172324263, "learning_rate": 4.264605085540842e-06, "loss": 0.6773, "step": 1067 }, { "epoch": 0.58, "grad_norm": 1.4494529583579039, "learning_rate": 4.2630220379959006e-06, "loss": 0.6979, "step": 1068 }, { "epoch": 0.58, "grad_norm": 1.4665366223547078, "learning_rate": 4.2614375829142684e-06, "loss": 0.6864, "step": 1069 }, { "epoch": 0.58, "grad_norm": 1.4150863928157225, "learning_rate": 4.25985172156092e-06, "loss": 0.671, "step": 1070 }, { "epoch": 0.58, "grad_norm": 1.4537842563977854, "learning_rate": 4.258264455201953e-06, "loss": 0.6896, "step": 1071 }, { "epoch": 0.58, "grad_norm": 1.4922558241119386, "learning_rate": 4.256675785104586e-06, "loss": 0.6979, "step": 1072 }, { "epoch": 0.58, "grad_norm": 1.477855786633934, "learning_rate": 4.2550857125371595e-06, "loss": 0.6853, "step": 1073 }, { "epoch": 0.58, "grad_norm": 1.5410042021208963, "learning_rate": 4.2534942387691335e-06, "loss": 0.665, "step": 1074 }, { "epoch": 0.58, "grad_norm": 1.4758049392824821, "learning_rate": 4.251901365071086e-06, "loss": 0.6731, "step": 1075 }, { "epoch": 0.58, "grad_norm": 1.4635698691359613, "learning_rate": 4.250307092714714e-06, "loss": 0.6724, "step": 1076 }, { "epoch": 0.58, "grad_norm": 1.4464314256812945, "learning_rate": 4.248711422972829e-06, "loss": 0.6869, "step": 1077 }, { "epoch": 0.58, "grad_norm": 1.4662468039710985, "learning_rate": 4.2471143571193595e-06, "loss": 0.6619, "step": 1078 }, { "epoch": 0.58, "grad_norm": 1.4747305414130254, "learning_rate": 4.2455158964293495e-06, "loss": 0.7019, "step": 1079 }, { "epoch": 0.58, "grad_norm": 1.4919202707296562, "learning_rate": 4.243916042178954e-06, "loss": 0.6801, "step": 1080 }, { "epoch": 0.58, "grad_norm": 1.4947111069729346, "learning_rate": 4.242314795645444e-06, "loss": 0.6734, "step": 1081 }, { "epoch": 0.58, "grad_norm": 1.438103541104056, "learning_rate": 4.240712158107199e-06, "loss": 0.705, "step": 1082 }, { "epoch": 0.58, "grad_norm": 1.4511241026290387, "learning_rate": 4.239108130843709e-06, "loss": 0.6956, "step": 1083 }, { "epoch": 0.59, "grad_norm": 1.5157423331213997, "learning_rate": 4.237502715135576e-06, "loss": 0.6958, "step": 1084 }, { "epoch": 0.59, "grad_norm": 1.5715598670129494, "learning_rate": 4.23589591226451e-06, "loss": 0.7186, "step": 1085 }, { "epoch": 0.59, "grad_norm": 1.460231268554097, "learning_rate": 4.234287723513326e-06, "loss": 0.7271, "step": 1086 }, { "epoch": 0.59, "grad_norm": 1.404084854098157, "learning_rate": 4.232678150165947e-06, "loss": 0.6639, "step": 1087 }, { "epoch": 0.59, "grad_norm": 1.4248075394756488, "learning_rate": 4.231067193507403e-06, "loss": 0.7099, "step": 1088 }, { "epoch": 0.59, "grad_norm": 1.4272011943152592, "learning_rate": 4.229454854823827e-06, "loss": 0.6849, "step": 1089 }, { "epoch": 0.59, "grad_norm": 1.4570738188496335, "learning_rate": 4.227841135402454e-06, "loss": 0.6621, "step": 1090 }, { "epoch": 0.59, "grad_norm": 1.439429931294694, "learning_rate": 4.226226036531622e-06, "loss": 0.6892, "step": 1091 }, { "epoch": 0.59, "grad_norm": 1.473553232108856, "learning_rate": 4.224609559500772e-06, "loss": 0.7112, "step": 1092 }, { "epoch": 0.59, "grad_norm": 1.5453455664995173, "learning_rate": 4.222991705600445e-06, "loss": 0.6849, "step": 1093 }, { "epoch": 0.59, "grad_norm": 1.476099496617057, "learning_rate": 4.221372476122278e-06, "loss": 0.6769, "step": 1094 }, { "epoch": 0.59, "grad_norm": 1.3786936524071092, "learning_rate": 4.21975187235901e-06, "loss": 0.6776, "step": 1095 }, { "epoch": 0.59, "grad_norm": 1.5608771029796804, "learning_rate": 4.218129895604477e-06, "loss": 0.7103, "step": 1096 }, { "epoch": 0.59, "grad_norm": 1.4430370514407906, "learning_rate": 4.216506547153608e-06, "loss": 0.6785, "step": 1097 }, { "epoch": 0.59, "grad_norm": 1.4462814354687104, "learning_rate": 4.21488182830243e-06, "loss": 0.6841, "step": 1098 }, { "epoch": 0.59, "grad_norm": 1.449767777391239, "learning_rate": 4.213255740348063e-06, "loss": 0.6859, "step": 1099 }, { "epoch": 0.59, "grad_norm": 1.4891479002188686, "learning_rate": 4.211628284588719e-06, "loss": 0.691, "step": 1100 }, { "epoch": 0.59, "grad_norm": 1.484847267932423, "learning_rate": 4.209999462323706e-06, "loss": 0.685, "step": 1101 }, { "epoch": 0.6, "grad_norm": 1.5025420157872247, "learning_rate": 4.208369274853417e-06, "loss": 0.7018, "step": 1102 }, { "epoch": 0.6, "grad_norm": 1.4365771527118953, "learning_rate": 4.206737723479341e-06, "loss": 0.68, "step": 1103 }, { "epoch": 0.6, "grad_norm": 1.4481220099245962, "learning_rate": 4.20510480950405e-06, "loss": 0.6887, "step": 1104 }, { "epoch": 0.6, "grad_norm": 1.501645293257132, "learning_rate": 4.20347053423121e-06, "loss": 0.7199, "step": 1105 }, { "epoch": 0.6, "grad_norm": 1.5124737294356456, "learning_rate": 4.201834898965568e-06, "loss": 0.6885, "step": 1106 }, { "epoch": 0.6, "grad_norm": 1.4609429459254872, "learning_rate": 4.200197905012961e-06, "loss": 0.7067, "step": 1107 }, { "epoch": 0.6, "grad_norm": 1.3839694778316587, "learning_rate": 4.198559553680308e-06, "loss": 0.6833, "step": 1108 }, { "epoch": 0.6, "grad_norm": 1.458444650108112, "learning_rate": 4.196919846275614e-06, "loss": 0.7052, "step": 1109 }, { "epoch": 0.6, "grad_norm": 1.4393231847781636, "learning_rate": 4.195278784107965e-06, "loss": 0.6959, "step": 1110 }, { "epoch": 0.6, "grad_norm": 1.4104037617689258, "learning_rate": 4.193636368487529e-06, "loss": 0.6786, "step": 1111 }, { "epoch": 0.6, "grad_norm": 1.4728629532916755, "learning_rate": 4.191992600725555e-06, "loss": 0.687, "step": 1112 }, { "epoch": 0.6, "grad_norm": 1.4610369502485714, "learning_rate": 4.19034748213437e-06, "loss": 0.6783, "step": 1113 }, { "epoch": 0.6, "grad_norm": 1.4594503705120148, "learning_rate": 4.188701014027382e-06, "loss": 0.6858, "step": 1114 }, { "epoch": 0.6, "grad_norm": 1.451655322925588, "learning_rate": 4.187053197719075e-06, "loss": 0.6886, "step": 1115 }, { "epoch": 0.6, "grad_norm": 1.399025077374106, "learning_rate": 4.185404034525008e-06, "loss": 0.6628, "step": 1116 }, { "epoch": 0.6, "grad_norm": 1.416495151215859, "learning_rate": 4.183753525761818e-06, "loss": 0.6829, "step": 1117 }, { "epoch": 0.6, "grad_norm": 1.5284696860205795, "learning_rate": 4.182101672747215e-06, "loss": 0.7131, "step": 1118 }, { "epoch": 0.6, "grad_norm": 1.5732202482834003, "learning_rate": 4.180448476799981e-06, "loss": 0.7054, "step": 1119 }, { "epoch": 0.6, "grad_norm": 1.4408110677098207, "learning_rate": 4.178793939239972e-06, "loss": 0.6695, "step": 1120 }, { "epoch": 0.61, "grad_norm": 1.397776640701602, "learning_rate": 4.177138061388114e-06, "loss": 0.6883, "step": 1121 }, { "epoch": 0.61, "grad_norm": 1.436366427140932, "learning_rate": 4.175480844566404e-06, "loss": 0.6712, "step": 1122 }, { "epoch": 0.61, "grad_norm": 1.5201971636554845, "learning_rate": 4.173822290097907e-06, "loss": 0.6943, "step": 1123 }, { "epoch": 0.61, "grad_norm": 1.4577447545881694, "learning_rate": 4.1721623993067565e-06, "loss": 0.6722, "step": 1124 }, { "epoch": 0.61, "grad_norm": 1.3922869108833886, "learning_rate": 4.170501173518152e-06, "loss": 0.6847, "step": 1125 }, { "epoch": 0.61, "grad_norm": 1.5016768613932518, "learning_rate": 4.168838614058361e-06, "loss": 0.7074, "step": 1126 }, { "epoch": 0.61, "grad_norm": 1.432189895215231, "learning_rate": 4.167174722254713e-06, "loss": 0.6857, "step": 1127 }, { "epoch": 0.61, "grad_norm": 1.4876775162063225, "learning_rate": 4.165509499435604e-06, "loss": 0.6574, "step": 1128 }, { "epoch": 0.61, "grad_norm": 1.5489080878027424, "learning_rate": 4.163842946930489e-06, "loss": 0.706, "step": 1129 }, { "epoch": 0.61, "grad_norm": 1.4712066611864916, "learning_rate": 4.1621750660698875e-06, "loss": 0.6756, "step": 1130 }, { "epoch": 0.61, "grad_norm": 1.4913274318359984, "learning_rate": 4.16050585818538e-06, "loss": 0.7183, "step": 1131 }, { "epoch": 0.61, "grad_norm": 1.5263120318630363, "learning_rate": 4.158835324609603e-06, "loss": 0.6841, "step": 1132 }, { "epoch": 0.61, "grad_norm": 1.523139905597414, "learning_rate": 4.1571634666762576e-06, "loss": 0.683, "step": 1133 }, { "epoch": 0.61, "grad_norm": 1.529012092210827, "learning_rate": 4.155490285720092e-06, "loss": 0.7113, "step": 1134 }, { "epoch": 0.61, "grad_norm": 1.4648768470540483, "learning_rate": 4.153815783076922e-06, "loss": 0.7039, "step": 1135 }, { "epoch": 0.61, "grad_norm": 1.587343240226047, "learning_rate": 4.152139960083611e-06, "loss": 0.6789, "step": 1136 }, { "epoch": 0.61, "grad_norm": 1.491520790450262, "learning_rate": 4.150462818078079e-06, "loss": 0.6781, "step": 1137 }, { "epoch": 0.61, "grad_norm": 1.4467009627294503, "learning_rate": 4.148784358399301e-06, "loss": 0.6943, "step": 1138 }, { "epoch": 0.62, "grad_norm": 1.4383788352734581, "learning_rate": 4.1471045823873e-06, "loss": 0.6748, "step": 1139 }, { "epoch": 0.62, "grad_norm": 1.4655179366365012, "learning_rate": 4.145423491383153e-06, "loss": 0.6824, "step": 1140 }, { "epoch": 0.62, "grad_norm": 1.5577337107381115, "learning_rate": 4.143741086728983e-06, "loss": 0.6942, "step": 1141 }, { "epoch": 0.62, "grad_norm": 1.5271550490785835, "learning_rate": 4.142057369767969e-06, "loss": 0.689, "step": 1142 }, { "epoch": 0.62, "grad_norm": 1.5283901196131227, "learning_rate": 4.14037234184433e-06, "loss": 0.7108, "step": 1143 }, { "epoch": 0.62, "grad_norm": 1.3822608012265165, "learning_rate": 4.1386860043033355e-06, "loss": 0.6627, "step": 1144 }, { "epoch": 0.62, "grad_norm": 1.5132326456532308, "learning_rate": 4.1369983584913e-06, "loss": 0.6941, "step": 1145 }, { "epoch": 0.62, "grad_norm": 1.45674137325769, "learning_rate": 4.135309405755583e-06, "loss": 0.7169, "step": 1146 }, { "epoch": 0.62, "grad_norm": 1.4416296601286693, "learning_rate": 4.133619147444586e-06, "loss": 0.693, "step": 1147 }, { "epoch": 0.62, "grad_norm": 1.3807897775529332, "learning_rate": 4.131927584907755e-06, "loss": 0.6515, "step": 1148 }, { "epoch": 0.62, "grad_norm": 1.4273184686482288, "learning_rate": 4.130234719495574e-06, "loss": 0.6698, "step": 1149 }, { "epoch": 0.62, "grad_norm": 1.4414852816596408, "learning_rate": 4.12854055255957e-06, "loss": 0.6782, "step": 1150 }, { "epoch": 0.62, "grad_norm": 1.4515130119245592, "learning_rate": 4.126845085452308e-06, "loss": 0.6905, "step": 1151 }, { "epoch": 0.62, "grad_norm": 1.4686629293589304, "learning_rate": 4.125148319527391e-06, "loss": 0.6791, "step": 1152 }, { "epoch": 0.62, "grad_norm": 1.6742700358625533, "learning_rate": 4.123450256139459e-06, "loss": 0.717, "step": 1153 }, { "epoch": 0.62, "grad_norm": 1.440543082190945, "learning_rate": 4.121750896644189e-06, "loss": 0.6764, "step": 1154 }, { "epoch": 0.62, "grad_norm": 1.4384951613345152, "learning_rate": 4.1200502423982904e-06, "loss": 0.6775, "step": 1155 }, { "epoch": 0.62, "grad_norm": 1.4591537958397283, "learning_rate": 4.1183482947595074e-06, "loss": 0.6799, "step": 1156 }, { "epoch": 0.62, "grad_norm": 1.4053078256471974, "learning_rate": 4.116645055086618e-06, "loss": 0.6775, "step": 1157 }, { "epoch": 0.63, "grad_norm": 1.4452026561499565, "learning_rate": 4.1149405247394295e-06, "loss": 0.6596, "step": 1158 }, { "epoch": 0.63, "grad_norm": 1.4593659593903618, "learning_rate": 4.113234705078782e-06, "loss": 0.7031, "step": 1159 }, { "epoch": 0.63, "grad_norm": 1.5019092588549279, "learning_rate": 4.111527597466544e-06, "loss": 0.6815, "step": 1160 }, { "epoch": 0.63, "grad_norm": 1.5011712586115724, "learning_rate": 4.10981920326561e-06, "loss": 0.696, "step": 1161 }, { "epoch": 0.63, "grad_norm": 1.429379705175454, "learning_rate": 4.108109523839906e-06, "loss": 0.6703, "step": 1162 }, { "epoch": 0.63, "grad_norm": 1.4753779424646962, "learning_rate": 4.106398560554381e-06, "loss": 0.6821, "step": 1163 }, { "epoch": 0.63, "grad_norm": 1.5225856177003603, "learning_rate": 4.104686314775009e-06, "loss": 0.6651, "step": 1164 }, { "epoch": 0.63, "grad_norm": 1.4538532274483444, "learning_rate": 4.102972787868789e-06, "loss": 0.6949, "step": 1165 }, { "epoch": 0.63, "grad_norm": 1.488500549317724, "learning_rate": 4.101257981203743e-06, "loss": 0.6749, "step": 1166 }, { "epoch": 0.63, "grad_norm": 1.501332870169803, "learning_rate": 4.099541896148914e-06, "loss": 0.6975, "step": 1167 }, { "epoch": 0.63, "grad_norm": 1.532239656846755, "learning_rate": 4.097824534074365e-06, "loss": 0.6794, "step": 1168 }, { "epoch": 0.63, "grad_norm": 1.449259408881481, "learning_rate": 4.0961058963511805e-06, "loss": 0.686, "step": 1169 }, { "epoch": 0.63, "grad_norm": 1.535020949173973, "learning_rate": 4.094385984351462e-06, "loss": 0.7146, "step": 1170 }, { "epoch": 0.63, "grad_norm": 1.4497255221331797, "learning_rate": 4.092664799448328e-06, "loss": 0.668, "step": 1171 }, { "epoch": 0.63, "grad_norm": 1.4548505590136023, "learning_rate": 4.090942343015914e-06, "loss": 0.6865, "step": 1172 }, { "epoch": 0.63, "grad_norm": 1.511929172205339, "learning_rate": 4.0892186164293715e-06, "loss": 0.6754, "step": 1173 }, { "epoch": 0.63, "grad_norm": 1.4225819311355832, "learning_rate": 4.087493621064863e-06, "loss": 0.6831, "step": 1174 }, { "epoch": 0.63, "grad_norm": 1.439372519314675, "learning_rate": 4.085767358299568e-06, "loss": 0.6616, "step": 1175 }, { "epoch": 0.64, "grad_norm": 1.4601041098879917, "learning_rate": 4.0840398295116745e-06, "loss": 0.7086, "step": 1176 }, { "epoch": 0.64, "grad_norm": 1.5086282375784361, "learning_rate": 4.082311036080384e-06, "loss": 0.7017, "step": 1177 }, { "epoch": 0.64, "grad_norm": 1.502858248024859, "learning_rate": 4.080580979385905e-06, "loss": 0.7022, "step": 1178 }, { "epoch": 0.64, "grad_norm": 1.437736154792691, "learning_rate": 4.078849660809456e-06, "loss": 0.6798, "step": 1179 }, { "epoch": 0.64, "grad_norm": 1.5259877281012197, "learning_rate": 4.077117081733264e-06, "loss": 0.7092, "step": 1180 }, { "epoch": 0.64, "grad_norm": 1.500898591726442, "learning_rate": 4.075383243540559e-06, "loss": 0.6638, "step": 1181 }, { "epoch": 0.64, "grad_norm": 1.5014733296033327, "learning_rate": 4.073648147615579e-06, "loss": 0.7127, "step": 1182 }, { "epoch": 0.64, "grad_norm": 1.4921664314676581, "learning_rate": 4.071911795343566e-06, "loss": 0.6669, "step": 1183 }, { "epoch": 0.64, "grad_norm": 1.4619837678396739, "learning_rate": 4.070174188110765e-06, "loss": 0.6935, "step": 1184 }, { "epoch": 0.64, "grad_norm": 1.4531215202256436, "learning_rate": 4.068435327304421e-06, "loss": 0.6563, "step": 1185 }, { "epoch": 0.64, "grad_norm": 1.4286568348630122, "learning_rate": 4.0666952143127815e-06, "loss": 0.6802, "step": 1186 }, { "epoch": 0.64, "grad_norm": 1.4737376828935465, "learning_rate": 4.064953850525094e-06, "loss": 0.7143, "step": 1187 }, { "epoch": 0.64, "grad_norm": 1.4029531684579104, "learning_rate": 4.063211237331603e-06, "loss": 0.694, "step": 1188 }, { "epoch": 0.64, "grad_norm": 1.4438726086991331, "learning_rate": 4.061467376123553e-06, "loss": 0.6875, "step": 1189 }, { "epoch": 0.64, "grad_norm": 1.448367601910385, "learning_rate": 4.059722268293181e-06, "loss": 0.6666, "step": 1190 }, { "epoch": 0.64, "grad_norm": 1.490648878651832, "learning_rate": 4.057975915233725e-06, "loss": 0.7019, "step": 1191 }, { "epoch": 0.64, "grad_norm": 1.4238559384569158, "learning_rate": 4.05622831833941e-06, "loss": 0.691, "step": 1192 }, { "epoch": 0.64, "grad_norm": 1.4710361367615437, "learning_rate": 4.0544794790054605e-06, "loss": 0.6515, "step": 1193 }, { "epoch": 0.64, "grad_norm": 1.4139922213837175, "learning_rate": 4.052729398628089e-06, "loss": 0.6924, "step": 1194 }, { "epoch": 0.65, "grad_norm": 1.487311231277756, "learning_rate": 4.0509780786045005e-06, "loss": 0.6664, "step": 1195 }, { "epoch": 0.65, "grad_norm": 1.4746725542454115, "learning_rate": 4.0492255203328886e-06, "loss": 0.6828, "step": 1196 }, { "epoch": 0.65, "grad_norm": 1.4343516608801103, "learning_rate": 4.047471725212437e-06, "loss": 0.6561, "step": 1197 }, { "epoch": 0.65, "grad_norm": 1.4161093835930532, "learning_rate": 4.0457166946433155e-06, "loss": 0.6814, "step": 1198 }, { "epoch": 0.65, "grad_norm": 3.022172299968139, "learning_rate": 4.04396043002668e-06, "loss": 0.7574, "step": 1199 }, { "epoch": 0.65, "grad_norm": 1.475490868493343, "learning_rate": 4.042202932764673e-06, "loss": 0.6826, "step": 1200 }, { "epoch": 0.65, "grad_norm": 1.4799558472765797, "learning_rate": 4.04044420426042e-06, "loss": 0.6969, "step": 1201 }, { "epoch": 0.65, "grad_norm": 1.435891202778176, "learning_rate": 4.038684245918031e-06, "loss": 0.6962, "step": 1202 }, { "epoch": 0.65, "grad_norm": 1.4700878299117843, "learning_rate": 4.036923059142595e-06, "loss": 0.6706, "step": 1203 }, { "epoch": 0.65, "grad_norm": 1.4435468068850252, "learning_rate": 4.035160645340184e-06, "loss": 0.7178, "step": 1204 }, { "epoch": 0.65, "grad_norm": 1.4374596822983927, "learning_rate": 4.03339700591785e-06, "loss": 0.6833, "step": 1205 }, { "epoch": 0.65, "grad_norm": 1.4471459018381778, "learning_rate": 4.031632142283623e-06, "loss": 0.6984, "step": 1206 }, { "epoch": 0.65, "grad_norm": 1.4118438146158085, "learning_rate": 4.029866055846507e-06, "loss": 0.6709, "step": 1207 }, { "epoch": 0.65, "grad_norm": 1.4712454028479345, "learning_rate": 4.028098748016488e-06, "loss": 0.7099, "step": 1208 }, { "epoch": 0.65, "grad_norm": 1.5655308868235827, "learning_rate": 4.026330220204524e-06, "loss": 0.6889, "step": 1209 }, { "epoch": 0.65, "grad_norm": 1.5145320323089113, "learning_rate": 4.0245604738225466e-06, "loss": 0.6809, "step": 1210 }, { "epoch": 0.65, "grad_norm": 1.4154513371627189, "learning_rate": 4.022789510283461e-06, "loss": 0.6986, "step": 1211 }, { "epoch": 0.65, "grad_norm": 1.4277686849617321, "learning_rate": 4.021017331001146e-06, "loss": 0.6363, "step": 1212 }, { "epoch": 0.66, "grad_norm": 1.4275230385384734, "learning_rate": 4.019243937390445e-06, "loss": 0.7079, "step": 1213 }, { "epoch": 0.66, "grad_norm": 1.4988109879388456, "learning_rate": 4.017469330867178e-06, "loss": 0.7189, "step": 1214 }, { "epoch": 0.66, "grad_norm": 1.4277077797664508, "learning_rate": 4.015693512848131e-06, "loss": 0.677, "step": 1215 }, { "epoch": 0.66, "grad_norm": 1.4628233559664636, "learning_rate": 4.013916484751055e-06, "loss": 0.6697, "step": 1216 }, { "epoch": 0.66, "grad_norm": 1.467919576933475, "learning_rate": 4.012138247994669e-06, "loss": 0.6937, "step": 1217 }, { "epoch": 0.66, "grad_norm": 1.4898329108107753, "learning_rate": 4.0103588039986556e-06, "loss": 0.6898, "step": 1218 }, { "epoch": 0.66, "grad_norm": 1.5066314175339628, "learning_rate": 4.008578154183664e-06, "loss": 0.7067, "step": 1219 }, { "epoch": 0.66, "grad_norm": 1.4751122647442556, "learning_rate": 4.006796299971304e-06, "loss": 0.6883, "step": 1220 }, { "epoch": 0.66, "grad_norm": 1.4923253809420378, "learning_rate": 4.005013242784146e-06, "loss": 0.6997, "step": 1221 }, { "epoch": 0.66, "grad_norm": 1.4491934661554058, "learning_rate": 4.003228984045723e-06, "loss": 0.6672, "step": 1222 }, { "epoch": 0.66, "grad_norm": 1.4655052619150428, "learning_rate": 4.001443525180527e-06, "loss": 0.683, "step": 1223 }, { "epoch": 0.66, "grad_norm": 1.476954550634771, "learning_rate": 3.999656867614006e-06, "loss": 0.6605, "step": 1224 }, { "epoch": 0.66, "grad_norm": 1.439966268164657, "learning_rate": 3.997869012772567e-06, "loss": 0.6884, "step": 1225 }, { "epoch": 0.66, "grad_norm": 1.4478702515335846, "learning_rate": 3.996079962083573e-06, "loss": 0.6847, "step": 1226 }, { "epoch": 0.66, "grad_norm": 1.4132022960295536, "learning_rate": 3.994289716975341e-06, "loss": 0.6888, "step": 1227 }, { "epoch": 0.66, "grad_norm": 1.4316308008115888, "learning_rate": 3.992498278877141e-06, "loss": 0.6945, "step": 1228 }, { "epoch": 0.66, "grad_norm": 1.436477426237709, "learning_rate": 3.990705649219196e-06, "loss": 0.6947, "step": 1229 }, { "epoch": 0.66, "grad_norm": 1.4353227074316313, "learning_rate": 3.988911829432682e-06, "loss": 0.7006, "step": 1230 }, { "epoch": 0.66, "grad_norm": 1.4416300537899243, "learning_rate": 3.987116820949721e-06, "loss": 0.6631, "step": 1231 }, { "epoch": 0.67, "grad_norm": 1.4959160774283353, "learning_rate": 3.985320625203389e-06, "loss": 0.6904, "step": 1232 }, { "epoch": 0.67, "grad_norm": 1.478796981694887, "learning_rate": 3.983523243627706e-06, "loss": 0.6634, "step": 1233 }, { "epoch": 0.67, "grad_norm": 1.463760263519161, "learning_rate": 3.981724677657641e-06, "loss": 0.6736, "step": 1234 }, { "epoch": 0.67, "grad_norm": 1.493557903283085, "learning_rate": 3.979924928729106e-06, "loss": 0.6882, "step": 1235 }, { "epoch": 0.67, "grad_norm": 1.5318421203992842, "learning_rate": 3.978123998278962e-06, "loss": 0.7023, "step": 1236 }, { "epoch": 0.67, "grad_norm": 1.3958996597684543, "learning_rate": 3.9763218877450085e-06, "loss": 0.6854, "step": 1237 }, { "epoch": 0.67, "grad_norm": 1.4105627209153757, "learning_rate": 3.97451859856599e-06, "loss": 0.6825, "step": 1238 }, { "epoch": 0.67, "grad_norm": 1.44279327979603, "learning_rate": 3.97271413218159e-06, "loss": 0.672, "step": 1239 }, { "epoch": 0.67, "grad_norm": 1.4375009509977055, "learning_rate": 3.970908490032433e-06, "loss": 0.6796, "step": 1240 }, { "epoch": 0.67, "grad_norm": 1.4806581743889051, "learning_rate": 3.969101673560085e-06, "loss": 0.687, "step": 1241 }, { "epoch": 0.67, "grad_norm": 1.357826650294513, "learning_rate": 3.9672936842070425e-06, "loss": 0.6869, "step": 1242 }, { "epoch": 0.67, "grad_norm": 1.4602604377337167, "learning_rate": 3.9654845234167456e-06, "loss": 0.654, "step": 1243 }, { "epoch": 0.67, "grad_norm": 1.4759322462607563, "learning_rate": 3.963674192633566e-06, "loss": 0.6686, "step": 1244 }, { "epoch": 0.67, "grad_norm": 1.428680534526892, "learning_rate": 3.9618626933028086e-06, "loss": 0.6894, "step": 1245 }, { "epoch": 0.67, "grad_norm": 1.4564077002627367, "learning_rate": 3.960050026870713e-06, "loss": 0.6795, "step": 1246 }, { "epoch": 0.67, "grad_norm": 1.5282906050973415, "learning_rate": 3.958236194784453e-06, "loss": 0.7023, "step": 1247 }, { "epoch": 0.67, "grad_norm": 1.3752908656861371, "learning_rate": 3.956421198492128e-06, "loss": 0.6324, "step": 1248 }, { "epoch": 0.67, "grad_norm": 1.4750608151003488, "learning_rate": 3.954605039442768e-06, "loss": 0.6932, "step": 1249 }, { "epoch": 0.68, "grad_norm": 1.4061350268778163, "learning_rate": 3.952787719086334e-06, "loss": 0.6667, "step": 1250 }, { "epoch": 0.68, "grad_norm": 1.4475615632639125, "learning_rate": 3.950969238873714e-06, "loss": 0.678, "step": 1251 }, { "epoch": 0.68, "grad_norm": 1.3648453296672138, "learning_rate": 3.949149600256718e-06, "loss": 0.6625, "step": 1252 }, { "epoch": 0.68, "grad_norm": 1.3946668214491953, "learning_rate": 3.947328804688086e-06, "loss": 0.6757, "step": 1253 }, { "epoch": 0.68, "grad_norm": 1.4731037186432783, "learning_rate": 3.9455068536214765e-06, "loss": 0.668, "step": 1254 }, { "epoch": 0.68, "grad_norm": 1.4759686161625194, "learning_rate": 3.943683748511475e-06, "loss": 0.6819, "step": 1255 }, { "epoch": 0.68, "grad_norm": 1.4639819701606058, "learning_rate": 3.941859490813585e-06, "loss": 0.7196, "step": 1256 }, { "epoch": 0.68, "grad_norm": 1.4497619069073933, "learning_rate": 3.9400340819842335e-06, "loss": 0.6424, "step": 1257 }, { "epoch": 0.68, "grad_norm": 1.6119990697473887, "learning_rate": 3.9382075234807625e-06, "loss": 0.6914, "step": 1258 }, { "epoch": 0.68, "grad_norm": 1.4426495438689488, "learning_rate": 3.936379816761437e-06, "loss": 0.6912, "step": 1259 }, { "epoch": 0.68, "grad_norm": 1.4678327750558744, "learning_rate": 3.934550963285432e-06, "loss": 0.7154, "step": 1260 }, { "epoch": 0.68, "grad_norm": 1.4338220580952195, "learning_rate": 3.9327209645128444e-06, "loss": 0.6681, "step": 1261 }, { "epoch": 0.68, "grad_norm": 1.4818666075165086, "learning_rate": 3.930889821904682e-06, "loss": 0.6979, "step": 1262 }, { "epoch": 0.68, "grad_norm": 1.5452065895372575, "learning_rate": 3.9290575369228664e-06, "loss": 0.703, "step": 1263 }, { "epoch": 0.68, "grad_norm": 1.4299749580270527, "learning_rate": 3.927224111030233e-06, "loss": 0.663, "step": 1264 }, { "epoch": 0.68, "grad_norm": 1.398995362298566, "learning_rate": 3.925389545690524e-06, "loss": 0.6712, "step": 1265 }, { "epoch": 0.68, "grad_norm": 1.4390594928258775, "learning_rate": 3.923553842368396e-06, "loss": 0.7085, "step": 1266 }, { "epoch": 0.68, "grad_norm": 1.483241594868306, "learning_rate": 3.921717002529411e-06, "loss": 0.6617, "step": 1267 }, { "epoch": 0.68, "grad_norm": 1.4729072543024275, "learning_rate": 3.91987902764004e-06, "loss": 0.6972, "step": 1268 }, { "epoch": 0.69, "grad_norm": 1.611759039358573, "learning_rate": 3.918039919167658e-06, "loss": 0.6762, "step": 1269 }, { "epoch": 0.69, "grad_norm": 1.5521065338336533, "learning_rate": 3.916199678580548e-06, "loss": 0.6905, "step": 1270 }, { "epoch": 0.69, "grad_norm": 1.452938749852299, "learning_rate": 3.914358307347894e-06, "loss": 0.6716, "step": 1271 }, { "epoch": 0.69, "grad_norm": 1.4363973985728244, "learning_rate": 3.912515806939786e-06, "loss": 0.6796, "step": 1272 }, { "epoch": 0.69, "grad_norm": 1.556424678895755, "learning_rate": 3.910672178827211e-06, "loss": 0.6868, "step": 1273 }, { "epoch": 0.69, "grad_norm": 1.5585001752290344, "learning_rate": 3.908827424482061e-06, "loss": 0.6691, "step": 1274 }, { "epoch": 0.69, "grad_norm": 1.468485678448706, "learning_rate": 3.906981545377124e-06, "loss": 0.6685, "step": 1275 }, { "epoch": 0.69, "grad_norm": 1.512349111926467, "learning_rate": 3.905134542986086e-06, "loss": 0.6807, "step": 1276 }, { "epoch": 0.69, "grad_norm": 1.4421225818241574, "learning_rate": 3.903286418783533e-06, "loss": 0.6903, "step": 1277 }, { "epoch": 0.69, "grad_norm": 1.4235134954060358, "learning_rate": 3.901437174244943e-06, "loss": 0.6794, "step": 1278 }, { "epoch": 0.69, "grad_norm": 1.4631192670755824, "learning_rate": 3.89958681084669e-06, "loss": 0.6591, "step": 1279 }, { "epoch": 0.69, "grad_norm": 1.4702612957677523, "learning_rate": 3.897735330066041e-06, "loss": 0.7021, "step": 1280 }, { "epoch": 0.69, "grad_norm": 1.4315597962970046, "learning_rate": 3.895882733381154e-06, "loss": 0.6507, "step": 1281 }, { "epoch": 0.69, "grad_norm": 1.512729345179895, "learning_rate": 3.894029022271082e-06, "loss": 0.6703, "step": 1282 }, { "epoch": 0.69, "grad_norm": 1.4903172237481015, "learning_rate": 3.8921741982157626e-06, "loss": 0.6734, "step": 1283 }, { "epoch": 0.69, "grad_norm": 1.4815019750181102, "learning_rate": 3.890318262696023e-06, "loss": 0.6835, "step": 1284 }, { "epoch": 0.69, "grad_norm": 1.5060681757385237, "learning_rate": 3.888461217193581e-06, "loss": 0.6996, "step": 1285 }, { "epoch": 0.69, "grad_norm": 1.4930760412683288, "learning_rate": 3.886603063191039e-06, "loss": 0.6913, "step": 1286 }, { "epoch": 0.7, "grad_norm": 1.5209062277714245, "learning_rate": 3.8847438021718805e-06, "loss": 0.6932, "step": 1287 }, { "epoch": 0.7, "grad_norm": 1.4057211634489475, "learning_rate": 3.88288343562048e-06, "loss": 0.6885, "step": 1288 }, { "epoch": 0.7, "grad_norm": 1.4466057473676517, "learning_rate": 3.881021965022088e-06, "loss": 0.6751, "step": 1289 }, { "epoch": 0.7, "grad_norm": 1.4552399164166916, "learning_rate": 3.879159391862839e-06, "loss": 0.6875, "step": 1290 }, { "epoch": 0.7, "grad_norm": 1.527639126732569, "learning_rate": 3.87729571762975e-06, "loss": 0.657, "step": 1291 }, { "epoch": 0.7, "grad_norm": 1.4534613132425964, "learning_rate": 3.875430943810714e-06, "loss": 0.668, "step": 1292 }, { "epoch": 0.7, "grad_norm": 1.437593231939739, "learning_rate": 3.873565071894503e-06, "loss": 0.6942, "step": 1293 }, { "epoch": 0.7, "grad_norm": 1.486595798418683, "learning_rate": 3.871698103370764e-06, "loss": 0.6957, "step": 1294 }, { "epoch": 0.7, "grad_norm": 1.4337377084176677, "learning_rate": 3.869830039730025e-06, "loss": 0.6977, "step": 1295 }, { "epoch": 0.7, "grad_norm": 1.3950582440804207, "learning_rate": 3.86796088246368e-06, "loss": 0.6961, "step": 1296 }, { "epoch": 0.7, "grad_norm": 1.524336346686645, "learning_rate": 3.8660906330640005e-06, "loss": 0.713, "step": 1297 }, { "epoch": 0.7, "grad_norm": 1.425202156047228, "learning_rate": 3.864219293024133e-06, "loss": 0.6684, "step": 1298 }, { "epoch": 0.7, "grad_norm": 1.5156316072372518, "learning_rate": 3.8623468638380905e-06, "loss": 0.7327, "step": 1299 }, { "epoch": 0.7, "grad_norm": 1.4442938734561663, "learning_rate": 3.860473347000755e-06, "loss": 0.7049, "step": 1300 }, { "epoch": 0.7, "grad_norm": 1.4248608984663256, "learning_rate": 3.858598744007879e-06, "loss": 0.6639, "step": 1301 }, { "epoch": 0.7, "grad_norm": 1.3999148943407593, "learning_rate": 3.856723056356085e-06, "loss": 0.6946, "step": 1302 }, { "epoch": 0.7, "grad_norm": 1.4545072567523982, "learning_rate": 3.854846285542852e-06, "loss": 0.6814, "step": 1303 }, { "epoch": 0.7, "grad_norm": 1.4656387345307877, "learning_rate": 3.852968433066536e-06, "loss": 0.6624, "step": 1304 }, { "epoch": 0.7, "grad_norm": 1.5065020365458572, "learning_rate": 3.851089500426346e-06, "loss": 0.6815, "step": 1305 }, { "epoch": 0.71, "grad_norm": 1.4131125168176697, "learning_rate": 3.849209489122359e-06, "loss": 0.7139, "step": 1306 }, { "epoch": 0.71, "grad_norm": 1.4595864739357376, "learning_rate": 3.847328400655513e-06, "loss": 0.7092, "step": 1307 }, { "epoch": 0.71, "grad_norm": 1.412050706079692, "learning_rate": 3.845446236527605e-06, "loss": 0.6823, "step": 1308 }, { "epoch": 0.71, "grad_norm": 1.5305993032041207, "learning_rate": 3.843562998241288e-06, "loss": 0.6893, "step": 1309 }, { "epoch": 0.71, "grad_norm": 1.4546514658071088, "learning_rate": 3.841678687300079e-06, "loss": 0.7321, "step": 1310 }, { "epoch": 0.71, "grad_norm": 1.4611828787250174, "learning_rate": 3.8397933052083445e-06, "loss": 0.6942, "step": 1311 }, { "epoch": 0.71, "grad_norm": 1.416585028053109, "learning_rate": 3.837906853471311e-06, "loss": 0.7121, "step": 1312 }, { "epoch": 0.71, "grad_norm": 1.4708978994452917, "learning_rate": 3.836019333595056e-06, "loss": 0.6847, "step": 1313 }, { "epoch": 0.71, "grad_norm": 1.385517129533814, "learning_rate": 3.834130747086512e-06, "loss": 0.6815, "step": 1314 }, { "epoch": 0.71, "grad_norm": 1.4593862125505896, "learning_rate": 3.832241095453462e-06, "loss": 0.6785, "step": 1315 }, { "epoch": 0.71, "grad_norm": 1.483457097251242, "learning_rate": 3.830350380204538e-06, "loss": 0.7045, "step": 1316 }, { "epoch": 0.71, "grad_norm": 1.4364117971469454, "learning_rate": 3.828458602849226e-06, "loss": 0.6845, "step": 1317 }, { "epoch": 0.71, "grad_norm": 1.4346069343115835, "learning_rate": 3.826565764897854e-06, "loss": 0.6596, "step": 1318 }, { "epoch": 0.71, "grad_norm": 1.4699129182778983, "learning_rate": 3.824671867861599e-06, "loss": 0.7098, "step": 1319 }, { "epoch": 0.71, "grad_norm": 1.370989645123936, "learning_rate": 3.822776913252485e-06, "loss": 0.6904, "step": 1320 }, { "epoch": 0.71, "grad_norm": 1.4269525282887128, "learning_rate": 3.820880902583378e-06, "loss": 0.6961, "step": 1321 }, { "epoch": 0.71, "grad_norm": 1.4680190344479656, "learning_rate": 3.81898383736799e-06, "loss": 0.6939, "step": 1322 }, { "epoch": 0.71, "grad_norm": 1.421009035376997, "learning_rate": 3.817085719120872e-06, "loss": 0.698, "step": 1323 }, { "epoch": 0.71, "grad_norm": 1.5082418636196708, "learning_rate": 3.8151865493574154e-06, "loss": 0.6905, "step": 1324 }, { "epoch": 0.72, "grad_norm": 1.3876582670150819, "learning_rate": 3.8132863295938568e-06, "loss": 0.6543, "step": 1325 }, { "epoch": 0.72, "grad_norm": 1.4310985024979603, "learning_rate": 3.811385061347263e-06, "loss": 0.6578, "step": 1326 }, { "epoch": 0.72, "grad_norm": 1.4862534183056162, "learning_rate": 3.809482746135543e-06, "loss": 0.6832, "step": 1327 }, { "epoch": 0.72, "grad_norm": 1.4710966052310679, "learning_rate": 3.8075793854774414e-06, "loss": 0.7109, "step": 1328 }, { "epoch": 0.72, "grad_norm": 1.458868223412115, "learning_rate": 3.805674980892535e-06, "loss": 0.6693, "step": 1329 }, { "epoch": 0.72, "grad_norm": 1.3994814495782784, "learning_rate": 3.803769533901236e-06, "loss": 0.6628, "step": 1330 }, { "epoch": 0.72, "grad_norm": 1.4677784404636873, "learning_rate": 3.8018630460247884e-06, "loss": 0.6811, "step": 1331 }, { "epoch": 0.72, "grad_norm": 1.449105985376217, "learning_rate": 3.7999555187852667e-06, "loss": 0.686, "step": 1332 }, { "epoch": 0.72, "grad_norm": 1.396248866032035, "learning_rate": 3.7980469537055766e-06, "loss": 0.6993, "step": 1333 }, { "epoch": 0.72, "grad_norm": 1.3517121873916558, "learning_rate": 3.7961373523094516e-06, "loss": 0.6575, "step": 1334 }, { "epoch": 0.72, "grad_norm": 1.4639218566961814, "learning_rate": 3.7942267161214497e-06, "loss": 0.6993, "step": 1335 }, { "epoch": 0.72, "grad_norm": 1.429634294188059, "learning_rate": 3.7923150466669608e-06, "loss": 0.696, "step": 1336 }, { "epoch": 0.72, "grad_norm": 1.441793048301125, "learning_rate": 3.790402345472195e-06, "loss": 0.6865, "step": 1337 }, { "epoch": 0.72, "grad_norm": 1.4690138729901152, "learning_rate": 3.7884886140641884e-06, "loss": 0.6861, "step": 1338 }, { "epoch": 0.72, "grad_norm": 1.4823050465797647, "learning_rate": 3.7865738539707987e-06, "loss": 0.6867, "step": 1339 }, { "epoch": 0.72, "grad_norm": 1.4392929248982533, "learning_rate": 3.7846580667207044e-06, "loss": 0.7017, "step": 1340 }, { "epoch": 0.72, "grad_norm": 1.4328078092681384, "learning_rate": 3.7827412538434062e-06, "loss": 0.7065, "step": 1341 }, { "epoch": 0.72, "grad_norm": 1.4075108248935673, "learning_rate": 3.7808234168692215e-06, "loss": 0.681, "step": 1342 }, { "epoch": 0.73, "grad_norm": 1.3989152177716164, "learning_rate": 3.7789045573292847e-06, "loss": 0.6834, "step": 1343 }, { "epoch": 0.73, "grad_norm": 1.5192416071932382, "learning_rate": 3.7769846767555495e-06, "loss": 0.7072, "step": 1344 }, { "epoch": 0.73, "grad_norm": 1.586550654720166, "learning_rate": 3.7750637766807824e-06, "loss": 0.708, "step": 1345 }, { "epoch": 0.73, "grad_norm": 1.3598618373829485, "learning_rate": 3.773141858638565e-06, "loss": 0.654, "step": 1346 }, { "epoch": 0.73, "grad_norm": 1.4265890823680727, "learning_rate": 3.7712189241632898e-06, "loss": 0.6741, "step": 1347 }, { "epoch": 0.73, "grad_norm": 1.394967561651, "learning_rate": 3.7692949747901643e-06, "loss": 0.6839, "step": 1348 }, { "epoch": 0.73, "grad_norm": 1.5086172702893839, "learning_rate": 3.7673700120552013e-06, "loss": 0.6726, "step": 1349 }, { "epoch": 0.73, "grad_norm": 1.4192361406334637, "learning_rate": 3.7654440374952288e-06, "loss": 0.6816, "step": 1350 }, { "epoch": 0.73, "grad_norm": 1.4572073033579365, "learning_rate": 3.7635170526478765e-06, "loss": 0.7034, "step": 1351 }, { "epoch": 0.73, "grad_norm": 1.4217558152333063, "learning_rate": 3.7615890590515847e-06, "loss": 0.6936, "step": 1352 }, { "epoch": 0.73, "grad_norm": 1.4273373628197896, "learning_rate": 3.7596600582455976e-06, "loss": 0.6585, "step": 1353 }, { "epoch": 0.73, "grad_norm": 1.4161419224311174, "learning_rate": 3.7577300517699626e-06, "loss": 0.6868, "step": 1354 }, { "epoch": 0.73, "grad_norm": 1.437711635077047, "learning_rate": 3.7557990411655326e-06, "loss": 0.6889, "step": 1355 }, { "epoch": 0.73, "grad_norm": 1.3899163134747166, "learning_rate": 3.75386702797396e-06, "loss": 0.662, "step": 1356 }, { "epoch": 0.73, "grad_norm": 1.4044239780064982, "learning_rate": 3.751934013737698e-06, "loss": 0.6957, "step": 1357 }, { "epoch": 0.73, "grad_norm": 1.4053653527343055, "learning_rate": 3.7500000000000005e-06, "loss": 0.6588, "step": 1358 }, { "epoch": 0.73, "grad_norm": 1.4673206956058404, "learning_rate": 3.7480649883049164e-06, "loss": 0.6943, "step": 1359 }, { "epoch": 0.73, "grad_norm": 1.477025691317949, "learning_rate": 3.746128980197294e-06, "loss": 0.685, "step": 1360 }, { "epoch": 0.73, "grad_norm": 1.4800506234950361, "learning_rate": 3.7441919772227757e-06, "loss": 0.6834, "step": 1361 }, { "epoch": 0.74, "grad_norm": 1.4016563784594347, "learning_rate": 3.7422539809277993e-06, "loss": 0.6691, "step": 1362 }, { "epoch": 0.74, "grad_norm": 1.3958633059237802, "learning_rate": 3.7403149928595946e-06, "loss": 0.6843, "step": 1363 }, { "epoch": 0.74, "grad_norm": 1.4156809668106873, "learning_rate": 3.7383750145661834e-06, "loss": 0.6762, "step": 1364 }, { "epoch": 0.74, "grad_norm": 1.412680827205205, "learning_rate": 3.736434047596379e-06, "loss": 0.6922, "step": 1365 }, { "epoch": 0.74, "grad_norm": 1.432978847554734, "learning_rate": 3.7344920934997825e-06, "loss": 0.69, "step": 1366 }, { "epoch": 0.74, "grad_norm": 1.4549442119814648, "learning_rate": 3.732549153826784e-06, "loss": 0.6731, "step": 1367 }, { "epoch": 0.74, "grad_norm": 1.4685748745650007, "learning_rate": 3.73060523012856e-06, "loss": 0.6616, "step": 1368 }, { "epoch": 0.74, "grad_norm": 1.433798224028261, "learning_rate": 3.7286603239570733e-06, "loss": 0.6972, "step": 1369 }, { "epoch": 0.74, "grad_norm": 1.4265964727805496, "learning_rate": 3.726714436865071e-06, "loss": 0.6819, "step": 1370 }, { "epoch": 0.74, "grad_norm": 1.5959161958621109, "learning_rate": 3.724767570406082e-06, "loss": 0.7056, "step": 1371 }, { "epoch": 0.74, "grad_norm": 1.4467192906108268, "learning_rate": 3.7228197261344194e-06, "loss": 0.6846, "step": 1372 }, { "epoch": 0.74, "grad_norm": 1.3751274727645657, "learning_rate": 3.720870905605175e-06, "loss": 0.6554, "step": 1373 }, { "epoch": 0.74, "grad_norm": 1.5046176952068258, "learning_rate": 3.7189211103742206e-06, "loss": 0.679, "step": 1374 }, { "epoch": 0.74, "grad_norm": 1.4498284708971279, "learning_rate": 3.7169703419982063e-06, "loss": 0.6613, "step": 1375 }, { "epoch": 0.74, "grad_norm": 1.4741443038078201, "learning_rate": 3.7150186020345593e-06, "loss": 0.681, "step": 1376 }, { "epoch": 0.74, "grad_norm": 1.5098842146434335, "learning_rate": 3.7130658920414818e-06, "loss": 0.6976, "step": 1377 }, { "epoch": 0.74, "grad_norm": 1.4702790147468248, "learning_rate": 3.7111122135779514e-06, "loss": 0.7025, "step": 1378 }, { "epoch": 0.74, "grad_norm": 1.5350353488961042, "learning_rate": 3.709157568203717e-06, "loss": 0.7033, "step": 1379 }, { "epoch": 0.75, "grad_norm": 1.4653701261685705, "learning_rate": 3.7072019574793034e-06, "loss": 0.6728, "step": 1380 }, { "epoch": 0.75, "grad_norm": 1.403102805846651, "learning_rate": 3.705245382966002e-06, "loss": 0.6531, "step": 1381 }, { "epoch": 0.75, "grad_norm": 1.5347459071441658, "learning_rate": 3.7032878462258735e-06, "loss": 0.6808, "step": 1382 }, { "epoch": 0.75, "grad_norm": 1.5027295861551242, "learning_rate": 3.701329348821752e-06, "loss": 0.6622, "step": 1383 }, { "epoch": 0.75, "grad_norm": 1.4290729517075254, "learning_rate": 3.699369892317234e-06, "loss": 0.6847, "step": 1384 }, { "epoch": 0.75, "grad_norm": 1.3964082605121613, "learning_rate": 3.6974094782766806e-06, "loss": 0.6767, "step": 1385 }, { "epoch": 0.75, "grad_norm": 1.4321732869433872, "learning_rate": 3.695448108265221e-06, "loss": 0.6954, "step": 1386 }, { "epoch": 0.75, "grad_norm": 1.406888557709584, "learning_rate": 3.693485783848747e-06, "loss": 0.695, "step": 1387 }, { "epoch": 0.75, "grad_norm": 1.4605944174240144, "learning_rate": 3.69152250659391e-06, "loss": 0.6717, "step": 1388 }, { "epoch": 0.75, "grad_norm": 1.43075340807193, "learning_rate": 3.6895582780681254e-06, "loss": 0.6851, "step": 1389 }, { "epoch": 0.75, "grad_norm": 1.4302482342166296, "learning_rate": 3.6875930998395644e-06, "loss": 0.68, "step": 1390 }, { "epoch": 0.75, "grad_norm": 1.411450709446237, "learning_rate": 3.685626973477159e-06, "loss": 0.6706, "step": 1391 }, { "epoch": 0.75, "grad_norm": 1.412278683481579, "learning_rate": 3.683659900550598e-06, "loss": 0.6783, "step": 1392 }, { "epoch": 0.75, "grad_norm": 1.4548086924201233, "learning_rate": 3.681691882630325e-06, "loss": 0.6657, "step": 1393 }, { "epoch": 0.75, "grad_norm": 1.4325245321868212, "learning_rate": 3.6797229212875378e-06, "loss": 0.6603, "step": 1394 }, { "epoch": 0.75, "grad_norm": 1.4446895415377339, "learning_rate": 3.6777530180941894e-06, "loss": 0.6866, "step": 1395 }, { "epoch": 0.75, "grad_norm": 1.4955298697581298, "learning_rate": 3.675782174622982e-06, "loss": 0.703, "step": 1396 }, { "epoch": 0.75, "grad_norm": 1.4033354870937857, "learning_rate": 3.6738103924473713e-06, "loss": 0.6725, "step": 1397 }, { "epoch": 0.75, "grad_norm": 1.4417870761205989, "learning_rate": 3.671837673141559e-06, "loss": 0.7051, "step": 1398 }, { "epoch": 0.76, "grad_norm": 1.4511138174130214, "learning_rate": 3.669864018280498e-06, "loss": 0.6944, "step": 1399 }, { "epoch": 0.76, "grad_norm": 1.3886353747380193, "learning_rate": 3.6678894294398877e-06, "loss": 0.6593, "step": 1400 }, { "epoch": 0.76, "grad_norm": 1.5497492525356558, "learning_rate": 3.6659139081961707e-06, "loss": 0.6768, "step": 1401 }, { "epoch": 0.76, "grad_norm": 1.4651976438235441, "learning_rate": 3.663937456126538e-06, "loss": 0.6714, "step": 1402 }, { "epoch": 0.76, "grad_norm": 1.4592024418141691, "learning_rate": 3.6619600748089203e-06, "loss": 0.6935, "step": 1403 }, { "epoch": 0.76, "grad_norm": 1.4048851197220433, "learning_rate": 3.6599817658219916e-06, "loss": 0.7084, "step": 1404 }, { "epoch": 0.76, "grad_norm": 1.5658870763316475, "learning_rate": 3.6580025307451667e-06, "loss": 0.7228, "step": 1405 }, { "epoch": 0.76, "grad_norm": 1.5498592216165945, "learning_rate": 3.6560223711585986e-06, "loss": 0.7061, "step": 1406 }, { "epoch": 0.76, "grad_norm": 1.4272069885944845, "learning_rate": 3.6540412886431796e-06, "loss": 0.691, "step": 1407 }, { "epoch": 0.76, "grad_norm": 1.4459835626815005, "learning_rate": 3.652059284780539e-06, "loss": 0.6927, "step": 1408 }, { "epoch": 0.76, "grad_norm": 1.4005559781938286, "learning_rate": 3.650076361153041e-06, "loss": 0.6736, "step": 1409 }, { "epoch": 0.76, "grad_norm": 1.4693554472347927, "learning_rate": 3.648092519343783e-06, "loss": 0.6544, "step": 1410 }, { "epoch": 0.76, "grad_norm": 1.4065241770850063, "learning_rate": 3.6461077609365985e-06, "loss": 0.6571, "step": 1411 }, { "epoch": 0.76, "grad_norm": 1.4710402295711889, "learning_rate": 3.6441220875160495e-06, "loss": 0.6625, "step": 1412 }, { "epoch": 0.76, "grad_norm": 1.4744934012569688, "learning_rate": 3.642135500667431e-06, "loss": 0.6739, "step": 1413 }, { "epoch": 0.76, "grad_norm": 1.4380504142040649, "learning_rate": 3.640148001976765e-06, "loss": 0.6735, "step": 1414 }, { "epoch": 0.76, "grad_norm": 1.4718349836050404, "learning_rate": 3.6381595930308032e-06, "loss": 0.6888, "step": 1415 }, { "epoch": 0.76, "grad_norm": 1.4071642456801707, "learning_rate": 3.6361702754170247e-06, "loss": 0.6803, "step": 1416 }, { "epoch": 0.77, "grad_norm": 1.4763343600321333, "learning_rate": 3.6341800507236314e-06, "loss": 0.6808, "step": 1417 }, { "epoch": 0.77, "grad_norm": 1.5122858625561977, "learning_rate": 3.6321889205395513e-06, "loss": 0.6732, "step": 1418 }, { "epoch": 0.77, "grad_norm": 1.4534675996468427, "learning_rate": 3.630196886454435e-06, "loss": 0.6837, "step": 1419 }, { "epoch": 0.77, "grad_norm": 1.4751210526713703, "learning_rate": 3.6282039500586545e-06, "loss": 0.6508, "step": 1420 }, { "epoch": 0.77, "grad_norm": 1.3783068888043561, "learning_rate": 3.626210112943302e-06, "loss": 0.6635, "step": 1421 }, { "epoch": 0.77, "grad_norm": 1.5228138042938633, "learning_rate": 3.62421537670019e-06, "loss": 0.6993, "step": 1422 }, { "epoch": 0.77, "grad_norm": 1.4518394506417716, "learning_rate": 3.6222197429218463e-06, "loss": 0.6889, "step": 1423 }, { "epoch": 0.77, "grad_norm": 1.465427007453531, "learning_rate": 3.6202232132015188e-06, "loss": 0.6816, "step": 1424 }, { "epoch": 0.77, "grad_norm": 1.443241725116693, "learning_rate": 3.618225789133167e-06, "loss": 0.706, "step": 1425 }, { "epoch": 0.77, "grad_norm": 1.4539300260097847, "learning_rate": 3.616227472311467e-06, "loss": 0.6767, "step": 1426 }, { "epoch": 0.77, "grad_norm": 1.4038538302762908, "learning_rate": 3.614228264331807e-06, "loss": 0.6697, "step": 1427 }, { "epoch": 0.77, "grad_norm": 1.3902851942575993, "learning_rate": 3.612228166790287e-06, "loss": 0.6831, "step": 1428 }, { "epoch": 0.77, "grad_norm": 1.4416524970845632, "learning_rate": 3.610227181283715e-06, "loss": 0.7132, "step": 1429 }, { "epoch": 0.77, "grad_norm": 1.4567500841786896, "learning_rate": 3.608225309409611e-06, "loss": 0.6383, "step": 1430 }, { "epoch": 0.77, "grad_norm": 1.4302890450103478, "learning_rate": 3.606222552766201e-06, "loss": 0.6708, "step": 1431 }, { "epoch": 0.77, "grad_norm": 1.434659424781137, "learning_rate": 3.6042189129524175e-06, "loss": 0.6924, "step": 1432 }, { "epoch": 0.77, "grad_norm": 1.4975406447177353, "learning_rate": 3.6022143915678977e-06, "loss": 0.7023, "step": 1433 }, { "epoch": 0.77, "grad_norm": 1.4818588010577205, "learning_rate": 3.6002089902129844e-06, "loss": 0.6682, "step": 1434 }, { "epoch": 0.77, "grad_norm": 1.5256876478758405, "learning_rate": 3.5982027104887202e-06, "loss": 0.7078, "step": 1435 }, { "epoch": 0.78, "grad_norm": 1.4390317096718814, "learning_rate": 3.5961955539968517e-06, "loss": 0.6866, "step": 1436 }, { "epoch": 0.78, "grad_norm": 1.4518421107650572, "learning_rate": 3.5941875223398225e-06, "loss": 0.6504, "step": 1437 }, { "epoch": 0.78, "grad_norm": 1.468406902365721, "learning_rate": 3.5921786171207788e-06, "loss": 0.6625, "step": 1438 }, { "epoch": 0.78, "grad_norm": 1.4408886575891862, "learning_rate": 3.5901688399435613e-06, "loss": 0.6631, "step": 1439 }, { "epoch": 0.78, "grad_norm": 1.3977412472995283, "learning_rate": 3.588158192412707e-06, "loss": 0.6735, "step": 1440 }, { "epoch": 0.78, "grad_norm": 1.4449244540600121, "learning_rate": 3.5861466761334485e-06, "loss": 0.6757, "step": 1441 }, { "epoch": 0.78, "grad_norm": 1.5043198074445858, "learning_rate": 3.5841342927117122e-06, "loss": 0.6737, "step": 1442 }, { "epoch": 0.78, "grad_norm": 1.456979667489677, "learning_rate": 3.582121043754116e-06, "loss": 0.6608, "step": 1443 }, { "epoch": 0.78, "grad_norm": 1.4532768033131134, "learning_rate": 3.5801069308679697e-06, "loss": 0.6693, "step": 1444 }, { "epoch": 0.78, "grad_norm": 1.3998306020111386, "learning_rate": 3.5780919556612712e-06, "loss": 0.6852, "step": 1445 }, { "epoch": 0.78, "grad_norm": 1.4246530471952497, "learning_rate": 3.5760761197427097e-06, "loss": 0.6405, "step": 1446 }, { "epoch": 0.78, "grad_norm": 1.436714984660112, "learning_rate": 3.5740594247216598e-06, "loss": 0.6898, "step": 1447 }, { "epoch": 0.78, "grad_norm": 1.508057647885206, "learning_rate": 3.57204187220818e-06, "loss": 0.6814, "step": 1448 }, { "epoch": 0.78, "grad_norm": 1.4755894612538245, "learning_rate": 3.570023463813017e-06, "loss": 0.6777, "step": 1449 }, { "epoch": 0.78, "grad_norm": 1.424527092685164, "learning_rate": 3.5680042011475996e-06, "loss": 0.7036, "step": 1450 }, { "epoch": 0.78, "grad_norm": 1.4681389313774627, "learning_rate": 3.5659840858240373e-06, "loss": 0.6982, "step": 1451 }, { "epoch": 0.78, "grad_norm": 1.4190778541136755, "learning_rate": 3.5639631194551216e-06, "loss": 0.6901, "step": 1452 }, { "epoch": 0.78, "grad_norm": 1.3819735853739117, "learning_rate": 3.561941303654324e-06, "loss": 0.6714, "step": 1453 }, { "epoch": 0.79, "grad_norm": 1.5329403415681073, "learning_rate": 3.559918640035792e-06, "loss": 0.6922, "step": 1454 }, { "epoch": 0.79, "grad_norm": 1.635281363608563, "learning_rate": 3.557895130214352e-06, "loss": 0.6631, "step": 1455 }, { "epoch": 0.79, "grad_norm": 1.4431951710659072, "learning_rate": 3.555870775805505e-06, "loss": 0.6907, "step": 1456 }, { "epoch": 0.79, "grad_norm": 1.4792101897537993, "learning_rate": 3.5538455784254262e-06, "loss": 0.6748, "step": 1457 }, { "epoch": 0.79, "grad_norm": 1.4327461487769055, "learning_rate": 3.5518195396909653e-06, "loss": 0.6907, "step": 1458 }, { "epoch": 0.79, "grad_norm": 1.5158039917728845, "learning_rate": 3.5497926612196414e-06, "loss": 0.7092, "step": 1459 }, { "epoch": 0.79, "grad_norm": 1.4302573314395517, "learning_rate": 3.547764944629646e-06, "loss": 0.6682, "step": 1460 }, { "epoch": 0.79, "grad_norm": 1.469752936176478, "learning_rate": 3.5457363915398384e-06, "loss": 0.6841, "step": 1461 }, { "epoch": 0.79, "grad_norm": 1.46267750329072, "learning_rate": 3.5437070035697463e-06, "loss": 0.7017, "step": 1462 }, { "epoch": 0.79, "grad_norm": 1.4990317932613286, "learning_rate": 3.5416767823395642e-06, "loss": 0.6518, "step": 1463 }, { "epoch": 0.79, "grad_norm": 1.3866526450325567, "learning_rate": 3.539645729470151e-06, "loss": 0.6619, "step": 1464 }, { "epoch": 0.79, "grad_norm": 1.4363663179658266, "learning_rate": 3.53761384658303e-06, "loss": 0.6654, "step": 1465 }, { "epoch": 0.79, "grad_norm": 1.4176121627538771, "learning_rate": 3.5355811353003883e-06, "loss": 0.6624, "step": 1466 }, { "epoch": 0.79, "grad_norm": 1.490991705953575, "learning_rate": 3.5335475972450715e-06, "loss": 0.7137, "step": 1467 }, { "epoch": 0.79, "grad_norm": 1.4356320406284628, "learning_rate": 3.531513234040589e-06, "loss": 0.7006, "step": 1468 }, { "epoch": 0.79, "grad_norm": 1.460979364035857, "learning_rate": 3.529478047311106e-06, "loss": 0.6832, "step": 1469 }, { "epoch": 0.79, "grad_norm": 1.426564752693756, "learning_rate": 3.5274420386814458e-06, "loss": 0.6732, "step": 1470 }, { "epoch": 0.79, "grad_norm": 1.4957971469028093, "learning_rate": 3.5254052097770895e-06, "loss": 0.6937, "step": 1471 }, { "epoch": 0.79, "grad_norm": 1.4739057169841852, "learning_rate": 3.52336756222417e-06, "loss": 0.6702, "step": 1472 }, { "epoch": 0.8, "grad_norm": 1.4509172358137132, "learning_rate": 3.521329097649478e-06, "loss": 0.6573, "step": 1473 }, { "epoch": 0.8, "grad_norm": 1.4381272421337572, "learning_rate": 3.5192898176804526e-06, "loss": 0.6921, "step": 1474 }, { "epoch": 0.8, "grad_norm": 1.428131559533256, "learning_rate": 3.517249723945186e-06, "loss": 0.6701, "step": 1475 }, { "epoch": 0.8, "grad_norm": 1.4479771899075717, "learning_rate": 3.515208818072418e-06, "loss": 0.6634, "step": 1476 }, { "epoch": 0.8, "grad_norm": 1.4212792391659979, "learning_rate": 3.5131671016915402e-06, "loss": 0.6812, "step": 1477 }, { "epoch": 0.8, "grad_norm": 1.450020166506057, "learning_rate": 3.51112457643259e-06, "loss": 0.7148, "step": 1478 }, { "epoch": 0.8, "grad_norm": 1.4386606074927404, "learning_rate": 3.509081243926247e-06, "loss": 0.708, "step": 1479 }, { "epoch": 0.8, "grad_norm": 1.4685667822066364, "learning_rate": 3.5070371058038392e-06, "loss": 0.6423, "step": 1480 }, { "epoch": 0.8, "grad_norm": 1.4505975219148286, "learning_rate": 3.504992163697339e-06, "loss": 0.6917, "step": 1481 }, { "epoch": 0.8, "grad_norm": 1.4626659210847426, "learning_rate": 3.5029464192393557e-06, "loss": 0.6633, "step": 1482 }, { "epoch": 0.8, "grad_norm": 1.4519959960750919, "learning_rate": 3.5008998740631437e-06, "loss": 0.6793, "step": 1483 }, { "epoch": 0.8, "grad_norm": 1.4681309733735879, "learning_rate": 3.498852529802593e-06, "loss": 0.674, "step": 1484 }, { "epoch": 0.8, "grad_norm": 1.4772233737799771, "learning_rate": 3.4968043880922363e-06, "loss": 0.6589, "step": 1485 }, { "epoch": 0.8, "grad_norm": 1.4252362075299936, "learning_rate": 3.494755450567239e-06, "loss": 0.6765, "step": 1486 }, { "epoch": 0.8, "grad_norm": 1.465572772558022, "learning_rate": 3.4927057188634004e-06, "loss": 0.6758, "step": 1487 }, { "epoch": 0.8, "grad_norm": 1.423228761709194, "learning_rate": 3.4906551946171603e-06, "loss": 0.6693, "step": 1488 }, { "epoch": 0.8, "grad_norm": 1.4667330220204293, "learning_rate": 3.4886038794655854e-06, "loss": 0.6738, "step": 1489 }, { "epoch": 0.8, "grad_norm": 1.4878986940036008, "learning_rate": 3.486551775046375e-06, "loss": 0.6796, "step": 1490 }, { "epoch": 0.81, "grad_norm": 1.4514472572403416, "learning_rate": 3.484498882997861e-06, "loss": 0.6938, "step": 1491 }, { "epoch": 0.81, "grad_norm": 1.4544540574241231, "learning_rate": 3.4824452049590018e-06, "loss": 0.6923, "step": 1492 }, { "epoch": 0.81, "grad_norm": 1.4226827689282213, "learning_rate": 3.4803907425693834e-06, "loss": 0.685, "step": 1493 }, { "epoch": 0.81, "grad_norm": 1.5739244860162094, "learning_rate": 3.478335497469219e-06, "loss": 0.6712, "step": 1494 }, { "epoch": 0.81, "grad_norm": 1.5232527363062855, "learning_rate": 3.4762794712993464e-06, "loss": 0.6821, "step": 1495 }, { "epoch": 0.81, "grad_norm": 1.4493853101074872, "learning_rate": 3.474222665701226e-06, "loss": 0.6627, "step": 1496 }, { "epoch": 0.81, "grad_norm": 1.450516701037901, "learning_rate": 3.472165082316943e-06, "loss": 0.6733, "step": 1497 }, { "epoch": 0.81, "grad_norm": 1.4040715849875598, "learning_rate": 3.4701067227891997e-06, "loss": 0.6741, "step": 1498 }, { "epoch": 0.81, "grad_norm": 1.4326176992805115, "learning_rate": 3.468047588761322e-06, "loss": 0.6573, "step": 1499 }, { "epoch": 0.81, "grad_norm": 1.4586247683882079, "learning_rate": 3.465987681877251e-06, "loss": 0.6833, "step": 1500 }, { "epoch": 0.81, "grad_norm": 1.4668231389434756, "learning_rate": 3.4639270037815465e-06, "loss": 0.6746, "step": 1501 }, { "epoch": 0.81, "grad_norm": 1.4135277969824904, "learning_rate": 3.461865556119384e-06, "loss": 0.6795, "step": 1502 }, { "epoch": 0.81, "grad_norm": 1.5024194279485963, "learning_rate": 3.4598033405365527e-06, "loss": 0.6629, "step": 1503 }, { "epoch": 0.81, "grad_norm": 1.4222505614492054, "learning_rate": 3.457740358679455e-06, "loss": 0.6876, "step": 1504 }, { "epoch": 0.81, "grad_norm": 1.4117759833376604, "learning_rate": 3.4556766121951065e-06, "loss": 0.6658, "step": 1505 }, { "epoch": 0.81, "grad_norm": 1.4374774532325523, "learning_rate": 3.45361210273113e-06, "loss": 0.7073, "step": 1506 }, { "epoch": 0.81, "grad_norm": 1.3806394226804501, "learning_rate": 3.451546831935761e-06, "loss": 0.6648, "step": 1507 }, { "epoch": 0.81, "grad_norm": 1.3826235326347396, "learning_rate": 3.449480801457841e-06, "loss": 0.6484, "step": 1508 }, { "epoch": 0.81, "grad_norm": 1.5323115215768597, "learning_rate": 3.447414012946818e-06, "loss": 0.6833, "step": 1509 }, { "epoch": 0.82, "grad_norm": 1.5370195494753944, "learning_rate": 3.4453464680527467e-06, "loss": 0.6865, "step": 1510 }, { "epoch": 0.82, "grad_norm": 1.4526950689268754, "learning_rate": 3.443278168426282e-06, "loss": 0.6612, "step": 1511 }, { "epoch": 0.82, "grad_norm": 1.4355853007506267, "learning_rate": 3.4412091157186853e-06, "loss": 0.698, "step": 1512 }, { "epoch": 0.82, "grad_norm": 1.477659619693687, "learning_rate": 3.439139311581819e-06, "loss": 0.6767, "step": 1513 }, { "epoch": 0.82, "grad_norm": 1.3835809385823312, "learning_rate": 3.4370687576681423e-06, "loss": 0.6499, "step": 1514 }, { "epoch": 0.82, "grad_norm": 1.4338784502020483, "learning_rate": 3.4349974556307146e-06, "loss": 0.6327, "step": 1515 }, { "epoch": 0.82, "grad_norm": 1.4582853368654562, "learning_rate": 3.4329254071231947e-06, "loss": 0.6851, "step": 1516 }, { "epoch": 0.82, "grad_norm": 1.4161559551682508, "learning_rate": 3.4308526137998337e-06, "loss": 0.6576, "step": 1517 }, { "epoch": 0.82, "grad_norm": 1.4378684910697672, "learning_rate": 3.4287790773154807e-06, "loss": 0.6436, "step": 1518 }, { "epoch": 0.82, "grad_norm": 1.4863284089977544, "learning_rate": 3.4267047993255748e-06, "loss": 0.6647, "step": 1519 }, { "epoch": 0.82, "grad_norm": 1.4842511905374058, "learning_rate": 3.42462978148615e-06, "loss": 0.6783, "step": 1520 }, { "epoch": 0.82, "grad_norm": 1.4644927791645228, "learning_rate": 3.4225540254538297e-06, "loss": 0.6577, "step": 1521 }, { "epoch": 0.82, "grad_norm": 1.4604126542005973, "learning_rate": 3.420477532885827e-06, "loss": 0.6669, "step": 1522 }, { "epoch": 0.82, "grad_norm": 1.4607813004041352, "learning_rate": 3.418400305439941e-06, "loss": 0.6948, "step": 1523 }, { "epoch": 0.82, "grad_norm": 1.3756454906116256, "learning_rate": 3.416322344774562e-06, "loss": 0.6647, "step": 1524 }, { "epoch": 0.82, "grad_norm": 1.4401353360265714, "learning_rate": 3.4142436525486616e-06, "loss": 0.6716, "step": 1525 }, { "epoch": 0.82, "grad_norm": 1.5305912655031244, "learning_rate": 3.4121642304217955e-06, "loss": 0.6767, "step": 1526 }, { "epoch": 0.82, "grad_norm": 1.4442498759749176, "learning_rate": 3.4100840800541055e-06, "loss": 0.6892, "step": 1527 }, { "epoch": 0.83, "grad_norm": 1.438169684485549, "learning_rate": 3.408003203106312e-06, "loss": 0.6851, "step": 1528 }, { "epoch": 0.83, "grad_norm": 1.3943835382910494, "learning_rate": 3.405921601239717e-06, "loss": 0.648, "step": 1529 }, { "epoch": 0.83, "grad_norm": 1.4006355912853032, "learning_rate": 3.4038392761161986e-06, "loss": 0.6642, "step": 1530 }, { "epoch": 0.83, "grad_norm": 1.4846622216917749, "learning_rate": 3.4017562293982153e-06, "loss": 0.6924, "step": 1531 }, { "epoch": 0.83, "grad_norm": 1.5619155465686159, "learning_rate": 3.399672462748801e-06, "loss": 0.7037, "step": 1532 }, { "epoch": 0.83, "grad_norm": 1.4832068570809205, "learning_rate": 3.3975879778315634e-06, "loss": 0.6737, "step": 1533 }, { "epoch": 0.83, "grad_norm": 1.4084972109081966, "learning_rate": 3.3955027763106835e-06, "loss": 0.648, "step": 1534 }, { "epoch": 0.83, "grad_norm": 1.3841866697562588, "learning_rate": 3.3934168598509164e-06, "loss": 0.6723, "step": 1535 }, { "epoch": 0.83, "grad_norm": 1.42184895281595, "learning_rate": 3.391330230117587e-06, "loss": 0.6569, "step": 1536 }, { "epoch": 0.83, "grad_norm": 1.443459023534524, "learning_rate": 3.3892428887765876e-06, "loss": 0.6641, "step": 1537 }, { "epoch": 0.83, "grad_norm": 1.4859444381300722, "learning_rate": 3.3871548374943813e-06, "loss": 0.6859, "step": 1538 }, { "epoch": 0.83, "grad_norm": 1.4554764163430918, "learning_rate": 3.385066077937997e-06, "loss": 0.6546, "step": 1539 }, { "epoch": 0.83, "grad_norm": 1.3844081799728218, "learning_rate": 3.3829766117750297e-06, "loss": 0.6954, "step": 1540 }, { "epoch": 0.83, "grad_norm": 1.4402649407967136, "learning_rate": 3.3808864406736373e-06, "loss": 0.6581, "step": 1541 }, { "epoch": 0.83, "grad_norm": 1.430465577715763, "learning_rate": 3.378795566302541e-06, "loss": 0.6828, "step": 1542 }, { "epoch": 0.83, "grad_norm": 1.4844384994639543, "learning_rate": 3.376703990331024e-06, "loss": 0.6892, "step": 1543 }, { "epoch": 0.83, "grad_norm": 1.4339510606805383, "learning_rate": 3.374611714428929e-06, "loss": 0.7041, "step": 1544 }, { "epoch": 0.83, "grad_norm": 1.4264322229433832, "learning_rate": 3.372518740266658e-06, "loss": 0.653, "step": 1545 }, { "epoch": 0.83, "grad_norm": 1.419553153461658, "learning_rate": 3.3704250695151702e-06, "loss": 0.6921, "step": 1546 }, { "epoch": 0.84, "grad_norm": 1.466816534311663, "learning_rate": 3.368330703845981e-06, "loss": 0.6846, "step": 1547 }, { "epoch": 0.84, "grad_norm": 1.4275880759251232, "learning_rate": 3.36623564493116e-06, "loss": 0.6603, "step": 1548 }, { "epoch": 0.84, "grad_norm": 1.407654131064437, "learning_rate": 3.3641398944433317e-06, "loss": 0.6617, "step": 1549 }, { "epoch": 0.84, "grad_norm": 1.4275207648833015, "learning_rate": 3.3620434540556713e-06, "loss": 0.6792, "step": 1550 }, { "epoch": 0.84, "grad_norm": 1.492037038596875, "learning_rate": 3.3599463254419047e-06, "loss": 0.6748, "step": 1551 }, { "epoch": 0.84, "grad_norm": 1.515223986663815, "learning_rate": 3.357848510276309e-06, "loss": 0.6814, "step": 1552 }, { "epoch": 0.84, "grad_norm": 1.4942424429723926, "learning_rate": 3.355750010233708e-06, "loss": 0.6998, "step": 1553 }, { "epoch": 0.84, "grad_norm": 1.4190243773611908, "learning_rate": 3.3536508269894724e-06, "loss": 0.6731, "step": 1554 }, { "epoch": 0.84, "grad_norm": 1.5327921914022695, "learning_rate": 3.351550962219519e-06, "loss": 0.671, "step": 1555 }, { "epoch": 0.84, "grad_norm": 1.4277272628423734, "learning_rate": 3.3494504176003074e-06, "loss": 0.6524, "step": 1556 }, { "epoch": 0.84, "grad_norm": 1.4010106822791062, "learning_rate": 3.347349194808842e-06, "loss": 0.6531, "step": 1557 }, { "epoch": 0.84, "grad_norm": 1.3715778254979, "learning_rate": 3.3452472955226654e-06, "loss": 0.6737, "step": 1558 }, { "epoch": 0.84, "grad_norm": 1.3630381936211158, "learning_rate": 3.3431447214198646e-06, "loss": 0.6798, "step": 1559 }, { "epoch": 0.84, "grad_norm": 1.4341667420507207, "learning_rate": 3.3410414741790625e-06, "loss": 0.6714, "step": 1560 }, { "epoch": 0.84, "grad_norm": 1.4685405835492062, "learning_rate": 3.338937555479419e-06, "loss": 0.6481, "step": 1561 }, { "epoch": 0.84, "grad_norm": 1.3856129047467936, "learning_rate": 3.3368329670006317e-06, "loss": 0.6676, "step": 1562 }, { "epoch": 0.84, "grad_norm": 1.433238417623258, "learning_rate": 3.3347277104229332e-06, "loss": 0.6541, "step": 1563 }, { "epoch": 0.84, "grad_norm": 1.634778577277858, "learning_rate": 3.3326217874270867e-06, "loss": 0.7169, "step": 1564 }, { "epoch": 0.85, "grad_norm": 1.475171766131615, "learning_rate": 3.3305151996943907e-06, "loss": 0.6769, "step": 1565 }, { "epoch": 0.85, "grad_norm": 1.4034488396198312, "learning_rate": 3.3284079489066728e-06, "loss": 0.6805, "step": 1566 }, { "epoch": 0.85, "grad_norm": 1.4489947078069931, "learning_rate": 3.3263000367462893e-06, "loss": 0.6732, "step": 1567 }, { "epoch": 0.85, "grad_norm": 1.3890977692045956, "learning_rate": 3.3241914648961272e-06, "loss": 0.658, "step": 1568 }, { "epoch": 0.85, "grad_norm": 1.3949683843173453, "learning_rate": 3.3220822350395966e-06, "loss": 0.6628, "step": 1569 }, { "epoch": 0.85, "grad_norm": 1.4978578674487864, "learning_rate": 3.3199723488606355e-06, "loss": 0.6872, "step": 1570 }, { "epoch": 0.85, "grad_norm": 1.4352475238082743, "learning_rate": 3.317861808043705e-06, "loss": 0.6797, "step": 1571 }, { "epoch": 0.85, "grad_norm": 1.5065370568373428, "learning_rate": 3.31575061427379e-06, "loss": 0.6801, "step": 1572 }, { "epoch": 0.85, "grad_norm": 1.4433368204748547, "learning_rate": 3.3136387692363923e-06, "loss": 0.7195, "step": 1573 }, { "epoch": 0.85, "grad_norm": 1.4645596173578812, "learning_rate": 3.3115262746175414e-06, "loss": 0.6833, "step": 1574 }, { "epoch": 0.85, "grad_norm": 1.3927148063146446, "learning_rate": 3.3094131321037783e-06, "loss": 0.6877, "step": 1575 }, { "epoch": 0.85, "grad_norm": 1.3968104206498386, "learning_rate": 3.3072993433821642e-06, "loss": 0.6831, "step": 1576 }, { "epoch": 0.85, "grad_norm": 1.410212463523097, "learning_rate": 3.3051849101402765e-06, "loss": 0.6821, "step": 1577 }, { "epoch": 0.85, "grad_norm": 1.4386311952753001, "learning_rate": 3.303069834066206e-06, "loss": 0.6862, "step": 1578 }, { "epoch": 0.85, "grad_norm": 1.4337233106876988, "learning_rate": 3.3009541168485587e-06, "loss": 0.6547, "step": 1579 }, { "epoch": 0.85, "grad_norm": 1.4458284658889207, "learning_rate": 3.29883776017645e-06, "loss": 0.6723, "step": 1580 }, { "epoch": 0.85, "grad_norm": 1.4555385658900377, "learning_rate": 3.2967207657395055e-06, "loss": 0.6725, "step": 1581 }, { "epoch": 0.85, "grad_norm": 1.3852876480435714, "learning_rate": 3.294603135227864e-06, "loss": 0.665, "step": 1582 }, { "epoch": 0.85, "grad_norm": 1.4492108984800172, "learning_rate": 3.292484870332169e-06, "loss": 0.7023, "step": 1583 }, { "epoch": 0.86, "grad_norm": 1.4631436273144824, "learning_rate": 3.2903659727435692e-06, "loss": 0.7025, "step": 1584 }, { "epoch": 0.86, "grad_norm": 1.4402051459361485, "learning_rate": 3.288246444153722e-06, "loss": 0.6821, "step": 1585 }, { "epoch": 0.86, "grad_norm": 1.4620426604092827, "learning_rate": 3.286126286254786e-06, "loss": 0.6829, "step": 1586 }, { "epoch": 0.86, "grad_norm": 1.4379265448681042, "learning_rate": 3.284005500739423e-06, "loss": 0.6623, "step": 1587 }, { "epoch": 0.86, "grad_norm": 1.4556864216429493, "learning_rate": 3.281884089300797e-06, "loss": 0.6835, "step": 1588 }, { "epoch": 0.86, "grad_norm": 1.4590510745124738, "learning_rate": 3.2797620536325682e-06, "loss": 0.6942, "step": 1589 }, { "epoch": 0.86, "grad_norm": 1.400802730604591, "learning_rate": 3.2776393954289e-06, "loss": 0.6628, "step": 1590 }, { "epoch": 0.86, "grad_norm": 1.4187560608417522, "learning_rate": 3.275516116384449e-06, "loss": 0.6721, "step": 1591 }, { "epoch": 0.86, "grad_norm": 1.3982097969139153, "learning_rate": 3.273392218194369e-06, "loss": 0.6677, "step": 1592 }, { "epoch": 0.86, "grad_norm": 1.404048184916487, "learning_rate": 3.271267702554307e-06, "loss": 0.6515, "step": 1593 }, { "epoch": 0.86, "grad_norm": 1.3743458146635597, "learning_rate": 3.269142571160406e-06, "loss": 0.6653, "step": 1594 }, { "epoch": 0.86, "grad_norm": 1.5011981951497197, "learning_rate": 3.267016825709297e-06, "loss": 0.6918, "step": 1595 }, { "epoch": 0.86, "grad_norm": 1.3650207616293855, "learning_rate": 3.2648904678981032e-06, "loss": 0.6633, "step": 1596 }, { "epoch": 0.86, "grad_norm": 1.5112787208992795, "learning_rate": 3.2627634994244357e-06, "loss": 0.692, "step": 1597 }, { "epoch": 0.86, "grad_norm": 1.411781931906448, "learning_rate": 3.2606359219863935e-06, "loss": 0.6697, "step": 1598 }, { "epoch": 0.86, "grad_norm": 1.4820809339840975, "learning_rate": 3.2585077372825636e-06, "loss": 0.6912, "step": 1599 }, { "epoch": 0.86, "grad_norm": 1.3790891161846972, "learning_rate": 3.2563789470120133e-06, "loss": 0.6424, "step": 1600 }, { "epoch": 0.86, "grad_norm": 1.4526794644395173, "learning_rate": 3.2542495528742984e-06, "loss": 0.6569, "step": 1601 }, { "epoch": 0.87, "grad_norm": 1.4560379548739129, "learning_rate": 3.2521195565694543e-06, "loss": 0.6936, "step": 1602 }, { "epoch": 0.87, "grad_norm": 1.4275434028050447, "learning_rate": 3.249988959797996e-06, "loss": 0.6956, "step": 1603 }, { "epoch": 0.87, "grad_norm": 1.4575922854145666, "learning_rate": 3.247857764260921e-06, "loss": 0.6907, "step": 1604 }, { "epoch": 0.87, "grad_norm": 1.433686964154054, "learning_rate": 3.2457259716597023e-06, "loss": 0.6943, "step": 1605 }, { "epoch": 0.87, "grad_norm": 1.4288653050972602, "learning_rate": 3.2435935836962906e-06, "loss": 0.686, "step": 1606 }, { "epoch": 0.87, "grad_norm": 1.471029888780123, "learning_rate": 3.241460602073112e-06, "loss": 0.6536, "step": 1607 }, { "epoch": 0.87, "grad_norm": 1.402282488946294, "learning_rate": 3.2393270284930658e-06, "loss": 0.6426, "step": 1608 }, { "epoch": 0.87, "grad_norm": 1.4538396923405672, "learning_rate": 3.2371928646595245e-06, "loss": 0.6844, "step": 1609 }, { "epoch": 0.87, "grad_norm": 1.3801107427663741, "learning_rate": 3.2350581122763325e-06, "loss": 0.6651, "step": 1610 }, { "epoch": 0.87, "grad_norm": 1.4938838963723213, "learning_rate": 3.2329227730478026e-06, "loss": 0.66, "step": 1611 }, { "epoch": 0.87, "grad_norm": 1.4439638546829163, "learning_rate": 3.230786848678717e-06, "loss": 0.6794, "step": 1612 }, { "epoch": 0.87, "grad_norm": 1.3955258160696697, "learning_rate": 3.228650340874325e-06, "loss": 0.6755, "step": 1613 }, { "epoch": 0.87, "grad_norm": 1.4311781088019526, "learning_rate": 3.2265132513403415e-06, "loss": 0.6463, "step": 1614 }, { "epoch": 0.87, "grad_norm": 1.460872099552705, "learning_rate": 3.224375581782946e-06, "loss": 0.6745, "step": 1615 }, { "epoch": 0.87, "grad_norm": 1.4122384349087687, "learning_rate": 3.2222373339087804e-06, "loss": 0.6766, "step": 1616 }, { "epoch": 0.87, "grad_norm": 1.7167891004911287, "learning_rate": 3.22009850942495e-06, "loss": 0.6582, "step": 1617 }, { "epoch": 0.87, "grad_norm": 1.4433713148959828, "learning_rate": 3.217959110039019e-06, "loss": 0.6639, "step": 1618 }, { "epoch": 0.87, "grad_norm": 1.4659194945426977, "learning_rate": 3.2158191374590097e-06, "loss": 0.6835, "step": 1619 }, { "epoch": 0.87, "grad_norm": 1.4399255972195113, "learning_rate": 3.213678593393405e-06, "loss": 0.6663, "step": 1620 }, { "epoch": 0.88, "grad_norm": 1.467659818886545, "learning_rate": 3.211537479551142e-06, "loss": 0.6613, "step": 1621 }, { "epoch": 0.88, "grad_norm": 1.4343571620654871, "learning_rate": 3.209395797641612e-06, "loss": 0.6574, "step": 1622 }, { "epoch": 0.88, "grad_norm": 1.4449950408285894, "learning_rate": 3.207253549374662e-06, "loss": 0.7206, "step": 1623 }, { "epoch": 0.88, "grad_norm": 1.4410917747146574, "learning_rate": 3.205110736460589e-06, "loss": 0.657, "step": 1624 }, { "epoch": 0.88, "grad_norm": 1.4284955198367986, "learning_rate": 3.202967360610142e-06, "loss": 0.669, "step": 1625 }, { "epoch": 0.88, "grad_norm": 1.4256815996476715, "learning_rate": 3.200823423534519e-06, "loss": 0.671, "step": 1626 }, { "epoch": 0.88, "grad_norm": 1.4364005605063699, "learning_rate": 3.1986789269453684e-06, "loss": 0.6677, "step": 1627 }, { "epoch": 0.88, "grad_norm": 1.4666914758792837, "learning_rate": 3.196533872554779e-06, "loss": 0.6887, "step": 1628 }, { "epoch": 0.88, "grad_norm": 1.4673857756251933, "learning_rate": 3.194388262075293e-06, "loss": 0.6964, "step": 1629 }, { "epoch": 0.88, "grad_norm": 1.398844354901598, "learning_rate": 3.192242097219892e-06, "loss": 0.678, "step": 1630 }, { "epoch": 0.88, "grad_norm": 1.4162295105005007, "learning_rate": 3.190095379701998e-06, "loss": 0.6514, "step": 1631 }, { "epoch": 0.88, "grad_norm": 1.502505113216928, "learning_rate": 3.1879481112354804e-06, "loss": 0.7027, "step": 1632 }, { "epoch": 0.88, "grad_norm": 1.4540188334969495, "learning_rate": 3.1858002935346444e-06, "loss": 0.6961, "step": 1633 }, { "epoch": 0.88, "grad_norm": 1.4583005853564048, "learning_rate": 3.1836519283142325e-06, "loss": 0.6774, "step": 1634 }, { "epoch": 0.88, "grad_norm": 1.415330427583445, "learning_rate": 3.181503017289428e-06, "loss": 0.6547, "step": 1635 }, { "epoch": 0.88, "grad_norm": 1.4428232054104853, "learning_rate": 3.179353562175848e-06, "loss": 0.6805, "step": 1636 }, { "epoch": 0.88, "grad_norm": 1.4533438303170776, "learning_rate": 3.177203564689545e-06, "loss": 0.6858, "step": 1637 }, { "epoch": 0.88, "grad_norm": 1.4358366994265008, "learning_rate": 3.175053026547002e-06, "loss": 0.691, "step": 1638 }, { "epoch": 0.89, "grad_norm": 1.4944348437497146, "learning_rate": 3.172901949465135e-06, "loss": 0.6945, "step": 1639 }, { "epoch": 0.89, "grad_norm": 1.3744608352662107, "learning_rate": 3.170750335161293e-06, "loss": 0.6796, "step": 1640 }, { "epoch": 0.89, "grad_norm": 1.421699849002301, "learning_rate": 3.16859818535325e-06, "loss": 0.679, "step": 1641 }, { "epoch": 0.89, "grad_norm": 1.3706191521713336, "learning_rate": 3.166445501759209e-06, "loss": 0.6236, "step": 1642 }, { "epoch": 0.89, "grad_norm": 1.4647195573946512, "learning_rate": 3.1642922860977994e-06, "loss": 0.6402, "step": 1643 }, { "epoch": 0.89, "grad_norm": 1.4229467689438768, "learning_rate": 3.1621385400880756e-06, "loss": 0.6652, "step": 1644 }, { "epoch": 0.89, "grad_norm": 1.4415736142736455, "learning_rate": 3.159984265449514e-06, "loss": 0.672, "step": 1645 }, { "epoch": 0.89, "grad_norm": 1.4928583281758974, "learning_rate": 3.157829463902015e-06, "loss": 0.6708, "step": 1646 }, { "epoch": 0.89, "grad_norm": 1.426730453232616, "learning_rate": 3.1556741371658984e-06, "loss": 0.6594, "step": 1647 }, { "epoch": 0.89, "grad_norm": 1.5249111577422731, "learning_rate": 3.153518286961903e-06, "loss": 0.6721, "step": 1648 }, { "epoch": 0.89, "grad_norm": 1.519949674388287, "learning_rate": 3.1513619150111872e-06, "loss": 0.6786, "step": 1649 }, { "epoch": 0.89, "grad_norm": 1.4175190617672313, "learning_rate": 3.1492050230353238e-06, "loss": 0.6517, "step": 1650 }, { "epoch": 0.89, "grad_norm": 1.4780212541475013, "learning_rate": 3.147047612756302e-06, "loss": 0.6777, "step": 1651 }, { "epoch": 0.89, "grad_norm": 1.458196969256038, "learning_rate": 3.1448896858965263e-06, "loss": 0.6949, "step": 1652 }, { "epoch": 0.89, "grad_norm": 1.484558461581838, "learning_rate": 3.142731244178809e-06, "loss": 0.6719, "step": 1653 }, { "epoch": 0.89, "grad_norm": 1.4821996903656973, "learning_rate": 3.1405722893263785e-06, "loss": 0.6538, "step": 1654 }, { "epoch": 0.89, "grad_norm": 1.4282294904930248, "learning_rate": 3.13841282306287e-06, "loss": 0.6631, "step": 1655 }, { "epoch": 0.89, "grad_norm": 1.4457467217351192, "learning_rate": 3.1362528471123277e-06, "loss": 0.6725, "step": 1656 }, { "epoch": 0.89, "grad_norm": 1.4348245548943666, "learning_rate": 3.1340923631992036e-06, "loss": 0.6487, "step": 1657 }, { "epoch": 0.9, "grad_norm": 1.418601428620702, "learning_rate": 3.1319313730483535e-06, "loss": 0.704, "step": 1658 }, { "epoch": 0.9, "grad_norm": 1.4084355325736586, "learning_rate": 3.129769878385039e-06, "loss": 0.6713, "step": 1659 }, { "epoch": 0.9, "grad_norm": 1.4570013070884353, "learning_rate": 3.127607880934923e-06, "loss": 0.6971, "step": 1660 }, { "epoch": 0.9, "grad_norm": 1.40885116015291, "learning_rate": 3.1254453824240703e-06, "loss": 0.6772, "step": 1661 }, { "epoch": 0.9, "grad_norm": 1.3648307463297702, "learning_rate": 3.1232823845789473e-06, "loss": 0.6362, "step": 1662 }, { "epoch": 0.9, "grad_norm": 1.483145795650575, "learning_rate": 3.1211188891264166e-06, "loss": 0.6885, "step": 1663 }, { "epoch": 0.9, "grad_norm": 1.43606641295255, "learning_rate": 3.1189548977937396e-06, "loss": 0.695, "step": 1664 }, { "epoch": 0.9, "grad_norm": 1.470099827864139, "learning_rate": 3.1167904123085736e-06, "loss": 0.6758, "step": 1665 }, { "epoch": 0.9, "grad_norm": 1.5445929937752987, "learning_rate": 3.114625434398969e-06, "loss": 0.7165, "step": 1666 }, { "epoch": 0.9, "grad_norm": 1.356805645690593, "learning_rate": 3.1124599657933715e-06, "loss": 0.6768, "step": 1667 }, { "epoch": 0.9, "grad_norm": 1.4547171721288736, "learning_rate": 3.110294008220617e-06, "loss": 0.6839, "step": 1668 }, { "epoch": 0.9, "grad_norm": 1.4535488341167346, "learning_rate": 3.108127563409931e-06, "loss": 0.6366, "step": 1669 }, { "epoch": 0.9, "grad_norm": 1.4215270852108117, "learning_rate": 3.1059606330909313e-06, "loss": 0.6907, "step": 1670 }, { "epoch": 0.9, "grad_norm": 1.4903456774408843, "learning_rate": 3.1037932189936205e-06, "loss": 0.6847, "step": 1671 }, { "epoch": 0.9, "grad_norm": 1.36106541926843, "learning_rate": 3.1016253228483868e-06, "loss": 0.6562, "step": 1672 }, { "epoch": 0.9, "grad_norm": 1.4551298551338596, "learning_rate": 3.0994569463860065e-06, "loss": 0.668, "step": 1673 }, { "epoch": 0.9, "grad_norm": 1.4205192531769153, "learning_rate": 3.097288091337635e-06, "loss": 0.6786, "step": 1674 }, { "epoch": 0.9, "grad_norm": 1.4333696744759636, "learning_rate": 3.095118759434815e-06, "loss": 0.6956, "step": 1675 }, { "epoch": 0.91, "grad_norm": 1.4459106941004647, "learning_rate": 3.0929489524094657e-06, "loss": 0.6832, "step": 1676 }, { "epoch": 0.91, "grad_norm": 1.4832142764989786, "learning_rate": 3.0907786719938876e-06, "loss": 0.6735, "step": 1677 }, { "epoch": 0.91, "grad_norm": 1.3814870310051843, "learning_rate": 3.088607919920757e-06, "loss": 0.65, "step": 1678 }, { "epoch": 0.91, "grad_norm": 1.4408140226825081, "learning_rate": 3.08643669792313e-06, "loss": 0.6816, "step": 1679 }, { "epoch": 0.91, "grad_norm": 1.5068637420924325, "learning_rate": 3.084265007734436e-06, "loss": 0.6715, "step": 1680 }, { "epoch": 0.91, "grad_norm": 1.5451575773744932, "learning_rate": 3.0820928510884786e-06, "loss": 0.7155, "step": 1681 }, { "epoch": 0.91, "grad_norm": 1.4605565828944467, "learning_rate": 3.079920229719433e-06, "loss": 0.6998, "step": 1682 }, { "epoch": 0.91, "grad_norm": 1.3823362711708929, "learning_rate": 3.0777471453618457e-06, "loss": 0.7079, "step": 1683 }, { "epoch": 0.91, "grad_norm": 1.386790162270107, "learning_rate": 3.075573599750634e-06, "loss": 0.6621, "step": 1684 }, { "epoch": 0.91, "grad_norm": 1.4185937322453854, "learning_rate": 3.073399594621083e-06, "loss": 0.6755, "step": 1685 }, { "epoch": 0.91, "grad_norm": 1.4549758628263016, "learning_rate": 3.0712251317088426e-06, "loss": 0.6882, "step": 1686 }, { "epoch": 0.91, "grad_norm": 1.465816293874543, "learning_rate": 3.069050212749932e-06, "loss": 0.6722, "step": 1687 }, { "epoch": 0.91, "grad_norm": 1.382296417825637, "learning_rate": 3.0668748394807307e-06, "loss": 0.6525, "step": 1688 }, { "epoch": 0.91, "grad_norm": 1.413211938789981, "learning_rate": 3.064699013637983e-06, "loss": 0.6685, "step": 1689 }, { "epoch": 0.91, "grad_norm": 1.390319312650803, "learning_rate": 3.062522736958794e-06, "loss": 0.6549, "step": 1690 }, { "epoch": 0.91, "grad_norm": 1.400696558841402, "learning_rate": 3.0603460111806292e-06, "loss": 0.6885, "step": 1691 }, { "epoch": 0.91, "grad_norm": 1.4335410834350422, "learning_rate": 3.0581688380413115e-06, "loss": 0.644, "step": 1692 }, { "epoch": 0.91, "grad_norm": 1.4399779865814804, "learning_rate": 3.055991219279023e-06, "loss": 0.6709, "step": 1693 }, { "epoch": 0.91, "grad_norm": 1.4318702978059226, "learning_rate": 3.0538131566322993e-06, "loss": 0.6784, "step": 1694 }, { "epoch": 0.92, "grad_norm": 1.425204578192174, "learning_rate": 3.0516346518400315e-06, "loss": 0.6555, "step": 1695 }, { "epoch": 0.92, "grad_norm": 1.368954067056628, "learning_rate": 3.049455706641464e-06, "loss": 0.7007, "step": 1696 }, { "epoch": 0.92, "grad_norm": 1.405101617059509, "learning_rate": 3.047276322776191e-06, "loss": 0.6779, "step": 1697 }, { "epoch": 0.92, "grad_norm": 1.4380267120870125, "learning_rate": 3.0450965019841593e-06, "loss": 0.6781, "step": 1698 }, { "epoch": 0.92, "grad_norm": 1.4050377668799783, "learning_rate": 3.042916246005665e-06, "loss": 0.6612, "step": 1699 }, { "epoch": 0.92, "grad_norm": 1.4115998086371284, "learning_rate": 3.0407355565813473e-06, "loss": 0.6928, "step": 1700 }, { "epoch": 0.92, "grad_norm": 1.469688303142864, "learning_rate": 3.0385544354521957e-06, "loss": 0.6756, "step": 1701 }, { "epoch": 0.92, "grad_norm": 1.3724716500502807, "learning_rate": 3.0363728843595436e-06, "loss": 0.6857, "step": 1702 }, { "epoch": 0.92, "grad_norm": 1.4326754877230694, "learning_rate": 3.0341909050450656e-06, "loss": 0.6755, "step": 1703 }, { "epoch": 0.92, "grad_norm": 1.3875912622242144, "learning_rate": 3.0320084992507814e-06, "loss": 0.6275, "step": 1704 }, { "epoch": 0.92, "grad_norm": 1.3893918030645163, "learning_rate": 3.029825668719047e-06, "loss": 0.6281, "step": 1705 }, { "epoch": 0.92, "grad_norm": 1.3952381216554528, "learning_rate": 3.0276424151925613e-06, "loss": 0.6634, "step": 1706 }, { "epoch": 0.92, "grad_norm": 1.4353933588251535, "learning_rate": 3.0254587404143604e-06, "loss": 0.6945, "step": 1707 }, { "epoch": 0.92, "grad_norm": 1.4424433356765056, "learning_rate": 3.023274646127814e-06, "loss": 0.7044, "step": 1708 }, { "epoch": 0.92, "grad_norm": 1.3978628847311483, "learning_rate": 3.021090134076629e-06, "loss": 0.6817, "step": 1709 }, { "epoch": 0.92, "grad_norm": 1.4729564904109325, "learning_rate": 3.0189052060048464e-06, "loss": 0.6722, "step": 1710 }, { "epoch": 0.92, "grad_norm": 1.503341887362476, "learning_rate": 3.016719863656837e-06, "loss": 0.6543, "step": 1711 }, { "epoch": 0.92, "grad_norm": 1.4996234102293973, "learning_rate": 3.014534108777304e-06, "loss": 0.6512, "step": 1712 }, { "epoch": 0.93, "grad_norm": 1.471298605902146, "learning_rate": 3.01234794311128e-06, "loss": 0.7034, "step": 1713 }, { "epoch": 0.93, "grad_norm": 1.4476547691248063, "learning_rate": 3.010161368404124e-06, "loss": 0.678, "step": 1714 }, { "epoch": 0.93, "grad_norm": 1.4487887930737662, "learning_rate": 3.007974386401525e-06, "loss": 0.6688, "step": 1715 }, { "epoch": 0.93, "grad_norm": 1.4525811469489387, "learning_rate": 3.0057869988494925e-06, "loss": 0.659, "step": 1716 }, { "epoch": 0.93, "grad_norm": 1.441712445671914, "learning_rate": 3.0035992074943633e-06, "loss": 0.6553, "step": 1717 }, { "epoch": 0.93, "grad_norm": 1.4873135243863604, "learning_rate": 3.0014110140827955e-06, "loss": 0.6622, "step": 1718 }, { "epoch": 0.93, "grad_norm": 1.4248041715489557, "learning_rate": 2.999222420361767e-06, "loss": 0.6596, "step": 1719 }, { "epoch": 0.93, "grad_norm": 1.4162753404158215, "learning_rate": 2.9970334280785784e-06, "loss": 0.6827, "step": 1720 }, { "epoch": 0.93, "grad_norm": 1.5004001202028687, "learning_rate": 2.9948440389808447e-06, "loss": 0.6851, "step": 1721 }, { "epoch": 0.93, "grad_norm": 1.4929316288056975, "learning_rate": 2.9926542548165e-06, "loss": 0.6637, "step": 1722 }, { "epoch": 0.93, "grad_norm": 1.433512186433944, "learning_rate": 2.990464077333794e-06, "loss": 0.675, "step": 1723 }, { "epoch": 0.93, "grad_norm": 1.5209166937914538, "learning_rate": 2.9882735082812885e-06, "loss": 0.6599, "step": 1724 }, { "epoch": 0.93, "grad_norm": 1.4866450198918832, "learning_rate": 2.9860825494078605e-06, "loss": 0.6907, "step": 1725 }, { "epoch": 0.93, "grad_norm": 1.4192255095648474, "learning_rate": 2.9838912024626964e-06, "loss": 0.6924, "step": 1726 }, { "epoch": 0.93, "grad_norm": 1.4059527456550318, "learning_rate": 2.981699469195292e-06, "loss": 0.6253, "step": 1727 }, { "epoch": 0.93, "grad_norm": 1.3966084536234236, "learning_rate": 2.979507351355454e-06, "loss": 0.6726, "step": 1728 }, { "epoch": 0.93, "grad_norm": 1.4080911502065196, "learning_rate": 2.9773148506932936e-06, "loss": 0.6639, "step": 1729 }, { "epoch": 0.93, "grad_norm": 1.4858680353118956, "learning_rate": 2.975121968959228e-06, "loss": 0.6548, "step": 1730 }, { "epoch": 0.93, "grad_norm": 1.484947270750994, "learning_rate": 2.972928707903981e-06, "loss": 0.6927, "step": 1731 }, { "epoch": 0.94, "grad_norm": 1.4016209132618882, "learning_rate": 2.9707350692785763e-06, "loss": 0.6788, "step": 1732 }, { "epoch": 0.94, "grad_norm": 1.4876762416751261, "learning_rate": 2.9685410548343386e-06, "loss": 0.6768, "step": 1733 }, { "epoch": 0.94, "grad_norm": 1.4659328650510413, "learning_rate": 2.966346666322898e-06, "loss": 0.6777, "step": 1734 }, { "epoch": 0.94, "grad_norm": 1.50873859280219, "learning_rate": 2.9641519054961765e-06, "loss": 0.6836, "step": 1735 }, { "epoch": 0.94, "grad_norm": 1.4444360511169019, "learning_rate": 2.9619567741063965e-06, "loss": 0.6809, "step": 1736 }, { "epoch": 0.94, "grad_norm": 1.3891186408164533, "learning_rate": 2.9597612739060775e-06, "loss": 0.639, "step": 1737 }, { "epoch": 0.94, "grad_norm": 1.4456046517412717, "learning_rate": 2.957565406648032e-06, "loss": 0.6357, "step": 1738 }, { "epoch": 0.94, "grad_norm": 1.5346746182252065, "learning_rate": 2.955369174085364e-06, "loss": 0.6819, "step": 1739 }, { "epoch": 0.94, "grad_norm": 1.4839818791830217, "learning_rate": 2.9531725779714713e-06, "loss": 0.6438, "step": 1740 }, { "epoch": 0.94, "grad_norm": 1.3975008297366032, "learning_rate": 2.9509756200600422e-06, "loss": 0.6626, "step": 1741 }, { "epoch": 0.94, "grad_norm": 1.3840563304708726, "learning_rate": 2.948778302105052e-06, "loss": 0.6591, "step": 1742 }, { "epoch": 0.94, "grad_norm": 1.4472958056926382, "learning_rate": 2.9465806258607653e-06, "loss": 0.668, "step": 1743 }, { "epoch": 0.94, "grad_norm": 1.4299334520464249, "learning_rate": 2.944382593081731e-06, "loss": 0.6773, "step": 1744 }, { "epoch": 0.94, "grad_norm": 1.476883629968566, "learning_rate": 2.9421842055227847e-06, "loss": 0.6603, "step": 1745 }, { "epoch": 0.94, "grad_norm": 1.5045492106436258, "learning_rate": 2.939985464939043e-06, "loss": 0.6591, "step": 1746 }, { "epoch": 0.94, "grad_norm": 1.4585027199049703, "learning_rate": 2.9377863730859053e-06, "loss": 0.665, "step": 1747 }, { "epoch": 0.94, "grad_norm": 1.4629942610038726, "learning_rate": 2.9355869317190522e-06, "loss": 0.6835, "step": 1748 }, { "epoch": 0.94, "grad_norm": 1.4087837446524767, "learning_rate": 2.9333871425944434e-06, "loss": 0.6745, "step": 1749 }, { "epoch": 0.95, "grad_norm": 1.4686690837322156, "learning_rate": 2.9311870074683135e-06, "loss": 0.6968, "step": 1750 }, { "epoch": 0.95, "grad_norm": 1.40719079047853, "learning_rate": 2.9289865280971776e-06, "loss": 0.6734, "step": 1751 }, { "epoch": 0.95, "grad_norm": 1.435613821056801, "learning_rate": 2.926785706237822e-06, "loss": 0.6748, "step": 1752 }, { "epoch": 0.95, "grad_norm": 1.3983696262363285, "learning_rate": 2.924584543647308e-06, "loss": 0.6733, "step": 1753 }, { "epoch": 0.95, "grad_norm": 1.4723792902630861, "learning_rate": 2.9223830420829693e-06, "loss": 0.6624, "step": 1754 }, { "epoch": 0.95, "grad_norm": 1.3903381242903072, "learning_rate": 2.920181203302409e-06, "loss": 0.6587, "step": 1755 }, { "epoch": 0.95, "grad_norm": 1.439248585484727, "learning_rate": 2.9179790290635007e-06, "loss": 0.6923, "step": 1756 }, { "epoch": 0.95, "grad_norm": 1.4274756611481074, "learning_rate": 2.9157765211243855e-06, "loss": 0.657, "step": 1757 }, { "epoch": 0.95, "grad_norm": 1.3878081605052355, "learning_rate": 2.91357368124347e-06, "loss": 0.6594, "step": 1758 }, { "epoch": 0.95, "grad_norm": 1.4045489259373845, "learning_rate": 2.9113705111794266e-06, "loss": 0.6638, "step": 1759 }, { "epoch": 0.95, "grad_norm": 1.504976510649229, "learning_rate": 2.9091670126911914e-06, "loss": 0.689, "step": 1760 }, { "epoch": 0.95, "grad_norm": 1.4913230896466116, "learning_rate": 2.906963187537962e-06, "loss": 0.6776, "step": 1761 }, { "epoch": 0.95, "grad_norm": 1.4986042596613673, "learning_rate": 2.904759037479198e-06, "loss": 0.6718, "step": 1762 }, { "epoch": 0.95, "grad_norm": 1.4125012860146622, "learning_rate": 2.902554564274617e-06, "loss": 0.6907, "step": 1763 }, { "epoch": 0.95, "grad_norm": 1.4365018108861767, "learning_rate": 2.9003497696841955e-06, "loss": 0.6581, "step": 1764 }, { "epoch": 0.95, "grad_norm": 1.5299138911673134, "learning_rate": 2.8981446554681663e-06, "loss": 0.6564, "step": 1765 }, { "epoch": 0.95, "grad_norm": 1.4927673852558085, "learning_rate": 2.8959392233870176e-06, "loss": 0.6485, "step": 1766 }, { "epoch": 0.95, "grad_norm": 1.3840852619296065, "learning_rate": 2.8937334752014913e-06, "loss": 0.6676, "step": 1767 }, { "epoch": 0.95, "grad_norm": 1.4634534154265404, "learning_rate": 2.8915274126725807e-06, "loss": 0.6941, "step": 1768 }, { "epoch": 0.96, "grad_norm": 1.4551285301530528, "learning_rate": 2.8893210375615316e-06, "loss": 0.6352, "step": 1769 }, { "epoch": 0.96, "grad_norm": 1.3790040917034478, "learning_rate": 2.887114351629839e-06, "loss": 0.6663, "step": 1770 }, { "epoch": 0.96, "grad_norm": 1.3838466774311018, "learning_rate": 2.884907356639245e-06, "loss": 0.6614, "step": 1771 }, { "epoch": 0.96, "grad_norm": 1.4509606520319271, "learning_rate": 2.8827000543517393e-06, "loss": 0.6563, "step": 1772 }, { "epoch": 0.96, "grad_norm": 1.4350969528903448, "learning_rate": 2.8804924465295575e-06, "loss": 0.6666, "step": 1773 }, { "epoch": 0.96, "grad_norm": 1.492630686442437, "learning_rate": 2.8782845349351774e-06, "loss": 0.6399, "step": 1774 }, { "epoch": 0.96, "grad_norm": 1.4079496111482426, "learning_rate": 2.876076321331321e-06, "loss": 0.6656, "step": 1775 }, { "epoch": 0.96, "grad_norm": 1.4582922146633133, "learning_rate": 2.873867807480951e-06, "loss": 0.6848, "step": 1776 }, { "epoch": 0.96, "grad_norm": 1.4316753422707156, "learning_rate": 2.8716589951472685e-06, "loss": 0.6765, "step": 1777 }, { "epoch": 0.96, "grad_norm": 1.3937055237411102, "learning_rate": 2.8694498860937152e-06, "loss": 0.6619, "step": 1778 }, { "epoch": 0.96, "grad_norm": 1.462630298341331, "learning_rate": 2.8672404820839676e-06, "loss": 0.6575, "step": 1779 }, { "epoch": 0.96, "grad_norm": 1.466249338030702, "learning_rate": 2.8650307848819387e-06, "loss": 0.6634, "step": 1780 }, { "epoch": 0.96, "grad_norm": 1.395540853950479, "learning_rate": 2.8628207962517763e-06, "loss": 0.6488, "step": 1781 }, { "epoch": 0.96, "grad_norm": 1.4737888245616717, "learning_rate": 2.8606105179578584e-06, "loss": 0.6816, "step": 1782 }, { "epoch": 0.96, "grad_norm": 1.4674373627601385, "learning_rate": 2.8583999517647966e-06, "loss": 0.6946, "step": 1783 }, { "epoch": 0.96, "grad_norm": 1.510696236095152, "learning_rate": 2.8561890994374318e-06, "loss": 0.6555, "step": 1784 }, { "epoch": 0.96, "grad_norm": 1.508395752667763, "learning_rate": 2.8539779627408332e-06, "loss": 0.6959, "step": 1785 }, { "epoch": 0.96, "grad_norm": 1.4096020702686347, "learning_rate": 2.851766543440297e-06, "loss": 0.6487, "step": 1786 }, { "epoch": 0.97, "grad_norm": 1.5240887970876449, "learning_rate": 2.8495548433013442e-06, "loss": 0.6988, "step": 1787 }, { "epoch": 0.97, "grad_norm": 1.4600503051210139, "learning_rate": 2.847342864089721e-06, "loss": 0.6886, "step": 1788 }, { "epoch": 0.97, "grad_norm": 1.4593720587011378, "learning_rate": 2.8451306075713977e-06, "loss": 0.6689, "step": 1789 }, { "epoch": 0.97, "grad_norm": 1.4028251641942215, "learning_rate": 2.8429180755125625e-06, "loss": 0.6428, "step": 1790 }, { "epoch": 0.97, "grad_norm": 1.4048672387232757, "learning_rate": 2.8407052696796255e-06, "loss": 0.6715, "step": 1791 }, { "epoch": 0.97, "grad_norm": 1.446749401688876, "learning_rate": 2.838492191839218e-06, "loss": 0.6765, "step": 1792 }, { "epoch": 0.97, "grad_norm": 1.465826425200976, "learning_rate": 2.8362788437581833e-06, "loss": 0.6593, "step": 1793 }, { "epoch": 0.97, "grad_norm": 1.435873500133914, "learning_rate": 2.834065227203584e-06, "loss": 0.6714, "step": 1794 }, { "epoch": 0.97, "grad_norm": 1.4165791664502998, "learning_rate": 2.8318513439426957e-06, "loss": 0.6901, "step": 1795 }, { "epoch": 0.97, "grad_norm": 1.422140173454788, "learning_rate": 2.8296371957430087e-06, "loss": 0.6891, "step": 1796 }, { "epoch": 0.97, "grad_norm": 1.4205374491536191, "learning_rate": 2.8274227843722213e-06, "loss": 0.6823, "step": 1797 }, { "epoch": 0.97, "grad_norm": 1.480616796795221, "learning_rate": 2.825208111598246e-06, "loss": 0.6629, "step": 1798 }, { "epoch": 0.97, "grad_norm": 1.3964271659441398, "learning_rate": 2.8229931791892003e-06, "loss": 0.6456, "step": 1799 }, { "epoch": 0.97, "grad_norm": 1.458114814334794, "learning_rate": 2.820777988913412e-06, "loss": 0.6867, "step": 1800 }, { "epoch": 0.97, "grad_norm": 1.3394344083942018, "learning_rate": 2.818562542539413e-06, "loss": 0.625, "step": 1801 }, { "epoch": 0.97, "grad_norm": 1.361308974126396, "learning_rate": 2.8163468418359406e-06, "loss": 0.6333, "step": 1802 }, { "epoch": 0.97, "grad_norm": 1.4350173328960545, "learning_rate": 2.8141308885719337e-06, "loss": 0.686, "step": 1803 }, { "epoch": 0.97, "grad_norm": 1.372477058820434, "learning_rate": 2.8119146845165355e-06, "loss": 0.6363, "step": 1804 }, { "epoch": 0.97, "grad_norm": 1.441383488249171, "learning_rate": 2.809698231439086e-06, "loss": 0.6815, "step": 1805 }, { "epoch": 0.98, "grad_norm": 1.394459910150164, "learning_rate": 2.8074815311091265e-06, "loss": 0.6413, "step": 1806 }, { "epoch": 0.98, "grad_norm": 1.4156367205202391, "learning_rate": 2.8052645852963957e-06, "loss": 0.6471, "step": 1807 }, { "epoch": 0.98, "grad_norm": 1.41851969456714, "learning_rate": 2.8030473957708264e-06, "loss": 0.6829, "step": 1808 }, { "epoch": 0.98, "grad_norm": 1.455882918422343, "learning_rate": 2.8008299643025477e-06, "loss": 0.6579, "step": 1809 }, { "epoch": 0.98, "grad_norm": 1.41271864775127, "learning_rate": 2.7986122926618816e-06, "loss": 0.6762, "step": 1810 }, { "epoch": 0.98, "grad_norm": 1.4099388082841382, "learning_rate": 2.79639438261934e-06, "loss": 0.6661, "step": 1811 }, { "epoch": 0.98, "grad_norm": 1.4031939283325037, "learning_rate": 2.7941762359456294e-06, "loss": 0.6449, "step": 1812 }, { "epoch": 0.98, "grad_norm": 1.4282092086390392, "learning_rate": 2.7919578544116393e-06, "loss": 0.6803, "step": 1813 }, { "epoch": 0.98, "grad_norm": 1.4559831278879358, "learning_rate": 2.789739239788452e-06, "loss": 0.653, "step": 1814 }, { "epoch": 0.98, "grad_norm": 1.4011221712337176, "learning_rate": 2.787520393847334e-06, "loss": 0.6704, "step": 1815 }, { "epoch": 0.98, "grad_norm": 1.4197532230863303, "learning_rate": 2.785301318359734e-06, "loss": 0.6696, "step": 1816 }, { "epoch": 0.98, "grad_norm": 1.424727590979412, "learning_rate": 2.78308201509729e-06, "loss": 0.6675, "step": 1817 }, { "epoch": 0.98, "grad_norm": 1.4246916501698599, "learning_rate": 2.780862485831814e-06, "loss": 0.662, "step": 1818 }, { "epoch": 0.98, "grad_norm": 1.4398128157684562, "learning_rate": 2.7786427323353043e-06, "loss": 0.6781, "step": 1819 }, { "epoch": 0.98, "grad_norm": 1.4298710234885919, "learning_rate": 2.7764227563799372e-06, "loss": 0.6738, "step": 1820 }, { "epoch": 0.98, "grad_norm": 1.4829616483799077, "learning_rate": 2.7742025597380644e-06, "loss": 0.6979, "step": 1821 }, { "epoch": 0.98, "grad_norm": 1.358252880973642, "learning_rate": 2.7719821441822166e-06, "loss": 0.6894, "step": 1822 }, { "epoch": 0.98, "grad_norm": 1.44533863427447, "learning_rate": 2.7697615114850966e-06, "loss": 0.6509, "step": 1823 }, { "epoch": 0.99, "grad_norm": 1.4529721462635954, "learning_rate": 2.7675406634195824e-06, "loss": 0.6992, "step": 1824 }, { "epoch": 0.99, "grad_norm": 1.4516864568987689, "learning_rate": 2.7653196017587237e-06, "loss": 0.6575, "step": 1825 }, { "epoch": 0.99, "grad_norm": 1.3569453323409468, "learning_rate": 2.7630983282757394e-06, "loss": 0.6608, "step": 1826 }, { "epoch": 0.99, "grad_norm": 1.526646771655887, "learning_rate": 2.7608768447440193e-06, "loss": 0.6889, "step": 1827 }, { "epoch": 0.99, "grad_norm": 1.4777334783333391, "learning_rate": 2.7586551529371207e-06, "loss": 0.6604, "step": 1828 }, { "epoch": 0.99, "grad_norm": 1.3981510288975776, "learning_rate": 2.7564332546287654e-06, "loss": 0.661, "step": 1829 }, { "epoch": 0.99, "grad_norm": 1.3713258899550036, "learning_rate": 2.754211151592841e-06, "loss": 0.6599, "step": 1830 }, { "epoch": 0.99, "grad_norm": 1.3983388383490014, "learning_rate": 2.751988845603401e-06, "loss": 0.6527, "step": 1831 }, { "epoch": 0.99, "grad_norm": 1.3858167822681602, "learning_rate": 2.7497663384346562e-06, "loss": 0.6771, "step": 1832 }, { "epoch": 0.99, "grad_norm": 1.458105550701537, "learning_rate": 2.7475436318609827e-06, "loss": 0.6927, "step": 1833 }, { "epoch": 0.99, "grad_norm": 1.4581468907425212, "learning_rate": 2.7453207276569114e-06, "loss": 0.685, "step": 1834 }, { "epoch": 0.99, "grad_norm": 1.458305859058622, "learning_rate": 2.743097627597135e-06, "loss": 0.6672, "step": 1835 }, { "epoch": 0.99, "grad_norm": 1.4617787685464778, "learning_rate": 2.7408743334565006e-06, "loss": 0.6616, "step": 1836 }, { "epoch": 0.99, "grad_norm": 1.4646428631369282, "learning_rate": 2.7386508470100105e-06, "loss": 0.7011, "step": 1837 }, { "epoch": 0.99, "grad_norm": 1.3705454656937268, "learning_rate": 2.7364271700328186e-06, "loss": 0.6429, "step": 1838 }, { "epoch": 0.99, "grad_norm": 1.3713633457019616, "learning_rate": 2.734203304300235e-06, "loss": 0.6249, "step": 1839 }, { "epoch": 0.99, "grad_norm": 1.4577819605041227, "learning_rate": 2.7319792515877185e-06, "loss": 0.6933, "step": 1840 }, { "epoch": 0.99, "grad_norm": 1.6637238015948927, "learning_rate": 2.7297550136708745e-06, "loss": 0.6576, "step": 1841 }, { "epoch": 0.99, "grad_norm": 1.386223788433957, "learning_rate": 2.7275305923254607e-06, "loss": 0.6522, "step": 1842 }, { "epoch": 1.0, "grad_norm": 1.434821857712414, "learning_rate": 2.72530598932738e-06, "loss": 0.6843, "step": 1843 }, { "epoch": 1.0, "grad_norm": 1.3849223039248673, "learning_rate": 2.723081206452677e-06, "loss": 0.6546, "step": 1844 }, { "epoch": 1.0, "grad_norm": 1.4233583067811388, "learning_rate": 2.720856245477544e-06, "loss": 0.6705, "step": 1845 }, { "epoch": 1.0, "grad_norm": 1.398200401272576, "learning_rate": 2.718631108178314e-06, "loss": 0.6655, "step": 1846 }, { "epoch": 1.0, "grad_norm": 1.3848842745032603, "learning_rate": 2.716405796331461e-06, "loss": 0.6643, "step": 1847 }, { "epoch": 1.0, "grad_norm": 1.3773993426079432, "learning_rate": 2.7141803117135978e-06, "loss": 0.6645, "step": 1848 }, { "epoch": 1.0, "grad_norm": 1.4696494227638235, "learning_rate": 2.711954656101475e-06, "loss": 0.6651, "step": 1849 }, { "epoch": 1.0, "grad_norm": 1.4233877577003469, "learning_rate": 2.709728831271981e-06, "loss": 0.6314, "step": 1850 }, { "epoch": 1.0, "grad_norm": 1.4343452843159386, "learning_rate": 2.7075028390021385e-06, "loss": 0.6859, "step": 1851 }, { "epoch": 1.0, "grad_norm": 1.412242211439471, "learning_rate": 2.7052766810691024e-06, "loss": 0.6442, "step": 1852 }, { "epoch": 1.0, "grad_norm": 1.541170391781939, "learning_rate": 2.7030503592501623e-06, "loss": 0.5422, "step": 1853 }, { "epoch": 1.0, "grad_norm": 1.544655902559114, "learning_rate": 2.7008238753227385e-06, "loss": 0.5493, "step": 1854 }, { "epoch": 1.0, "grad_norm": 1.4505336852239352, "learning_rate": 2.6985972310643784e-06, "loss": 0.5824, "step": 1855 }, { "epoch": 1.0, "grad_norm": 1.4253855813394518, "learning_rate": 2.6963704282527597e-06, "loss": 0.5927, "step": 1856 }, { "epoch": 1.0, "grad_norm": 1.444363140481304, "learning_rate": 2.694143468665685e-06, "loss": 0.5292, "step": 1857 }, { "epoch": 1.0, "grad_norm": 1.427879357352963, "learning_rate": 2.6919163540810846e-06, "loss": 0.5442, "step": 1858 }, { "epoch": 1.0, "grad_norm": 1.4743373113580296, "learning_rate": 2.6896890862770093e-06, "loss": 0.5479, "step": 1859 }, { "epoch": 1.0, "grad_norm": 1.4723062134584637, "learning_rate": 2.6874616670316338e-06, "loss": 0.553, "step": 1860 }, { "epoch": 1.0, "grad_norm": 1.6849765480213208, "learning_rate": 2.6852340981232543e-06, "loss": 0.5554, "step": 1861 }, { "epoch": 1.01, "grad_norm": 1.528014411570887, "learning_rate": 2.6830063813302862e-06, "loss": 0.5421, "step": 1862 }, { "epoch": 1.01, "grad_norm": 1.5009383405022878, "learning_rate": 2.6807785184312618e-06, "loss": 0.538, "step": 1863 }, { "epoch": 1.01, "grad_norm": 1.5731714631297795, "learning_rate": 2.6785505112048323e-06, "loss": 0.572, "step": 1864 }, { "epoch": 1.01, "grad_norm": 1.4733462706254221, "learning_rate": 2.67632236142976e-06, "loss": 0.5461, "step": 1865 }, { "epoch": 1.01, "grad_norm": 1.427797112423125, "learning_rate": 2.674094070884926e-06, "loss": 0.5517, "step": 1866 }, { "epoch": 1.01, "grad_norm": 1.4500365526282355, "learning_rate": 2.6718656413493214e-06, "loss": 0.5619, "step": 1867 }, { "epoch": 1.01, "grad_norm": 1.4479176496870072, "learning_rate": 2.669637074602048e-06, "loss": 0.5657, "step": 1868 }, { "epoch": 1.01, "grad_norm": 1.4403943107084305, "learning_rate": 2.6674083724223166e-06, "loss": 0.5467, "step": 1869 }, { "epoch": 1.01, "grad_norm": 1.4836295484090531, "learning_rate": 2.6651795365894483e-06, "loss": 0.5562, "step": 1870 }, { "epoch": 1.01, "grad_norm": 1.4446611476525921, "learning_rate": 2.662950568882869e-06, "loss": 0.5708, "step": 1871 }, { "epoch": 1.01, "grad_norm": 1.5151328288880972, "learning_rate": 2.6607214710821112e-06, "loss": 0.5462, "step": 1872 }, { "epoch": 1.01, "grad_norm": 1.4383473149127335, "learning_rate": 2.658492244966809e-06, "loss": 0.5713, "step": 1873 }, { "epoch": 1.01, "grad_norm": 1.4164131465030776, "learning_rate": 2.6562628923167026e-06, "loss": 0.5522, "step": 1874 }, { "epoch": 1.01, "grad_norm": 1.4473025350446809, "learning_rate": 2.6540334149116304e-06, "loss": 0.5383, "step": 1875 }, { "epoch": 1.01, "grad_norm": 1.452441509935077, "learning_rate": 2.65180381453153e-06, "loss": 0.5442, "step": 1876 }, { "epoch": 1.01, "grad_norm": 1.5722661422248523, "learning_rate": 2.64957409295644e-06, "loss": 0.5447, "step": 1877 }, { "epoch": 1.01, "grad_norm": 1.4718511022481098, "learning_rate": 2.647344251966493e-06, "loss": 0.5476, "step": 1878 }, { "epoch": 1.01, "grad_norm": 1.4626650220226867, "learning_rate": 2.6451142933419188e-06, "loss": 0.5371, "step": 1879 }, { "epoch": 1.02, "grad_norm": 1.5409114203231267, "learning_rate": 2.642884218863039e-06, "loss": 0.5722, "step": 1880 }, { "epoch": 1.02, "grad_norm": 1.4694811699317525, "learning_rate": 2.6406540303102714e-06, "loss": 0.5494, "step": 1881 }, { "epoch": 1.02, "grad_norm": 1.5228463918583366, "learning_rate": 2.6384237294641194e-06, "loss": 0.5607, "step": 1882 }, { "epoch": 1.02, "grad_norm": 1.4824930069202058, "learning_rate": 2.6361933181051824e-06, "loss": 0.5653, "step": 1883 }, { "epoch": 1.02, "grad_norm": 1.474900181219094, "learning_rate": 2.6339627980141425e-06, "loss": 0.552, "step": 1884 }, { "epoch": 1.02, "grad_norm": 1.4452941229433278, "learning_rate": 2.6317321709717714e-06, "loss": 0.5682, "step": 1885 }, { "epoch": 1.02, "grad_norm": 1.5440961956990955, "learning_rate": 2.629501438758927e-06, "loss": 0.5575, "step": 1886 }, { "epoch": 1.02, "grad_norm": 1.5476724257476395, "learning_rate": 2.6272706031565482e-06, "loss": 0.5359, "step": 1887 }, { "epoch": 1.02, "grad_norm": 1.467560451429543, "learning_rate": 2.6250396659456577e-06, "loss": 0.5532, "step": 1888 }, { "epoch": 1.02, "grad_norm": 1.4599466363036868, "learning_rate": 2.6228086289073617e-06, "loss": 0.5407, "step": 1889 }, { "epoch": 1.02, "grad_norm": 1.4423114384501574, "learning_rate": 2.6205774938228433e-06, "loss": 0.5484, "step": 1890 }, { "epoch": 1.02, "grad_norm": 1.489975908977279, "learning_rate": 2.6183462624733642e-06, "loss": 0.5469, "step": 1891 }, { "epoch": 1.02, "grad_norm": 1.5610112441858341, "learning_rate": 2.616114936640263e-06, "loss": 0.5568, "step": 1892 }, { "epoch": 1.02, "grad_norm": 1.5630914768014037, "learning_rate": 2.6138835181049556e-06, "loss": 0.5392, "step": 1893 }, { "epoch": 1.02, "grad_norm": 1.5153868048287007, "learning_rate": 2.6116520086489296e-06, "loss": 0.5446, "step": 1894 }, { "epoch": 1.02, "grad_norm": 1.5158925566276285, "learning_rate": 2.609420410053747e-06, "loss": 0.5611, "step": 1895 }, { "epoch": 1.02, "grad_norm": 1.5018239817576486, "learning_rate": 2.6071887241010374e-06, "loss": 0.5521, "step": 1896 }, { "epoch": 1.02, "grad_norm": 1.464359951478072, "learning_rate": 2.6049569525725068e-06, "loss": 0.5265, "step": 1897 }, { "epoch": 1.02, "grad_norm": 1.46894215636755, "learning_rate": 2.6027250972499225e-06, "loss": 0.542, "step": 1898 }, { "epoch": 1.03, "grad_norm": 1.4818534061826778, "learning_rate": 2.6004931599151223e-06, "loss": 0.5359, "step": 1899 }, { "epoch": 1.03, "grad_norm": 1.5666377059318644, "learning_rate": 2.5982611423500093e-06, "loss": 0.5552, "step": 1900 }, { "epoch": 1.03, "grad_norm": 1.554357364160214, "learning_rate": 2.5960290463365507e-06, "loss": 0.5398, "step": 1901 }, { "epoch": 1.03, "grad_norm": 1.5917894740606466, "learning_rate": 2.593796873656775e-06, "loss": 0.5678, "step": 1902 }, { "epoch": 1.03, "grad_norm": 1.4856418948778602, "learning_rate": 2.591564626092773e-06, "loss": 0.5492, "step": 1903 }, { "epoch": 1.03, "grad_norm": 1.4562695811230348, "learning_rate": 2.5893323054266947e-06, "loss": 0.5502, "step": 1904 }, { "epoch": 1.03, "grad_norm": 1.445907151780509, "learning_rate": 2.587099913440749e-06, "loss": 0.545, "step": 1905 }, { "epoch": 1.03, "grad_norm": 1.4949458519743686, "learning_rate": 2.584867451917203e-06, "loss": 0.5459, "step": 1906 }, { "epoch": 1.03, "grad_norm": 1.4585840539018402, "learning_rate": 2.582634922638375e-06, "loss": 0.5399, "step": 1907 }, { "epoch": 1.03, "grad_norm": 1.5144020624473606, "learning_rate": 2.580402327386643e-06, "loss": 0.5652, "step": 1908 }, { "epoch": 1.03, "grad_norm": 1.4997585411943795, "learning_rate": 2.578169667944434e-06, "loss": 0.5402, "step": 1909 }, { "epoch": 1.03, "grad_norm": 1.47227137082658, "learning_rate": 2.5759369460942262e-06, "loss": 0.5505, "step": 1910 }, { "epoch": 1.03, "grad_norm": 1.4759566498308514, "learning_rate": 2.5737041636185496e-06, "loss": 0.5533, "step": 1911 }, { "epoch": 1.03, "grad_norm": 1.493205879275641, "learning_rate": 2.571471322299982e-06, "loss": 0.5647, "step": 1912 }, { "epoch": 1.03, "grad_norm": 1.5369302238271634, "learning_rate": 2.5692384239211467e-06, "loss": 0.5607, "step": 1913 }, { "epoch": 1.03, "grad_norm": 1.466293296599501, "learning_rate": 2.5670054702647146e-06, "loss": 0.5348, "step": 1914 }, { "epoch": 1.03, "grad_norm": 1.5878758775590822, "learning_rate": 2.5647724631133992e-06, "loss": 0.5621, "step": 1915 }, { "epoch": 1.03, "grad_norm": 1.4834524446328787, "learning_rate": 2.5625394042499567e-06, "loss": 0.54, "step": 1916 }, { "epoch": 1.04, "grad_norm": 1.5024222149276365, "learning_rate": 2.5603062954571872e-06, "loss": 0.5118, "step": 1917 }, { "epoch": 1.04, "grad_norm": 1.4790165986061539, "learning_rate": 2.5580731385179262e-06, "loss": 0.5155, "step": 1918 }, { "epoch": 1.04, "grad_norm": 1.5332270052602448, "learning_rate": 2.5558399352150522e-06, "loss": 0.5696, "step": 1919 }, { "epoch": 1.04, "grad_norm": 1.4114329589275727, "learning_rate": 2.553606687331477e-06, "loss": 0.5249, "step": 1920 }, { "epoch": 1.04, "grad_norm": 1.4559207359121964, "learning_rate": 2.55137339665015e-06, "loss": 0.52, "step": 1921 }, { "epoch": 1.04, "grad_norm": 1.4488861959925907, "learning_rate": 2.5491400649540555e-06, "loss": 0.5301, "step": 1922 }, { "epoch": 1.04, "grad_norm": 1.4845133793549146, "learning_rate": 2.5469066940262073e-06, "loss": 0.5446, "step": 1923 }, { "epoch": 1.04, "grad_norm": 1.5791315043264416, "learning_rate": 2.5446732856496535e-06, "loss": 0.5673, "step": 1924 }, { "epoch": 1.04, "grad_norm": 1.4730074009332204, "learning_rate": 2.5424398416074725e-06, "loss": 0.5284, "step": 1925 }, { "epoch": 1.04, "grad_norm": 1.4754446247110102, "learning_rate": 2.540206363682768e-06, "loss": 0.538, "step": 1926 }, { "epoch": 1.04, "grad_norm": 1.4819151339963614, "learning_rate": 2.5379728536586733e-06, "loss": 0.5722, "step": 1927 }, { "epoch": 1.04, "grad_norm": 1.5150920751502008, "learning_rate": 2.535739313318348e-06, "loss": 0.5615, "step": 1928 }, { "epoch": 1.04, "grad_norm": 1.5599887331041409, "learning_rate": 2.533505744444972e-06, "loss": 0.5683, "step": 1929 }, { "epoch": 1.04, "grad_norm": 1.3946376528771842, "learning_rate": 2.531272148821753e-06, "loss": 0.5234, "step": 1930 }, { "epoch": 1.04, "grad_norm": 1.5021870174547483, "learning_rate": 2.5290385282319153e-06, "loss": 0.5498, "step": 1931 }, { "epoch": 1.04, "grad_norm": 1.4892520940701213, "learning_rate": 2.526804884458707e-06, "loss": 0.5248, "step": 1932 }, { "epoch": 1.04, "grad_norm": 1.472668122604296, "learning_rate": 2.524571219285393e-06, "loss": 0.5355, "step": 1933 }, { "epoch": 1.04, "grad_norm": 1.5399431719830554, "learning_rate": 2.5223375344952555e-06, "loss": 0.5503, "step": 1934 }, { "epoch": 1.04, "grad_norm": 1.4677277785505236, "learning_rate": 2.520103831871591e-06, "loss": 0.5461, "step": 1935 }, { "epoch": 1.05, "grad_norm": 1.5076914152203322, "learning_rate": 2.5178701131977135e-06, "loss": 0.5453, "step": 1936 }, { "epoch": 1.05, "grad_norm": 1.512445846300473, "learning_rate": 2.515636380256946e-06, "loss": 0.5373, "step": 1937 }, { "epoch": 1.05, "grad_norm": 1.4644480396721804, "learning_rate": 2.513402634832627e-06, "loss": 0.5395, "step": 1938 }, { "epoch": 1.05, "grad_norm": 1.471651088730441, "learning_rate": 2.5111688787080994e-06, "loss": 0.5416, "step": 1939 }, { "epoch": 1.05, "grad_norm": 1.4660312811983436, "learning_rate": 2.5089351136667204e-06, "loss": 0.5247, "step": 1940 }, { "epoch": 1.05, "grad_norm": 1.4779908181622965, "learning_rate": 2.5067013414918523e-06, "loss": 0.5332, "step": 1941 }, { "epoch": 1.05, "grad_norm": 1.512285674597304, "learning_rate": 2.5044675639668615e-06, "loss": 0.5515, "step": 1942 }, { "epoch": 1.05, "grad_norm": 1.4786857497151864, "learning_rate": 2.502233782875118e-06, "loss": 0.5598, "step": 1943 }, { "epoch": 1.05, "grad_norm": 1.5340287251527995, "learning_rate": 2.5e-06, "loss": 0.5717, "step": 1944 }, { "epoch": 1.05, "grad_norm": 1.483762820167908, "learning_rate": 2.4977662171248825e-06, "loss": 0.5118, "step": 1945 }, { "epoch": 1.05, "grad_norm": 1.4875306897443217, "learning_rate": 2.49553243603314e-06, "loss": 0.5419, "step": 1946 }, { "epoch": 1.05, "grad_norm": 1.499667106888288, "learning_rate": 2.493298658508149e-06, "loss": 0.5496, "step": 1947 }, { "epoch": 1.05, "grad_norm": 1.4536517031934768, "learning_rate": 2.491064886333279e-06, "loss": 0.549, "step": 1948 }, { "epoch": 1.05, "grad_norm": 1.479374942752317, "learning_rate": 2.4888311212919e-06, "loss": 0.5346, "step": 1949 }, { "epoch": 1.05, "grad_norm": 1.459565692606008, "learning_rate": 2.4865973651673743e-06, "loss": 0.5388, "step": 1950 }, { "epoch": 1.05, "grad_norm": 1.4912218408076479, "learning_rate": 2.4843636197430543e-06, "loss": 0.5557, "step": 1951 }, { "epoch": 1.05, "grad_norm": 1.4432490866508885, "learning_rate": 2.4821298868022873e-06, "loss": 0.5577, "step": 1952 }, { "epoch": 1.05, "grad_norm": 1.5067703285626208, "learning_rate": 2.4798961681284096e-06, "loss": 0.5396, "step": 1953 }, { "epoch": 1.06, "grad_norm": 1.4754911481381825, "learning_rate": 2.4776624655047454e-06, "loss": 0.5331, "step": 1954 }, { "epoch": 1.06, "grad_norm": 1.462436367127252, "learning_rate": 2.475428780714608e-06, "loss": 0.5189, "step": 1955 }, { "epoch": 1.06, "grad_norm": 1.5359827211868566, "learning_rate": 2.473195115541293e-06, "loss": 0.5556, "step": 1956 }, { "epoch": 1.06, "grad_norm": 1.5106144218349489, "learning_rate": 2.470961471768085e-06, "loss": 0.5712, "step": 1957 }, { "epoch": 1.06, "grad_norm": 1.495876915475342, "learning_rate": 2.468727851178248e-06, "loss": 0.547, "step": 1958 }, { "epoch": 1.06, "grad_norm": 1.4901099084442317, "learning_rate": 2.466494255555029e-06, "loss": 0.5553, "step": 1959 }, { "epoch": 1.06, "grad_norm": 1.537414321537475, "learning_rate": 2.4642606866816533e-06, "loss": 0.5412, "step": 1960 }, { "epoch": 1.06, "grad_norm": 1.4973965681986459, "learning_rate": 2.462027146341327e-06, "loss": 0.5672, "step": 1961 }, { "epoch": 1.06, "grad_norm": 1.4401652007622854, "learning_rate": 2.459793636317233e-06, "loss": 0.5263, "step": 1962 }, { "epoch": 1.06, "grad_norm": 1.5067371922511503, "learning_rate": 2.4575601583925287e-06, "loss": 0.5504, "step": 1963 }, { "epoch": 1.06, "grad_norm": 1.4928569517773722, "learning_rate": 2.4553267143503465e-06, "loss": 0.5393, "step": 1964 }, { "epoch": 1.06, "grad_norm": 1.481331074661893, "learning_rate": 2.4530933059737936e-06, "loss": 0.5624, "step": 1965 }, { "epoch": 1.06, "grad_norm": 1.4969623796891816, "learning_rate": 2.4508599350459458e-06, "loss": 0.5592, "step": 1966 }, { "epoch": 1.06, "grad_norm": 1.4905308497552625, "learning_rate": 2.4486266033498506e-06, "loss": 0.5241, "step": 1967 }, { "epoch": 1.06, "grad_norm": 1.4904210635599038, "learning_rate": 2.4463933126685236e-06, "loss": 0.5349, "step": 1968 }, { "epoch": 1.06, "grad_norm": 1.5231699610713554, "learning_rate": 2.444160064784949e-06, "loss": 0.551, "step": 1969 }, { "epoch": 1.06, "grad_norm": 1.5288048814816413, "learning_rate": 2.441926861482074e-06, "loss": 0.5485, "step": 1970 }, { "epoch": 1.06, "grad_norm": 1.6053734831278057, "learning_rate": 2.439693704542814e-06, "loss": 0.5635, "step": 1971 }, { "epoch": 1.06, "grad_norm": 1.539306189287943, "learning_rate": 2.437460595750043e-06, "loss": 0.5336, "step": 1972 }, { "epoch": 1.07, "grad_norm": 1.5215250663169457, "learning_rate": 2.4352275368866016e-06, "loss": 0.5464, "step": 1973 }, { "epoch": 1.07, "grad_norm": 1.4802395448809191, "learning_rate": 2.432994529735286e-06, "loss": 0.5439, "step": 1974 }, { "epoch": 1.07, "grad_norm": 1.5564779645038427, "learning_rate": 2.4307615760788537e-06, "loss": 0.5577, "step": 1975 }, { "epoch": 1.07, "grad_norm": 1.5068371950261898, "learning_rate": 2.428528677700019e-06, "loss": 0.5731, "step": 1976 }, { "epoch": 1.07, "grad_norm": 1.5839022762346582, "learning_rate": 2.4262958363814512e-06, "loss": 0.5279, "step": 1977 }, { "epoch": 1.07, "grad_norm": 1.446381730719909, "learning_rate": 2.424063053905775e-06, "loss": 0.5589, "step": 1978 }, { "epoch": 1.07, "grad_norm": 1.438945738210926, "learning_rate": 2.4218303320555677e-06, "loss": 0.5306, "step": 1979 }, { "epoch": 1.07, "grad_norm": 1.516592698279528, "learning_rate": 2.4195976726133574e-06, "loss": 0.5765, "step": 1980 }, { "epoch": 1.07, "grad_norm": 1.5597496633591363, "learning_rate": 2.4173650773616252e-06, "loss": 0.574, "step": 1981 }, { "epoch": 1.07, "grad_norm": 1.5109108318986628, "learning_rate": 2.415132548082798e-06, "loss": 0.5671, "step": 1982 }, { "epoch": 1.07, "grad_norm": 1.5416169986890005, "learning_rate": 2.4129000865592517e-06, "loss": 0.5454, "step": 1983 }, { "epoch": 1.07, "grad_norm": 1.4632744063267806, "learning_rate": 2.410667694573306e-06, "loss": 0.5211, "step": 1984 }, { "epoch": 1.07, "grad_norm": 1.4877181996752833, "learning_rate": 2.4084353739072286e-06, "loss": 0.5376, "step": 1985 }, { "epoch": 1.07, "grad_norm": 1.4781921626377215, "learning_rate": 2.4062031263432267e-06, "loss": 0.5365, "step": 1986 }, { "epoch": 1.07, "grad_norm": 1.3975072634530403, "learning_rate": 2.40397095366345e-06, "loss": 0.5309, "step": 1987 }, { "epoch": 1.07, "grad_norm": 1.457071716424087, "learning_rate": 2.4017388576499907e-06, "loss": 0.5821, "step": 1988 }, { "epoch": 1.07, "grad_norm": 1.4799742493983443, "learning_rate": 2.3995068400848785e-06, "loss": 0.553, "step": 1989 }, { "epoch": 1.07, "grad_norm": 1.5072893489012378, "learning_rate": 2.3972749027500783e-06, "loss": 0.5526, "step": 1990 }, { "epoch": 1.08, "grad_norm": 1.4738068501234467, "learning_rate": 2.3950430474274945e-06, "loss": 0.5331, "step": 1991 }, { "epoch": 1.08, "grad_norm": 1.5336101344790545, "learning_rate": 2.392811275898963e-06, "loss": 0.5288, "step": 1992 }, { "epoch": 1.08, "grad_norm": 1.445101766077698, "learning_rate": 2.3905795899462544e-06, "loss": 0.5426, "step": 1993 }, { "epoch": 1.08, "grad_norm": 1.4991969085582677, "learning_rate": 2.3883479913510716e-06, "loss": 0.5477, "step": 1994 }, { "epoch": 1.08, "grad_norm": 1.4094523942582688, "learning_rate": 2.3861164818950448e-06, "loss": 0.5196, "step": 1995 }, { "epoch": 1.08, "grad_norm": 1.4643613266900304, "learning_rate": 2.3838850633597373e-06, "loss": 0.5322, "step": 1996 }, { "epoch": 1.08, "grad_norm": 1.4907040982846411, "learning_rate": 2.381653737526637e-06, "loss": 0.5596, "step": 1997 }, { "epoch": 1.08, "grad_norm": 1.5136729540729195, "learning_rate": 2.379422506177157e-06, "loss": 0.5426, "step": 1998 }, { "epoch": 1.08, "grad_norm": 1.4432182516228216, "learning_rate": 2.3771913710926387e-06, "loss": 0.5246, "step": 1999 }, { "epoch": 1.08, "grad_norm": 1.5759287335098946, "learning_rate": 2.374960334054343e-06, "loss": 0.5624, "step": 2000 }, { "epoch": 1.08, "grad_norm": 1.4988638788291024, "learning_rate": 2.372729396843453e-06, "loss": 0.561, "step": 2001 }, { "epoch": 1.08, "grad_norm": 1.543032530520574, "learning_rate": 2.3704985612410742e-06, "loss": 0.5711, "step": 2002 }, { "epoch": 1.08, "grad_norm": 1.5174070656334862, "learning_rate": 2.3682678290282286e-06, "loss": 0.5446, "step": 2003 }, { "epoch": 1.08, "grad_norm": 1.5277607013234762, "learning_rate": 2.366037201985858e-06, "loss": 0.5206, "step": 2004 }, { "epoch": 1.08, "grad_norm": 1.4680636266880316, "learning_rate": 2.363806681894818e-06, "loss": 0.5583, "step": 2005 }, { "epoch": 1.08, "grad_norm": 1.4667938322797125, "learning_rate": 2.361576270535881e-06, "loss": 0.5307, "step": 2006 }, { "epoch": 1.08, "grad_norm": 1.4530020969515622, "learning_rate": 2.3593459696897294e-06, "loss": 0.5315, "step": 2007 }, { "epoch": 1.08, "grad_norm": 1.4633897858179004, "learning_rate": 2.3571157811369616e-06, "loss": 0.5463, "step": 2008 }, { "epoch": 1.08, "grad_norm": 1.5415794466180677, "learning_rate": 2.354885706658082e-06, "loss": 0.5457, "step": 2009 }, { "epoch": 1.09, "grad_norm": 1.4553086780969278, "learning_rate": 2.352655748033508e-06, "loss": 0.5277, "step": 2010 }, { "epoch": 1.09, "grad_norm": 1.4838171903510005, "learning_rate": 2.3504259070435604e-06, "loss": 0.551, "step": 2011 }, { "epoch": 1.09, "grad_norm": 1.5173402457545688, "learning_rate": 2.3481961854684705e-06, "loss": 0.5445, "step": 2012 }, { "epoch": 1.09, "grad_norm": 1.470168593418558, "learning_rate": 2.3459665850883704e-06, "loss": 0.5437, "step": 2013 }, { "epoch": 1.09, "grad_norm": 1.5321269270988167, "learning_rate": 2.343737107683298e-06, "loss": 0.5588, "step": 2014 }, { "epoch": 1.09, "grad_norm": 1.5014398572365637, "learning_rate": 2.3415077550331912e-06, "loss": 0.5488, "step": 2015 }, { "epoch": 1.09, "grad_norm": 1.4701635373644735, "learning_rate": 2.33927852891789e-06, "loss": 0.5341, "step": 2016 }, { "epoch": 1.09, "grad_norm": 1.4679533769888284, "learning_rate": 2.3370494311171316e-06, "loss": 0.5425, "step": 2017 }, { "epoch": 1.09, "grad_norm": 1.5253582788470854, "learning_rate": 2.3348204634105517e-06, "loss": 0.5304, "step": 2018 }, { "epoch": 1.09, "grad_norm": 1.5405347610724673, "learning_rate": 2.3325916275776834e-06, "loss": 0.5434, "step": 2019 }, { "epoch": 1.09, "grad_norm": 1.4802167728111564, "learning_rate": 2.330362925397953e-06, "loss": 0.5356, "step": 2020 }, { "epoch": 1.09, "grad_norm": 1.5881794735110282, "learning_rate": 2.328134358650679e-06, "loss": 0.5428, "step": 2021 }, { "epoch": 1.09, "grad_norm": 1.5452958037627385, "learning_rate": 2.3259059291150744e-06, "loss": 0.5443, "step": 2022 }, { "epoch": 1.09, "grad_norm": 1.4637466796737586, "learning_rate": 2.3236776385702402e-06, "loss": 0.516, "step": 2023 }, { "epoch": 1.09, "grad_norm": 1.4840258100559471, "learning_rate": 2.3214494887951694e-06, "loss": 0.5142, "step": 2024 }, { "epoch": 1.09, "grad_norm": 1.5705428331685378, "learning_rate": 2.319221481568739e-06, "loss": 0.5477, "step": 2025 }, { "epoch": 1.09, "grad_norm": 1.5117581863142746, "learning_rate": 2.316993618669714e-06, "loss": 0.5311, "step": 2026 }, { "epoch": 1.09, "grad_norm": 1.5749155843491496, "learning_rate": 2.3147659018767457e-06, "loss": 0.5548, "step": 2027 }, { "epoch": 1.1, "grad_norm": 1.4890745614865246, "learning_rate": 2.3125383329683666e-06, "loss": 0.5501, "step": 2028 }, { "epoch": 1.1, "grad_norm": 1.4576163617719196, "learning_rate": 2.3103109137229916e-06, "loss": 0.5683, "step": 2029 }, { "epoch": 1.1, "grad_norm": 1.5605106824353243, "learning_rate": 2.3080836459189167e-06, "loss": 0.5497, "step": 2030 }, { "epoch": 1.1, "grad_norm": 1.5373389867977894, "learning_rate": 2.3058565313343152e-06, "loss": 0.5717, "step": 2031 }, { "epoch": 1.1, "grad_norm": 1.5328294704670236, "learning_rate": 2.3036295717472415e-06, "loss": 0.5563, "step": 2032 }, { "epoch": 1.1, "grad_norm": 1.5641603055739288, "learning_rate": 2.301402768935623e-06, "loss": 0.5542, "step": 2033 }, { "epoch": 1.1, "grad_norm": 1.4685972278903172, "learning_rate": 2.2991761246772623e-06, "loss": 0.5219, "step": 2034 }, { "epoch": 1.1, "grad_norm": 1.5457093094502863, "learning_rate": 2.2969496407498377e-06, "loss": 0.5195, "step": 2035 }, { "epoch": 1.1, "grad_norm": 1.4851072102215512, "learning_rate": 2.2947233189308984e-06, "loss": 0.5315, "step": 2036 }, { "epoch": 1.1, "grad_norm": 1.4993434854830086, "learning_rate": 2.2924971609978623e-06, "loss": 0.5554, "step": 2037 }, { "epoch": 1.1, "grad_norm": 1.5425377295837994, "learning_rate": 2.2902711687280197e-06, "loss": 0.5559, "step": 2038 }, { "epoch": 1.1, "grad_norm": 1.602801856491369, "learning_rate": 2.288045343898526e-06, "loss": 0.5838, "step": 2039 }, { "epoch": 1.1, "grad_norm": 1.4994922312454517, "learning_rate": 2.285819688286403e-06, "loss": 0.5337, "step": 2040 }, { "epoch": 1.1, "grad_norm": 1.4612582633866125, "learning_rate": 2.28359420366854e-06, "loss": 0.5472, "step": 2041 }, { "epoch": 1.1, "grad_norm": 1.5180342287051396, "learning_rate": 2.281368891821686e-06, "loss": 0.5494, "step": 2042 }, { "epoch": 1.1, "grad_norm": 1.4806058573678198, "learning_rate": 2.2791437545224563e-06, "loss": 0.5425, "step": 2043 }, { "epoch": 1.1, "grad_norm": 1.4739590986382416, "learning_rate": 2.276918793547324e-06, "loss": 0.5499, "step": 2044 }, { "epoch": 1.1, "grad_norm": 1.549841907800793, "learning_rate": 2.2746940106726213e-06, "loss": 0.54, "step": 2045 }, { "epoch": 1.1, "grad_norm": 1.563174318592053, "learning_rate": 2.2724694076745397e-06, "loss": 0.5779, "step": 2046 }, { "epoch": 1.11, "grad_norm": 1.530962242007493, "learning_rate": 2.2702449863291263e-06, "loss": 0.5264, "step": 2047 }, { "epoch": 1.11, "grad_norm": 1.5295568558017254, "learning_rate": 2.2680207484122827e-06, "loss": 0.5365, "step": 2048 }, { "epoch": 1.11, "grad_norm": 1.5962795765999274, "learning_rate": 2.265796695699766e-06, "loss": 0.553, "step": 2049 }, { "epoch": 1.11, "grad_norm": 1.4876458177536442, "learning_rate": 2.263572829967182e-06, "loss": 0.5128, "step": 2050 }, { "epoch": 1.11, "grad_norm": 1.497808927962802, "learning_rate": 2.2613491529899904e-06, "loss": 0.5385, "step": 2051 }, { "epoch": 1.11, "grad_norm": 1.5481509901710009, "learning_rate": 2.2591256665434998e-06, "loss": 0.5582, "step": 2052 }, { "epoch": 1.11, "grad_norm": 1.490090680497983, "learning_rate": 2.2569023724028655e-06, "loss": 0.5572, "step": 2053 }, { "epoch": 1.11, "grad_norm": 1.484237641098667, "learning_rate": 2.254679272343089e-06, "loss": 0.5689, "step": 2054 }, { "epoch": 1.11, "grad_norm": 1.570647736775846, "learning_rate": 2.252456368139019e-06, "loss": 0.5474, "step": 2055 }, { "epoch": 1.11, "grad_norm": 1.533530282224179, "learning_rate": 2.2502336615653446e-06, "loss": 0.5499, "step": 2056 }, { "epoch": 1.11, "grad_norm": 1.5155056598498795, "learning_rate": 2.2480111543965994e-06, "loss": 0.5705, "step": 2057 }, { "epoch": 1.11, "grad_norm": 1.4808538913865987, "learning_rate": 2.245788848407159e-06, "loss": 0.5439, "step": 2058 }, { "epoch": 1.11, "grad_norm": 1.52846568191959, "learning_rate": 2.243566745371235e-06, "loss": 0.5459, "step": 2059 }, { "epoch": 1.11, "grad_norm": 1.5085080701044495, "learning_rate": 2.24134484706288e-06, "loss": 0.5298, "step": 2060 }, { "epoch": 1.11, "grad_norm": 1.5107632836997253, "learning_rate": 2.2391231552559815e-06, "loss": 0.5638, "step": 2061 }, { "epoch": 1.11, "grad_norm": 1.4866944585369397, "learning_rate": 2.2369016717242614e-06, "loss": 0.5259, "step": 2062 }, { "epoch": 1.11, "grad_norm": 1.5441915667360964, "learning_rate": 2.2346803982412776e-06, "loss": 0.5372, "step": 2063 }, { "epoch": 1.11, "grad_norm": 1.5663852213050984, "learning_rate": 2.2324593365804184e-06, "loss": 0.5293, "step": 2064 }, { "epoch": 1.12, "grad_norm": 1.4875122877931917, "learning_rate": 2.230238488514904e-06, "loss": 0.536, "step": 2065 }, { "epoch": 1.12, "grad_norm": 1.5656923921698571, "learning_rate": 2.228017855817784e-06, "loss": 0.5779, "step": 2066 }, { "epoch": 1.12, "grad_norm": 1.5345611427762955, "learning_rate": 2.225797440261936e-06, "loss": 0.5367, "step": 2067 }, { "epoch": 1.12, "grad_norm": 1.482771424521306, "learning_rate": 2.223577243620063e-06, "loss": 0.5518, "step": 2068 }, { "epoch": 1.12, "grad_norm": 1.512641017819438, "learning_rate": 2.2213572676646965e-06, "loss": 0.5494, "step": 2069 }, { "epoch": 1.12, "grad_norm": 1.5582475287769006, "learning_rate": 2.219137514168187e-06, "loss": 0.5635, "step": 2070 }, { "epoch": 1.12, "grad_norm": 1.5260121390564352, "learning_rate": 2.2169179849027117e-06, "loss": 0.5463, "step": 2071 }, { "epoch": 1.12, "grad_norm": 1.6376262681303433, "learning_rate": 2.214698681640266e-06, "loss": 0.5668, "step": 2072 }, { "epoch": 1.12, "grad_norm": 1.5642758551964795, "learning_rate": 2.212479606152667e-06, "loss": 0.5294, "step": 2073 }, { "epoch": 1.12, "grad_norm": 1.4612647381355792, "learning_rate": 2.210260760211548e-06, "loss": 0.5197, "step": 2074 }, { "epoch": 1.12, "grad_norm": 1.5199052585208601, "learning_rate": 2.208042145588361e-06, "loss": 0.5512, "step": 2075 }, { "epoch": 1.12, "grad_norm": 1.5385498132995443, "learning_rate": 2.205823764054372e-06, "loss": 0.558, "step": 2076 }, { "epoch": 1.12, "grad_norm": 1.6087465678007238, "learning_rate": 2.2036056173806607e-06, "loss": 0.5477, "step": 2077 }, { "epoch": 1.12, "grad_norm": 1.486617646480766, "learning_rate": 2.2013877073381197e-06, "loss": 0.5388, "step": 2078 }, { "epoch": 1.12, "grad_norm": 1.4756865636044976, "learning_rate": 2.199170035697453e-06, "loss": 0.5381, "step": 2079 }, { "epoch": 1.12, "grad_norm": 1.456208800900108, "learning_rate": 2.196952604229175e-06, "loss": 0.5411, "step": 2080 }, { "epoch": 1.12, "grad_norm": 1.479522784250332, "learning_rate": 2.194735414703605e-06, "loss": 0.5214, "step": 2081 }, { "epoch": 1.12, "grad_norm": 1.5924680123565949, "learning_rate": 2.1925184688908735e-06, "loss": 0.5403, "step": 2082 }, { "epoch": 1.12, "grad_norm": 1.4696288439481162, "learning_rate": 2.190301768560915e-06, "loss": 0.5414, "step": 2083 }, { "epoch": 1.13, "grad_norm": 1.4494119217525803, "learning_rate": 2.1880853154834653e-06, "loss": 0.5333, "step": 2084 }, { "epoch": 1.13, "grad_norm": 1.4837632845378426, "learning_rate": 2.185869111428067e-06, "loss": 0.5479, "step": 2085 }, { "epoch": 1.13, "grad_norm": 1.4908297235356542, "learning_rate": 2.1836531581640606e-06, "loss": 0.5567, "step": 2086 }, { "epoch": 1.13, "grad_norm": 1.54215838515537, "learning_rate": 2.181437457460588e-06, "loss": 0.5583, "step": 2087 }, { "epoch": 1.13, "grad_norm": 1.5604188506665329, "learning_rate": 2.1792220110865885e-06, "loss": 0.5735, "step": 2088 }, { "epoch": 1.13, "grad_norm": 1.562711197250171, "learning_rate": 2.1770068208108e-06, "loss": 0.5212, "step": 2089 }, { "epoch": 1.13, "grad_norm": 1.5935300906546317, "learning_rate": 2.174791888401755e-06, "loss": 0.5499, "step": 2090 }, { "epoch": 1.13, "grad_norm": 1.6170959918270176, "learning_rate": 2.1725772156277795e-06, "loss": 0.5609, "step": 2091 }, { "epoch": 1.13, "grad_norm": 1.5580244733238122, "learning_rate": 2.1703628042569925e-06, "loss": 0.5586, "step": 2092 }, { "epoch": 1.13, "grad_norm": 1.5802069022263083, "learning_rate": 2.168148656057305e-06, "loss": 0.5674, "step": 2093 }, { "epoch": 1.13, "grad_norm": 1.508695414133373, "learning_rate": 2.165934772796417e-06, "loss": 0.5604, "step": 2094 }, { "epoch": 1.13, "grad_norm": 1.5022932377601534, "learning_rate": 2.163721156241818e-06, "loss": 0.5399, "step": 2095 }, { "epoch": 1.13, "grad_norm": 1.5145422220378273, "learning_rate": 2.1615078081607824e-06, "loss": 0.5453, "step": 2096 }, { "epoch": 1.13, "grad_norm": 1.4798981579415011, "learning_rate": 2.159294730320374e-06, "loss": 0.5493, "step": 2097 }, { "epoch": 1.13, "grad_norm": 1.4211342166391716, "learning_rate": 2.157081924487438e-06, "loss": 0.5416, "step": 2098 }, { "epoch": 1.13, "grad_norm": 1.5331917566297057, "learning_rate": 2.154869392428603e-06, "loss": 0.5535, "step": 2099 }, { "epoch": 1.13, "grad_norm": 1.559170677620434, "learning_rate": 2.15265713591028e-06, "loss": 0.5536, "step": 2100 }, { "epoch": 1.13, "grad_norm": 1.5131013287523283, "learning_rate": 2.1504451566986566e-06, "loss": 0.5681, "step": 2101 }, { "epoch": 1.14, "grad_norm": 1.481050199501426, "learning_rate": 2.1482334565597044e-06, "loss": 0.544, "step": 2102 }, { "epoch": 1.14, "grad_norm": 1.4897579255839333, "learning_rate": 2.1460220372591676e-06, "loss": 0.5317, "step": 2103 }, { "epoch": 1.14, "grad_norm": 1.5405964879740979, "learning_rate": 2.1438109005625682e-06, "loss": 0.5625, "step": 2104 }, { "epoch": 1.14, "grad_norm": 1.4815460299227776, "learning_rate": 2.141600048235204e-06, "loss": 0.5539, "step": 2105 }, { "epoch": 1.14, "grad_norm": 1.4505436842219874, "learning_rate": 2.139389482042142e-06, "loss": 0.5559, "step": 2106 }, { "epoch": 1.14, "grad_norm": 1.5036839299239901, "learning_rate": 2.1371792037482245e-06, "loss": 0.5487, "step": 2107 }, { "epoch": 1.14, "grad_norm": 1.4585968090501107, "learning_rate": 2.134969215118062e-06, "loss": 0.5379, "step": 2108 }, { "epoch": 1.14, "grad_norm": 1.494166978987021, "learning_rate": 2.1327595179160332e-06, "loss": 0.565, "step": 2109 }, { "epoch": 1.14, "grad_norm": 1.5277932025747298, "learning_rate": 2.130550113906286e-06, "loss": 0.5354, "step": 2110 }, { "epoch": 1.14, "grad_norm": 1.5272415808401367, "learning_rate": 2.1283410048527323e-06, "loss": 0.5607, "step": 2111 }, { "epoch": 1.14, "grad_norm": 1.5487280070491267, "learning_rate": 2.1261321925190492e-06, "loss": 0.5584, "step": 2112 }, { "epoch": 1.14, "grad_norm": 1.5044391383681976, "learning_rate": 2.1239236786686794e-06, "loss": 0.5469, "step": 2113 }, { "epoch": 1.14, "grad_norm": 1.4539045037264633, "learning_rate": 2.1217154650648235e-06, "loss": 0.5306, "step": 2114 }, { "epoch": 1.14, "grad_norm": 1.4835681181607012, "learning_rate": 2.1195075534704433e-06, "loss": 0.5478, "step": 2115 }, { "epoch": 1.14, "grad_norm": 1.5381486646850033, "learning_rate": 2.1172999456482616e-06, "loss": 0.5287, "step": 2116 }, { "epoch": 1.14, "grad_norm": 1.5990118579744759, "learning_rate": 2.115092643360756e-06, "loss": 0.5355, "step": 2117 }, { "epoch": 1.14, "grad_norm": 1.4863017834835832, "learning_rate": 2.1128856483701625e-06, "loss": 0.5335, "step": 2118 }, { "epoch": 1.14, "grad_norm": 1.5570596561501915, "learning_rate": 2.1106789624384692e-06, "loss": 0.563, "step": 2119 }, { "epoch": 1.14, "grad_norm": 1.5006384608488375, "learning_rate": 2.1084725873274197e-06, "loss": 0.5505, "step": 2120 }, { "epoch": 1.15, "grad_norm": 1.5330173340496593, "learning_rate": 2.10626652479851e-06, "loss": 0.5385, "step": 2121 }, { "epoch": 1.15, "grad_norm": 1.4768527686547535, "learning_rate": 2.1040607766129833e-06, "loss": 0.5551, "step": 2122 }, { "epoch": 1.15, "grad_norm": 1.4989408175954275, "learning_rate": 2.1018553445318346e-06, "loss": 0.5326, "step": 2123 }, { "epoch": 1.15, "grad_norm": 1.5289773786229965, "learning_rate": 2.0996502303158057e-06, "loss": 0.5556, "step": 2124 }, { "epoch": 1.15, "grad_norm": 1.5325072852937827, "learning_rate": 2.097445435725384e-06, "loss": 0.5625, "step": 2125 }, { "epoch": 1.15, "grad_norm": 1.5126709830301106, "learning_rate": 2.0952409625208033e-06, "loss": 0.5415, "step": 2126 }, { "epoch": 1.15, "grad_norm": 1.5489243698167274, "learning_rate": 2.0930368124620385e-06, "loss": 0.5437, "step": 2127 }, { "epoch": 1.15, "grad_norm": 1.449204252826453, "learning_rate": 2.0908329873088094e-06, "loss": 0.5128, "step": 2128 }, { "epoch": 1.15, "grad_norm": 1.5670164972513752, "learning_rate": 2.0886294888205738e-06, "loss": 0.5405, "step": 2129 }, { "epoch": 1.15, "grad_norm": 1.5574006014102342, "learning_rate": 2.086426318756531e-06, "loss": 0.5457, "step": 2130 }, { "epoch": 1.15, "grad_norm": 1.49500664216848, "learning_rate": 2.084223478875615e-06, "loss": 0.5263, "step": 2131 }, { "epoch": 1.15, "grad_norm": 1.5057719759876047, "learning_rate": 2.0820209709365e-06, "loss": 0.5472, "step": 2132 }, { "epoch": 1.15, "grad_norm": 1.4733914362030163, "learning_rate": 2.0798187966975917e-06, "loss": 0.529, "step": 2133 }, { "epoch": 1.15, "grad_norm": 1.4874382450873602, "learning_rate": 2.077616957917032e-06, "loss": 0.5219, "step": 2134 }, { "epoch": 1.15, "grad_norm": 1.4808109038243638, "learning_rate": 2.0754154563526925e-06, "loss": 0.5455, "step": 2135 }, { "epoch": 1.15, "grad_norm": 1.4954571088437099, "learning_rate": 2.073214293762179e-06, "loss": 0.5245, "step": 2136 }, { "epoch": 1.15, "grad_norm": 1.4546050185950674, "learning_rate": 2.0710134719028228e-06, "loss": 0.5213, "step": 2137 }, { "epoch": 1.15, "grad_norm": 1.475253791020004, "learning_rate": 2.068812992531687e-06, "loss": 0.5186, "step": 2138 }, { "epoch": 1.16, "grad_norm": 1.4449159852083162, "learning_rate": 2.0666128574055575e-06, "loss": 0.5517, "step": 2139 }, { "epoch": 1.16, "grad_norm": 1.5178500045643328, "learning_rate": 2.0644130682809486e-06, "loss": 0.5686, "step": 2140 }, { "epoch": 1.16, "grad_norm": 1.5553807415263061, "learning_rate": 2.062213626914096e-06, "loss": 0.5298, "step": 2141 }, { "epoch": 1.16, "grad_norm": 1.4657995370333017, "learning_rate": 2.0600145350609585e-06, "loss": 0.5246, "step": 2142 }, { "epoch": 1.16, "grad_norm": 1.5632552874603491, "learning_rate": 2.0578157944772157e-06, "loss": 0.5514, "step": 2143 }, { "epoch": 1.16, "grad_norm": 1.5121950261014352, "learning_rate": 2.0556174069182694e-06, "loss": 0.5402, "step": 2144 }, { "epoch": 1.16, "grad_norm": 1.5792215053458638, "learning_rate": 2.053419374139235e-06, "loss": 0.5398, "step": 2145 }, { "epoch": 1.16, "grad_norm": 1.5373423298131779, "learning_rate": 2.0512216978949485e-06, "loss": 0.5461, "step": 2146 }, { "epoch": 1.16, "grad_norm": 1.5000743943389965, "learning_rate": 2.049024379939959e-06, "loss": 0.5422, "step": 2147 }, { "epoch": 1.16, "grad_norm": 1.5099148435157488, "learning_rate": 2.0468274220285295e-06, "loss": 0.5643, "step": 2148 }, { "epoch": 1.16, "grad_norm": 1.5553045956131843, "learning_rate": 2.0446308259146374e-06, "loss": 0.582, "step": 2149 }, { "epoch": 1.16, "grad_norm": 1.4985126395061177, "learning_rate": 2.0424345933519694e-06, "loss": 0.5244, "step": 2150 }, { "epoch": 1.16, "grad_norm": 1.5107369428865585, "learning_rate": 2.0402387260939224e-06, "loss": 0.5265, "step": 2151 }, { "epoch": 1.16, "grad_norm": 1.4802099230560992, "learning_rate": 2.0380432258936035e-06, "loss": 0.5322, "step": 2152 }, { "epoch": 1.16, "grad_norm": 1.4826225698374345, "learning_rate": 2.0358480945038243e-06, "loss": 0.5585, "step": 2153 }, { "epoch": 1.16, "grad_norm": 1.500724192716536, "learning_rate": 2.033653333677103e-06, "loss": 0.5351, "step": 2154 }, { "epoch": 1.16, "grad_norm": 1.5093720195763116, "learning_rate": 2.031458945165662e-06, "loss": 0.5506, "step": 2155 }, { "epoch": 1.16, "grad_norm": 1.480890481937993, "learning_rate": 2.029264930721425e-06, "loss": 0.5407, "step": 2156 }, { "epoch": 1.16, "grad_norm": 1.5344287621019794, "learning_rate": 2.02707129209602e-06, "loss": 0.5298, "step": 2157 }, { "epoch": 1.17, "grad_norm": 1.4440124296701229, "learning_rate": 2.024878031040772e-06, "loss": 0.5316, "step": 2158 }, { "epoch": 1.17, "grad_norm": 1.5010315864602994, "learning_rate": 2.0226851493067068e-06, "loss": 0.5297, "step": 2159 }, { "epoch": 1.17, "grad_norm": 1.5233499372833323, "learning_rate": 2.0204926486445463e-06, "loss": 0.55, "step": 2160 }, { "epoch": 1.17, "grad_norm": 1.4997248453457943, "learning_rate": 2.0183005308047086e-06, "loss": 0.5502, "step": 2161 }, { "epoch": 1.17, "grad_norm": 1.5362911292620212, "learning_rate": 2.0161087975373044e-06, "loss": 0.5422, "step": 2162 }, { "epoch": 1.17, "grad_norm": 1.5052603259971673, "learning_rate": 2.0139174505921403e-06, "loss": 0.5256, "step": 2163 }, { "epoch": 1.17, "grad_norm": 1.4829868083651943, "learning_rate": 2.011726491718712e-06, "loss": 0.5213, "step": 2164 }, { "epoch": 1.17, "grad_norm": 1.558496461953048, "learning_rate": 2.009535922666207e-06, "loss": 0.559, "step": 2165 }, { "epoch": 1.17, "grad_norm": 1.4859606625802517, "learning_rate": 2.0073457451835e-06, "loss": 0.5381, "step": 2166 }, { "epoch": 1.17, "grad_norm": 1.505486247860409, "learning_rate": 2.0051559610191558e-06, "loss": 0.535, "step": 2167 }, { "epoch": 1.17, "grad_norm": 1.5388668252501294, "learning_rate": 2.0029665719214224e-06, "loss": 0.5279, "step": 2168 }, { "epoch": 1.17, "grad_norm": 1.5711261554710358, "learning_rate": 2.0007775796382335e-06, "loss": 0.5371, "step": 2169 }, { "epoch": 1.17, "grad_norm": 1.5019393678384767, "learning_rate": 1.9985889859172054e-06, "loss": 0.5419, "step": 2170 }, { "epoch": 1.17, "grad_norm": 1.4798978740025281, "learning_rate": 1.9964007925056376e-06, "loss": 0.5523, "step": 2171 }, { "epoch": 1.17, "grad_norm": 1.532669827406204, "learning_rate": 1.994213001150508e-06, "loss": 0.5419, "step": 2172 }, { "epoch": 1.17, "grad_norm": 1.5408234677863264, "learning_rate": 1.992025613598476e-06, "loss": 0.5393, "step": 2173 }, { "epoch": 1.17, "grad_norm": 1.4892528167179608, "learning_rate": 1.9898386315958754e-06, "loss": 0.5516, "step": 2174 }, { "epoch": 1.17, "grad_norm": 1.5290906225031953, "learning_rate": 1.9876520568887207e-06, "loss": 0.5597, "step": 2175 }, { "epoch": 1.18, "grad_norm": 1.5410172507546094, "learning_rate": 1.9854658912226963e-06, "loss": 0.5356, "step": 2176 }, { "epoch": 1.18, "grad_norm": 1.5187930157451695, "learning_rate": 1.983280136343164e-06, "loss": 0.551, "step": 2177 }, { "epoch": 1.18, "grad_norm": 1.5077161492750608, "learning_rate": 1.981094793995155e-06, "loss": 0.5338, "step": 2178 }, { "epoch": 1.18, "grad_norm": 1.5247541474913893, "learning_rate": 1.9789098659233717e-06, "loss": 0.5483, "step": 2179 }, { "epoch": 1.18, "grad_norm": 1.4987307468290174, "learning_rate": 1.976725353872187e-06, "loss": 0.5474, "step": 2180 }, { "epoch": 1.18, "grad_norm": 1.5169709068649875, "learning_rate": 1.974541259585641e-06, "loss": 0.5474, "step": 2181 }, { "epoch": 1.18, "grad_norm": 1.5279141947241628, "learning_rate": 1.9723575848074386e-06, "loss": 0.554, "step": 2182 }, { "epoch": 1.18, "grad_norm": 1.5050282724655442, "learning_rate": 1.9701743312809536e-06, "loss": 0.5455, "step": 2183 }, { "epoch": 1.18, "grad_norm": 1.6005844792045207, "learning_rate": 1.9679915007492194e-06, "loss": 0.5565, "step": 2184 }, { "epoch": 1.18, "grad_norm": 1.538288630493282, "learning_rate": 1.965809094954935e-06, "loss": 0.5609, "step": 2185 }, { "epoch": 1.18, "grad_norm": 1.528022561926755, "learning_rate": 1.963627115640457e-06, "loss": 0.5263, "step": 2186 }, { "epoch": 1.18, "grad_norm": 1.4719048497565412, "learning_rate": 1.9614455645478047e-06, "loss": 0.5232, "step": 2187 }, { "epoch": 1.18, "grad_norm": 1.4838868635678315, "learning_rate": 1.959264443418654e-06, "loss": 0.5421, "step": 2188 }, { "epoch": 1.18, "grad_norm": 1.5592127603302346, "learning_rate": 1.9570837539943367e-06, "loss": 0.5713, "step": 2189 }, { "epoch": 1.18, "grad_norm": 1.552686200304658, "learning_rate": 1.9549034980158403e-06, "loss": 0.5534, "step": 2190 }, { "epoch": 1.18, "grad_norm": 1.5146061604363048, "learning_rate": 1.9527236772238096e-06, "loss": 0.5398, "step": 2191 }, { "epoch": 1.18, "grad_norm": 1.4785569161503065, "learning_rate": 1.950544293358537e-06, "loss": 0.5134, "step": 2192 }, { "epoch": 1.18, "grad_norm": 1.501035396996746, "learning_rate": 1.9483653481599697e-06, "loss": 0.5516, "step": 2193 }, { "epoch": 1.18, "grad_norm": 1.5270190371337842, "learning_rate": 1.9461868433677016e-06, "loss": 0.5407, "step": 2194 }, { "epoch": 1.19, "grad_norm": 1.538660353720542, "learning_rate": 1.944008780720978e-06, "loss": 0.5691, "step": 2195 }, { "epoch": 1.19, "grad_norm": 1.4593066715670096, "learning_rate": 1.9418311619586897e-06, "loss": 0.5241, "step": 2196 }, { "epoch": 1.19, "grad_norm": 1.5054809583705335, "learning_rate": 1.9396539888193716e-06, "loss": 0.5317, "step": 2197 }, { "epoch": 1.19, "grad_norm": 1.5388337797706226, "learning_rate": 1.9374772630412063e-06, "loss": 0.5372, "step": 2198 }, { "epoch": 1.19, "grad_norm": 1.486051173096881, "learning_rate": 1.935300986362018e-06, "loss": 0.535, "step": 2199 }, { "epoch": 1.19, "grad_norm": 1.454667962971885, "learning_rate": 1.93312516051927e-06, "loss": 0.5235, "step": 2200 }, { "epoch": 1.19, "grad_norm": 1.4572138604291673, "learning_rate": 1.930949787250069e-06, "loss": 0.5313, "step": 2201 }, { "epoch": 1.19, "grad_norm": 1.55540566338715, "learning_rate": 1.9287748682911582e-06, "loss": 0.5711, "step": 2202 }, { "epoch": 1.19, "grad_norm": 1.4996646280517256, "learning_rate": 1.9266004053789176e-06, "loss": 0.5487, "step": 2203 }, { "epoch": 1.19, "grad_norm": 1.527150399836431, "learning_rate": 1.9244264002493667e-06, "loss": 0.5521, "step": 2204 }, { "epoch": 1.19, "grad_norm": 1.5525899409821564, "learning_rate": 1.9222528546381543e-06, "loss": 0.5391, "step": 2205 }, { "epoch": 1.19, "grad_norm": 1.5257682620267576, "learning_rate": 1.9200797702805675e-06, "loss": 0.5508, "step": 2206 }, { "epoch": 1.19, "grad_norm": 1.5734156443141107, "learning_rate": 1.9179071489115218e-06, "loss": 0.535, "step": 2207 }, { "epoch": 1.19, "grad_norm": 1.5172693221763864, "learning_rate": 1.9157349922655648e-06, "loss": 0.537, "step": 2208 }, { "epoch": 1.19, "grad_norm": 1.6379593751338555, "learning_rate": 1.91356330207687e-06, "loss": 0.5465, "step": 2209 }, { "epoch": 1.19, "grad_norm": 1.5011952614249704, "learning_rate": 1.911392080079244e-06, "loss": 0.5187, "step": 2210 }, { "epoch": 1.19, "grad_norm": 1.5785397349938042, "learning_rate": 1.909221328006114e-06, "loss": 0.5527, "step": 2211 }, { "epoch": 1.19, "grad_norm": 1.4437589966867592, "learning_rate": 1.9070510475905358e-06, "loss": 0.5263, "step": 2212 }, { "epoch": 1.2, "grad_norm": 1.5082153797260482, "learning_rate": 1.9048812405651854e-06, "loss": 0.5322, "step": 2213 }, { "epoch": 1.2, "grad_norm": 1.5434713623693874, "learning_rate": 1.9027119086623647e-06, "loss": 0.5411, "step": 2214 }, { "epoch": 1.2, "grad_norm": 1.5266338598464213, "learning_rate": 1.9005430536139946e-06, "loss": 0.5358, "step": 2215 }, { "epoch": 1.2, "grad_norm": 1.5624037872924466, "learning_rate": 1.898374677151614e-06, "loss": 0.5604, "step": 2216 }, { "epoch": 1.2, "grad_norm": 1.4925298693212834, "learning_rate": 1.8962067810063806e-06, "loss": 0.5416, "step": 2217 }, { "epoch": 1.2, "grad_norm": 1.5801624447444471, "learning_rate": 1.8940393669090695e-06, "loss": 0.5325, "step": 2218 }, { "epoch": 1.2, "grad_norm": 1.494330706092874, "learning_rate": 1.8918724365900692e-06, "loss": 0.537, "step": 2219 }, { "epoch": 1.2, "grad_norm": 1.4895195455434531, "learning_rate": 1.8897059917793844e-06, "loss": 0.5545, "step": 2220 }, { "epoch": 1.2, "grad_norm": 1.4945695941820565, "learning_rate": 1.887540034206629e-06, "loss": 0.5506, "step": 2221 }, { "epoch": 1.2, "grad_norm": 1.4867763100096434, "learning_rate": 1.8853745656010314e-06, "loss": 0.5213, "step": 2222 }, { "epoch": 1.2, "grad_norm": 1.4566460098822356, "learning_rate": 1.8832095876914268e-06, "loss": 0.5109, "step": 2223 }, { "epoch": 1.2, "grad_norm": 1.5579334350004024, "learning_rate": 1.8810451022062612e-06, "loss": 0.5238, "step": 2224 }, { "epoch": 1.2, "grad_norm": 1.5424912743729768, "learning_rate": 1.8788811108735838e-06, "loss": 0.5603, "step": 2225 }, { "epoch": 1.2, "grad_norm": 1.5610502050504236, "learning_rate": 1.8767176154210537e-06, "loss": 0.5222, "step": 2226 }, { "epoch": 1.2, "grad_norm": 1.4806915669972536, "learning_rate": 1.8745546175759301e-06, "loss": 0.5456, "step": 2227 }, { "epoch": 1.2, "grad_norm": 1.5382885859737223, "learning_rate": 1.8723921190650773e-06, "loss": 0.5279, "step": 2228 }, { "epoch": 1.2, "grad_norm": 1.612727243806329, "learning_rate": 1.8702301216149616e-06, "loss": 0.5527, "step": 2229 }, { "epoch": 1.2, "grad_norm": 1.6406447009022165, "learning_rate": 1.8680686269516469e-06, "loss": 0.5314, "step": 2230 }, { "epoch": 1.2, "grad_norm": 1.4722703574958222, "learning_rate": 1.8659076368007968e-06, "loss": 0.5478, "step": 2231 }, { "epoch": 1.21, "grad_norm": 1.5370597552421599, "learning_rate": 1.8637471528876727e-06, "loss": 0.5359, "step": 2232 }, { "epoch": 1.21, "grad_norm": 1.550548590423159, "learning_rate": 1.8615871769371307e-06, "loss": 0.551, "step": 2233 }, { "epoch": 1.21, "grad_norm": 1.5016457194892483, "learning_rate": 1.8594277106736225e-06, "loss": 0.5251, "step": 2234 }, { "epoch": 1.21, "grad_norm": 1.5264181241132118, "learning_rate": 1.8572687558211923e-06, "loss": 0.562, "step": 2235 }, { "epoch": 1.21, "grad_norm": 1.5673739140064529, "learning_rate": 1.8551103141034748e-06, "loss": 0.5355, "step": 2236 }, { "epoch": 1.21, "grad_norm": 1.533271019510768, "learning_rate": 1.852952387243698e-06, "loss": 0.5436, "step": 2237 }, { "epoch": 1.21, "grad_norm": 1.509868294100292, "learning_rate": 1.850794976964677e-06, "loss": 0.543, "step": 2238 }, { "epoch": 1.21, "grad_norm": 1.5259730331736403, "learning_rate": 1.8486380849888136e-06, "loss": 0.5504, "step": 2239 }, { "epoch": 1.21, "grad_norm": 1.5029237011698158, "learning_rate": 1.846481713038098e-06, "loss": 0.5582, "step": 2240 }, { "epoch": 1.21, "grad_norm": 1.5109298875000903, "learning_rate": 1.8443258628341026e-06, "loss": 0.5379, "step": 2241 }, { "epoch": 1.21, "grad_norm": 1.5099205349824192, "learning_rate": 1.842170536097986e-06, "loss": 0.5292, "step": 2242 }, { "epoch": 1.21, "grad_norm": 1.4781397850563323, "learning_rate": 1.840015734550487e-06, "loss": 0.5276, "step": 2243 }, { "epoch": 1.21, "grad_norm": 1.547159968244762, "learning_rate": 1.837861459911925e-06, "loss": 0.5517, "step": 2244 }, { "epoch": 1.21, "grad_norm": 1.492726163886711, "learning_rate": 1.8357077139022008e-06, "loss": 0.5434, "step": 2245 }, { "epoch": 1.21, "grad_norm": 1.481411410639737, "learning_rate": 1.8335544982407916e-06, "loss": 0.5409, "step": 2246 }, { "epoch": 1.21, "grad_norm": 1.5026734533278525, "learning_rate": 1.8314018146467505e-06, "loss": 0.5493, "step": 2247 }, { "epoch": 1.21, "grad_norm": 1.5488816475007827, "learning_rate": 1.8292496648387076e-06, "loss": 0.5339, "step": 2248 }, { "epoch": 1.21, "grad_norm": 1.5458721043345767, "learning_rate": 1.8270980505348657e-06, "loss": 0.5483, "step": 2249 }, { "epoch": 1.22, "grad_norm": 1.4877577426345046, "learning_rate": 1.8249469734529995e-06, "loss": 0.553, "step": 2250 }, { "epoch": 1.22, "grad_norm": 1.5228802561474426, "learning_rate": 1.8227964353104567e-06, "loss": 0.5412, "step": 2251 }, { "epoch": 1.22, "grad_norm": 1.5195570270497647, "learning_rate": 1.820646437824152e-06, "loss": 0.5448, "step": 2252 }, { "epoch": 1.22, "grad_norm": 1.5355179309515032, "learning_rate": 1.818496982710572e-06, "loss": 0.5384, "step": 2253 }, { "epoch": 1.22, "grad_norm": 1.5557340788315108, "learning_rate": 1.8163480716857681e-06, "loss": 0.5539, "step": 2254 }, { "epoch": 1.22, "grad_norm": 1.5717122917891984, "learning_rate": 1.8141997064653566e-06, "loss": 0.5558, "step": 2255 }, { "epoch": 1.22, "grad_norm": 1.5447848184497632, "learning_rate": 1.81205188876452e-06, "loss": 0.5386, "step": 2256 }, { "epoch": 1.22, "grad_norm": 1.5333412907918922, "learning_rate": 1.8099046202980026e-06, "loss": 0.545, "step": 2257 }, { "epoch": 1.22, "grad_norm": 1.5356437939885186, "learning_rate": 1.8077579027801096e-06, "loss": 0.5388, "step": 2258 }, { "epoch": 1.22, "grad_norm": 1.5491665469137839, "learning_rate": 1.8056117379247078e-06, "loss": 0.5704, "step": 2259 }, { "epoch": 1.22, "grad_norm": 1.544907936457534, "learning_rate": 1.8034661274452209e-06, "loss": 0.5394, "step": 2260 }, { "epoch": 1.22, "grad_norm": 1.54217763310229, "learning_rate": 1.8013210730546327e-06, "loss": 0.5631, "step": 2261 }, { "epoch": 1.22, "grad_norm": 1.6032469719561566, "learning_rate": 1.7991765764654813e-06, "loss": 0.5519, "step": 2262 }, { "epoch": 1.22, "grad_norm": 1.530854132338099, "learning_rate": 1.797032639389859e-06, "loss": 0.5552, "step": 2263 }, { "epoch": 1.22, "grad_norm": 1.498132311570054, "learning_rate": 1.7948892635394123e-06, "loss": 0.5376, "step": 2264 }, { "epoch": 1.22, "grad_norm": 1.5581817393647588, "learning_rate": 1.7927464506253394e-06, "loss": 0.5641, "step": 2265 }, { "epoch": 1.22, "grad_norm": 1.512690472478597, "learning_rate": 1.7906042023583886e-06, "loss": 0.5397, "step": 2266 }, { "epoch": 1.22, "grad_norm": 1.5401466313183434, "learning_rate": 1.7884625204488581e-06, "loss": 0.5478, "step": 2267 }, { "epoch": 1.22, "grad_norm": 1.4972154850066988, "learning_rate": 1.7863214066065951e-06, "loss": 0.5254, "step": 2268 }, { "epoch": 1.23, "grad_norm": 1.5041934460845519, "learning_rate": 1.7841808625409907e-06, "loss": 0.5683, "step": 2269 }, { "epoch": 1.23, "grad_norm": 1.5083417706611069, "learning_rate": 1.782040889960982e-06, "loss": 0.5528, "step": 2270 }, { "epoch": 1.23, "grad_norm": 1.4625588646096774, "learning_rate": 1.779901490575051e-06, "loss": 0.5548, "step": 2271 }, { "epoch": 1.23, "grad_norm": 1.5564801192992943, "learning_rate": 1.77776266609122e-06, "loss": 0.531, "step": 2272 }, { "epoch": 1.23, "grad_norm": 1.5584236918964836, "learning_rate": 1.7756244182170552e-06, "loss": 0.5318, "step": 2273 }, { "epoch": 1.23, "grad_norm": 1.5543926821530805, "learning_rate": 1.7734867486596596e-06, "loss": 0.5368, "step": 2274 }, { "epoch": 1.23, "grad_norm": 1.4523561322758791, "learning_rate": 1.771349659125675e-06, "loss": 0.5365, "step": 2275 }, { "epoch": 1.23, "grad_norm": 1.5818089624070832, "learning_rate": 1.7692131513212835e-06, "loss": 0.5494, "step": 2276 }, { "epoch": 1.23, "grad_norm": 1.460215743851457, "learning_rate": 1.767077226952198e-06, "loss": 0.5341, "step": 2277 }, { "epoch": 1.23, "grad_norm": 1.4677665324029585, "learning_rate": 1.764941887723668e-06, "loss": 0.5415, "step": 2278 }, { "epoch": 1.23, "grad_norm": 1.5109279556361668, "learning_rate": 1.762807135340476e-06, "loss": 0.5112, "step": 2279 }, { "epoch": 1.23, "grad_norm": 1.5659950973787837, "learning_rate": 1.7606729715069349e-06, "loss": 0.5452, "step": 2280 }, { "epoch": 1.23, "grad_norm": 1.520390437155426, "learning_rate": 1.7585393979268892e-06, "loss": 0.5615, "step": 2281 }, { "epoch": 1.23, "grad_norm": 1.4866758784894962, "learning_rate": 1.7564064163037109e-06, "loss": 0.5299, "step": 2282 }, { "epoch": 1.23, "grad_norm": 1.5590562941483619, "learning_rate": 1.7542740283402981e-06, "loss": 0.5539, "step": 2283 }, { "epoch": 1.23, "grad_norm": 1.566457424523215, "learning_rate": 1.7521422357390794e-06, "loss": 0.5621, "step": 2284 }, { "epoch": 1.23, "grad_norm": 1.5587544128028594, "learning_rate": 1.7500110402020047e-06, "loss": 0.5396, "step": 2285 }, { "epoch": 1.23, "grad_norm": 1.4770009417890786, "learning_rate": 1.7478804434305466e-06, "loss": 0.5289, "step": 2286 }, { "epoch": 1.24, "grad_norm": 1.5242511115148387, "learning_rate": 1.7457504471257024e-06, "loss": 0.5545, "step": 2287 }, { "epoch": 1.24, "grad_norm": 1.4696445479716276, "learning_rate": 1.7436210529879871e-06, "loss": 0.5232, "step": 2288 }, { "epoch": 1.24, "grad_norm": 1.4518270604674468, "learning_rate": 1.741492262717438e-06, "loss": 0.5442, "step": 2289 }, { "epoch": 1.24, "grad_norm": 1.527071843325399, "learning_rate": 1.7393640780136073e-06, "loss": 0.5311, "step": 2290 }, { "epoch": 1.24, "grad_norm": 1.5576585817458315, "learning_rate": 1.7372365005755647e-06, "loss": 0.5403, "step": 2291 }, { "epoch": 1.24, "grad_norm": 1.527905907084912, "learning_rate": 1.7351095321018974e-06, "loss": 0.5575, "step": 2292 }, { "epoch": 1.24, "grad_norm": 1.6094932969865872, "learning_rate": 1.7329831742907035e-06, "loss": 0.5631, "step": 2293 }, { "epoch": 1.24, "grad_norm": 1.5006694040687352, "learning_rate": 1.7308574288395943e-06, "loss": 0.5613, "step": 2294 }, { "epoch": 1.24, "grad_norm": 1.5546232523044952, "learning_rate": 1.7287322974456933e-06, "loss": 0.5318, "step": 2295 }, { "epoch": 1.24, "grad_norm": 1.571256867552864, "learning_rate": 1.7266077818056326e-06, "loss": 0.5674, "step": 2296 }, { "epoch": 1.24, "grad_norm": 1.5625258725999625, "learning_rate": 1.7244838836155522e-06, "loss": 0.5628, "step": 2297 }, { "epoch": 1.24, "grad_norm": 1.4541790737639217, "learning_rate": 1.7223606045711006e-06, "loss": 0.531, "step": 2298 }, { "epoch": 1.24, "grad_norm": 1.5213672380789736, "learning_rate": 1.7202379463674322e-06, "loss": 0.5444, "step": 2299 }, { "epoch": 1.24, "grad_norm": 1.5399612677893237, "learning_rate": 1.7181159106992037e-06, "loss": 0.5481, "step": 2300 }, { "epoch": 1.24, "grad_norm": 1.5524902746542428, "learning_rate": 1.7159944992605774e-06, "loss": 0.5589, "step": 2301 }, { "epoch": 1.24, "grad_norm": 1.4578605922407053, "learning_rate": 1.7138737137452143e-06, "loss": 0.5382, "step": 2302 }, { "epoch": 1.24, "grad_norm": 1.5140361936609534, "learning_rate": 1.711753555846279e-06, "loss": 0.541, "step": 2303 }, { "epoch": 1.24, "grad_norm": 1.5441864842628068, "learning_rate": 1.7096340272564318e-06, "loss": 0.5214, "step": 2304 }, { "epoch": 1.24, "grad_norm": 1.5094137064748558, "learning_rate": 1.7075151296678324e-06, "loss": 0.5228, "step": 2305 }, { "epoch": 1.25, "grad_norm": 1.4821207663969738, "learning_rate": 1.7053968647721358e-06, "loss": 0.5474, "step": 2306 }, { "epoch": 1.25, "grad_norm": 1.5282234142147177, "learning_rate": 1.7032792342604947e-06, "loss": 0.532, "step": 2307 }, { "epoch": 1.25, "grad_norm": 1.5066754804624145, "learning_rate": 1.7011622398235511e-06, "loss": 0.5374, "step": 2308 }, { "epoch": 1.25, "grad_norm": 1.9969679629386043, "learning_rate": 1.6990458831514423e-06, "loss": 0.552, "step": 2309 }, { "epoch": 1.25, "grad_norm": 1.5452450936418518, "learning_rate": 1.6969301659337944e-06, "loss": 0.5393, "step": 2310 }, { "epoch": 1.25, "grad_norm": 1.516561390351429, "learning_rate": 1.694815089859724e-06, "loss": 0.5373, "step": 2311 }, { "epoch": 1.25, "grad_norm": 1.5373650950965705, "learning_rate": 1.6927006566178366e-06, "loss": 0.5515, "step": 2312 }, { "epoch": 1.25, "grad_norm": 1.5029056960142015, "learning_rate": 1.6905868678962225e-06, "loss": 0.5493, "step": 2313 }, { "epoch": 1.25, "grad_norm": 1.656711614223389, "learning_rate": 1.6884737253824586e-06, "loss": 0.5547, "step": 2314 }, { "epoch": 1.25, "grad_norm": 1.4694792514531625, "learning_rate": 1.6863612307636074e-06, "loss": 0.5203, "step": 2315 }, { "epoch": 1.25, "grad_norm": 1.5173982279506542, "learning_rate": 1.684249385726211e-06, "loss": 0.5374, "step": 2316 }, { "epoch": 1.25, "grad_norm": 1.4946674179395056, "learning_rate": 1.6821381919562952e-06, "loss": 0.5133, "step": 2317 }, { "epoch": 1.25, "grad_norm": 1.4835485737784835, "learning_rate": 1.6800276511393653e-06, "loss": 0.535, "step": 2318 }, { "epoch": 1.25, "grad_norm": 1.5555109570195813, "learning_rate": 1.677917764960404e-06, "loss": 0.5645, "step": 2319 }, { "epoch": 1.25, "grad_norm": 1.5475617785438274, "learning_rate": 1.6758085351038738e-06, "loss": 0.5454, "step": 2320 }, { "epoch": 1.25, "grad_norm": 1.605732324315125, "learning_rate": 1.6736999632537113e-06, "loss": 0.5539, "step": 2321 }, { "epoch": 1.25, "grad_norm": 1.5228866289106016, "learning_rate": 1.6715920510933277e-06, "loss": 0.5486, "step": 2322 }, { "epoch": 1.25, "grad_norm": 1.4829338691636644, "learning_rate": 1.6694848003056097e-06, "loss": 0.5433, "step": 2323 }, { "epoch": 1.26, "grad_norm": 1.4601595736917636, "learning_rate": 1.667378212572914e-06, "loss": 0.5467, "step": 2324 }, { "epoch": 1.26, "grad_norm": 1.5224558349947386, "learning_rate": 1.6652722895770676e-06, "loss": 0.5509, "step": 2325 }, { "epoch": 1.26, "grad_norm": 1.5016330001755167, "learning_rate": 1.6631670329993687e-06, "loss": 0.5216, "step": 2326 }, { "epoch": 1.26, "grad_norm": 1.4895028385422553, "learning_rate": 1.6610624445205814e-06, "loss": 0.5427, "step": 2327 }, { "epoch": 1.26, "grad_norm": 1.468033963296124, "learning_rate": 1.6589585258209383e-06, "loss": 0.5198, "step": 2328 }, { "epoch": 1.26, "grad_norm": 1.4907609137170859, "learning_rate": 1.6568552785801364e-06, "loss": 0.5137, "step": 2329 }, { "epoch": 1.26, "grad_norm": 1.5109539552677878, "learning_rate": 1.6547527044773348e-06, "loss": 0.5205, "step": 2330 }, { "epoch": 1.26, "grad_norm": 1.500544009190588, "learning_rate": 1.6526508051911588e-06, "loss": 0.5379, "step": 2331 }, { "epoch": 1.26, "grad_norm": 1.483543272615099, "learning_rate": 1.650549582399693e-06, "loss": 0.4975, "step": 2332 }, { "epoch": 1.26, "grad_norm": 1.476912629806613, "learning_rate": 1.6484490377804819e-06, "loss": 0.5352, "step": 2333 }, { "epoch": 1.26, "grad_norm": 1.5384011419245094, "learning_rate": 1.6463491730105282e-06, "loss": 0.5499, "step": 2334 }, { "epoch": 1.26, "grad_norm": 1.511666449431208, "learning_rate": 1.6442499897662927e-06, "loss": 0.5677, "step": 2335 }, { "epoch": 1.26, "grad_norm": 1.5785414647759748, "learning_rate": 1.6421514897236918e-06, "loss": 0.5657, "step": 2336 }, { "epoch": 1.26, "grad_norm": 1.5550540437569056, "learning_rate": 1.6400536745580955e-06, "loss": 0.5644, "step": 2337 }, { "epoch": 1.26, "grad_norm": 1.518189834001723, "learning_rate": 1.6379565459443298e-06, "loss": 0.5334, "step": 2338 }, { "epoch": 1.26, "grad_norm": 1.509920059964092, "learning_rate": 1.6358601055566689e-06, "loss": 0.5172, "step": 2339 }, { "epoch": 1.26, "grad_norm": 1.4894415735103457, "learning_rate": 1.6337643550688408e-06, "loss": 0.5447, "step": 2340 }, { "epoch": 1.26, "grad_norm": 1.5724523717838603, "learning_rate": 1.6316692961540198e-06, "loss": 0.5484, "step": 2341 }, { "epoch": 1.26, "grad_norm": 1.5798325724974465, "learning_rate": 1.6295749304848308e-06, "loss": 0.5448, "step": 2342 }, { "epoch": 1.27, "grad_norm": 1.5831262193173443, "learning_rate": 1.627481259733343e-06, "loss": 0.5503, "step": 2343 }, { "epoch": 1.27, "grad_norm": 1.4753379764782275, "learning_rate": 1.6253882855710718e-06, "loss": 0.5264, "step": 2344 }, { "epoch": 1.27, "grad_norm": 1.5230047356067242, "learning_rate": 1.6232960096689763e-06, "loss": 0.5443, "step": 2345 }, { "epoch": 1.27, "grad_norm": 1.5264289897631935, "learning_rate": 1.6212044336974598e-06, "loss": 0.5388, "step": 2346 }, { "epoch": 1.27, "grad_norm": 1.563788362268928, "learning_rate": 1.6191135593263633e-06, "loss": 0.5639, "step": 2347 }, { "epoch": 1.27, "grad_norm": 1.5281697198604824, "learning_rate": 1.6170233882249708e-06, "loss": 0.5515, "step": 2348 }, { "epoch": 1.27, "grad_norm": 1.5398061757086856, "learning_rate": 1.614933922062003e-06, "loss": 0.5531, "step": 2349 }, { "epoch": 1.27, "grad_norm": 1.532180845689891, "learning_rate": 1.6128451625056193e-06, "loss": 0.5328, "step": 2350 }, { "epoch": 1.27, "grad_norm": 1.4954056307170243, "learning_rate": 1.6107571112234135e-06, "loss": 0.5257, "step": 2351 }, { "epoch": 1.27, "grad_norm": 1.5103354432574148, "learning_rate": 1.6086697698824144e-06, "loss": 0.5439, "step": 2352 }, { "epoch": 1.27, "grad_norm": 1.4720696841561558, "learning_rate": 1.6065831401490833e-06, "loss": 0.5303, "step": 2353 }, { "epoch": 1.27, "grad_norm": 1.5157061416067863, "learning_rate": 1.6044972236893167e-06, "loss": 0.547, "step": 2354 }, { "epoch": 1.27, "grad_norm": 1.4930754745555064, "learning_rate": 1.6024120221684373e-06, "loss": 0.5429, "step": 2355 }, { "epoch": 1.27, "grad_norm": 1.5341185740810448, "learning_rate": 1.6003275372511997e-06, "loss": 0.5701, "step": 2356 }, { "epoch": 1.27, "grad_norm": 1.4682040295119185, "learning_rate": 1.5982437706017855e-06, "loss": 0.5333, "step": 2357 }, { "epoch": 1.27, "grad_norm": 1.4929022310265534, "learning_rate": 1.5961607238838022e-06, "loss": 0.5419, "step": 2358 }, { "epoch": 1.27, "grad_norm": 1.5585944083766745, "learning_rate": 1.5940783987602846e-06, "loss": 0.5554, "step": 2359 }, { "epoch": 1.27, "grad_norm": 1.5626042832236096, "learning_rate": 1.5919967968936884e-06, "loss": 0.5499, "step": 2360 }, { "epoch": 1.28, "grad_norm": 1.5279653547200678, "learning_rate": 1.589915919945894e-06, "loss": 0.552, "step": 2361 }, { "epoch": 1.28, "grad_norm": 1.4983841044656239, "learning_rate": 1.587835769578205e-06, "loss": 0.5266, "step": 2362 }, { "epoch": 1.28, "grad_norm": 1.5087211218746428, "learning_rate": 1.5857563474513392e-06, "loss": 0.5436, "step": 2363 }, { "epoch": 1.28, "grad_norm": 1.4981589301022704, "learning_rate": 1.5836776552254386e-06, "loss": 0.5577, "step": 2364 }, { "epoch": 1.28, "grad_norm": 1.5105475882444923, "learning_rate": 1.5815996945600594e-06, "loss": 0.5389, "step": 2365 }, { "epoch": 1.28, "grad_norm": 1.5360064475170376, "learning_rate": 1.579522467114174e-06, "loss": 0.5326, "step": 2366 }, { "epoch": 1.28, "grad_norm": 1.4996603656376628, "learning_rate": 1.5774459745461711e-06, "loss": 0.5359, "step": 2367 }, { "epoch": 1.28, "grad_norm": 1.4742238770827798, "learning_rate": 1.5753702185138504e-06, "loss": 0.524, "step": 2368 }, { "epoch": 1.28, "grad_norm": 1.5093747281681265, "learning_rate": 1.5732952006744254e-06, "loss": 0.5314, "step": 2369 }, { "epoch": 1.28, "grad_norm": 1.583135400133878, "learning_rate": 1.5712209226845201e-06, "loss": 0.5576, "step": 2370 }, { "epoch": 1.28, "grad_norm": 1.5387507266321248, "learning_rate": 1.569147386200167e-06, "loss": 0.5323, "step": 2371 }, { "epoch": 1.28, "grad_norm": 1.5101671639725034, "learning_rate": 1.567074592876806e-06, "loss": 0.5683, "step": 2372 }, { "epoch": 1.28, "grad_norm": 1.5221087175618324, "learning_rate": 1.565002544369286e-06, "loss": 0.5615, "step": 2373 }, { "epoch": 1.28, "grad_norm": 1.5366772469577907, "learning_rate": 1.5629312423318588e-06, "loss": 0.5655, "step": 2374 }, { "epoch": 1.28, "grad_norm": 1.5019553956145781, "learning_rate": 1.5608606884181821e-06, "loss": 0.5384, "step": 2375 }, { "epoch": 1.28, "grad_norm": 1.442788796582326, "learning_rate": 1.5587908842813142e-06, "loss": 0.5109, "step": 2376 }, { "epoch": 1.28, "grad_norm": 1.498300863153612, "learning_rate": 1.5567218315737182e-06, "loss": 0.5286, "step": 2377 }, { "epoch": 1.28, "grad_norm": 1.5128140751670678, "learning_rate": 1.5546535319472545e-06, "loss": 0.5661, "step": 2378 }, { "epoch": 1.28, "grad_norm": 1.6009499300039702, "learning_rate": 1.5525859870531823e-06, "loss": 0.5644, "step": 2379 }, { "epoch": 1.29, "grad_norm": 1.5531643770533863, "learning_rate": 1.5505191985421595e-06, "loss": 0.5653, "step": 2380 }, { "epoch": 1.29, "grad_norm": 1.4656802124366621, "learning_rate": 1.5484531680642399e-06, "loss": 0.5307, "step": 2381 }, { "epoch": 1.29, "grad_norm": 1.4906824337233568, "learning_rate": 1.5463878972688707e-06, "loss": 0.5321, "step": 2382 }, { "epoch": 1.29, "grad_norm": 1.4417007859403959, "learning_rate": 1.544323387804895e-06, "loss": 0.5299, "step": 2383 }, { "epoch": 1.29, "grad_norm": 1.5459107928490279, "learning_rate": 1.542259641320545e-06, "loss": 0.5272, "step": 2384 }, { "epoch": 1.29, "grad_norm": 1.5202339859437475, "learning_rate": 1.5401966594634483e-06, "loss": 0.5277, "step": 2385 }, { "epoch": 1.29, "grad_norm": 1.536265340637199, "learning_rate": 1.5381344438806168e-06, "loss": 0.5664, "step": 2386 }, { "epoch": 1.29, "grad_norm": 1.487922158917052, "learning_rate": 1.5360729962184543e-06, "loss": 0.5294, "step": 2387 }, { "epoch": 1.29, "grad_norm": 1.5803360981073296, "learning_rate": 1.5340123181227495e-06, "loss": 0.5487, "step": 2388 }, { "epoch": 1.29, "grad_norm": 1.5165584761415758, "learning_rate": 1.531952411238679e-06, "loss": 0.5523, "step": 2389 }, { "epoch": 1.29, "grad_norm": 1.4797133090936658, "learning_rate": 1.5298932772108013e-06, "loss": 0.5625, "step": 2390 }, { "epoch": 1.29, "grad_norm": 1.494147846574264, "learning_rate": 1.527834917683058e-06, "loss": 0.5486, "step": 2391 }, { "epoch": 1.29, "grad_norm": 1.474447567959196, "learning_rate": 1.525777334298774e-06, "loss": 0.5344, "step": 2392 }, { "epoch": 1.29, "grad_norm": 1.5676284554951678, "learning_rate": 1.5237205287006543e-06, "loss": 0.5482, "step": 2393 }, { "epoch": 1.29, "grad_norm": 1.550491234102215, "learning_rate": 1.5216645025307813e-06, "loss": 0.5459, "step": 2394 }, { "epoch": 1.29, "grad_norm": 1.4828834342423607, "learning_rate": 1.5196092574306176e-06, "loss": 0.5268, "step": 2395 }, { "epoch": 1.29, "grad_norm": 1.636615055886003, "learning_rate": 1.517554795040999e-06, "loss": 0.523, "step": 2396 }, { "epoch": 1.29, "grad_norm": 1.5741166664667947, "learning_rate": 1.5155011170021399e-06, "loss": 0.5668, "step": 2397 }, { "epoch": 1.29, "grad_norm": 1.4971870358556165, "learning_rate": 1.5134482249536261e-06, "loss": 0.5458, "step": 2398 }, { "epoch": 1.3, "grad_norm": 1.516745426012132, "learning_rate": 1.5113961205344161e-06, "loss": 0.5256, "step": 2399 }, { "epoch": 1.3, "grad_norm": 1.5217717281470797, "learning_rate": 1.5093448053828402e-06, "loss": 0.5594, "step": 2400 }, { "epoch": 1.3, "grad_norm": 1.501110031744736, "learning_rate": 1.5072942811365998e-06, "loss": 0.542, "step": 2401 }, { "epoch": 1.3, "grad_norm": 1.5343364513751112, "learning_rate": 1.5052445494327622e-06, "loss": 0.5464, "step": 2402 }, { "epoch": 1.3, "grad_norm": 1.4792943246716788, "learning_rate": 1.503195611907764e-06, "loss": 0.5218, "step": 2403 }, { "epoch": 1.3, "grad_norm": 1.5427080158326307, "learning_rate": 1.5011474701974071e-06, "loss": 0.5426, "step": 2404 }, { "epoch": 1.3, "grad_norm": 1.5178418599251564, "learning_rate": 1.4991001259368573e-06, "loss": 0.5487, "step": 2405 }, { "epoch": 1.3, "grad_norm": 1.5844037395938462, "learning_rate": 1.4970535807606453e-06, "loss": 0.5211, "step": 2406 }, { "epoch": 1.3, "grad_norm": 1.518280476702064, "learning_rate": 1.4950078363026617e-06, "loss": 0.5535, "step": 2407 }, { "epoch": 1.3, "grad_norm": 1.5343085801445755, "learning_rate": 1.4929628941961608e-06, "loss": 0.5403, "step": 2408 }, { "epoch": 1.3, "grad_norm": 1.6212136638651158, "learning_rate": 1.4909187560737542e-06, "loss": 0.5527, "step": 2409 }, { "epoch": 1.3, "grad_norm": 1.5154461711703724, "learning_rate": 1.4888754235674114e-06, "loss": 0.5296, "step": 2410 }, { "epoch": 1.3, "grad_norm": 1.4818592268691386, "learning_rate": 1.4868328983084602e-06, "loss": 0.5161, "step": 2411 }, { "epoch": 1.3, "grad_norm": 1.460191580999705, "learning_rate": 1.4847911819275829e-06, "loss": 0.5239, "step": 2412 }, { "epoch": 1.3, "grad_norm": 1.549811616173647, "learning_rate": 1.4827502760548152e-06, "loss": 0.5665, "step": 2413 }, { "epoch": 1.3, "grad_norm": 1.5672903231949111, "learning_rate": 1.4807101823195486e-06, "loss": 0.5407, "step": 2414 }, { "epoch": 1.3, "grad_norm": 1.598587075675725, "learning_rate": 1.4786709023505224e-06, "loss": 0.5385, "step": 2415 }, { "epoch": 1.3, "grad_norm": 1.5114940079702792, "learning_rate": 1.4766324377758299e-06, "loss": 0.5214, "step": 2416 }, { "epoch": 1.31, "grad_norm": 1.5536471927516866, "learning_rate": 1.4745947902229113e-06, "loss": 0.5164, "step": 2417 }, { "epoch": 1.31, "grad_norm": 1.4982588863801236, "learning_rate": 1.4725579613185549e-06, "loss": 0.548, "step": 2418 }, { "epoch": 1.31, "grad_norm": 1.5093381911065977, "learning_rate": 1.4705219526888948e-06, "loss": 0.5151, "step": 2419 }, { "epoch": 1.31, "grad_norm": 1.5094203848971215, "learning_rate": 1.4684867659594122e-06, "loss": 0.5279, "step": 2420 }, { "epoch": 1.31, "grad_norm": 1.6347677605751176, "learning_rate": 1.4664524027549291e-06, "loss": 0.5577, "step": 2421 }, { "epoch": 1.31, "grad_norm": 1.5407820275046227, "learning_rate": 1.4644188646996132e-06, "loss": 0.527, "step": 2422 }, { "epoch": 1.31, "grad_norm": 1.5591146338555022, "learning_rate": 1.4623861534169704e-06, "loss": 0.5329, "step": 2423 }, { "epoch": 1.31, "grad_norm": 1.568346247369648, "learning_rate": 1.4603542705298493e-06, "loss": 0.5253, "step": 2424 }, { "epoch": 1.31, "grad_norm": 1.5634313987337305, "learning_rate": 1.4583232176604364e-06, "loss": 0.552, "step": 2425 }, { "epoch": 1.31, "grad_norm": 1.5420546632469143, "learning_rate": 1.4562929964302543e-06, "loss": 0.5402, "step": 2426 }, { "epoch": 1.31, "grad_norm": 1.5813940711532377, "learning_rate": 1.4542636084601624e-06, "loss": 0.5321, "step": 2427 }, { "epoch": 1.31, "grad_norm": 1.521953566222143, "learning_rate": 1.4522350553703544e-06, "loss": 0.5489, "step": 2428 }, { "epoch": 1.31, "grad_norm": 1.5296313353125708, "learning_rate": 1.450207338780359e-06, "loss": 0.5269, "step": 2429 }, { "epoch": 1.31, "grad_norm": 1.4662451756883719, "learning_rate": 1.4481804603090358e-06, "loss": 0.527, "step": 2430 }, { "epoch": 1.31, "grad_norm": 1.5537481763873993, "learning_rate": 1.4461544215745737e-06, "loss": 0.5314, "step": 2431 }, { "epoch": 1.31, "grad_norm": 1.5909204698068968, "learning_rate": 1.444129224194496e-06, "loss": 0.5475, "step": 2432 }, { "epoch": 1.31, "grad_norm": 1.5609276456899235, "learning_rate": 1.4421048697856494e-06, "loss": 0.5654, "step": 2433 }, { "epoch": 1.31, "grad_norm": 1.4813151257535067, "learning_rate": 1.4400813599642083e-06, "loss": 0.5434, "step": 2434 }, { "epoch": 1.31, "grad_norm": 1.5207911859802576, "learning_rate": 1.4380586963456766e-06, "loss": 0.5251, "step": 2435 }, { "epoch": 1.32, "grad_norm": 1.5239925667980454, "learning_rate": 1.4360368805448788e-06, "loss": 0.5373, "step": 2436 }, { "epoch": 1.32, "grad_norm": 1.5433840464359676, "learning_rate": 1.4340159141759638e-06, "loss": 0.5321, "step": 2437 }, { "epoch": 1.32, "grad_norm": 1.5413803131135688, "learning_rate": 1.4319957988524008e-06, "loss": 0.5193, "step": 2438 }, { "epoch": 1.32, "grad_norm": 1.6025328393944527, "learning_rate": 1.4299765361869837e-06, "loss": 0.5789, "step": 2439 }, { "epoch": 1.32, "grad_norm": 1.5743843305555143, "learning_rate": 1.4279581277918203e-06, "loss": 0.5505, "step": 2440 }, { "epoch": 1.32, "grad_norm": 1.4457265967241906, "learning_rate": 1.4259405752783413e-06, "loss": 0.5529, "step": 2441 }, { "epoch": 1.32, "grad_norm": 1.4492904044115367, "learning_rate": 1.4239238802572908e-06, "loss": 0.5359, "step": 2442 }, { "epoch": 1.32, "grad_norm": 1.467381942313326, "learning_rate": 1.4219080443387296e-06, "loss": 0.5481, "step": 2443 }, { "epoch": 1.32, "grad_norm": 1.5163753086144085, "learning_rate": 1.4198930691320312e-06, "loss": 0.5204, "step": 2444 }, { "epoch": 1.32, "grad_norm": 1.5266416913490235, "learning_rate": 1.4178789562458847e-06, "loss": 0.5213, "step": 2445 }, { "epoch": 1.32, "grad_norm": 1.5434737839228942, "learning_rate": 1.4158657072882882e-06, "loss": 0.5535, "step": 2446 }, { "epoch": 1.32, "grad_norm": 1.5643177227207934, "learning_rate": 1.4138533238665519e-06, "loss": 0.5518, "step": 2447 }, { "epoch": 1.32, "grad_norm": 1.5550468303311513, "learning_rate": 1.4118418075872936e-06, "loss": 0.5308, "step": 2448 }, { "epoch": 1.32, "grad_norm": 1.4394396258089857, "learning_rate": 1.4098311600564397e-06, "loss": 0.51, "step": 2449 }, { "epoch": 1.32, "grad_norm": 1.491734912652739, "learning_rate": 1.4078213828792212e-06, "loss": 0.5378, "step": 2450 }, { "epoch": 1.32, "grad_norm": 1.5124226695411562, "learning_rate": 1.405812477660178e-06, "loss": 0.5524, "step": 2451 }, { "epoch": 1.32, "grad_norm": 1.4887966524076937, "learning_rate": 1.4038044460031497e-06, "loss": 0.5235, "step": 2452 }, { "epoch": 1.32, "grad_norm": 1.5217898385121553, "learning_rate": 1.4017972895112815e-06, "loss": 0.5273, "step": 2453 }, { "epoch": 1.33, "grad_norm": 1.486114508341056, "learning_rate": 1.3997910097870165e-06, "loss": 0.5099, "step": 2454 }, { "epoch": 1.33, "grad_norm": 1.4769001455271653, "learning_rate": 1.3977856084321031e-06, "loss": 0.5364, "step": 2455 }, { "epoch": 1.33, "grad_norm": 1.5216243992731857, "learning_rate": 1.395781087047583e-06, "loss": 0.5294, "step": 2456 }, { "epoch": 1.33, "grad_norm": 1.4912197611023503, "learning_rate": 1.3937774472337994e-06, "loss": 0.5445, "step": 2457 }, { "epoch": 1.33, "grad_norm": 1.536427111451776, "learning_rate": 1.3917746905903898e-06, "loss": 0.548, "step": 2458 }, { "epoch": 1.33, "grad_norm": 1.5065273794351308, "learning_rate": 1.3897728187162863e-06, "loss": 0.543, "step": 2459 }, { "epoch": 1.33, "grad_norm": 1.5314643112658084, "learning_rate": 1.3877718332097146e-06, "loss": 0.5364, "step": 2460 }, { "epoch": 1.33, "grad_norm": 1.498858579691541, "learning_rate": 1.3857717356681932e-06, "loss": 0.5305, "step": 2461 }, { "epoch": 1.33, "grad_norm": 1.5454845065426241, "learning_rate": 1.3837725276885327e-06, "loss": 0.5518, "step": 2462 }, { "epoch": 1.33, "grad_norm": 1.5013926217700058, "learning_rate": 1.3817742108668333e-06, "loss": 0.5413, "step": 2463 }, { "epoch": 1.33, "grad_norm": 1.5198449546766097, "learning_rate": 1.3797767867984825e-06, "loss": 0.543, "step": 2464 }, { "epoch": 1.33, "grad_norm": 1.6377928244269537, "learning_rate": 1.3777802570781545e-06, "loss": 0.5776, "step": 2465 }, { "epoch": 1.33, "grad_norm": 1.4969529669105437, "learning_rate": 1.3757846232998118e-06, "loss": 0.5497, "step": 2466 }, { "epoch": 1.33, "grad_norm": 1.537269298067104, "learning_rate": 1.3737898870566984e-06, "loss": 0.5479, "step": 2467 }, { "epoch": 1.33, "grad_norm": 1.5885774751275852, "learning_rate": 1.3717960499413464e-06, "loss": 0.5303, "step": 2468 }, { "epoch": 1.33, "grad_norm": 1.5435161033238816, "learning_rate": 1.369803113545566e-06, "loss": 0.5462, "step": 2469 }, { "epoch": 1.33, "grad_norm": 1.5306177002309798, "learning_rate": 1.3678110794604491e-06, "loss": 0.5414, "step": 2470 }, { "epoch": 1.33, "grad_norm": 1.5201216547111207, "learning_rate": 1.365819949276369e-06, "loss": 0.5517, "step": 2471 }, { "epoch": 1.33, "grad_norm": 1.512655957425126, "learning_rate": 1.3638297245829762e-06, "loss": 0.5356, "step": 2472 }, { "epoch": 1.34, "grad_norm": 1.4845033951825548, "learning_rate": 1.3618404069691966e-06, "loss": 0.5441, "step": 2473 }, { "epoch": 1.34, "grad_norm": 1.5353756722956944, "learning_rate": 1.3598519980232356e-06, "loss": 0.5474, "step": 2474 }, { "epoch": 1.34, "grad_norm": 1.6244375118705203, "learning_rate": 1.3578644993325701e-06, "loss": 0.5726, "step": 2475 }, { "epoch": 1.34, "grad_norm": 1.6098014663237619, "learning_rate": 1.3558779124839516e-06, "loss": 0.5317, "step": 2476 }, { "epoch": 1.34, "grad_norm": 1.5578398369070363, "learning_rate": 1.353892239063402e-06, "loss": 0.5291, "step": 2477 }, { "epoch": 1.34, "grad_norm": 1.4561525820816918, "learning_rate": 1.3519074806562165e-06, "loss": 0.5107, "step": 2478 }, { "epoch": 1.34, "grad_norm": 1.496809485177418, "learning_rate": 1.3499236388469594e-06, "loss": 0.5238, "step": 2479 }, { "epoch": 1.34, "grad_norm": 1.4719587796535136, "learning_rate": 1.3479407152194612e-06, "loss": 0.5523, "step": 2480 }, { "epoch": 1.34, "grad_norm": 1.5175817404089715, "learning_rate": 1.3459587113568208e-06, "loss": 0.5477, "step": 2481 }, { "epoch": 1.34, "grad_norm": 1.4744609306073633, "learning_rate": 1.3439776288414024e-06, "loss": 0.5342, "step": 2482 }, { "epoch": 1.34, "grad_norm": 1.6234788994052602, "learning_rate": 1.341997469254834e-06, "loss": 0.5514, "step": 2483 }, { "epoch": 1.34, "grad_norm": 1.5422256662979181, "learning_rate": 1.340018234178009e-06, "loss": 0.5287, "step": 2484 }, { "epoch": 1.34, "grad_norm": 1.5160889984046246, "learning_rate": 1.3380399251910796e-06, "loss": 0.5586, "step": 2485 }, { "epoch": 1.34, "grad_norm": 1.4832874556094222, "learning_rate": 1.3360625438734623e-06, "loss": 0.5389, "step": 2486 }, { "epoch": 1.34, "grad_norm": 1.4654823605216, "learning_rate": 1.3340860918038295e-06, "loss": 0.5429, "step": 2487 }, { "epoch": 1.34, "grad_norm": 1.537001975267488, "learning_rate": 1.332110570560114e-06, "loss": 0.5338, "step": 2488 }, { "epoch": 1.34, "grad_norm": 1.5295491063685183, "learning_rate": 1.3301359817195026e-06, "loss": 0.5115, "step": 2489 }, { "epoch": 1.34, "grad_norm": 1.5294682164959468, "learning_rate": 1.328162326858442e-06, "loss": 0.5404, "step": 2490 }, { "epoch": 1.35, "grad_norm": 1.5714838134871467, "learning_rate": 1.3261896075526303e-06, "loss": 0.556, "step": 2491 }, { "epoch": 1.35, "grad_norm": 1.5348882221684248, "learning_rate": 1.3242178253770192e-06, "loss": 0.5426, "step": 2492 }, { "epoch": 1.35, "grad_norm": 1.499047902196786, "learning_rate": 1.3222469819058112e-06, "loss": 0.549, "step": 2493 }, { "epoch": 1.35, "grad_norm": 1.5537977124706854, "learning_rate": 1.3202770787124627e-06, "loss": 0.5446, "step": 2494 }, { "epoch": 1.35, "grad_norm": 1.5272531004845014, "learning_rate": 1.318308117369675e-06, "loss": 0.5391, "step": 2495 }, { "epoch": 1.35, "grad_norm": 1.5747165863463828, "learning_rate": 1.3163400994494025e-06, "loss": 0.5384, "step": 2496 }, { "epoch": 1.35, "grad_norm": 1.5685403964073772, "learning_rate": 1.3143730265228415e-06, "loss": 0.5376, "step": 2497 }, { "epoch": 1.35, "grad_norm": 1.531452162129606, "learning_rate": 1.3124069001604367e-06, "loss": 0.5403, "step": 2498 }, { "epoch": 1.35, "grad_norm": 1.5121912185092619, "learning_rate": 1.3104417219318762e-06, "loss": 0.5325, "step": 2499 }, { "epoch": 1.35, "grad_norm": 1.554054233795943, "learning_rate": 1.3084774934060903e-06, "loss": 0.5449, "step": 2500 }, { "epoch": 1.35, "grad_norm": 1.555491527379274, "learning_rate": 1.3065142161512532e-06, "loss": 0.5322, "step": 2501 }, { "epoch": 1.35, "grad_norm": 1.5118136459990599, "learning_rate": 1.3045518917347791e-06, "loss": 0.5247, "step": 2502 }, { "epoch": 1.35, "grad_norm": 1.5361007772210933, "learning_rate": 1.3025905217233202e-06, "loss": 0.5523, "step": 2503 }, { "epoch": 1.35, "grad_norm": 1.5078831664404642, "learning_rate": 1.3006301076827676e-06, "loss": 0.5232, "step": 2504 }, { "epoch": 1.35, "grad_norm": 1.5159739661918785, "learning_rate": 1.2986706511782476e-06, "loss": 0.5532, "step": 2505 }, { "epoch": 1.35, "grad_norm": 1.5433234699057141, "learning_rate": 1.2967121537741262e-06, "loss": 0.5509, "step": 2506 }, { "epoch": 1.35, "grad_norm": 1.6005291563770876, "learning_rate": 1.2947546170339992e-06, "loss": 0.5469, "step": 2507 }, { "epoch": 1.35, "grad_norm": 1.516696624981547, "learning_rate": 1.2927980425206968e-06, "loss": 0.5674, "step": 2508 }, { "epoch": 1.35, "grad_norm": 1.4973741569345027, "learning_rate": 1.290842431796283e-06, "loss": 0.529, "step": 2509 }, { "epoch": 1.36, "grad_norm": 1.5287326526206946, "learning_rate": 1.28888778642205e-06, "loss": 0.5494, "step": 2510 }, { "epoch": 1.36, "grad_norm": 1.5772874533167165, "learning_rate": 1.2869341079585184e-06, "loss": 0.5529, "step": 2511 }, { "epoch": 1.36, "grad_norm": 1.6041643732534183, "learning_rate": 1.2849813979654413e-06, "loss": 0.547, "step": 2512 }, { "epoch": 1.36, "grad_norm": 1.5676563819641636, "learning_rate": 1.2830296580017945e-06, "loss": 0.5421, "step": 2513 }, { "epoch": 1.36, "grad_norm": 1.4884002561192269, "learning_rate": 1.2810788896257804e-06, "loss": 0.5288, "step": 2514 }, { "epoch": 1.36, "grad_norm": 1.4661461063285197, "learning_rate": 1.2791290943948265e-06, "loss": 0.5597, "step": 2515 }, { "epoch": 1.36, "grad_norm": 1.4296140128612769, "learning_rate": 1.2771802738655812e-06, "loss": 0.5253, "step": 2516 }, { "epoch": 1.36, "grad_norm": 1.5217188184780803, "learning_rate": 1.2752324295939178e-06, "loss": 0.5354, "step": 2517 }, { "epoch": 1.36, "grad_norm": 1.4663037481589725, "learning_rate": 1.2732855631349294e-06, "loss": 0.5227, "step": 2518 }, { "epoch": 1.36, "grad_norm": 1.556096631740379, "learning_rate": 1.2713396760429271e-06, "loss": 0.5474, "step": 2519 }, { "epoch": 1.36, "grad_norm": 1.5699022919863936, "learning_rate": 1.2693947698714409e-06, "loss": 0.5334, "step": 2520 }, { "epoch": 1.36, "grad_norm": 1.5227243609897134, "learning_rate": 1.2674508461732177e-06, "loss": 0.5453, "step": 2521 }, { "epoch": 1.36, "grad_norm": 1.5882298993832245, "learning_rate": 1.2655079065002181e-06, "loss": 0.5397, "step": 2522 }, { "epoch": 1.36, "grad_norm": 1.5320678982506541, "learning_rate": 1.263565952403622e-06, "loss": 0.5321, "step": 2523 }, { "epoch": 1.36, "grad_norm": 1.549211544402404, "learning_rate": 1.2616249854338164e-06, "loss": 0.5451, "step": 2524 }, { "epoch": 1.36, "grad_norm": 1.4759984701178575, "learning_rate": 1.2596850071404058e-06, "loss": 0.5331, "step": 2525 }, { "epoch": 1.36, "grad_norm": 1.5774965968559318, "learning_rate": 1.2577460190722013e-06, "loss": 0.5342, "step": 2526 }, { "epoch": 1.36, "grad_norm": 1.5342809321968465, "learning_rate": 1.2558080227772251e-06, "loss": 0.5353, "step": 2527 }, { "epoch": 1.37, "grad_norm": 1.6173721259291296, "learning_rate": 1.2538710198027066e-06, "loss": 0.5597, "step": 2528 }, { "epoch": 1.37, "grad_norm": 1.5034372907639457, "learning_rate": 1.2519350116950842e-06, "loss": 0.543, "step": 2529 }, { "epoch": 1.37, "grad_norm": 1.5477258815854176, "learning_rate": 1.2500000000000007e-06, "loss": 0.5387, "step": 2530 }, { "epoch": 1.37, "grad_norm": 1.5338201053043867, "learning_rate": 1.2480659862623026e-06, "loss": 0.5157, "step": 2531 }, { "epoch": 1.37, "grad_norm": 1.5292106345046257, "learning_rate": 1.2461329720260403e-06, "loss": 0.5199, "step": 2532 }, { "epoch": 1.37, "grad_norm": 1.5518673334530246, "learning_rate": 1.244200958834468e-06, "loss": 0.5444, "step": 2533 }, { "epoch": 1.37, "grad_norm": 1.4589972404412794, "learning_rate": 1.2422699482300374e-06, "loss": 0.5302, "step": 2534 }, { "epoch": 1.37, "grad_norm": 1.534911995824094, "learning_rate": 1.2403399417544033e-06, "loss": 0.5399, "step": 2535 }, { "epoch": 1.37, "grad_norm": 1.569123604991523, "learning_rate": 1.238410940948416e-06, "loss": 0.5364, "step": 2536 }, { "epoch": 1.37, "grad_norm": 1.6084644322244668, "learning_rate": 1.2364829473521245e-06, "loss": 0.5341, "step": 2537 }, { "epoch": 1.37, "grad_norm": 1.5551523961739115, "learning_rate": 1.2345559625047718e-06, "loss": 0.5443, "step": 2538 }, { "epoch": 1.37, "grad_norm": 1.5328288537925223, "learning_rate": 1.2326299879447989e-06, "loss": 0.56, "step": 2539 }, { "epoch": 1.37, "grad_norm": 1.5335619542854007, "learning_rate": 1.2307050252098362e-06, "loss": 0.5442, "step": 2540 }, { "epoch": 1.37, "grad_norm": 1.5662868600806252, "learning_rate": 1.2287810758367104e-06, "loss": 0.5329, "step": 2541 }, { "epoch": 1.37, "grad_norm": 1.4866541214793476, "learning_rate": 1.226858141361436e-06, "loss": 0.546, "step": 2542 }, { "epoch": 1.37, "grad_norm": 1.5599266327603216, "learning_rate": 1.2249362233192189e-06, "loss": 0.5265, "step": 2543 }, { "epoch": 1.37, "grad_norm": 1.5396771261330624, "learning_rate": 1.2230153232444511e-06, "loss": 0.5534, "step": 2544 }, { "epoch": 1.37, "grad_norm": 1.518581029406671, "learning_rate": 1.221095442670716e-06, "loss": 0.5468, "step": 2545 }, { "epoch": 1.37, "grad_norm": 1.5295820269537028, "learning_rate": 1.21917658313078e-06, "loss": 0.5358, "step": 2546 }, { "epoch": 1.38, "grad_norm": 1.532777169584868, "learning_rate": 1.217258746156594e-06, "loss": 0.5335, "step": 2547 }, { "epoch": 1.38, "grad_norm": 1.4421218264469642, "learning_rate": 1.2153419332792958e-06, "loss": 0.5106, "step": 2548 }, { "epoch": 1.38, "grad_norm": 1.4963149273298377, "learning_rate": 1.2134261460292024e-06, "loss": 0.543, "step": 2549 }, { "epoch": 1.38, "grad_norm": 1.5068301430136164, "learning_rate": 1.2115113859358118e-06, "loss": 0.5262, "step": 2550 }, { "epoch": 1.38, "grad_norm": 1.5632028959485527, "learning_rate": 1.2095976545278055e-06, "loss": 0.5464, "step": 2551 }, { "epoch": 1.38, "grad_norm": 1.4705188774051856, "learning_rate": 1.20768495333304e-06, "loss": 0.5192, "step": 2552 }, { "epoch": 1.38, "grad_norm": 1.5073102492065722, "learning_rate": 1.2057732838785514e-06, "loss": 0.5345, "step": 2553 }, { "epoch": 1.38, "grad_norm": 1.5247196083233077, "learning_rate": 1.2038626476905508e-06, "loss": 0.546, "step": 2554 }, { "epoch": 1.38, "grad_norm": 1.5275842260858816, "learning_rate": 1.201953046294424e-06, "loss": 0.5604, "step": 2555 }, { "epoch": 1.38, "grad_norm": 1.46192364113277, "learning_rate": 1.2000444812147333e-06, "loss": 0.5274, "step": 2556 }, { "epoch": 1.38, "grad_norm": 1.4981941765389097, "learning_rate": 1.1981369539752122e-06, "loss": 0.5287, "step": 2557 }, { "epoch": 1.38, "grad_norm": 1.474738153124808, "learning_rate": 1.196230466098765e-06, "loss": 0.5353, "step": 2558 }, { "epoch": 1.38, "grad_norm": 1.4989773035053533, "learning_rate": 1.1943250191074664e-06, "loss": 0.5002, "step": 2559 }, { "epoch": 1.38, "grad_norm": 1.521650194507714, "learning_rate": 1.1924206145225603e-06, "loss": 0.5499, "step": 2560 }, { "epoch": 1.38, "grad_norm": 1.58169386897708, "learning_rate": 1.1905172538644574e-06, "loss": 0.5423, "step": 2561 }, { "epoch": 1.38, "grad_norm": 1.5096702853523731, "learning_rate": 1.188614938652738e-06, "loss": 0.5324, "step": 2562 }, { "epoch": 1.38, "grad_norm": 1.5100945348617199, "learning_rate": 1.1867136704061439e-06, "loss": 0.5351, "step": 2563 }, { "epoch": 1.38, "grad_norm": 1.5287958035856366, "learning_rate": 1.1848134506425843e-06, "loss": 0.5381, "step": 2564 }, { "epoch": 1.39, "grad_norm": 1.5917653344865708, "learning_rate": 1.1829142808791294e-06, "loss": 0.5368, "step": 2565 }, { "epoch": 1.39, "grad_norm": 1.6043795201099973, "learning_rate": 1.1810161626320104e-06, "loss": 0.5533, "step": 2566 }, { "epoch": 1.39, "grad_norm": 1.5325352793324423, "learning_rate": 1.1791190974166223e-06, "loss": 0.5384, "step": 2567 }, { "epoch": 1.39, "grad_norm": 1.5466900250039932, "learning_rate": 1.177223086747516e-06, "loss": 0.5306, "step": 2568 }, { "epoch": 1.39, "grad_norm": 1.5213091537610535, "learning_rate": 1.1753281321384022e-06, "loss": 0.541, "step": 2569 }, { "epoch": 1.39, "grad_norm": 1.5517623041755664, "learning_rate": 1.1734342351021478e-06, "loss": 0.5401, "step": 2570 }, { "epoch": 1.39, "grad_norm": 1.5349674877849588, "learning_rate": 1.1715413971507747e-06, "loss": 0.5488, "step": 2571 }, { "epoch": 1.39, "grad_norm": 1.552263927272752, "learning_rate": 1.169649619795461e-06, "loss": 0.551, "step": 2572 }, { "epoch": 1.39, "grad_norm": 1.5141283837295754, "learning_rate": 1.1677589045465388e-06, "loss": 0.5256, "step": 2573 }, { "epoch": 1.39, "grad_norm": 1.5667734697725946, "learning_rate": 1.1658692529134888e-06, "loss": 0.5543, "step": 2574 }, { "epoch": 1.39, "grad_norm": 1.559178055586324, "learning_rate": 1.1639806664049452e-06, "loss": 0.5424, "step": 2575 }, { "epoch": 1.39, "grad_norm": 1.5235471731038392, "learning_rate": 1.1620931465286908e-06, "loss": 0.5448, "step": 2576 }, { "epoch": 1.39, "grad_norm": 1.540884579562855, "learning_rate": 1.1602066947916565e-06, "loss": 0.5532, "step": 2577 }, { "epoch": 1.39, "grad_norm": 1.5157676945408882, "learning_rate": 1.1583213126999215e-06, "loss": 0.5284, "step": 2578 }, { "epoch": 1.39, "grad_norm": 1.4918828634706887, "learning_rate": 1.1564370017587117e-06, "loss": 0.5279, "step": 2579 }, { "epoch": 1.39, "grad_norm": 1.5019161312650555, "learning_rate": 1.154553763472396e-06, "loss": 0.5346, "step": 2580 }, { "epoch": 1.39, "grad_norm": 1.5233319321835237, "learning_rate": 1.1526715993444872e-06, "loss": 0.533, "step": 2581 }, { "epoch": 1.39, "grad_norm": 1.4888824904524816, "learning_rate": 1.1507905108776415e-06, "loss": 0.547, "step": 2582 }, { "epoch": 1.39, "grad_norm": 1.520944891051242, "learning_rate": 1.1489104995736543e-06, "loss": 0.5163, "step": 2583 }, { "epoch": 1.4, "grad_norm": 1.4988744230459885, "learning_rate": 1.1470315669334654e-06, "loss": 0.5433, "step": 2584 }, { "epoch": 1.4, "grad_norm": 1.499826079832275, "learning_rate": 1.1451537144571482e-06, "loss": 0.5352, "step": 2585 }, { "epoch": 1.4, "grad_norm": 1.588243343876002, "learning_rate": 1.1432769436439162e-06, "loss": 0.5214, "step": 2586 }, { "epoch": 1.4, "grad_norm": 1.505071897241112, "learning_rate": 1.1414012559921208e-06, "loss": 0.5357, "step": 2587 }, { "epoch": 1.4, "grad_norm": 1.5623435176898073, "learning_rate": 1.1395266529992462e-06, "loss": 0.5603, "step": 2588 }, { "epoch": 1.4, "grad_norm": 1.5180362904161218, "learning_rate": 1.1376531361619105e-06, "loss": 0.5633, "step": 2589 }, { "epoch": 1.4, "grad_norm": 1.5947770215845296, "learning_rate": 1.1357807069758676e-06, "loss": 0.5539, "step": 2590 }, { "epoch": 1.4, "grad_norm": 1.4959823606704903, "learning_rate": 1.1339093669360001e-06, "loss": 0.5475, "step": 2591 }, { "epoch": 1.4, "grad_norm": 1.5178963956821891, "learning_rate": 1.1320391175363225e-06, "loss": 0.5511, "step": 2592 }, { "epoch": 1.4, "grad_norm": 1.51634767847209, "learning_rate": 1.1301699602699772e-06, "loss": 0.524, "step": 2593 }, { "epoch": 1.4, "grad_norm": 1.4616168166478298, "learning_rate": 1.1283018966292357e-06, "loss": 0.52, "step": 2594 }, { "epoch": 1.4, "grad_norm": 1.5035141343881662, "learning_rate": 1.126434928105497e-06, "loss": 0.5449, "step": 2595 }, { "epoch": 1.4, "grad_norm": 1.4873877559622979, "learning_rate": 1.124569056189286e-06, "loss": 0.5341, "step": 2596 }, { "epoch": 1.4, "grad_norm": 1.5360596906278856, "learning_rate": 1.1227042823702499e-06, "loss": 0.5549, "step": 2597 }, { "epoch": 1.4, "grad_norm": 1.51659873256134, "learning_rate": 1.1208406081371612e-06, "loss": 0.5341, "step": 2598 }, { "epoch": 1.4, "grad_norm": 1.499233015510438, "learning_rate": 1.1189780349779126e-06, "loss": 0.5054, "step": 2599 }, { "epoch": 1.4, "grad_norm": 1.4967798239118963, "learning_rate": 1.117116564379521e-06, "loss": 0.5396, "step": 2600 }, { "epoch": 1.4, "grad_norm": 1.5443589533344166, "learning_rate": 1.11525619782812e-06, "loss": 0.5504, "step": 2601 }, { "epoch": 1.41, "grad_norm": 1.5586971361132884, "learning_rate": 1.1133969368089617e-06, "loss": 0.5456, "step": 2602 }, { "epoch": 1.41, "grad_norm": 1.5547017497849738, "learning_rate": 1.111538782806419e-06, "loss": 0.5423, "step": 2603 }, { "epoch": 1.41, "grad_norm": 1.5548363709930408, "learning_rate": 1.1096817373039773e-06, "loss": 0.5354, "step": 2604 }, { "epoch": 1.41, "grad_norm": 1.5399007341394038, "learning_rate": 1.107825801784238e-06, "loss": 0.5451, "step": 2605 }, { "epoch": 1.41, "grad_norm": 1.5351806256851697, "learning_rate": 1.1059709777289184e-06, "loss": 0.5356, "step": 2606 }, { "epoch": 1.41, "grad_norm": 1.568347608525034, "learning_rate": 1.104117266618846e-06, "loss": 0.5506, "step": 2607 }, { "epoch": 1.41, "grad_norm": 1.5326872258915085, "learning_rate": 1.1022646699339604e-06, "loss": 0.5483, "step": 2608 }, { "epoch": 1.41, "grad_norm": 1.4525737713051026, "learning_rate": 1.1004131891533114e-06, "loss": 0.5268, "step": 2609 }, { "epoch": 1.41, "grad_norm": 1.572323246220047, "learning_rate": 1.0985628257550575e-06, "loss": 0.5242, "step": 2610 }, { "epoch": 1.41, "grad_norm": 1.5197340411330356, "learning_rate": 1.096713581216467e-06, "loss": 0.548, "step": 2611 }, { "epoch": 1.41, "grad_norm": 1.4787490365700535, "learning_rate": 1.0948654570139138e-06, "loss": 0.549, "step": 2612 }, { "epoch": 1.41, "grad_norm": 1.4901697522434216, "learning_rate": 1.0930184546228769e-06, "loss": 0.5199, "step": 2613 }, { "epoch": 1.41, "grad_norm": 1.5396797961985016, "learning_rate": 1.09117257551794e-06, "loss": 0.5289, "step": 2614 }, { "epoch": 1.41, "grad_norm": 1.4929753142352915, "learning_rate": 1.08932782117279e-06, "loss": 0.5428, "step": 2615 }, { "epoch": 1.41, "grad_norm": 1.545334729339311, "learning_rate": 1.087484193060215e-06, "loss": 0.5459, "step": 2616 }, { "epoch": 1.41, "grad_norm": 1.589395604779088, "learning_rate": 1.0856416926521058e-06, "loss": 0.5631, "step": 2617 }, { "epoch": 1.41, "grad_norm": 1.6377841609127264, "learning_rate": 1.0838003214194525e-06, "loss": 0.5229, "step": 2618 }, { "epoch": 1.41, "grad_norm": 1.6134338211167005, "learning_rate": 1.0819600808323424e-06, "loss": 0.5359, "step": 2619 }, { "epoch": 1.41, "grad_norm": 1.5096209725384275, "learning_rate": 1.080120972359961e-06, "loss": 0.5387, "step": 2620 }, { "epoch": 1.42, "grad_norm": 1.5424648890129626, "learning_rate": 1.0782829974705897e-06, "loss": 0.5423, "step": 2621 }, { "epoch": 1.42, "grad_norm": 1.5490897009000544, "learning_rate": 1.0764461576316041e-06, "loss": 0.5477, "step": 2622 }, { "epoch": 1.42, "grad_norm": 1.4940932733843555, "learning_rate": 1.0746104543094763e-06, "loss": 0.5224, "step": 2623 }, { "epoch": 1.42, "grad_norm": 1.5463245855920071, "learning_rate": 1.0727758889697681e-06, "loss": 0.5561, "step": 2624 }, { "epoch": 1.42, "grad_norm": 1.58661494909617, "learning_rate": 1.0709424630771333e-06, "loss": 0.5174, "step": 2625 }, { "epoch": 1.42, "grad_norm": 1.495067606434838, "learning_rate": 1.0691101780953187e-06, "loss": 0.5417, "step": 2626 }, { "epoch": 1.42, "grad_norm": 1.4768570428916956, "learning_rate": 1.0672790354871558e-06, "loss": 0.5469, "step": 2627 }, { "epoch": 1.42, "grad_norm": 1.571478696430078, "learning_rate": 1.0654490367145684e-06, "loss": 0.5508, "step": 2628 }, { "epoch": 1.42, "grad_norm": 1.4839699110408993, "learning_rate": 1.0636201832385642e-06, "loss": 0.5374, "step": 2629 }, { "epoch": 1.42, "grad_norm": 1.495836217432806, "learning_rate": 1.0617924765192381e-06, "loss": 0.5528, "step": 2630 }, { "epoch": 1.42, "grad_norm": 1.517827739929429, "learning_rate": 1.0599659180157678e-06, "loss": 0.552, "step": 2631 }, { "epoch": 1.42, "grad_norm": 1.4740066225794646, "learning_rate": 1.0581405091864152e-06, "loss": 0.5486, "step": 2632 }, { "epoch": 1.42, "grad_norm": 1.6058338779130064, "learning_rate": 1.0563162514885253e-06, "loss": 0.5217, "step": 2633 }, { "epoch": 1.42, "grad_norm": 1.4683221488737803, "learning_rate": 1.0544931463785237e-06, "loss": 0.5204, "step": 2634 }, { "epoch": 1.42, "grad_norm": 1.5079538607583245, "learning_rate": 1.0526711953119147e-06, "loss": 0.5411, "step": 2635 }, { "epoch": 1.42, "grad_norm": 1.4658737311883547, "learning_rate": 1.0508503997432824e-06, "loss": 0.5426, "step": 2636 }, { "epoch": 1.42, "grad_norm": 1.5005539148360991, "learning_rate": 1.049030761126287e-06, "loss": 0.5193, "step": 2637 }, { "epoch": 1.42, "grad_norm": 2.0628897904118944, "learning_rate": 1.0472122809136658e-06, "loss": 0.5354, "step": 2638 }, { "epoch": 1.43, "grad_norm": 1.5569076369239856, "learning_rate": 1.0453949605572328e-06, "loss": 0.5333, "step": 2639 }, { "epoch": 1.43, "grad_norm": 1.5058959699150756, "learning_rate": 1.043578801507874e-06, "loss": 0.5215, "step": 2640 }, { "epoch": 1.43, "grad_norm": 1.6055850251668482, "learning_rate": 1.0417638052155477e-06, "loss": 0.565, "step": 2641 }, { "epoch": 1.43, "grad_norm": 1.5529154789060153, "learning_rate": 1.039949973129287e-06, "loss": 0.5389, "step": 2642 }, { "epoch": 1.43, "grad_norm": 1.5312822696869968, "learning_rate": 1.038137306697193e-06, "loss": 0.55, "step": 2643 }, { "epoch": 1.43, "grad_norm": 1.464213306644036, "learning_rate": 1.036325807366435e-06, "loss": 0.533, "step": 2644 }, { "epoch": 1.43, "grad_norm": 1.4997371602329528, "learning_rate": 1.0345154765832548e-06, "loss": 0.5459, "step": 2645 }, { "epoch": 1.43, "grad_norm": 1.4834038314639026, "learning_rate": 1.0327063157929582e-06, "loss": 0.5397, "step": 2646 }, { "epoch": 1.43, "grad_norm": 1.5615404017012275, "learning_rate": 1.0308983264399167e-06, "loss": 0.5471, "step": 2647 }, { "epoch": 1.43, "grad_norm": 1.5089248022755382, "learning_rate": 1.0290915099675668e-06, "loss": 0.5561, "step": 2648 }, { "epoch": 1.43, "grad_norm": 1.5308883011340424, "learning_rate": 1.027285867818411e-06, "loss": 0.5517, "step": 2649 }, { "epoch": 1.43, "grad_norm": 1.5237048902298207, "learning_rate": 1.025481401434011e-06, "loss": 0.5365, "step": 2650 }, { "epoch": 1.43, "grad_norm": 1.525799428809637, "learning_rate": 1.0236781122549921e-06, "loss": 0.5173, "step": 2651 }, { "epoch": 1.43, "grad_norm": 1.4939750673331293, "learning_rate": 1.021876001721039e-06, "loss": 0.5306, "step": 2652 }, { "epoch": 1.43, "grad_norm": 1.5423681326253373, "learning_rate": 1.0200750712708946e-06, "loss": 0.5639, "step": 2653 }, { "epoch": 1.43, "grad_norm": 1.5056257409207758, "learning_rate": 1.0182753223423609e-06, "loss": 0.5307, "step": 2654 }, { "epoch": 1.43, "grad_norm": 1.5347639164946119, "learning_rate": 1.016476756372295e-06, "loss": 0.5515, "step": 2655 }, { "epoch": 1.43, "grad_norm": 1.4743874486391955, "learning_rate": 1.0146793747966113e-06, "loss": 0.5488, "step": 2656 }, { "epoch": 1.43, "grad_norm": 1.497602462602524, "learning_rate": 1.0128831790502792e-06, "loss": 0.5305, "step": 2657 }, { "epoch": 1.44, "grad_norm": 1.5112225097983354, "learning_rate": 1.011088170567319e-06, "loss": 0.5481, "step": 2658 }, { "epoch": 1.44, "grad_norm": 1.577433531857036, "learning_rate": 1.0092943507808046e-06, "loss": 0.5469, "step": 2659 }, { "epoch": 1.44, "grad_norm": 1.551445980596186, "learning_rate": 1.007501721122859e-06, "loss": 0.5485, "step": 2660 }, { "epoch": 1.44, "grad_norm": 1.5394202054128816, "learning_rate": 1.0057102830246596e-06, "loss": 0.546, "step": 2661 }, { "epoch": 1.44, "grad_norm": 1.5399392574719335, "learning_rate": 1.0039200379164274e-06, "loss": 0.5429, "step": 2662 }, { "epoch": 1.44, "grad_norm": 1.523307576024978, "learning_rate": 1.0021309872274338e-06, "loss": 0.5305, "step": 2663 }, { "epoch": 1.44, "grad_norm": 1.4865657482123495, "learning_rate": 1.0003431323859943e-06, "loss": 0.5153, "step": 2664 }, { "epoch": 1.44, "grad_norm": 1.5535965985844165, "learning_rate": 9.98556474819474e-07, "loss": 0.5229, "step": 2665 }, { "epoch": 1.44, "grad_norm": 1.5071566216176775, "learning_rate": 9.967710159542768e-07, "loss": 0.5522, "step": 2666 }, { "epoch": 1.44, "grad_norm": 1.5655044184389222, "learning_rate": 9.949867572158544e-07, "loss": 0.5516, "step": 2667 }, { "epoch": 1.44, "grad_norm": 1.4969227772640556, "learning_rate": 9.93203700028697e-07, "loss": 0.5488, "step": 2668 }, { "epoch": 1.44, "grad_norm": 1.4929733849724462, "learning_rate": 9.914218458163368e-07, "loss": 0.5544, "step": 2669 }, { "epoch": 1.44, "grad_norm": 1.4864063314171339, "learning_rate": 9.896411960013455e-07, "loss": 0.5398, "step": 2670 }, { "epoch": 1.44, "grad_norm": 1.520781681579928, "learning_rate": 9.878617520053321e-07, "loss": 0.5528, "step": 2671 }, { "epoch": 1.44, "grad_norm": 1.5070410556364693, "learning_rate": 9.860835152489453e-07, "loss": 0.5565, "step": 2672 }, { "epoch": 1.44, "grad_norm": 1.501212695306523, "learning_rate": 9.843064871518694e-07, "loss": 0.5473, "step": 2673 }, { "epoch": 1.44, "grad_norm": 1.5432014034411181, "learning_rate": 9.825306691328218e-07, "loss": 0.5454, "step": 2674 }, { "epoch": 1.44, "grad_norm": 1.5375895654179423, "learning_rate": 9.807560626095555e-07, "loss": 0.5774, "step": 2675 }, { "epoch": 1.45, "grad_norm": 1.550404352087281, "learning_rate": 9.78982668998856e-07, "loss": 0.5201, "step": 2676 }, { "epoch": 1.45, "grad_norm": 1.5200416994829313, "learning_rate": 9.772104897165388e-07, "loss": 0.5365, "step": 2677 }, { "epoch": 1.45, "grad_norm": 1.5489957791612072, "learning_rate": 9.754395261774539e-07, "loss": 0.5686, "step": 2678 }, { "epoch": 1.45, "grad_norm": 1.5136358530386012, "learning_rate": 9.736697797954766e-07, "loss": 0.5447, "step": 2679 }, { "epoch": 1.45, "grad_norm": 1.530097189495357, "learning_rate": 9.71901251983512e-07, "loss": 0.5555, "step": 2680 }, { "epoch": 1.45, "grad_norm": 1.5352014188634036, "learning_rate": 9.701339441534935e-07, "loss": 0.5393, "step": 2681 }, { "epoch": 1.45, "grad_norm": 1.5116520791902603, "learning_rate": 9.683678577163788e-07, "loss": 0.5242, "step": 2682 }, { "epoch": 1.45, "grad_norm": 1.4717703442520413, "learning_rate": 9.666029940821504e-07, "loss": 0.547, "step": 2683 }, { "epoch": 1.45, "grad_norm": 1.5374658437554893, "learning_rate": 9.648393546598167e-07, "loss": 0.513, "step": 2684 }, { "epoch": 1.45, "grad_norm": 1.5087840165331812, "learning_rate": 9.630769408574065e-07, "loss": 0.5417, "step": 2685 }, { "epoch": 1.45, "grad_norm": 1.5461917649851502, "learning_rate": 9.61315754081971e-07, "loss": 0.5357, "step": 2686 }, { "epoch": 1.45, "grad_norm": 1.5349111353378682, "learning_rate": 9.595557957395806e-07, "loss": 0.5392, "step": 2687 }, { "epoch": 1.45, "grad_norm": 1.5392555612743017, "learning_rate": 9.577970672353274e-07, "loss": 0.5206, "step": 2688 }, { "epoch": 1.45, "grad_norm": 1.533119989472604, "learning_rate": 9.560395699733206e-07, "loss": 0.5212, "step": 2689 }, { "epoch": 1.45, "grad_norm": 1.5416131025981228, "learning_rate": 9.542833053566856e-07, "loss": 0.5569, "step": 2690 }, { "epoch": 1.45, "grad_norm": 1.5498008485918402, "learning_rate": 9.525282747875636e-07, "loss": 0.5267, "step": 2691 }, { "epoch": 1.45, "grad_norm": 1.5356917276941062, "learning_rate": 9.50774479667112e-07, "loss": 0.5273, "step": 2692 }, { "epoch": 1.45, "grad_norm": 1.499697409929533, "learning_rate": 9.490219213954998e-07, "loss": 0.5486, "step": 2693 }, { "epoch": 1.45, "grad_norm": 1.5503208524581644, "learning_rate": 9.472706013719113e-07, "loss": 0.5522, "step": 2694 }, { "epoch": 1.46, "grad_norm": 1.5236103616773435, "learning_rate": 9.455205209945392e-07, "loss": 0.5223, "step": 2695 }, { "epoch": 1.46, "grad_norm": 1.5579627140970205, "learning_rate": 9.437716816605899e-07, "loss": 0.513, "step": 2696 }, { "epoch": 1.46, "grad_norm": 1.5248518310233135, "learning_rate": 9.420240847662759e-07, "loss": 0.5485, "step": 2697 }, { "epoch": 1.46, "grad_norm": 1.4695130983868134, "learning_rate": 9.402777317068193e-07, "loss": 0.5205, "step": 2698 }, { "epoch": 1.46, "grad_norm": 1.5335353183116234, "learning_rate": 9.385326238764475e-07, "loss": 0.5589, "step": 2699 }, { "epoch": 1.46, "grad_norm": 1.4779209933283264, "learning_rate": 9.367887626683975e-07, "loss": 0.521, "step": 2700 }, { "epoch": 1.46, "grad_norm": 1.4905760976414175, "learning_rate": 9.350461494749072e-07, "loss": 0.5374, "step": 2701 }, { "epoch": 1.46, "grad_norm": 1.4961943140412757, "learning_rate": 9.333047856872196e-07, "loss": 0.5662, "step": 2702 }, { "epoch": 1.46, "grad_norm": 1.5209055184838354, "learning_rate": 9.315646726955798e-07, "loss": 0.5425, "step": 2703 }, { "epoch": 1.46, "grad_norm": 1.542832127148583, "learning_rate": 9.298258118892358e-07, "loss": 0.5337, "step": 2704 }, { "epoch": 1.46, "grad_norm": 1.5376554138555052, "learning_rate": 9.280882046564337e-07, "loss": 0.5329, "step": 2705 }, { "epoch": 1.46, "grad_norm": 1.5991993472855124, "learning_rate": 9.263518523844211e-07, "loss": 0.5361, "step": 2706 }, { "epoch": 1.46, "grad_norm": 1.5399376633847102, "learning_rate": 9.246167564594419e-07, "loss": 0.5224, "step": 2707 }, { "epoch": 1.46, "grad_norm": 1.5672318646817984, "learning_rate": 9.228829182667373e-07, "loss": 0.5207, "step": 2708 }, { "epoch": 1.46, "grad_norm": 1.449971781321615, "learning_rate": 9.211503391905446e-07, "loss": 0.5017, "step": 2709 }, { "epoch": 1.46, "grad_norm": 1.504445521967651, "learning_rate": 9.194190206140954e-07, "loss": 0.5307, "step": 2710 }, { "epoch": 1.46, "grad_norm": 1.5430357146017546, "learning_rate": 9.176889639196159e-07, "loss": 0.5333, "step": 2711 }, { "epoch": 1.46, "grad_norm": 1.5611435222212402, "learning_rate": 9.159601704883253e-07, "loss": 0.5354, "step": 2712 }, { "epoch": 1.47, "grad_norm": 1.5272660256886788, "learning_rate": 9.142326417004324e-07, "loss": 0.5516, "step": 2713 }, { "epoch": 1.47, "grad_norm": 1.521465357931594, "learning_rate": 9.125063789351376e-07, "loss": 0.5416, "step": 2714 }, { "epoch": 1.47, "grad_norm": 1.585120420763191, "learning_rate": 9.107813835706303e-07, "loss": 0.5464, "step": 2715 }, { "epoch": 1.47, "grad_norm": 1.4961200816102145, "learning_rate": 9.090576569840864e-07, "loss": 0.5143, "step": 2716 }, { "epoch": 1.47, "grad_norm": 1.5248755490490384, "learning_rate": 9.073352005516731e-07, "loss": 0.5343, "step": 2717 }, { "epoch": 1.47, "grad_norm": 1.5561983020376209, "learning_rate": 9.056140156485385e-07, "loss": 0.506, "step": 2718 }, { "epoch": 1.47, "grad_norm": 1.5081120369505785, "learning_rate": 9.0389410364882e-07, "loss": 0.5311, "step": 2719 }, { "epoch": 1.47, "grad_norm": 1.5348740309715523, "learning_rate": 9.021754659256357e-07, "loss": 0.535, "step": 2720 }, { "epoch": 1.47, "grad_norm": 1.5254876352505338, "learning_rate": 9.004581038510865e-07, "loss": 0.5417, "step": 2721 }, { "epoch": 1.47, "grad_norm": 1.514840923718138, "learning_rate": 8.987420187962576e-07, "loss": 0.5347, "step": 2722 }, { "epoch": 1.47, "grad_norm": 1.5248671644090046, "learning_rate": 8.970272121312118e-07, "loss": 0.5457, "step": 2723 }, { "epoch": 1.47, "grad_norm": 1.5282456586370228, "learning_rate": 8.953136852249922e-07, "loss": 0.518, "step": 2724 }, { "epoch": 1.47, "grad_norm": 1.4947067809808758, "learning_rate": 8.936014394456205e-07, "loss": 0.5251, "step": 2725 }, { "epoch": 1.47, "grad_norm": 1.5694755052074842, "learning_rate": 8.918904761600944e-07, "loss": 0.5388, "step": 2726 }, { "epoch": 1.47, "grad_norm": 1.5234010868670627, "learning_rate": 8.901807967343898e-07, "loss": 0.5496, "step": 2727 }, { "epoch": 1.47, "grad_norm": 1.5580759190160627, "learning_rate": 8.884724025334568e-07, "loss": 0.5388, "step": 2728 }, { "epoch": 1.47, "grad_norm": 1.477833489850805, "learning_rate": 8.867652949212182e-07, "loss": 0.5322, "step": 2729 }, { "epoch": 1.47, "grad_norm": 1.5048188857550684, "learning_rate": 8.850594752605712e-07, "loss": 0.536, "step": 2730 }, { "epoch": 1.47, "grad_norm": 1.4961529914329879, "learning_rate": 8.833549449133835e-07, "loss": 0.5381, "step": 2731 }, { "epoch": 1.48, "grad_norm": 1.4676258579624122, "learning_rate": 8.816517052404933e-07, "loss": 0.5442, "step": 2732 }, { "epoch": 1.48, "grad_norm": 1.5230655452432478, "learning_rate": 8.79949757601711e-07, "loss": 0.5335, "step": 2733 }, { "epoch": 1.48, "grad_norm": 1.538497317603092, "learning_rate": 8.782491033558116e-07, "loss": 0.5351, "step": 2734 }, { "epoch": 1.48, "grad_norm": 1.5298458527765355, "learning_rate": 8.765497438605411e-07, "loss": 0.5295, "step": 2735 }, { "epoch": 1.48, "grad_norm": 1.5695337352856533, "learning_rate": 8.748516804726096e-07, "loss": 0.5433, "step": 2736 }, { "epoch": 1.48, "grad_norm": 1.498173711635199, "learning_rate": 8.731549145476928e-07, "loss": 0.5288, "step": 2737 }, { "epoch": 1.48, "grad_norm": 1.5109867231639826, "learning_rate": 8.714594474404303e-07, "loss": 0.5327, "step": 2738 }, { "epoch": 1.48, "grad_norm": 1.5611247978322786, "learning_rate": 8.697652805044265e-07, "loss": 0.5738, "step": 2739 }, { "epoch": 1.48, "grad_norm": 1.5398724293338757, "learning_rate": 8.680724150922459e-07, "loss": 0.5259, "step": 2740 }, { "epoch": 1.48, "grad_norm": 1.5949809171777478, "learning_rate": 8.663808525554146e-07, "loss": 0.5404, "step": 2741 }, { "epoch": 1.48, "grad_norm": 1.5001756129711996, "learning_rate": 8.646905942444172e-07, "loss": 0.5371, "step": 2742 }, { "epoch": 1.48, "grad_norm": 1.555272470566675, "learning_rate": 8.630016415087006e-07, "loss": 0.5489, "step": 2743 }, { "epoch": 1.48, "grad_norm": 1.4984344021951022, "learning_rate": 8.613139956966649e-07, "loss": 0.532, "step": 2744 }, { "epoch": 1.48, "grad_norm": 1.4990489709775106, "learning_rate": 8.59627658155671e-07, "loss": 0.5328, "step": 2745 }, { "epoch": 1.48, "grad_norm": 1.5743811818410014, "learning_rate": 8.579426302320323e-07, "loss": 0.5551, "step": 2746 }, { "epoch": 1.48, "grad_norm": 1.4993363784818297, "learning_rate": 8.562589132710175e-07, "loss": 0.5312, "step": 2747 }, { "epoch": 1.48, "grad_norm": 1.4826000419526773, "learning_rate": 8.545765086168484e-07, "loss": 0.5443, "step": 2748 }, { "epoch": 1.48, "grad_norm": 1.5525008477157118, "learning_rate": 8.528954176127008e-07, "loss": 0.5475, "step": 2749 }, { "epoch": 1.49, "grad_norm": 1.5026953867149297, "learning_rate": 8.512156416006989e-07, "loss": 0.5317, "step": 2750 }, { "epoch": 1.49, "grad_norm": 1.482909330553009, "learning_rate": 8.495371819219206e-07, "loss": 0.5488, "step": 2751 }, { "epoch": 1.49, "grad_norm": 1.5540179375084535, "learning_rate": 8.478600399163894e-07, "loss": 0.5311, "step": 2752 }, { "epoch": 1.49, "grad_norm": 1.5183944938180238, "learning_rate": 8.461842169230788e-07, "loss": 0.5357, "step": 2753 }, { "epoch": 1.49, "grad_norm": 1.5757006228566908, "learning_rate": 8.44509714279908e-07, "loss": 0.5339, "step": 2754 }, { "epoch": 1.49, "grad_norm": 1.4796584923966605, "learning_rate": 8.42836533323744e-07, "loss": 0.5374, "step": 2755 }, { "epoch": 1.49, "grad_norm": 1.5680824509741396, "learning_rate": 8.411646753903968e-07, "loss": 0.5559, "step": 2756 }, { "epoch": 1.49, "grad_norm": 1.4968858955095163, "learning_rate": 8.394941418146202e-07, "loss": 0.5379, "step": 2757 }, { "epoch": 1.49, "grad_norm": 1.4805481361762713, "learning_rate": 8.378249339301126e-07, "loss": 0.5326, "step": 2758 }, { "epoch": 1.49, "grad_norm": 1.5467672926249845, "learning_rate": 8.361570530695121e-07, "loss": 0.5603, "step": 2759 }, { "epoch": 1.49, "grad_norm": 1.5081850603597518, "learning_rate": 8.344905005643967e-07, "loss": 0.5596, "step": 2760 }, { "epoch": 1.49, "grad_norm": 1.5561285218230054, "learning_rate": 8.328252777452872e-07, "loss": 0.5577, "step": 2761 }, { "epoch": 1.49, "grad_norm": 1.511286621217467, "learning_rate": 8.311613859416398e-07, "loss": 0.5402, "step": 2762 }, { "epoch": 1.49, "grad_norm": 1.5155313697678314, "learning_rate": 8.294988264818488e-07, "loss": 0.5368, "step": 2763 }, { "epoch": 1.49, "grad_norm": 1.5120084125396034, "learning_rate": 8.278376006932451e-07, "loss": 0.5635, "step": 2764 }, { "epoch": 1.49, "grad_norm": 1.5064615511165569, "learning_rate": 8.26177709902094e-07, "loss": 0.5316, "step": 2765 }, { "epoch": 1.49, "grad_norm": 1.4866865662350537, "learning_rate": 8.245191554335963e-07, "loss": 0.5402, "step": 2766 }, { "epoch": 1.49, "grad_norm": 1.5413125444193265, "learning_rate": 8.228619386118863e-07, "loss": 0.5431, "step": 2767 }, { "epoch": 1.49, "grad_norm": 1.574282669723312, "learning_rate": 8.212060607600289e-07, "loss": 0.5549, "step": 2768 }, { "epoch": 1.5, "grad_norm": 1.5322648505312901, "learning_rate": 8.1955152320002e-07, "loss": 0.5397, "step": 2769 }, { "epoch": 1.5, "grad_norm": 1.5547079491541864, "learning_rate": 8.178983272527863e-07, "loss": 0.5404, "step": 2770 }, { "epoch": 1.5, "grad_norm": 1.5531345634325409, "learning_rate": 8.162464742381823e-07, "loss": 0.5388, "step": 2771 }, { "epoch": 1.5, "grad_norm": 1.504159320607765, "learning_rate": 8.145959654749924e-07, "loss": 0.5489, "step": 2772 }, { "epoch": 1.5, "grad_norm": 1.5449949233332212, "learning_rate": 8.129468022809253e-07, "loss": 0.5388, "step": 2773 }, { "epoch": 1.5, "grad_norm": 1.5137480433986632, "learning_rate": 8.112989859726184e-07, "loss": 0.5355, "step": 2774 }, { "epoch": 1.5, "grad_norm": 1.4719452051390223, "learning_rate": 8.096525178656306e-07, "loss": 0.5161, "step": 2775 }, { "epoch": 1.5, "grad_norm": 1.5351280050481138, "learning_rate": 8.080073992744467e-07, "loss": 0.5335, "step": 2776 }, { "epoch": 1.5, "grad_norm": 1.502103215894358, "learning_rate": 8.063636315124718e-07, "loss": 0.5434, "step": 2777 }, { "epoch": 1.5, "grad_norm": 1.472587708304098, "learning_rate": 8.047212158920362e-07, "loss": 0.5152, "step": 2778 }, { "epoch": 1.5, "grad_norm": 1.539641995442544, "learning_rate": 8.030801537243873e-07, "loss": 0.5359, "step": 2779 }, { "epoch": 1.5, "grad_norm": 1.538223931526719, "learning_rate": 8.014404463196935e-07, "loss": 0.5468, "step": 2780 }, { "epoch": 1.5, "grad_norm": 1.535580071602923, "learning_rate": 7.998020949870402e-07, "loss": 0.5214, "step": 2781 }, { "epoch": 1.5, "grad_norm": 1.5331659388281575, "learning_rate": 7.981651010344324e-07, "loss": 0.5592, "step": 2782 }, { "epoch": 1.5, "grad_norm": 1.540811207749569, "learning_rate": 7.965294657687908e-07, "loss": 0.5295, "step": 2783 }, { "epoch": 1.5, "grad_norm": 1.5154176995151138, "learning_rate": 7.948951904959504e-07, "loss": 0.5245, "step": 2784 }, { "epoch": 1.5, "grad_norm": 1.5160277555894262, "learning_rate": 7.932622765206602e-07, "loss": 0.5556, "step": 2785 }, { "epoch": 1.5, "grad_norm": 1.539976597092044, "learning_rate": 7.916307251465835e-07, "loss": 0.5506, "step": 2786 }, { "epoch": 1.51, "grad_norm": 1.4716218456557422, "learning_rate": 7.900005376762948e-07, "loss": 0.5508, "step": 2787 }, { "epoch": 1.51, "grad_norm": 1.5118923287672104, "learning_rate": 7.883717154112802e-07, "loss": 0.5345, "step": 2788 }, { "epoch": 1.51, "grad_norm": 1.5250807259017154, "learning_rate": 7.867442596519373e-07, "loss": 0.5774, "step": 2789 }, { "epoch": 1.51, "grad_norm": 1.5219384514820595, "learning_rate": 7.851181716975703e-07, "loss": 0.5061, "step": 2790 }, { "epoch": 1.51, "grad_norm": 1.5436681575834583, "learning_rate": 7.834934528463925e-07, "loss": 0.5381, "step": 2791 }, { "epoch": 1.51, "grad_norm": 1.48729397811252, "learning_rate": 7.818701043955238e-07, "loss": 0.531, "step": 2792 }, { "epoch": 1.51, "grad_norm": 1.594338021434946, "learning_rate": 7.802481276409896e-07, "loss": 0.5414, "step": 2793 }, { "epoch": 1.51, "grad_norm": 1.561605628531304, "learning_rate": 7.786275238777225e-07, "loss": 0.5465, "step": 2794 }, { "epoch": 1.51, "grad_norm": 1.526402684155825, "learning_rate": 7.770082943995563e-07, "loss": 0.5421, "step": 2795 }, { "epoch": 1.51, "grad_norm": 1.5104541195524153, "learning_rate": 7.75390440499228e-07, "loss": 0.5074, "step": 2796 }, { "epoch": 1.51, "grad_norm": 1.4786509144397155, "learning_rate": 7.737739634683786e-07, "loss": 0.5258, "step": 2797 }, { "epoch": 1.51, "grad_norm": 1.5234808222367802, "learning_rate": 7.721588645975473e-07, "loss": 0.5473, "step": 2798 }, { "epoch": 1.51, "grad_norm": 1.4912190408714712, "learning_rate": 7.705451451761734e-07, "loss": 0.544, "step": 2799 }, { "epoch": 1.51, "grad_norm": 1.6176575386432028, "learning_rate": 7.689328064925968e-07, "loss": 0.5594, "step": 2800 }, { "epoch": 1.51, "grad_norm": 1.513980153946076, "learning_rate": 7.673218498340531e-07, "loss": 0.531, "step": 2801 }, { "epoch": 1.51, "grad_norm": 1.4899230564145247, "learning_rate": 7.657122764866754e-07, "loss": 0.5436, "step": 2802 }, { "epoch": 1.51, "grad_norm": 1.4754495065427693, "learning_rate": 7.641040877354916e-07, "loss": 0.5164, "step": 2803 }, { "epoch": 1.51, "grad_norm": 1.4567207074583337, "learning_rate": 7.624972848644247e-07, "loss": 0.5274, "step": 2804 }, { "epoch": 1.51, "grad_norm": 1.5009254840648203, "learning_rate": 7.608918691562914e-07, "loss": 0.53, "step": 2805 }, { "epoch": 1.52, "grad_norm": 1.588931478674625, "learning_rate": 7.592878418928024e-07, "loss": 0.5738, "step": 2806 }, { "epoch": 1.52, "grad_norm": 1.5249174809046215, "learning_rate": 7.576852043545568e-07, "loss": 0.5287, "step": 2807 }, { "epoch": 1.52, "grad_norm": 1.4805565462957526, "learning_rate": 7.560839578210466e-07, "loss": 0.5427, "step": 2808 }, { "epoch": 1.52, "grad_norm": 1.5150247430053274, "learning_rate": 7.54484103570651e-07, "loss": 0.5313, "step": 2809 }, { "epoch": 1.52, "grad_norm": 1.5234957686833772, "learning_rate": 7.528856428806408e-07, "loss": 0.5338, "step": 2810 }, { "epoch": 1.52, "grad_norm": 1.5551587850875999, "learning_rate": 7.512885770271722e-07, "loss": 0.5564, "step": 2811 }, { "epoch": 1.52, "grad_norm": 1.5468114469220289, "learning_rate": 7.496929072852865e-07, "loss": 0.5071, "step": 2812 }, { "epoch": 1.52, "grad_norm": 1.5933774535640364, "learning_rate": 7.480986349289146e-07, "loss": 0.5334, "step": 2813 }, { "epoch": 1.52, "grad_norm": 1.6063221469798639, "learning_rate": 7.465057612308676e-07, "loss": 0.5267, "step": 2814 }, { "epoch": 1.52, "grad_norm": 1.5676699007431516, "learning_rate": 7.449142874628413e-07, "loss": 0.5341, "step": 2815 }, { "epoch": 1.52, "grad_norm": 1.4718855598451537, "learning_rate": 7.43324214895415e-07, "loss": 0.5156, "step": 2816 }, { "epoch": 1.52, "grad_norm": 1.5268337843179705, "learning_rate": 7.417355447980484e-07, "loss": 0.5332, "step": 2817 }, { "epoch": 1.52, "grad_norm": 1.5272280989916007, "learning_rate": 7.401482784390813e-07, "loss": 0.5543, "step": 2818 }, { "epoch": 1.52, "grad_norm": 1.4985902209103044, "learning_rate": 7.385624170857328e-07, "loss": 0.5308, "step": 2819 }, { "epoch": 1.52, "grad_norm": 1.4884095143683083, "learning_rate": 7.369779620041001e-07, "loss": 0.5422, "step": 2820 }, { "epoch": 1.52, "grad_norm": 1.5021048064918745, "learning_rate": 7.353949144591588e-07, "loss": 0.5292, "step": 2821 }, { "epoch": 1.52, "grad_norm": 1.538391322521803, "learning_rate": 7.338132757147607e-07, "loss": 0.5324, "step": 2822 }, { "epoch": 1.52, "grad_norm": 1.4790875365957636, "learning_rate": 7.322330470336314e-07, "loss": 0.5157, "step": 2823 }, { "epoch": 1.53, "grad_norm": 1.570406427239961, "learning_rate": 7.30654229677372e-07, "loss": 0.5585, "step": 2824 }, { "epoch": 1.53, "grad_norm": 1.5337524755112333, "learning_rate": 7.290768249064562e-07, "loss": 0.5217, "step": 2825 }, { "epoch": 1.53, "grad_norm": 1.4927030555816243, "learning_rate": 7.275008339802295e-07, "loss": 0.5167, "step": 2826 }, { "epoch": 1.53, "grad_norm": 1.5864212522252563, "learning_rate": 7.259262581569099e-07, "loss": 0.515, "step": 2827 }, { "epoch": 1.53, "grad_norm": 1.508495072776216, "learning_rate": 7.243530986935863e-07, "loss": 0.5479, "step": 2828 }, { "epoch": 1.53, "grad_norm": 1.5569235568243756, "learning_rate": 7.227813568462141e-07, "loss": 0.5375, "step": 2829 }, { "epoch": 1.53, "grad_norm": 1.5580689188983419, "learning_rate": 7.212110338696194e-07, "loss": 0.554, "step": 2830 }, { "epoch": 1.53, "grad_norm": 1.5191185320765983, "learning_rate": 7.196421310174934e-07, "loss": 0.5332, "step": 2831 }, { "epoch": 1.53, "grad_norm": 1.528156228463147, "learning_rate": 7.180746495423946e-07, "loss": 0.5312, "step": 2832 }, { "epoch": 1.53, "grad_norm": 1.5441071181352948, "learning_rate": 7.165085906957484e-07, "loss": 0.5458, "step": 2833 }, { "epoch": 1.53, "grad_norm": 1.5605891931670983, "learning_rate": 7.149439557278415e-07, "loss": 0.5505, "step": 2834 }, { "epoch": 1.53, "grad_norm": 1.5642922017837884, "learning_rate": 7.133807458878247e-07, "loss": 0.5617, "step": 2835 }, { "epoch": 1.53, "grad_norm": 1.5376792906837664, "learning_rate": 7.118189624237129e-07, "loss": 0.5514, "step": 2836 }, { "epoch": 1.53, "grad_norm": 1.5693186111022202, "learning_rate": 7.102586065823799e-07, "loss": 0.5271, "step": 2837 }, { "epoch": 1.53, "grad_norm": 1.5180855062207446, "learning_rate": 7.086996796095599e-07, "loss": 0.5615, "step": 2838 }, { "epoch": 1.53, "grad_norm": 1.453185243061549, "learning_rate": 7.071421827498489e-07, "loss": 0.5284, "step": 2839 }, { "epoch": 1.53, "grad_norm": 1.484365344657527, "learning_rate": 7.055861172466979e-07, "loss": 0.54, "step": 2840 }, { "epoch": 1.53, "grad_norm": 1.5079811560623353, "learning_rate": 7.040314843424173e-07, "loss": 0.5312, "step": 2841 }, { "epoch": 1.53, "grad_norm": 1.4953176615114634, "learning_rate": 7.024782852781717e-07, "loss": 0.5411, "step": 2842 }, { "epoch": 1.54, "grad_norm": 1.5078319960762623, "learning_rate": 7.009265212939831e-07, "loss": 0.5513, "step": 2843 }, { "epoch": 1.54, "grad_norm": 1.4781059875218487, "learning_rate": 6.99376193628728e-07, "loss": 0.5685, "step": 2844 }, { "epoch": 1.54, "grad_norm": 1.5039539713384897, "learning_rate": 6.978273035201341e-07, "loss": 0.5182, "step": 2845 }, { "epoch": 1.54, "grad_norm": 1.523920500396487, "learning_rate": 6.962798522047826e-07, "loss": 0.5334, "step": 2846 }, { "epoch": 1.54, "grad_norm": 1.515392555854772, "learning_rate": 6.947338409181056e-07, "loss": 0.5383, "step": 2847 }, { "epoch": 1.54, "grad_norm": 1.5422199468133304, "learning_rate": 6.931892708943857e-07, "loss": 0.5484, "step": 2848 }, { "epoch": 1.54, "grad_norm": 1.5288505702699493, "learning_rate": 6.916461433667559e-07, "loss": 0.5246, "step": 2849 }, { "epoch": 1.54, "grad_norm": 1.5139230745557657, "learning_rate": 6.90104459567196e-07, "loss": 0.5362, "step": 2850 }, { "epoch": 1.54, "grad_norm": 1.5527316276377285, "learning_rate": 6.885642207265331e-07, "loss": 0.5356, "step": 2851 }, { "epoch": 1.54, "grad_norm": 1.5456312788495274, "learning_rate": 6.870254280744429e-07, "loss": 0.5546, "step": 2852 }, { "epoch": 1.54, "grad_norm": 1.5466478382680235, "learning_rate": 6.854880828394442e-07, "loss": 0.5459, "step": 2853 }, { "epoch": 1.54, "grad_norm": 1.515848898981195, "learning_rate": 6.839521862488999e-07, "loss": 0.5175, "step": 2854 }, { "epoch": 1.54, "grad_norm": 1.5581636965628587, "learning_rate": 6.824177395290196e-07, "loss": 0.5486, "step": 2855 }, { "epoch": 1.54, "grad_norm": 1.5634090197718882, "learning_rate": 6.808847439048524e-07, "loss": 0.5661, "step": 2856 }, { "epoch": 1.54, "grad_norm": 1.5151626384764176, "learning_rate": 6.793532006002893e-07, "loss": 0.5376, "step": 2857 }, { "epoch": 1.54, "grad_norm": 1.5600486825740578, "learning_rate": 6.778231108380628e-07, "loss": 0.5468, "step": 2858 }, { "epoch": 1.54, "grad_norm": 1.4810539138951437, "learning_rate": 6.762944758397432e-07, "loss": 0.5439, "step": 2859 }, { "epoch": 1.54, "grad_norm": 1.532981693505775, "learning_rate": 6.747672968257415e-07, "loss": 0.5452, "step": 2860 }, { "epoch": 1.55, "grad_norm": 1.5670773189532547, "learning_rate": 6.732415750153062e-07, "loss": 0.5306, "step": 2861 }, { "epoch": 1.55, "grad_norm": 1.4964708896374512, "learning_rate": 6.717173116265208e-07, "loss": 0.5426, "step": 2862 }, { "epoch": 1.55, "grad_norm": 1.5666134329478059, "learning_rate": 6.701945078763051e-07, "loss": 0.5381, "step": 2863 }, { "epoch": 1.55, "grad_norm": 1.5020824412388063, "learning_rate": 6.686731649804138e-07, "loss": 0.5407, "step": 2864 }, { "epoch": 1.55, "grad_norm": 1.537947718303955, "learning_rate": 6.671532841534345e-07, "loss": 0.5535, "step": 2865 }, { "epoch": 1.55, "grad_norm": 1.5468122682237002, "learning_rate": 6.656348666087889e-07, "loss": 0.5423, "step": 2866 }, { "epoch": 1.55, "grad_norm": 1.5842865388882978, "learning_rate": 6.641179135587306e-07, "loss": 0.5388, "step": 2867 }, { "epoch": 1.55, "grad_norm": 1.6317608102767842, "learning_rate": 6.626024262143421e-07, "loss": 0.5311, "step": 2868 }, { "epoch": 1.55, "grad_norm": 1.5460964622975215, "learning_rate": 6.610884057855374e-07, "loss": 0.5091, "step": 2869 }, { "epoch": 1.55, "grad_norm": 1.5249840589865742, "learning_rate": 6.595758534810578e-07, "loss": 0.5442, "step": 2870 }, { "epoch": 1.55, "grad_norm": 1.5525582426733706, "learning_rate": 6.58064770508475e-07, "loss": 0.5488, "step": 2871 }, { "epoch": 1.55, "grad_norm": 1.5141170986162233, "learning_rate": 6.565551580741855e-07, "loss": 0.5371, "step": 2872 }, { "epoch": 1.55, "grad_norm": 1.5003087772515906, "learning_rate": 6.550470173834123e-07, "loss": 0.5296, "step": 2873 }, { "epoch": 1.55, "grad_norm": 1.5849855032546016, "learning_rate": 6.535403496402023e-07, "loss": 0.556, "step": 2874 }, { "epoch": 1.55, "grad_norm": 1.4979068746024515, "learning_rate": 6.5203515604743e-07, "loss": 0.5226, "step": 2875 }, { "epoch": 1.55, "grad_norm": 1.5071200455794203, "learning_rate": 6.505314378067887e-07, "loss": 0.5441, "step": 2876 }, { "epoch": 1.55, "grad_norm": 1.5000272089074143, "learning_rate": 6.490291961187975e-07, "loss": 0.518, "step": 2877 }, { "epoch": 1.55, "grad_norm": 1.5063502547415601, "learning_rate": 6.475284321827938e-07, "loss": 0.526, "step": 2878 }, { "epoch": 1.55, "grad_norm": 1.5088086314000204, "learning_rate": 6.460291471969365e-07, "loss": 0.5135, "step": 2879 }, { "epoch": 1.56, "grad_norm": 1.5076725294669369, "learning_rate": 6.445313423582039e-07, "loss": 0.5194, "step": 2880 }, { "epoch": 1.56, "grad_norm": 1.5184068990462063, "learning_rate": 6.430350188623912e-07, "loss": 0.5353, "step": 2881 }, { "epoch": 1.56, "grad_norm": 1.5544621084546968, "learning_rate": 6.415401779041133e-07, "loss": 0.5429, "step": 2882 }, { "epoch": 1.56, "grad_norm": 1.510575326986459, "learning_rate": 6.400468206768004e-07, "loss": 0.5488, "step": 2883 }, { "epoch": 1.56, "grad_norm": 1.4846207106848932, "learning_rate": 6.385549483726977e-07, "loss": 0.5478, "step": 2884 }, { "epoch": 1.56, "grad_norm": 1.5509923648620512, "learning_rate": 6.370645621828653e-07, "loss": 0.557, "step": 2885 }, { "epoch": 1.56, "grad_norm": 1.4884919743709024, "learning_rate": 6.35575663297176e-07, "loss": 0.5327, "step": 2886 }, { "epoch": 1.56, "grad_norm": 1.4868609236725674, "learning_rate": 6.34088252904316e-07, "loss": 0.5316, "step": 2887 }, { "epoch": 1.56, "grad_norm": 1.4910893765916067, "learning_rate": 6.32602332191784e-07, "loss": 0.5439, "step": 2888 }, { "epoch": 1.56, "grad_norm": 1.5573090690104112, "learning_rate": 6.31117902345888e-07, "loss": 0.5495, "step": 2889 }, { "epoch": 1.56, "grad_norm": 1.5203917399130158, "learning_rate": 6.296349645517453e-07, "loss": 0.528, "step": 2890 }, { "epoch": 1.56, "grad_norm": 1.5506752146677614, "learning_rate": 6.281535199932845e-07, "loss": 0.5568, "step": 2891 }, { "epoch": 1.56, "grad_norm": 1.566118629586149, "learning_rate": 6.266735698532392e-07, "loss": 0.5385, "step": 2892 }, { "epoch": 1.56, "grad_norm": 1.4680493515171738, "learning_rate": 6.251951153131511e-07, "loss": 0.5347, "step": 2893 }, { "epoch": 1.56, "grad_norm": 1.564233279196982, "learning_rate": 6.23718157553369e-07, "loss": 0.5392, "step": 2894 }, { "epoch": 1.56, "grad_norm": 1.515824822761373, "learning_rate": 6.222426977530449e-07, "loss": 0.5473, "step": 2895 }, { "epoch": 1.56, "grad_norm": 1.4528624032191506, "learning_rate": 6.207687370901356e-07, "loss": 0.5222, "step": 2896 }, { "epoch": 1.56, "grad_norm": 1.5569765601697534, "learning_rate": 6.192962767414004e-07, "loss": 0.5421, "step": 2897 }, { "epoch": 1.57, "grad_norm": 1.5442736031627184, "learning_rate": 6.178253178824029e-07, "loss": 0.5291, "step": 2898 }, { "epoch": 1.57, "grad_norm": 1.4769456846413098, "learning_rate": 6.163558616875048e-07, "loss": 0.54, "step": 2899 }, { "epoch": 1.57, "grad_norm": 1.5013171266691663, "learning_rate": 6.148879093298718e-07, "loss": 0.52, "step": 2900 }, { "epoch": 1.57, "grad_norm": 1.5928617052252823, "learning_rate": 6.134214619814657e-07, "loss": 0.5425, "step": 2901 }, { "epoch": 1.57, "grad_norm": 1.5508918287380395, "learning_rate": 6.119565208130485e-07, "loss": 0.5029, "step": 2902 }, { "epoch": 1.57, "grad_norm": 1.5173338784255466, "learning_rate": 6.104930869941783e-07, "loss": 0.5385, "step": 2903 }, { "epoch": 1.57, "grad_norm": 1.5381874871066505, "learning_rate": 6.090311616932127e-07, "loss": 0.5402, "step": 2904 }, { "epoch": 1.57, "grad_norm": 1.4992213142666329, "learning_rate": 6.075707460773008e-07, "loss": 0.5395, "step": 2905 }, { "epoch": 1.57, "grad_norm": 1.4853497397702278, "learning_rate": 6.061118413123906e-07, "loss": 0.567, "step": 2906 }, { "epoch": 1.57, "grad_norm": 1.504108469108114, "learning_rate": 6.04654448563221e-07, "loss": 0.55, "step": 2907 }, { "epoch": 1.57, "grad_norm": 1.487097458516056, "learning_rate": 6.031985689933251e-07, "loss": 0.5255, "step": 2908 }, { "epoch": 1.57, "grad_norm": 1.5323442015441964, "learning_rate": 6.017442037650262e-07, "loss": 0.5464, "step": 2909 }, { "epoch": 1.57, "grad_norm": 1.526895698225953, "learning_rate": 6.002913540394417e-07, "loss": 0.5451, "step": 2910 }, { "epoch": 1.57, "grad_norm": 1.4997403518790131, "learning_rate": 5.988400209764766e-07, "loss": 0.5393, "step": 2911 }, { "epoch": 1.57, "grad_norm": 1.5454793937449187, "learning_rate": 5.973902057348258e-07, "loss": 0.5605, "step": 2912 }, { "epoch": 1.57, "grad_norm": 1.5402760363910755, "learning_rate": 5.959419094719713e-07, "loss": 0.5202, "step": 2913 }, { "epoch": 1.57, "grad_norm": 1.4615044340160697, "learning_rate": 5.944951333441851e-07, "loss": 0.5491, "step": 2914 }, { "epoch": 1.57, "grad_norm": 1.5372161528295685, "learning_rate": 5.930498785065223e-07, "loss": 0.5783, "step": 2915 }, { "epoch": 1.57, "grad_norm": 1.5487011359213263, "learning_rate": 5.916061461128269e-07, "loss": 0.5343, "step": 2916 }, { "epoch": 1.58, "grad_norm": 1.5338632001710542, "learning_rate": 5.901639373157248e-07, "loss": 0.5286, "step": 2917 }, { "epoch": 1.58, "grad_norm": 1.483406961342771, "learning_rate": 5.887232532666262e-07, "loss": 0.5154, "step": 2918 }, { "epoch": 1.58, "grad_norm": 1.4767323171597788, "learning_rate": 5.872840951157241e-07, "loss": 0.5285, "step": 2919 }, { "epoch": 1.58, "grad_norm": 1.501217735632269, "learning_rate": 5.858464640119929e-07, "loss": 0.5479, "step": 2920 }, { "epoch": 1.58, "grad_norm": 1.5344992533070299, "learning_rate": 5.844103611031887e-07, "loss": 0.5294, "step": 2921 }, { "epoch": 1.58, "grad_norm": 1.5039103963061908, "learning_rate": 5.829757875358477e-07, "loss": 0.5545, "step": 2922 }, { "epoch": 1.58, "grad_norm": 1.5423034658308157, "learning_rate": 5.815427444552843e-07, "loss": 0.5302, "step": 2923 }, { "epoch": 1.58, "grad_norm": 1.5490439126952076, "learning_rate": 5.801112330055908e-07, "loss": 0.5282, "step": 2924 }, { "epoch": 1.58, "grad_norm": 1.5292238968635405, "learning_rate": 5.786812543296372e-07, "loss": 0.5378, "step": 2925 }, { "epoch": 1.58, "grad_norm": 1.5233431611931079, "learning_rate": 5.772528095690691e-07, "loss": 0.5232, "step": 2926 }, { "epoch": 1.58, "grad_norm": 1.5353623772386078, "learning_rate": 5.758258998643095e-07, "loss": 0.5128, "step": 2927 }, { "epoch": 1.58, "grad_norm": 1.5088644780070515, "learning_rate": 5.744005263545538e-07, "loss": 0.5095, "step": 2928 }, { "epoch": 1.58, "grad_norm": 1.4959560269498366, "learning_rate": 5.729766901777708e-07, "loss": 0.5079, "step": 2929 }, { "epoch": 1.58, "grad_norm": 1.514958568436152, "learning_rate": 5.715543924707046e-07, "loss": 0.5201, "step": 2930 }, { "epoch": 1.58, "grad_norm": 1.5834795985479306, "learning_rate": 5.701336343688671e-07, "loss": 0.5379, "step": 2931 }, { "epoch": 1.58, "grad_norm": 1.5071645640720375, "learning_rate": 5.687144170065448e-07, "loss": 0.5295, "step": 2932 }, { "epoch": 1.58, "grad_norm": 1.4463441264597934, "learning_rate": 5.67296741516792e-07, "loss": 0.5248, "step": 2933 }, { "epoch": 1.58, "grad_norm": 1.4777244193014725, "learning_rate": 5.658806090314322e-07, "loss": 0.5396, "step": 2934 }, { "epoch": 1.58, "grad_norm": 1.5545091613535706, "learning_rate": 5.644660206810576e-07, "loss": 0.5196, "step": 2935 }, { "epoch": 1.59, "grad_norm": 1.5638995104572668, "learning_rate": 5.630529775950264e-07, "loss": 0.5324, "step": 2936 }, { "epoch": 1.59, "grad_norm": 1.550067475041645, "learning_rate": 5.616414809014647e-07, "loss": 0.5443, "step": 2937 }, { "epoch": 1.59, "grad_norm": 1.5972659760452799, "learning_rate": 5.602315317272642e-07, "loss": 0.5523, "step": 2938 }, { "epoch": 1.59, "grad_norm": 1.5295045524998263, "learning_rate": 5.588231311980794e-07, "loss": 0.5257, "step": 2939 }, { "epoch": 1.59, "grad_norm": 1.497017052000618, "learning_rate": 5.574162804383293e-07, "loss": 0.514, "step": 2940 }, { "epoch": 1.59, "grad_norm": 1.603074114111661, "learning_rate": 5.560109805711955e-07, "loss": 0.5499, "step": 2941 }, { "epoch": 1.59, "grad_norm": 1.4673411369151155, "learning_rate": 5.546072327186211e-07, "loss": 0.5476, "step": 2942 }, { "epoch": 1.59, "grad_norm": 1.4586529873474938, "learning_rate": 5.532050380013115e-07, "loss": 0.5125, "step": 2943 }, { "epoch": 1.59, "grad_norm": 1.50555176131095, "learning_rate": 5.518043975387302e-07, "loss": 0.5366, "step": 2944 }, { "epoch": 1.59, "grad_norm": 1.517757352305537, "learning_rate": 5.504053124491021e-07, "loss": 0.5556, "step": 2945 }, { "epoch": 1.59, "grad_norm": 1.5002663556640088, "learning_rate": 5.490077838494079e-07, "loss": 0.53, "step": 2946 }, { "epoch": 1.59, "grad_norm": 1.5633774702734846, "learning_rate": 5.476118128553873e-07, "loss": 0.5375, "step": 2947 }, { "epoch": 1.59, "grad_norm": 1.491241121329542, "learning_rate": 5.462174005815352e-07, "loss": 0.5104, "step": 2948 }, { "epoch": 1.59, "grad_norm": 1.5782161377515265, "learning_rate": 5.448245481411041e-07, "loss": 0.5457, "step": 2949 }, { "epoch": 1.59, "grad_norm": 1.5493780822519245, "learning_rate": 5.434332566460989e-07, "loss": 0.5478, "step": 2950 }, { "epoch": 1.59, "grad_norm": 1.5571590193852398, "learning_rate": 5.4204352720728e-07, "loss": 0.5283, "step": 2951 }, { "epoch": 1.59, "grad_norm": 1.5076069490226305, "learning_rate": 5.406553609341586e-07, "loss": 0.5358, "step": 2952 }, { "epoch": 1.59, "grad_norm": 1.5427235924383544, "learning_rate": 5.39268758935001e-07, "loss": 0.5396, "step": 2953 }, { "epoch": 1.6, "grad_norm": 1.5566515649804438, "learning_rate": 5.378837223168218e-07, "loss": 0.5504, "step": 2954 }, { "epoch": 1.6, "grad_norm": 1.4910364882483547, "learning_rate": 5.365002521853882e-07, "loss": 0.5396, "step": 2955 }, { "epoch": 1.6, "grad_norm": 1.4824467011133344, "learning_rate": 5.351183496452148e-07, "loss": 0.5372, "step": 2956 }, { "epoch": 1.6, "grad_norm": 1.5897585380297798, "learning_rate": 5.337380157995656e-07, "loss": 0.501, "step": 2957 }, { "epoch": 1.6, "grad_norm": 1.5380065683803055, "learning_rate": 5.32359251750452e-07, "loss": 0.5377, "step": 2958 }, { "epoch": 1.6, "grad_norm": 1.5369269892510151, "learning_rate": 5.309820585986316e-07, "loss": 0.5502, "step": 2959 }, { "epoch": 1.6, "grad_norm": 1.5239257460326283, "learning_rate": 5.296064374436094e-07, "loss": 0.5407, "step": 2960 }, { "epoch": 1.6, "grad_norm": 1.4963984401811716, "learning_rate": 5.282323893836347e-07, "loss": 0.5157, "step": 2961 }, { "epoch": 1.6, "grad_norm": 1.5170936473476797, "learning_rate": 5.268599155157001e-07, "loss": 0.5285, "step": 2962 }, { "epoch": 1.6, "grad_norm": 1.4922422931751171, "learning_rate": 5.25489016935542e-07, "loss": 0.5338, "step": 2963 }, { "epoch": 1.6, "grad_norm": 1.4776222229413878, "learning_rate": 5.241196947376382e-07, "loss": 0.5372, "step": 2964 }, { "epoch": 1.6, "grad_norm": 1.5010060204579956, "learning_rate": 5.227519500152106e-07, "loss": 0.5439, "step": 2965 }, { "epoch": 1.6, "grad_norm": 1.5259892358724216, "learning_rate": 5.21385783860219e-07, "loss": 0.5459, "step": 2966 }, { "epoch": 1.6, "grad_norm": 1.4644123018951147, "learning_rate": 5.200211973633632e-07, "loss": 0.5138, "step": 2967 }, { "epoch": 1.6, "grad_norm": 1.5058027587857816, "learning_rate": 5.18658191614084e-07, "loss": 0.5449, "step": 2968 }, { "epoch": 1.6, "grad_norm": 1.4827127611027975, "learning_rate": 5.17296767700558e-07, "loss": 0.5281, "step": 2969 }, { "epoch": 1.6, "grad_norm": 1.5101895533833622, "learning_rate": 5.15936926709699e-07, "loss": 0.5331, "step": 2970 }, { "epoch": 1.6, "grad_norm": 1.5567784734557373, "learning_rate": 5.145786697271588e-07, "loss": 0.5339, "step": 2971 }, { "epoch": 1.6, "grad_norm": 1.5283963675840009, "learning_rate": 5.132219978373232e-07, "loss": 0.5556, "step": 2972 }, { "epoch": 1.61, "grad_norm": 1.5196202292781247, "learning_rate": 5.118669121233127e-07, "loss": 0.5454, "step": 2973 }, { "epoch": 1.61, "grad_norm": 1.5686594887964083, "learning_rate": 5.105134136669812e-07, "loss": 0.5321, "step": 2974 }, { "epoch": 1.61, "grad_norm": 1.5143662309498316, "learning_rate": 5.091615035489153e-07, "loss": 0.5192, "step": 2975 }, { "epoch": 1.61, "grad_norm": 1.560501826529351, "learning_rate": 5.078111828484347e-07, "loss": 0.5548, "step": 2976 }, { "epoch": 1.61, "grad_norm": 1.5366033503981245, "learning_rate": 5.064624526435901e-07, "loss": 0.5314, "step": 2977 }, { "epoch": 1.61, "grad_norm": 1.4871623518240051, "learning_rate": 5.051153140111608e-07, "loss": 0.5364, "step": 2978 }, { "epoch": 1.61, "grad_norm": 1.5035424068039458, "learning_rate": 5.037697680266565e-07, "loss": 0.523, "step": 2979 }, { "epoch": 1.61, "grad_norm": 1.5591524720620213, "learning_rate": 5.024258157643153e-07, "loss": 0.5458, "step": 2980 }, { "epoch": 1.61, "grad_norm": 1.5568781014423723, "learning_rate": 5.010834582971019e-07, "loss": 0.5401, "step": 2981 }, { "epoch": 1.61, "grad_norm": 1.5176448131449216, "learning_rate": 4.997426966967106e-07, "loss": 0.5352, "step": 2982 }, { "epoch": 1.61, "grad_norm": 1.5285165637478322, "learning_rate": 4.984035320335581e-07, "loss": 0.5489, "step": 2983 }, { "epoch": 1.61, "grad_norm": 1.5369816917053492, "learning_rate": 4.97065965376789e-07, "loss": 0.5177, "step": 2984 }, { "epoch": 1.61, "grad_norm": 1.5211846650371068, "learning_rate": 4.957299977942704e-07, "loss": 0.5383, "step": 2985 }, { "epoch": 1.61, "grad_norm": 1.4793532626289296, "learning_rate": 4.943956303525935e-07, "loss": 0.5189, "step": 2986 }, { "epoch": 1.61, "grad_norm": 1.5086572090078376, "learning_rate": 4.930628641170707e-07, "loss": 0.5263, "step": 2987 }, { "epoch": 1.61, "grad_norm": 1.510606877964708, "learning_rate": 4.917317001517389e-07, "loss": 0.5253, "step": 2988 }, { "epoch": 1.61, "grad_norm": 1.5513516628772015, "learning_rate": 4.904021395193529e-07, "loss": 0.5382, "step": 2989 }, { "epoch": 1.61, "grad_norm": 1.5079039028016576, "learning_rate": 4.890741832813888e-07, "loss": 0.5243, "step": 2990 }, { "epoch": 1.62, "grad_norm": 1.5122691078929158, "learning_rate": 4.877478324980412e-07, "loss": 0.5258, "step": 2991 }, { "epoch": 1.62, "grad_norm": 1.5106650577733678, "learning_rate": 4.864230882282236e-07, "loss": 0.5187, "step": 2992 }, { "epoch": 1.62, "grad_norm": 1.6660669334025386, "learning_rate": 4.850999515295676e-07, "loss": 0.5666, "step": 2993 }, { "epoch": 1.62, "grad_norm": 1.5727427256255884, "learning_rate": 4.837784234584194e-07, "loss": 0.536, "step": 2994 }, { "epoch": 1.62, "grad_norm": 1.5461950702623766, "learning_rate": 4.824585050698424e-07, "loss": 0.5492, "step": 2995 }, { "epoch": 1.62, "grad_norm": 1.510428143980405, "learning_rate": 4.811401974176141e-07, "loss": 0.5546, "step": 2996 }, { "epoch": 1.62, "grad_norm": 1.4891551442349662, "learning_rate": 4.79823501554226e-07, "loss": 0.5202, "step": 2997 }, { "epoch": 1.62, "grad_norm": 1.5509079590062433, "learning_rate": 4.785084185308845e-07, "loss": 0.5094, "step": 2998 }, { "epoch": 1.62, "grad_norm": 1.5515913418987726, "learning_rate": 4.771949493975053e-07, "loss": 0.5513, "step": 2999 }, { "epoch": 1.62, "grad_norm": 1.5246376601948284, "learning_rate": 4.7588309520271934e-07, "loss": 0.5254, "step": 3000 }, { "epoch": 1.62, "grad_norm": 1.593319959118476, "learning_rate": 4.7457285699386513e-07, "loss": 0.5754, "step": 3001 }, { "epoch": 1.62, "grad_norm": 1.5532884435856955, "learning_rate": 4.732642358169923e-07, "loss": 0.5289, "step": 3002 }, { "epoch": 1.62, "grad_norm": 1.5714144373751482, "learning_rate": 4.7195723271685893e-07, "loss": 0.5333, "step": 3003 }, { "epoch": 1.62, "grad_norm": 1.56877776253858, "learning_rate": 4.706518487369327e-07, "loss": 0.5168, "step": 3004 }, { "epoch": 1.62, "grad_norm": 1.5556915001378047, "learning_rate": 4.693480849193874e-07, "loss": 0.5408, "step": 3005 }, { "epoch": 1.62, "grad_norm": 1.511534850689453, "learning_rate": 4.6804594230510286e-07, "loss": 0.5236, "step": 3006 }, { "epoch": 1.62, "grad_norm": 1.5236235874489656, "learning_rate": 4.6674542193366645e-07, "loss": 0.5305, "step": 3007 }, { "epoch": 1.62, "grad_norm": 1.5358728146954672, "learning_rate": 4.654465248433687e-07, "loss": 0.537, "step": 3008 }, { "epoch": 1.62, "grad_norm": 1.5040137344229243, "learning_rate": 4.641492520712043e-07, "loss": 0.5461, "step": 3009 }, { "epoch": 1.63, "grad_norm": 1.5042049765431278, "learning_rate": 4.628536046528728e-07, "loss": 0.5221, "step": 3010 }, { "epoch": 1.63, "grad_norm": 1.4895820622561362, "learning_rate": 4.615595836227743e-07, "loss": 0.501, "step": 3011 }, { "epoch": 1.63, "grad_norm": 1.4947890002344772, "learning_rate": 4.60267190014011e-07, "loss": 0.5443, "step": 3012 }, { "epoch": 1.63, "grad_norm": 1.5154537204584428, "learning_rate": 4.589764248583861e-07, "loss": 0.535, "step": 3013 }, { "epoch": 1.63, "grad_norm": 1.5677041515007966, "learning_rate": 4.5768728918640175e-07, "loss": 0.5389, "step": 3014 }, { "epoch": 1.63, "grad_norm": 1.4960463994869362, "learning_rate": 4.563997840272602e-07, "loss": 0.5503, "step": 3015 }, { "epoch": 1.63, "grad_norm": 1.5297125316891593, "learning_rate": 4.551139104088628e-07, "loss": 0.5105, "step": 3016 }, { "epoch": 1.63, "grad_norm": 1.4904820735682514, "learning_rate": 4.5382966935780646e-07, "loss": 0.5334, "step": 3017 }, { "epoch": 1.63, "grad_norm": 1.51732233831999, "learning_rate": 4.5254706189938545e-07, "loss": 0.5235, "step": 3018 }, { "epoch": 1.63, "grad_norm": 1.5729949297881718, "learning_rate": 4.512660890575898e-07, "loss": 0.5489, "step": 3019 }, { "epoch": 1.63, "grad_norm": 1.5169066053794233, "learning_rate": 4.499867518551038e-07, "loss": 0.5125, "step": 3020 }, { "epoch": 1.63, "grad_norm": 1.519966159235866, "learning_rate": 4.4870905131330827e-07, "loss": 0.5432, "step": 3021 }, { "epoch": 1.63, "grad_norm": 1.5136951579601985, "learning_rate": 4.4743298845227405e-07, "loss": 0.5209, "step": 3022 }, { "epoch": 1.63, "grad_norm": 1.5221678496313238, "learning_rate": 4.461585642907676e-07, "loss": 0.5298, "step": 3023 }, { "epoch": 1.63, "grad_norm": 1.5782328906506342, "learning_rate": 4.448857798462455e-07, "loss": 0.5379, "step": 3024 }, { "epoch": 1.63, "grad_norm": 1.5346720780363965, "learning_rate": 4.436146361348545e-07, "loss": 0.5455, "step": 3025 }, { "epoch": 1.63, "grad_norm": 1.5205822420723798, "learning_rate": 4.4234513417143356e-07, "loss": 0.5449, "step": 3026 }, { "epoch": 1.63, "grad_norm": 1.5177233372090584, "learning_rate": 4.4107727496950913e-07, "loss": 0.5098, "step": 3027 }, { "epoch": 1.64, "grad_norm": 1.5600345590227114, "learning_rate": 4.3981105954129714e-07, "loss": 0.535, "step": 3028 }, { "epoch": 1.64, "grad_norm": 1.512859003171821, "learning_rate": 4.3854648889770064e-07, "loss": 0.5437, "step": 3029 }, { "epoch": 1.64, "grad_norm": 1.537476968488509, "learning_rate": 4.372835640483089e-07, "loss": 0.5295, "step": 3030 }, { "epoch": 1.64, "grad_norm": 1.5273646010396338, "learning_rate": 4.360222860013991e-07, "loss": 0.5236, "step": 3031 }, { "epoch": 1.64, "grad_norm": 1.5318248856700323, "learning_rate": 4.3476265576393256e-07, "loss": 0.5224, "step": 3032 }, { "epoch": 1.64, "grad_norm": 1.43626953892061, "learning_rate": 4.3350467434155526e-07, "loss": 0.515, "step": 3033 }, { "epoch": 1.64, "grad_norm": 1.5528083904736716, "learning_rate": 4.322483427385962e-07, "loss": 0.5418, "step": 3034 }, { "epoch": 1.64, "grad_norm": 1.4642844045850194, "learning_rate": 4.309936619580682e-07, "loss": 0.5204, "step": 3035 }, { "epoch": 1.64, "grad_norm": 1.5019923282199519, "learning_rate": 4.297406330016643e-07, "loss": 0.5037, "step": 3036 }, { "epoch": 1.64, "grad_norm": 1.5080493973899645, "learning_rate": 4.2848925686976127e-07, "loss": 0.5253, "step": 3037 }, { "epoch": 1.64, "grad_norm": 1.5112826878049774, "learning_rate": 4.272395345614155e-07, "loss": 0.5526, "step": 3038 }, { "epoch": 1.64, "grad_norm": 1.52185038420991, "learning_rate": 4.25991467074362e-07, "loss": 0.5195, "step": 3039 }, { "epoch": 1.64, "grad_norm": 1.535379810047592, "learning_rate": 4.247450554050159e-07, "loss": 0.5284, "step": 3040 }, { "epoch": 1.64, "grad_norm": 1.516526524208046, "learning_rate": 4.235003005484689e-07, "loss": 0.5293, "step": 3041 }, { "epoch": 1.64, "grad_norm": 1.5179873397940475, "learning_rate": 4.2225720349849063e-07, "loss": 0.5594, "step": 3042 }, { "epoch": 1.64, "grad_norm": 1.6009076542270277, "learning_rate": 4.2101576524752885e-07, "loss": 0.5371, "step": 3043 }, { "epoch": 1.64, "grad_norm": 1.487477724363242, "learning_rate": 4.197759867867049e-07, "loss": 0.5353, "step": 3044 }, { "epoch": 1.64, "grad_norm": 1.529356641686603, "learning_rate": 4.185378691058145e-07, "loss": 0.5397, "step": 3045 }, { "epoch": 1.64, "grad_norm": 1.4759818444837227, "learning_rate": 4.1730141319333035e-07, "loss": 0.5377, "step": 3046 }, { "epoch": 1.65, "grad_norm": 1.5034196756400457, "learning_rate": 4.160666200363958e-07, "loss": 0.5547, "step": 3047 }, { "epoch": 1.65, "grad_norm": 1.5105204052692238, "learning_rate": 4.148334906208273e-07, "loss": 0.5553, "step": 3048 }, { "epoch": 1.65, "grad_norm": 1.6147798556222677, "learning_rate": 4.1360202593111435e-07, "loss": 0.5474, "step": 3049 }, { "epoch": 1.65, "grad_norm": 1.4881423091188084, "learning_rate": 4.123722269504157e-07, "loss": 0.539, "step": 3050 }, { "epoch": 1.65, "grad_norm": 1.5395167533648735, "learning_rate": 4.1114409466056107e-07, "loss": 0.5458, "step": 3051 }, { "epoch": 1.65, "grad_norm": 1.5597053860939138, "learning_rate": 4.099176300420485e-07, "loss": 0.4956, "step": 3052 }, { "epoch": 1.65, "grad_norm": 1.5044783129908899, "learning_rate": 4.0869283407404645e-07, "loss": 0.5259, "step": 3053 }, { "epoch": 1.65, "grad_norm": 1.5239988081955478, "learning_rate": 4.0746970773439115e-07, "loss": 0.5303, "step": 3054 }, { "epoch": 1.65, "grad_norm": 1.531967956167543, "learning_rate": 4.062482519995836e-07, "loss": 0.5468, "step": 3055 }, { "epoch": 1.65, "grad_norm": 1.5171074952213761, "learning_rate": 4.0502846784479325e-07, "loss": 0.5299, "step": 3056 }, { "epoch": 1.65, "grad_norm": 1.548672445193952, "learning_rate": 4.0381035624385336e-07, "loss": 0.5548, "step": 3057 }, { "epoch": 1.65, "grad_norm": 1.503663169803186, "learning_rate": 4.0259391816926284e-07, "loss": 0.5196, "step": 3058 }, { "epoch": 1.65, "grad_norm": 1.5026318942899135, "learning_rate": 4.0137915459218494e-07, "loss": 0.5342, "step": 3059 }, { "epoch": 1.65, "grad_norm": 1.542095017738161, "learning_rate": 4.0016606648244555e-07, "loss": 0.545, "step": 3060 }, { "epoch": 1.65, "grad_norm": 1.4604632562913145, "learning_rate": 3.9895465480853186e-07, "loss": 0.5338, "step": 3061 }, { "epoch": 1.65, "grad_norm": 1.5075220279279717, "learning_rate": 3.977449205375947e-07, "loss": 0.5421, "step": 3062 }, { "epoch": 1.65, "grad_norm": 1.5397998814743727, "learning_rate": 3.9653686463544447e-07, "loss": 0.5225, "step": 3063 }, { "epoch": 1.65, "grad_norm": 1.5836086347990859, "learning_rate": 3.9533048806655096e-07, "loss": 0.5201, "step": 3064 }, { "epoch": 1.66, "grad_norm": 1.5263677564612903, "learning_rate": 3.9412579179404524e-07, "loss": 0.5401, "step": 3065 }, { "epoch": 1.66, "grad_norm": 1.4892106885299405, "learning_rate": 3.929227767797153e-07, "loss": 0.5269, "step": 3066 }, { "epoch": 1.66, "grad_norm": 1.4711341121560275, "learning_rate": 3.917214439840075e-07, "loss": 0.5083, "step": 3067 }, { "epoch": 1.66, "grad_norm": 1.5442949716429615, "learning_rate": 3.905217943660247e-07, "loss": 0.5379, "step": 3068 }, { "epoch": 1.66, "grad_norm": 1.454670767577836, "learning_rate": 3.8932382888352547e-07, "loss": 0.5097, "step": 3069 }, { "epoch": 1.66, "grad_norm": 1.5017533282870204, "learning_rate": 3.881275484929256e-07, "loss": 0.5399, "step": 3070 }, { "epoch": 1.66, "grad_norm": 1.5349392976409528, "learning_rate": 3.869329541492953e-07, "loss": 0.5266, "step": 3071 }, { "epoch": 1.66, "grad_norm": 1.610052444165038, "learning_rate": 3.8574004680635686e-07, "loss": 0.5452, "step": 3072 }, { "epoch": 1.66, "grad_norm": 1.5609872544952912, "learning_rate": 3.845488274164871e-07, "loss": 0.5167, "step": 3073 }, { "epoch": 1.66, "grad_norm": 1.580004622313261, "learning_rate": 3.833592969307151e-07, "loss": 0.5364, "step": 3074 }, { "epoch": 1.66, "grad_norm": 1.5792198780096047, "learning_rate": 3.8217145629872054e-07, "loss": 0.5372, "step": 3075 }, { "epoch": 1.66, "grad_norm": 1.5339074922416254, "learning_rate": 3.809853064688357e-07, "loss": 0.5143, "step": 3076 }, { "epoch": 1.66, "grad_norm": 1.5015206191741732, "learning_rate": 3.798008483880425e-07, "loss": 0.5052, "step": 3077 }, { "epoch": 1.66, "grad_norm": 1.5206128479434708, "learning_rate": 3.786180830019717e-07, "loss": 0.5365, "step": 3078 }, { "epoch": 1.66, "grad_norm": 1.562383788536216, "learning_rate": 3.774370112549028e-07, "loss": 0.5298, "step": 3079 }, { "epoch": 1.66, "grad_norm": 1.5237881703077334, "learning_rate": 3.7625763408976305e-07, "loss": 0.5627, "step": 3080 }, { "epoch": 1.66, "grad_norm": 1.52956027374555, "learning_rate": 3.7507995244812636e-07, "loss": 0.564, "step": 3081 }, { "epoch": 1.66, "grad_norm": 1.5018099894295551, "learning_rate": 3.7390396727021465e-07, "loss": 0.5286, "step": 3082 }, { "epoch": 1.66, "grad_norm": 1.6068472730075873, "learning_rate": 3.727296794948942e-07, "loss": 0.5137, "step": 3083 }, { "epoch": 1.67, "grad_norm": 1.535453870071085, "learning_rate": 3.7155709005967544e-07, "loss": 0.5306, "step": 3084 }, { "epoch": 1.67, "grad_norm": 1.5521258383106278, "learning_rate": 3.7038619990071525e-07, "loss": 0.5444, "step": 3085 }, { "epoch": 1.67, "grad_norm": 1.5776498978553903, "learning_rate": 3.6921700995281087e-07, "loss": 0.5287, "step": 3086 }, { "epoch": 1.67, "grad_norm": 1.5786475914815392, "learning_rate": 3.6804952114940504e-07, "loss": 0.5567, "step": 3087 }, { "epoch": 1.67, "grad_norm": 1.5641218653067606, "learning_rate": 3.668837344225806e-07, "loss": 0.5457, "step": 3088 }, { "epoch": 1.67, "grad_norm": 1.478115120684521, "learning_rate": 3.6571965070306193e-07, "loss": 0.529, "step": 3089 }, { "epoch": 1.67, "grad_norm": 1.5293725474255693, "learning_rate": 3.645572709202136e-07, "loss": 0.556, "step": 3090 }, { "epoch": 1.67, "grad_norm": 1.5263606767394562, "learning_rate": 3.6339659600203915e-07, "loss": 0.5374, "step": 3091 }, { "epoch": 1.67, "grad_norm": 1.6138484988437922, "learning_rate": 3.6223762687518315e-07, "loss": 0.558, "step": 3092 }, { "epoch": 1.67, "grad_norm": 1.4817629837435653, "learning_rate": 3.610803644649269e-07, "loss": 0.5327, "step": 3093 }, { "epoch": 1.67, "grad_norm": 1.5200921358792723, "learning_rate": 3.5992480969518936e-07, "loss": 0.5275, "step": 3094 }, { "epoch": 1.67, "grad_norm": 1.5529227033890876, "learning_rate": 3.587709634885256e-07, "loss": 0.5381, "step": 3095 }, { "epoch": 1.67, "grad_norm": 1.5180147734835103, "learning_rate": 3.576188267661271e-07, "loss": 0.5485, "step": 3096 }, { "epoch": 1.67, "grad_norm": 1.4650605130099905, "learning_rate": 3.5646840044782e-07, "loss": 0.5359, "step": 3097 }, { "epoch": 1.67, "grad_norm": 1.5453205870498852, "learning_rate": 3.5531968545206683e-07, "loss": 0.5563, "step": 3098 }, { "epoch": 1.67, "grad_norm": 1.5410757219435545, "learning_rate": 3.5417268269596186e-07, "loss": 0.5634, "step": 3099 }, { "epoch": 1.67, "grad_norm": 1.498411303400729, "learning_rate": 3.530273930952322e-07, "loss": 0.5146, "step": 3100 }, { "epoch": 1.67, "grad_norm": 1.4995305077387642, "learning_rate": 3.5188381756423917e-07, "loss": 0.5458, "step": 3101 }, { "epoch": 1.68, "grad_norm": 1.500251692338226, "learning_rate": 3.5074195701597423e-07, "loss": 0.5353, "step": 3102 }, { "epoch": 1.68, "grad_norm": 1.5126025851067857, "learning_rate": 3.4960181236205895e-07, "loss": 0.5249, "step": 3103 }, { "epoch": 1.68, "grad_norm": 1.5073692225361386, "learning_rate": 3.4846338451274746e-07, "loss": 0.5327, "step": 3104 }, { "epoch": 1.68, "grad_norm": 1.5169556575455145, "learning_rate": 3.4732667437692075e-07, "loss": 0.5512, "step": 3105 }, { "epoch": 1.68, "grad_norm": 1.5420065418401565, "learning_rate": 3.461916828620898e-07, "loss": 0.5341, "step": 3106 }, { "epoch": 1.68, "grad_norm": 1.5213696224459479, "learning_rate": 3.4505841087439264e-07, "loss": 0.5472, "step": 3107 }, { "epoch": 1.68, "grad_norm": 1.5584100084589914, "learning_rate": 3.439268593185957e-07, "loss": 0.5376, "step": 3108 }, { "epoch": 1.68, "grad_norm": 1.4557297870599046, "learning_rate": 3.427970290980906e-07, "loss": 0.5217, "step": 3109 }, { "epoch": 1.68, "grad_norm": 1.477534122898685, "learning_rate": 3.4166892111489575e-07, "loss": 0.5559, "step": 3110 }, { "epoch": 1.68, "grad_norm": 1.5495053153169542, "learning_rate": 3.4054253626965404e-07, "loss": 0.5476, "step": 3111 }, { "epoch": 1.68, "grad_norm": 1.545096204987173, "learning_rate": 3.3941787546163256e-07, "loss": 0.5441, "step": 3112 }, { "epoch": 1.68, "grad_norm": 1.4792341168445076, "learning_rate": 3.3829493958872137e-07, "loss": 0.5172, "step": 3113 }, { "epoch": 1.68, "grad_norm": 1.5185711982111156, "learning_rate": 3.371737295474359e-07, "loss": 0.5305, "step": 3114 }, { "epoch": 1.68, "grad_norm": 1.5673561432504457, "learning_rate": 3.360542462329103e-07, "loss": 0.5319, "step": 3115 }, { "epoch": 1.68, "grad_norm": 1.5610727740667043, "learning_rate": 3.3493649053890325e-07, "loss": 0.5274, "step": 3116 }, { "epoch": 1.68, "grad_norm": 1.54626109922468, "learning_rate": 3.338204633577924e-07, "loss": 0.5467, "step": 3117 }, { "epoch": 1.68, "grad_norm": 1.4912671952159016, "learning_rate": 3.327061655805755e-07, "loss": 0.5301, "step": 3118 }, { "epoch": 1.68, "grad_norm": 1.5291750783060458, "learning_rate": 3.315935980968696e-07, "loss": 0.5172, "step": 3119 }, { "epoch": 1.68, "grad_norm": 1.5131667997731209, "learning_rate": 3.3048276179491135e-07, "loss": 0.5136, "step": 3120 }, { "epoch": 1.69, "grad_norm": 1.5247511829063, "learning_rate": 3.293736575615547e-07, "loss": 0.5399, "step": 3121 }, { "epoch": 1.69, "grad_norm": 1.4817085398455612, "learning_rate": 3.2826628628226996e-07, "loss": 0.5162, "step": 3122 }, { "epoch": 1.69, "grad_norm": 1.5183343447285615, "learning_rate": 3.271606488411447e-07, "loss": 0.5276, "step": 3123 }, { "epoch": 1.69, "grad_norm": 1.5258610919894993, "learning_rate": 3.260567461208827e-07, "loss": 0.5288, "step": 3124 }, { "epoch": 1.69, "grad_norm": 1.5204594399321998, "learning_rate": 3.2495457900280134e-07, "loss": 0.561, "step": 3125 }, { "epoch": 1.69, "grad_norm": 1.4901265428312949, "learning_rate": 3.238541483668345e-07, "loss": 0.5151, "step": 3126 }, { "epoch": 1.69, "grad_norm": 1.5077009104210173, "learning_rate": 3.2275545509152794e-07, "loss": 0.5331, "step": 3127 }, { "epoch": 1.69, "grad_norm": 1.5491991174506474, "learning_rate": 3.216585000540409e-07, "loss": 0.5111, "step": 3128 }, { "epoch": 1.69, "grad_norm": 1.58765405862487, "learning_rate": 3.2056328413014456e-07, "loss": 0.5589, "step": 3129 }, { "epoch": 1.69, "grad_norm": 1.49297838719084, "learning_rate": 3.1946980819422183e-07, "loss": 0.5231, "step": 3130 }, { "epoch": 1.69, "grad_norm": 1.5357440156998057, "learning_rate": 3.18378073119267e-07, "loss": 0.5391, "step": 3131 }, { "epoch": 1.69, "grad_norm": 1.5575377899418255, "learning_rate": 3.172880797768849e-07, "loss": 0.5444, "step": 3132 }, { "epoch": 1.69, "grad_norm": 1.5467731904174702, "learning_rate": 3.161998290372881e-07, "loss": 0.535, "step": 3133 }, { "epoch": 1.69, "grad_norm": 1.5406430067676957, "learning_rate": 3.151133217692992e-07, "loss": 0.5542, "step": 3134 }, { "epoch": 1.69, "grad_norm": 1.589232475764202, "learning_rate": 3.1402855884034856e-07, "loss": 0.5439, "step": 3135 }, { "epoch": 1.69, "grad_norm": 1.5308532924623568, "learning_rate": 3.129455411164731e-07, "loss": 0.5401, "step": 3136 }, { "epoch": 1.69, "grad_norm": 1.4906647055052225, "learning_rate": 3.1186426946231864e-07, "loss": 0.5436, "step": 3137 }, { "epoch": 1.69, "grad_norm": 1.4999316946876198, "learning_rate": 3.1078474474113497e-07, "loss": 0.5325, "step": 3138 }, { "epoch": 1.7, "grad_norm": 1.5147584240721652, "learning_rate": 3.097069678147774e-07, "loss": 0.5394, "step": 3139 }, { "epoch": 1.7, "grad_norm": 1.5395239240440894, "learning_rate": 3.0863093954370706e-07, "loss": 0.5394, "step": 3140 }, { "epoch": 1.7, "grad_norm": 1.5512804586502285, "learning_rate": 3.075566607869876e-07, "loss": 0.5436, "step": 3141 }, { "epoch": 1.7, "grad_norm": 1.4444207593634597, "learning_rate": 3.064841324022866e-07, "loss": 0.5439, "step": 3142 }, { "epoch": 1.7, "grad_norm": 1.4946747986329698, "learning_rate": 3.054133552458749e-07, "loss": 0.5244, "step": 3143 }, { "epoch": 1.7, "grad_norm": 1.5229410555988616, "learning_rate": 3.04344330172624e-07, "loss": 0.5227, "step": 3144 }, { "epoch": 1.7, "grad_norm": 1.553616455089816, "learning_rate": 3.0327705803600697e-07, "loss": 0.5439, "step": 3145 }, { "epoch": 1.7, "grad_norm": 1.494878049732479, "learning_rate": 3.0221153968809704e-07, "loss": 0.5402, "step": 3146 }, { "epoch": 1.7, "grad_norm": 1.5115649589624762, "learning_rate": 3.0114777597956835e-07, "loss": 0.5321, "step": 3147 }, { "epoch": 1.7, "grad_norm": 1.4851038576699278, "learning_rate": 3.000857677596938e-07, "loss": 0.5242, "step": 3148 }, { "epoch": 1.7, "grad_norm": 1.5294616925366258, "learning_rate": 2.9902551587634445e-07, "loss": 0.5417, "step": 3149 }, { "epoch": 1.7, "grad_norm": 1.529582351184609, "learning_rate": 2.9796702117598884e-07, "loss": 0.5303, "step": 3150 }, { "epoch": 1.7, "grad_norm": 1.5235127964230288, "learning_rate": 2.969102845036934e-07, "loss": 0.517, "step": 3151 }, { "epoch": 1.7, "grad_norm": 1.5539298691316603, "learning_rate": 2.9585530670312e-07, "loss": 0.5429, "step": 3152 }, { "epoch": 1.7, "grad_norm": 1.4808087206869514, "learning_rate": 2.948020886165279e-07, "loss": 0.5158, "step": 3153 }, { "epoch": 1.7, "grad_norm": 1.560649261886151, "learning_rate": 2.937506310847696e-07, "loss": 0.5464, "step": 3154 }, { "epoch": 1.7, "grad_norm": 1.525717594268756, "learning_rate": 2.92700934947294e-07, "loss": 0.536, "step": 3155 }, { "epoch": 1.7, "grad_norm": 1.4995669882854614, "learning_rate": 2.91653001042142e-07, "loss": 0.5162, "step": 3156 }, { "epoch": 1.7, "grad_norm": 1.557947888969116, "learning_rate": 2.9060683020594867e-07, "loss": 0.5294, "step": 3157 }, { "epoch": 1.71, "grad_norm": 1.5172977687258093, "learning_rate": 2.8956242327394026e-07, "loss": 0.5317, "step": 3158 }, { "epoch": 1.71, "grad_norm": 1.5109480541709588, "learning_rate": 2.885197810799367e-07, "loss": 0.5187, "step": 3159 }, { "epoch": 1.71, "grad_norm": 1.5607717572032938, "learning_rate": 2.874789044563478e-07, "loss": 0.5379, "step": 3160 }, { "epoch": 1.71, "grad_norm": 1.477483530981726, "learning_rate": 2.864397942341737e-07, "loss": 0.5341, "step": 3161 }, { "epoch": 1.71, "grad_norm": 1.5180716499046008, "learning_rate": 2.854024512430043e-07, "loss": 0.5352, "step": 3162 }, { "epoch": 1.71, "grad_norm": 1.4857882527064001, "learning_rate": 2.843668763110194e-07, "loss": 0.5185, "step": 3163 }, { "epoch": 1.71, "grad_norm": 1.5074664184548814, "learning_rate": 2.833330702649861e-07, "loss": 0.52, "step": 3164 }, { "epoch": 1.71, "grad_norm": 1.469372856857798, "learning_rate": 2.8230103393026094e-07, "loss": 0.5266, "step": 3165 }, { "epoch": 1.71, "grad_norm": 1.4825590253442822, "learning_rate": 2.812707681307861e-07, "loss": 0.5293, "step": 3166 }, { "epoch": 1.71, "grad_norm": 1.524639015006615, "learning_rate": 2.802422736890903e-07, "loss": 0.5305, "step": 3167 }, { "epoch": 1.71, "grad_norm": 1.5127447058733725, "learning_rate": 2.792155514262887e-07, "loss": 0.5399, "step": 3168 }, { "epoch": 1.71, "grad_norm": 1.508309020527454, "learning_rate": 2.7819060216208086e-07, "loss": 0.5462, "step": 3169 }, { "epoch": 1.71, "grad_norm": 1.5333283869140326, "learning_rate": 2.771674267147517e-07, "loss": 0.5441, "step": 3170 }, { "epoch": 1.71, "grad_norm": 1.5003195927704365, "learning_rate": 2.761460259011703e-07, "loss": 0.5315, "step": 3171 }, { "epoch": 1.71, "grad_norm": 1.6002715299517352, "learning_rate": 2.751264005367876e-07, "loss": 0.5544, "step": 3172 }, { "epoch": 1.71, "grad_norm": 1.5524442227416382, "learning_rate": 2.741085514356379e-07, "loss": 0.5448, "step": 3173 }, { "epoch": 1.71, "grad_norm": 1.484248611858518, "learning_rate": 2.7309247941033623e-07, "loss": 0.5328, "step": 3174 }, { "epoch": 1.71, "grad_norm": 1.569609638769087, "learning_rate": 2.720781852720816e-07, "loss": 0.5328, "step": 3175 }, { "epoch": 1.72, "grad_norm": 1.5119899722347723, "learning_rate": 2.7106566983065076e-07, "loss": 0.5177, "step": 3176 }, { "epoch": 1.72, "grad_norm": 1.5729082452114496, "learning_rate": 2.700549338944014e-07, "loss": 0.5353, "step": 3177 }, { "epoch": 1.72, "grad_norm": 1.517364050105431, "learning_rate": 2.6904597827027144e-07, "loss": 0.5206, "step": 3178 }, { "epoch": 1.72, "grad_norm": 1.5095569756690728, "learning_rate": 2.680388037637763e-07, "loss": 0.5139, "step": 3179 }, { "epoch": 1.72, "grad_norm": 1.5347754414873265, "learning_rate": 2.6703341117900905e-07, "loss": 0.5343, "step": 3180 }, { "epoch": 1.72, "grad_norm": 1.5510601361576057, "learning_rate": 2.660298013186421e-07, "loss": 0.5482, "step": 3181 }, { "epoch": 1.72, "grad_norm": 1.569778571646343, "learning_rate": 2.650279749839227e-07, "loss": 0.5441, "step": 3182 }, { "epoch": 1.72, "grad_norm": 1.501334698182888, "learning_rate": 2.6402793297467476e-07, "loss": 0.537, "step": 3183 }, { "epoch": 1.72, "grad_norm": 1.550307819644176, "learning_rate": 2.630296760892978e-07, "loss": 0.5519, "step": 3184 }, { "epoch": 1.72, "grad_norm": 1.4907881608008007, "learning_rate": 2.6203320512476595e-07, "loss": 0.5694, "step": 3185 }, { "epoch": 1.72, "grad_norm": 1.4462594174244303, "learning_rate": 2.6103852087662753e-07, "loss": 0.5499, "step": 3186 }, { "epoch": 1.72, "grad_norm": 1.5631258036821536, "learning_rate": 2.6004562413900537e-07, "loss": 0.5652, "step": 3187 }, { "epoch": 1.72, "grad_norm": 1.5593661477634508, "learning_rate": 2.590545157045937e-07, "loss": 0.55, "step": 3188 }, { "epoch": 1.72, "grad_norm": 1.4832601162041876, "learning_rate": 2.580651963646602e-07, "loss": 0.5334, "step": 3189 }, { "epoch": 1.72, "grad_norm": 1.569352276494696, "learning_rate": 2.57077666909043e-07, "loss": 0.5055, "step": 3190 }, { "epoch": 1.72, "grad_norm": 1.501546296934881, "learning_rate": 2.56091928126152e-07, "loss": 0.5006, "step": 3191 }, { "epoch": 1.72, "grad_norm": 1.5851457138898248, "learning_rate": 2.5510798080296827e-07, "loss": 0.5619, "step": 3192 }, { "epoch": 1.72, "grad_norm": 1.5622075027914493, "learning_rate": 2.541258257250406e-07, "loss": 0.5402, "step": 3193 }, { "epoch": 1.72, "grad_norm": 1.5513273563953336, "learning_rate": 2.531454636764896e-07, "loss": 0.5591, "step": 3194 }, { "epoch": 1.73, "grad_norm": 1.52547649522814, "learning_rate": 2.5216689544000193e-07, "loss": 0.5466, "step": 3195 }, { "epoch": 1.73, "grad_norm": 1.6111331309750754, "learning_rate": 2.511901217968332e-07, "loss": 0.545, "step": 3196 }, { "epoch": 1.73, "grad_norm": 1.6036911036630666, "learning_rate": 2.5021514352680577e-07, "loss": 0.5241, "step": 3197 }, { "epoch": 1.73, "grad_norm": 1.5296911713525367, "learning_rate": 2.4924196140831027e-07, "loss": 0.5228, "step": 3198 }, { "epoch": 1.73, "grad_norm": 1.5315576357480138, "learning_rate": 2.482705762183013e-07, "loss": 0.528, "step": 3199 }, { "epoch": 1.73, "grad_norm": 1.5372201695900665, "learning_rate": 2.4730098873229967e-07, "loss": 0.5619, "step": 3200 }, { "epoch": 1.73, "grad_norm": 1.5080058028949508, "learning_rate": 2.4633319972439064e-07, "loss": 0.5291, "step": 3201 }, { "epoch": 1.73, "grad_norm": 1.4686027623915736, "learning_rate": 2.45367209967225e-07, "loss": 0.539, "step": 3202 }, { "epoch": 1.73, "grad_norm": 1.5076056882685516, "learning_rate": 2.4440302023201496e-07, "loss": 0.5075, "step": 3203 }, { "epoch": 1.73, "grad_norm": 1.5831406735455846, "learning_rate": 2.434406312885376e-07, "loss": 0.5309, "step": 3204 }, { "epoch": 1.73, "grad_norm": 1.55981139554718, "learning_rate": 2.4248004390513113e-07, "loss": 0.5334, "step": 3205 }, { "epoch": 1.73, "grad_norm": 1.5607797090795938, "learning_rate": 2.415212588486959e-07, "loss": 0.5167, "step": 3206 }, { "epoch": 1.73, "grad_norm": 1.4911613698805732, "learning_rate": 2.405642768846925e-07, "loss": 0.5046, "step": 3207 }, { "epoch": 1.73, "grad_norm": 1.5013756866069075, "learning_rate": 2.396090987771435e-07, "loss": 0.5239, "step": 3208 }, { "epoch": 1.73, "grad_norm": 1.5170324524899408, "learning_rate": 2.3865572528862986e-07, "loss": 0.5525, "step": 3209 }, { "epoch": 1.73, "grad_norm": 1.5598510814533373, "learning_rate": 2.3770415718029349e-07, "loss": 0.5295, "step": 3210 }, { "epoch": 1.73, "grad_norm": 1.5053815710116991, "learning_rate": 2.3675439521183313e-07, "loss": 0.5485, "step": 3211 }, { "epoch": 1.73, "grad_norm": 1.4773041376032992, "learning_rate": 2.3580644014150667e-07, "loss": 0.5031, "step": 3212 }, { "epoch": 1.74, "grad_norm": 1.5224982310298063, "learning_rate": 2.3486029272612842e-07, "loss": 0.5265, "step": 3213 }, { "epoch": 1.74, "grad_norm": 1.5218444599901642, "learning_rate": 2.339159537210714e-07, "loss": 0.5337, "step": 3214 }, { "epoch": 1.74, "grad_norm": 1.5539108135654682, "learning_rate": 2.3297342388026306e-07, "loss": 0.5406, "step": 3215 }, { "epoch": 1.74, "grad_norm": 1.5171618541298646, "learning_rate": 2.320327039561865e-07, "loss": 0.5125, "step": 3216 }, { "epoch": 1.74, "grad_norm": 1.5034176323078168, "learning_rate": 2.3109379469988147e-07, "loss": 0.5223, "step": 3217 }, { "epoch": 1.74, "grad_norm": 1.515292716966931, "learning_rate": 2.3015669686094088e-07, "loss": 0.5221, "step": 3218 }, { "epoch": 1.74, "grad_norm": 1.4955031286298377, "learning_rate": 2.29221411187511e-07, "loss": 0.5243, "step": 3219 }, { "epoch": 1.74, "grad_norm": 1.4991851437382253, "learning_rate": 2.2828793842629315e-07, "loss": 0.5195, "step": 3220 }, { "epoch": 1.74, "grad_norm": 1.5240258856394462, "learning_rate": 2.2735627932253923e-07, "loss": 0.5362, "step": 3221 }, { "epoch": 1.74, "grad_norm": 1.519583035626743, "learning_rate": 2.2642643462005454e-07, "loss": 0.5237, "step": 3222 }, { "epoch": 1.74, "grad_norm": 1.4929663564275217, "learning_rate": 2.2549840506119526e-07, "loss": 0.5274, "step": 3223 }, { "epoch": 1.74, "grad_norm": 1.5012462440025776, "learning_rate": 2.2457219138686815e-07, "loss": 0.542, "step": 3224 }, { "epoch": 1.74, "grad_norm": 1.5335311695429583, "learning_rate": 2.236477943365309e-07, "loss": 0.5408, "step": 3225 }, { "epoch": 1.74, "grad_norm": 1.533366192543834, "learning_rate": 2.2272521464819147e-07, "loss": 0.5327, "step": 3226 }, { "epoch": 1.74, "grad_norm": 1.5729950652483882, "learning_rate": 2.2180445305840514e-07, "loss": 0.5528, "step": 3227 }, { "epoch": 1.74, "grad_norm": 1.5174867539341557, "learning_rate": 2.2088551030227668e-07, "loss": 0.5179, "step": 3228 }, { "epoch": 1.74, "grad_norm": 1.5331561946500343, "learning_rate": 2.1996838711345864e-07, "loss": 0.5076, "step": 3229 }, { "epoch": 1.74, "grad_norm": 1.532453149313305, "learning_rate": 2.190530842241509e-07, "loss": 0.5302, "step": 3230 }, { "epoch": 1.74, "grad_norm": 1.538025211633109, "learning_rate": 2.181396023651003e-07, "loss": 0.5391, "step": 3231 }, { "epoch": 1.75, "grad_norm": 1.576457132451282, "learning_rate": 2.17227942265599e-07, "loss": 0.5402, "step": 3232 }, { "epoch": 1.75, "grad_norm": 1.499031169107865, "learning_rate": 2.1631810465348653e-07, "loss": 0.5133, "step": 3233 }, { "epoch": 1.75, "grad_norm": 1.5778953962295088, "learning_rate": 2.1541009025514536e-07, "loss": 0.5555, "step": 3234 }, { "epoch": 1.75, "grad_norm": 1.5091292285057343, "learning_rate": 2.145038997955029e-07, "loss": 0.5174, "step": 3235 }, { "epoch": 1.75, "grad_norm": 1.501095107599011, "learning_rate": 2.135995339980318e-07, "loss": 0.5593, "step": 3236 }, { "epoch": 1.75, "grad_norm": 1.4632811103030323, "learning_rate": 2.1269699358474617e-07, "loss": 0.5092, "step": 3237 }, { "epoch": 1.75, "grad_norm": 1.5829526548607902, "learning_rate": 2.117962792762035e-07, "loss": 0.5324, "step": 3238 }, { "epoch": 1.75, "grad_norm": 1.4901227993907071, "learning_rate": 2.1089739179150359e-07, "loss": 0.5067, "step": 3239 }, { "epoch": 1.75, "grad_norm": 1.568407692194484, "learning_rate": 2.100003318482871e-07, "loss": 0.5478, "step": 3240 }, { "epoch": 1.75, "grad_norm": 1.492792256513661, "learning_rate": 2.0910510016273672e-07, "loss": 0.5309, "step": 3241 }, { "epoch": 1.75, "grad_norm": 1.5335845579171787, "learning_rate": 2.0821169744957486e-07, "loss": 0.5499, "step": 3242 }, { "epoch": 1.75, "grad_norm": 1.5263783030481444, "learning_rate": 2.073201244220635e-07, "loss": 0.5505, "step": 3243 }, { "epoch": 1.75, "grad_norm": 1.5553721731280692, "learning_rate": 2.0643038179200464e-07, "loss": 0.5502, "step": 3244 }, { "epoch": 1.75, "grad_norm": 1.5164722094461205, "learning_rate": 2.0554247026973784e-07, "loss": 0.5416, "step": 3245 }, { "epoch": 1.75, "grad_norm": 1.4893965151226372, "learning_rate": 2.0465639056414106e-07, "loss": 0.5169, "step": 3246 }, { "epoch": 1.75, "grad_norm": 1.4420657227444655, "learning_rate": 2.037721433826309e-07, "loss": 0.5295, "step": 3247 }, { "epoch": 1.75, "grad_norm": 1.5430984342874174, "learning_rate": 2.0288972943116048e-07, "loss": 0.5377, "step": 3248 }, { "epoch": 1.75, "grad_norm": 1.5573582457966673, "learning_rate": 2.0200914941421817e-07, "loss": 0.5308, "step": 3249 }, { "epoch": 1.76, "grad_norm": 1.547647018774257, "learning_rate": 2.011304040348297e-07, "loss": 0.523, "step": 3250 }, { "epoch": 1.76, "grad_norm": 1.534993775994673, "learning_rate": 2.0025349399455496e-07, "loss": 0.5352, "step": 3251 }, { "epoch": 1.76, "grad_norm": 1.5358596305178644, "learning_rate": 1.9937841999348866e-07, "loss": 0.5358, "step": 3252 }, { "epoch": 1.76, "grad_norm": 1.4921130598684287, "learning_rate": 1.9850518273026116e-07, "loss": 0.5287, "step": 3253 }, { "epoch": 1.76, "grad_norm": 1.5514676669540908, "learning_rate": 1.9763378290203443e-07, "loss": 0.5419, "step": 3254 }, { "epoch": 1.76, "grad_norm": 1.532998546948384, "learning_rate": 1.9676422120450455e-07, "loss": 0.5417, "step": 3255 }, { "epoch": 1.76, "grad_norm": 1.5524171876973771, "learning_rate": 1.9589649833190033e-07, "loss": 0.5246, "step": 3256 }, { "epoch": 1.76, "grad_norm": 1.5855925843514471, "learning_rate": 1.9503061497698177e-07, "loss": 0.5483, "step": 3257 }, { "epoch": 1.76, "grad_norm": 1.468972872399596, "learning_rate": 1.9416657183104038e-07, "loss": 0.5399, "step": 3258 }, { "epoch": 1.76, "grad_norm": 1.4767638417094713, "learning_rate": 1.9330436958389958e-07, "loss": 0.526, "step": 3259 }, { "epoch": 1.76, "grad_norm": 1.4737236107367169, "learning_rate": 1.9244400892391157e-07, "loss": 0.5202, "step": 3260 }, { "epoch": 1.76, "grad_norm": 1.5543557949769022, "learning_rate": 1.915854905379594e-07, "loss": 0.5155, "step": 3261 }, { "epoch": 1.76, "grad_norm": 1.5308623341717296, "learning_rate": 1.9072881511145392e-07, "loss": 0.5256, "step": 3262 }, { "epoch": 1.76, "grad_norm": 1.622897178149009, "learning_rate": 1.8987398332833696e-07, "loss": 0.552, "step": 3263 }, { "epoch": 1.76, "grad_norm": 1.5704892919056832, "learning_rate": 1.8902099587107592e-07, "loss": 0.518, "step": 3264 }, { "epoch": 1.76, "grad_norm": 1.5432306107289924, "learning_rate": 1.8816985342066774e-07, "loss": 0.5259, "step": 3265 }, { "epoch": 1.76, "grad_norm": 1.535270141830991, "learning_rate": 1.8732055665663523e-07, "loss": 0.5357, "step": 3266 }, { "epoch": 1.76, "grad_norm": 1.5076672662748642, "learning_rate": 1.8647310625702796e-07, "loss": 0.5471, "step": 3267 }, { "epoch": 1.76, "grad_norm": 1.5606469394091476, "learning_rate": 1.8562750289842084e-07, "loss": 0.5339, "step": 3268 }, { "epoch": 1.77, "grad_norm": 1.582572967973636, "learning_rate": 1.847837472559158e-07, "loss": 0.5341, "step": 3269 }, { "epoch": 1.77, "grad_norm": 1.4902803671533498, "learning_rate": 1.8394184000313815e-07, "loss": 0.4928, "step": 3270 }, { "epoch": 1.77, "grad_norm": 1.4919419662363766, "learning_rate": 1.831017818122377e-07, "loss": 0.5345, "step": 3271 }, { "epoch": 1.77, "grad_norm": 1.558537687498934, "learning_rate": 1.8226357335388855e-07, "loss": 0.5403, "step": 3272 }, { "epoch": 1.77, "grad_norm": 1.5213618520700014, "learning_rate": 1.814272152972879e-07, "loss": 0.5269, "step": 3273 }, { "epoch": 1.77, "grad_norm": 1.5484478890740887, "learning_rate": 1.8059270831015467e-07, "loss": 0.5394, "step": 3274 }, { "epoch": 1.77, "grad_norm": 1.5036854494913128, "learning_rate": 1.7976005305873207e-07, "loss": 0.5477, "step": 3275 }, { "epoch": 1.77, "grad_norm": 1.5515864986462202, "learning_rate": 1.78929250207783e-07, "loss": 0.5325, "step": 3276 }, { "epoch": 1.77, "grad_norm": 1.5563563621077092, "learning_rate": 1.7810030042059195e-07, "loss": 0.5476, "step": 3277 }, { "epoch": 1.77, "grad_norm": 1.515210668165353, "learning_rate": 1.772732043589645e-07, "loss": 0.5389, "step": 3278 }, { "epoch": 1.77, "grad_norm": 1.6184766237379573, "learning_rate": 1.7644796268322523e-07, "loss": 0.543, "step": 3279 }, { "epoch": 1.77, "grad_norm": 1.5554896448365394, "learning_rate": 1.7562457605221962e-07, "loss": 0.5496, "step": 3280 }, { "epoch": 1.77, "grad_norm": 1.496124250973499, "learning_rate": 1.748030451233121e-07, "loss": 0.5426, "step": 3281 }, { "epoch": 1.77, "grad_norm": 1.5364455247234798, "learning_rate": 1.7398337055238385e-07, "loss": 0.5355, "step": 3282 }, { "epoch": 1.77, "grad_norm": 1.5495628420613028, "learning_rate": 1.7316555299383558e-07, "loss": 0.5319, "step": 3283 }, { "epoch": 1.77, "grad_norm": 1.514025194702398, "learning_rate": 1.7234959310058501e-07, "loss": 0.5407, "step": 3284 }, { "epoch": 1.77, "grad_norm": 1.5291439703930199, "learning_rate": 1.7153549152406608e-07, "loss": 0.5379, "step": 3285 }, { "epoch": 1.77, "grad_norm": 1.4994309198303943, "learning_rate": 1.7072324891423047e-07, "loss": 0.5251, "step": 3286 }, { "epoch": 1.78, "grad_norm": 1.495157695870461, "learning_rate": 1.699128659195448e-07, "loss": 0.5353, "step": 3287 }, { "epoch": 1.78, "grad_norm": 1.5232678644423314, "learning_rate": 1.6910434318699153e-07, "loss": 0.5212, "step": 3288 }, { "epoch": 1.78, "grad_norm": 1.4738609804504252, "learning_rate": 1.682976813620671e-07, "loss": 0.5324, "step": 3289 }, { "epoch": 1.78, "grad_norm": 1.5134021445454096, "learning_rate": 1.6749288108878297e-07, "loss": 0.5388, "step": 3290 }, { "epoch": 1.78, "grad_norm": 1.4716910894831468, "learning_rate": 1.6668994300966385e-07, "loss": 0.5353, "step": 3291 }, { "epoch": 1.78, "grad_norm": 1.5406022865253852, "learning_rate": 1.6588886776574886e-07, "loss": 0.5495, "step": 3292 }, { "epoch": 1.78, "grad_norm": 1.5365358175064299, "learning_rate": 1.650896559965892e-07, "loss": 0.52, "step": 3293 }, { "epoch": 1.78, "grad_norm": 1.5559896656128136, "learning_rate": 1.642923083402473e-07, "loss": 0.5395, "step": 3294 }, { "epoch": 1.78, "grad_norm": 1.5273105743123592, "learning_rate": 1.634968254332994e-07, "loss": 0.5456, "step": 3295 }, { "epoch": 1.78, "grad_norm": 1.5263734187971405, "learning_rate": 1.627032079108312e-07, "loss": 0.519, "step": 3296 }, { "epoch": 1.78, "grad_norm": 1.483482141605053, "learning_rate": 1.6191145640644057e-07, "loss": 0.5539, "step": 3297 }, { "epoch": 1.78, "grad_norm": 1.4964475981159742, "learning_rate": 1.6112157155223463e-07, "loss": 0.5271, "step": 3298 }, { "epoch": 1.78, "grad_norm": 1.5280912331285503, "learning_rate": 1.6033355397883033e-07, "loss": 0.558, "step": 3299 }, { "epoch": 1.78, "grad_norm": 1.4911713093234806, "learning_rate": 1.5954740431535442e-07, "loss": 0.5071, "step": 3300 }, { "epoch": 1.78, "grad_norm": 1.4830801168406738, "learning_rate": 1.5876312318944098e-07, "loss": 0.5379, "step": 3301 }, { "epoch": 1.78, "grad_norm": 1.468039520365842, "learning_rate": 1.579807112272344e-07, "loss": 0.515, "step": 3302 }, { "epoch": 1.78, "grad_norm": 1.510800035850026, "learning_rate": 1.5720016905338558e-07, "loss": 0.5341, "step": 3303 }, { "epoch": 1.78, "grad_norm": 1.51167553957568, "learning_rate": 1.5642149729105272e-07, "loss": 0.5291, "step": 3304 }, { "epoch": 1.78, "grad_norm": 1.580205573180925, "learning_rate": 1.5564469656190045e-07, "loss": 0.5384, "step": 3305 }, { "epoch": 1.79, "grad_norm": 1.6181171683857603, "learning_rate": 1.548697674861005e-07, "loss": 0.5496, "step": 3306 }, { "epoch": 1.79, "grad_norm": 1.5109538198573713, "learning_rate": 1.5409671068232907e-07, "loss": 0.5383, "step": 3307 }, { "epoch": 1.79, "grad_norm": 1.5778272891396083, "learning_rate": 1.5332552676776912e-07, "loss": 0.5465, "step": 3308 }, { "epoch": 1.79, "grad_norm": 1.4670497016493378, "learning_rate": 1.5255621635810737e-07, "loss": 0.5432, "step": 3309 }, { "epoch": 1.79, "grad_norm": 1.5142161050594989, "learning_rate": 1.517887800675344e-07, "loss": 0.5376, "step": 3310 }, { "epoch": 1.79, "grad_norm": 1.536676692630233, "learning_rate": 1.5102321850874653e-07, "loss": 0.5272, "step": 3311 }, { "epoch": 1.79, "grad_norm": 1.5191440234148252, "learning_rate": 1.5025953229294094e-07, "loss": 0.5337, "step": 3312 }, { "epoch": 1.79, "grad_norm": 1.5627707799542534, "learning_rate": 1.4949772202981855e-07, "loss": 0.5371, "step": 3313 }, { "epoch": 1.79, "grad_norm": 1.507995023376064, "learning_rate": 1.487377883275834e-07, "loss": 0.5067, "step": 3314 }, { "epoch": 1.79, "grad_norm": 1.555121051136358, "learning_rate": 1.4797973179294072e-07, "loss": 0.5521, "step": 3315 }, { "epoch": 1.79, "grad_norm": 1.5497945991340936, "learning_rate": 1.4722355303109642e-07, "loss": 0.5315, "step": 3316 }, { "epoch": 1.79, "grad_norm": 1.5358294511265933, "learning_rate": 1.4646925264575756e-07, "loss": 0.5288, "step": 3317 }, { "epoch": 1.79, "grad_norm": 1.457200661607398, "learning_rate": 1.45716831239133e-07, "loss": 0.5256, "step": 3318 }, { "epoch": 1.79, "grad_norm": 1.5375345504263709, "learning_rate": 1.4496628941192913e-07, "loss": 0.532, "step": 3319 }, { "epoch": 1.79, "grad_norm": 1.4967824108769459, "learning_rate": 1.442176277633539e-07, "loss": 0.5367, "step": 3320 }, { "epoch": 1.79, "grad_norm": 1.4880490922646694, "learning_rate": 1.4347084689111307e-07, "loss": 0.5306, "step": 3321 }, { "epoch": 1.79, "grad_norm": 1.4884619129905818, "learning_rate": 1.4272594739141053e-07, "loss": 0.5252, "step": 3322 }, { "epoch": 1.79, "grad_norm": 1.4992724603502148, "learning_rate": 1.4198292985894897e-07, "loss": 0.5406, "step": 3323 }, { "epoch": 1.8, "grad_norm": 1.601682462932139, "learning_rate": 1.4124179488692823e-07, "loss": 0.5486, "step": 3324 }, { "epoch": 1.8, "grad_norm": 1.5284944874063917, "learning_rate": 1.4050254306704507e-07, "loss": 0.5549, "step": 3325 }, { "epoch": 1.8, "grad_norm": 1.523669928062916, "learning_rate": 1.3976517498949323e-07, "loss": 0.5593, "step": 3326 }, { "epoch": 1.8, "grad_norm": 1.4924445110622013, "learning_rate": 1.3902969124296228e-07, "loss": 0.5297, "step": 3327 }, { "epoch": 1.8, "grad_norm": 1.5205228892755718, "learning_rate": 1.3829609241463732e-07, "loss": 0.538, "step": 3328 }, { "epoch": 1.8, "grad_norm": 1.5375279561209563, "learning_rate": 1.3756437909019786e-07, "loss": 0.5458, "step": 3329 }, { "epoch": 1.8, "grad_norm": 1.513861213879298, "learning_rate": 1.3683455185382e-07, "loss": 0.5536, "step": 3330 }, { "epoch": 1.8, "grad_norm": 1.532474855900511, "learning_rate": 1.361066112881726e-07, "loss": 0.5415, "step": 3331 }, { "epoch": 1.8, "grad_norm": 1.5190032597570389, "learning_rate": 1.3538055797441828e-07, "loss": 0.5458, "step": 3332 }, { "epoch": 1.8, "grad_norm": 1.5131069899223417, "learning_rate": 1.3465639249221313e-07, "loss": 0.5266, "step": 3333 }, { "epoch": 1.8, "grad_norm": 1.5104516667342542, "learning_rate": 1.3393411541970658e-07, "loss": 0.5315, "step": 3334 }, { "epoch": 1.8, "grad_norm": 1.503825120499846, "learning_rate": 1.3321372733353988e-07, "loss": 0.539, "step": 3335 }, { "epoch": 1.8, "grad_norm": 1.5122259691772018, "learning_rate": 1.324952288088466e-07, "loss": 0.5135, "step": 3336 }, { "epoch": 1.8, "grad_norm": 1.5498156415896198, "learning_rate": 1.3177862041925116e-07, "loss": 0.5435, "step": 3337 }, { "epoch": 1.8, "grad_norm": 1.5103887388173547, "learning_rate": 1.310639027368693e-07, "loss": 0.5352, "step": 3338 }, { "epoch": 1.8, "grad_norm": 1.5603619184869915, "learning_rate": 1.3035107633230737e-07, "loss": 0.5255, "step": 3339 }, { "epoch": 1.8, "grad_norm": 1.5073397133476838, "learning_rate": 1.2964014177466123e-07, "loss": 0.5287, "step": 3340 }, { "epoch": 1.8, "grad_norm": 1.4987701360019337, "learning_rate": 1.2893109963151684e-07, "loss": 0.5299, "step": 3341 }, { "epoch": 1.8, "grad_norm": 1.522815997906821, "learning_rate": 1.2822395046895032e-07, "loss": 0.5077, "step": 3342 }, { "epoch": 1.81, "grad_norm": 1.5110380952189901, "learning_rate": 1.2751869485152447e-07, "loss": 0.5438, "step": 3343 }, { "epoch": 1.81, "grad_norm": 1.5390298883448912, "learning_rate": 1.2681533334229168e-07, "loss": 0.5289, "step": 3344 }, { "epoch": 1.81, "grad_norm": 1.472296233641574, "learning_rate": 1.2611386650279167e-07, "loss": 0.5209, "step": 3345 }, { "epoch": 1.81, "grad_norm": 1.5715637985231456, "learning_rate": 1.2541429489305145e-07, "loss": 0.5456, "step": 3346 }, { "epoch": 1.81, "grad_norm": 1.545181199046119, "learning_rate": 1.247166190715854e-07, "loss": 0.5138, "step": 3347 }, { "epoch": 1.81, "grad_norm": 1.4987375714863533, "learning_rate": 1.240208395953943e-07, "loss": 0.5157, "step": 3348 }, { "epoch": 1.81, "grad_norm": 1.5227452307440328, "learning_rate": 1.233269570199644e-07, "loss": 0.5675, "step": 3349 }, { "epoch": 1.81, "grad_norm": 1.4455895235684775, "learning_rate": 1.226349718992681e-07, "loss": 0.5194, "step": 3350 }, { "epoch": 1.81, "grad_norm": 1.5935504097630513, "learning_rate": 1.2194488478576266e-07, "loss": 0.5438, "step": 3351 }, { "epoch": 1.81, "grad_norm": 1.5046129631924945, "learning_rate": 1.2125669623038989e-07, "loss": 0.5388, "step": 3352 }, { "epoch": 1.81, "grad_norm": 1.5170798400796972, "learning_rate": 1.2057040678257638e-07, "loss": 0.5396, "step": 3353 }, { "epoch": 1.81, "grad_norm": 1.4984629512002001, "learning_rate": 1.1988601699023244e-07, "loss": 0.5167, "step": 3354 }, { "epoch": 1.81, "grad_norm": 1.4744429643967978, "learning_rate": 1.1920352739975105e-07, "loss": 0.5387, "step": 3355 }, { "epoch": 1.81, "grad_norm": 1.531329129683208, "learning_rate": 1.1852293855600856e-07, "loss": 0.5467, "step": 3356 }, { "epoch": 1.81, "grad_norm": 1.4944882063182243, "learning_rate": 1.1784425100236419e-07, "loss": 0.5371, "step": 3357 }, { "epoch": 1.81, "grad_norm": 1.4752505902704225, "learning_rate": 1.1716746528065926e-07, "loss": 0.5232, "step": 3358 }, { "epoch": 1.81, "grad_norm": 1.4970476620001685, "learning_rate": 1.1649258193121599e-07, "loss": 0.5246, "step": 3359 }, { "epoch": 1.81, "grad_norm": 1.5414466183925166, "learning_rate": 1.1581960149283839e-07, "loss": 0.5331, "step": 3360 }, { "epoch": 1.82, "grad_norm": 1.5040703234721486, "learning_rate": 1.1514852450281111e-07, "loss": 0.5268, "step": 3361 }, { "epoch": 1.82, "grad_norm": 1.5011057109912287, "learning_rate": 1.1447935149689865e-07, "loss": 0.5299, "step": 3362 }, { "epoch": 1.82, "grad_norm": 1.4910175929177494, "learning_rate": 1.138120830093467e-07, "loss": 0.5363, "step": 3363 }, { "epoch": 1.82, "grad_norm": 1.5670228290506063, "learning_rate": 1.1314671957287888e-07, "loss": 0.5427, "step": 3364 }, { "epoch": 1.82, "grad_norm": 1.545610067664737, "learning_rate": 1.1248326171869967e-07, "loss": 0.5398, "step": 3365 }, { "epoch": 1.82, "grad_norm": 1.4986395422808245, "learning_rate": 1.1182170997649067e-07, "loss": 0.5241, "step": 3366 }, { "epoch": 1.82, "grad_norm": 1.5098346753712757, "learning_rate": 1.1116206487441189e-07, "loss": 0.5345, "step": 3367 }, { "epoch": 1.82, "grad_norm": 1.551793271146352, "learning_rate": 1.1050432693910179e-07, "loss": 0.5392, "step": 3368 }, { "epoch": 1.82, "grad_norm": 1.525241785108272, "learning_rate": 1.0984849669567616e-07, "loss": 0.5373, "step": 3369 }, { "epoch": 1.82, "grad_norm": 1.5057019688542832, "learning_rate": 1.0919457466772726e-07, "loss": 0.5467, "step": 3370 }, { "epoch": 1.82, "grad_norm": 1.5588093861304535, "learning_rate": 1.0854256137732416e-07, "loss": 0.5486, "step": 3371 }, { "epoch": 1.82, "grad_norm": 1.5585606380425734, "learning_rate": 1.0789245734501186e-07, "loss": 0.53, "step": 3372 }, { "epoch": 1.82, "grad_norm": 1.5386190287747838, "learning_rate": 1.0724426308981156e-07, "loss": 0.5272, "step": 3373 }, { "epoch": 1.82, "grad_norm": 1.512214223102978, "learning_rate": 1.0659797912921905e-07, "loss": 0.5475, "step": 3374 }, { "epoch": 1.82, "grad_norm": 1.5309836523684632, "learning_rate": 1.0595360597920629e-07, "loss": 0.5389, "step": 3375 }, { "epoch": 1.82, "grad_norm": 1.4822695120976273, "learning_rate": 1.0531114415421817e-07, "loss": 0.5212, "step": 3376 }, { "epoch": 1.82, "grad_norm": 1.573790687212412, "learning_rate": 1.0467059416717412e-07, "loss": 0.5339, "step": 3377 }, { "epoch": 1.82, "grad_norm": 1.5175856070734965, "learning_rate": 1.0403195652946784e-07, "loss": 0.5312, "step": 3378 }, { "epoch": 1.82, "grad_norm": 1.5175604822783177, "learning_rate": 1.0339523175096538e-07, "loss": 0.5353, "step": 3379 }, { "epoch": 1.83, "grad_norm": 1.5413860565229038, "learning_rate": 1.0276042034000649e-07, "loss": 0.5215, "step": 3380 }, { "epoch": 1.83, "grad_norm": 1.5785393600906952, "learning_rate": 1.0212752280340327e-07, "loss": 0.5562, "step": 3381 }, { "epoch": 1.83, "grad_norm": 1.5071323114421673, "learning_rate": 1.0149653964643902e-07, "loss": 0.5348, "step": 3382 }, { "epoch": 1.83, "grad_norm": 1.5257902073673515, "learning_rate": 1.0086747137286912e-07, "loss": 0.5492, "step": 3383 }, { "epoch": 1.83, "grad_norm": 1.4916240723255072, "learning_rate": 1.0024031848492044e-07, "loss": 0.5051, "step": 3384 }, { "epoch": 1.83, "grad_norm": 1.493933758127766, "learning_rate": 9.961508148329052e-08, "loss": 0.5209, "step": 3385 }, { "epoch": 1.83, "grad_norm": 1.5324942400911525, "learning_rate": 9.899176086714729e-08, "loss": 0.5083, "step": 3386 }, { "epoch": 1.83, "grad_norm": 1.498114874280335, "learning_rate": 9.837035713412823e-08, "loss": 0.5337, "step": 3387 }, { "epoch": 1.83, "grad_norm": 1.532435170255003, "learning_rate": 9.775087078034151e-08, "loss": 0.5325, "step": 3388 }, { "epoch": 1.83, "grad_norm": 1.5232075361250972, "learning_rate": 9.7133302300364e-08, "loss": 0.5339, "step": 3389 }, { "epoch": 1.83, "grad_norm": 1.4959542235317076, "learning_rate": 9.651765218724018e-08, "loss": 0.518, "step": 3390 }, { "epoch": 1.83, "grad_norm": 1.4721463205652112, "learning_rate": 9.590392093248552e-08, "loss": 0.5219, "step": 3391 }, { "epoch": 1.83, "grad_norm": 1.4899873006498225, "learning_rate": 9.529210902608138e-08, "loss": 0.5136, "step": 3392 }, { "epoch": 1.83, "grad_norm": 1.5599038716258393, "learning_rate": 9.468221695647789e-08, "loss": 0.532, "step": 3393 }, { "epoch": 1.83, "grad_norm": 1.5142409754935444, "learning_rate": 9.407424521059166e-08, "loss": 0.5559, "step": 3394 }, { "epoch": 1.83, "grad_norm": 1.548876191020073, "learning_rate": 9.346819427380638e-08, "loss": 0.5412, "step": 3395 }, { "epoch": 1.83, "grad_norm": 1.5800882092348736, "learning_rate": 9.286406462997305e-08, "loss": 0.5491, "step": 3396 }, { "epoch": 1.83, "grad_norm": 1.5040644057247694, "learning_rate": 9.226185676140836e-08, "loss": 0.5391, "step": 3397 }, { "epoch": 1.84, "grad_norm": 1.504651508784372, "learning_rate": 9.166157114889412e-08, "loss": 0.5572, "step": 3398 }, { "epoch": 1.84, "grad_norm": 1.4490279172665426, "learning_rate": 9.106320827167809e-08, "loss": 0.5383, "step": 3399 }, { "epoch": 1.84, "grad_norm": 1.5512164916056315, "learning_rate": 9.046676860747255e-08, "loss": 0.5327, "step": 3400 }, { "epoch": 1.84, "grad_norm": 1.5183350596804723, "learning_rate": 8.987225263245442e-08, "loss": 0.5333, "step": 3401 }, { "epoch": 1.84, "grad_norm": 1.539084799224373, "learning_rate": 8.927966082126566e-08, "loss": 0.5444, "step": 3402 }, { "epoch": 1.84, "grad_norm": 1.486294100663676, "learning_rate": 8.868899364701061e-08, "loss": 0.5387, "step": 3403 }, { "epoch": 1.84, "grad_norm": 1.4878983086266562, "learning_rate": 8.810025158125845e-08, "loss": 0.537, "step": 3404 }, { "epoch": 1.84, "grad_norm": 1.534169374493222, "learning_rate": 8.75134350940407e-08, "loss": 0.5247, "step": 3405 }, { "epoch": 1.84, "grad_norm": 1.504761652182076, "learning_rate": 8.692854465385148e-08, "loss": 0.5368, "step": 3406 }, { "epoch": 1.84, "grad_norm": 1.5167314357162618, "learning_rate": 8.634558072764698e-08, "loss": 0.5205, "step": 3407 }, { "epoch": 1.84, "grad_norm": 1.4631696200005364, "learning_rate": 8.57645437808463e-08, "loss": 0.5311, "step": 3408 }, { "epoch": 1.84, "grad_norm": 1.5064492152546434, "learning_rate": 8.518543427732951e-08, "loss": 0.5189, "step": 3409 }, { "epoch": 1.84, "grad_norm": 1.5327204655733309, "learning_rate": 8.460825267943757e-08, "loss": 0.5123, "step": 3410 }, { "epoch": 1.84, "grad_norm": 1.5410714145038449, "learning_rate": 8.403299944797244e-08, "loss": 0.5267, "step": 3411 }, { "epoch": 1.84, "grad_norm": 1.4687969791670414, "learning_rate": 8.345967504219732e-08, "loss": 0.5295, "step": 3412 }, { "epoch": 1.84, "grad_norm": 1.5044299439111644, "learning_rate": 8.28882799198344e-08, "loss": 0.5089, "step": 3413 }, { "epoch": 1.84, "grad_norm": 1.4583133662054464, "learning_rate": 8.231881453706625e-08, "loss": 0.5077, "step": 3414 }, { "epoch": 1.84, "grad_norm": 1.4951916843261506, "learning_rate": 8.175127934853477e-08, "loss": 0.5417, "step": 3415 }, { "epoch": 1.84, "grad_norm": 1.5063853931559739, "learning_rate": 8.11856748073403e-08, "loss": 0.5316, "step": 3416 }, { "epoch": 1.85, "grad_norm": 1.4367843890722698, "learning_rate": 8.062200136504217e-08, "loss": 0.5331, "step": 3417 }, { "epoch": 1.85, "grad_norm": 1.5077950973251126, "learning_rate": 8.006025947165875e-08, "loss": 0.5278, "step": 3418 }, { "epoch": 1.85, "grad_norm": 1.4941724848626283, "learning_rate": 7.950044957566489e-08, "loss": 0.5273, "step": 3419 }, { "epoch": 1.85, "grad_norm": 1.5279625940627146, "learning_rate": 7.894257212399393e-08, "loss": 0.5251, "step": 3420 }, { "epoch": 1.85, "grad_norm": 1.5189003990375927, "learning_rate": 7.838662756203652e-08, "loss": 0.5304, "step": 3421 }, { "epoch": 1.85, "grad_norm": 1.502415930482314, "learning_rate": 7.783261633363986e-08, "loss": 0.537, "step": 3422 }, { "epoch": 1.85, "grad_norm": 1.4892272609803354, "learning_rate": 7.728053888110681e-08, "loss": 0.5188, "step": 3423 }, { "epoch": 1.85, "grad_norm": 1.5296493281667716, "learning_rate": 7.673039564519813e-08, "loss": 0.5109, "step": 3424 }, { "epoch": 1.85, "grad_norm": 1.537472890418263, "learning_rate": 7.618218706512887e-08, "loss": 0.566, "step": 3425 }, { "epoch": 1.85, "grad_norm": 1.5842943911105398, "learning_rate": 7.563591357857003e-08, "loss": 0.555, "step": 3426 }, { "epoch": 1.85, "grad_norm": 1.5423221387550359, "learning_rate": 7.509157562164832e-08, "loss": 0.5318, "step": 3427 }, { "epoch": 1.85, "grad_norm": 1.544335266779868, "learning_rate": 7.454917362894415e-08, "loss": 0.5552, "step": 3428 }, { "epoch": 1.85, "grad_norm": 1.5396592585088316, "learning_rate": 7.40087080334928e-08, "loss": 0.5383, "step": 3429 }, { "epoch": 1.85, "grad_norm": 1.5248412006179375, "learning_rate": 7.347017926678385e-08, "loss": 0.5374, "step": 3430 }, { "epoch": 1.85, "grad_norm": 1.5407859668444266, "learning_rate": 7.293358775876002e-08, "loss": 0.5434, "step": 3431 }, { "epoch": 1.85, "grad_norm": 1.5182704995584373, "learning_rate": 7.239893393781783e-08, "loss": 0.5159, "step": 3432 }, { "epoch": 1.85, "grad_norm": 1.522773288737733, "learning_rate": 7.186621823080637e-08, "loss": 0.5404, "step": 3433 }, { "epoch": 1.85, "grad_norm": 1.5291233384392575, "learning_rate": 7.133544106302791e-08, "loss": 0.5446, "step": 3434 }, { "epoch": 1.86, "grad_norm": 1.5034040692493469, "learning_rate": 7.080660285823687e-08, "loss": 0.5359, "step": 3435 }, { "epoch": 1.86, "grad_norm": 1.50221909732861, "learning_rate": 7.027970403863965e-08, "loss": 0.5332, "step": 3436 }, { "epoch": 1.86, "grad_norm": 1.4933833240015482, "learning_rate": 6.975474502489449e-08, "loss": 0.513, "step": 3437 }, { "epoch": 1.86, "grad_norm": 1.4546609161164976, "learning_rate": 6.923172623611057e-08, "loss": 0.5195, "step": 3438 }, { "epoch": 1.86, "grad_norm": 1.494552801422546, "learning_rate": 6.871064808984834e-08, "loss": 0.5311, "step": 3439 }, { "epoch": 1.86, "grad_norm": 1.493244803019665, "learning_rate": 6.819151100211836e-08, "loss": 0.529, "step": 3440 }, { "epoch": 1.86, "grad_norm": 1.5259300514149738, "learning_rate": 6.767431538738268e-08, "loss": 0.54, "step": 3441 }, { "epoch": 1.86, "grad_norm": 1.514292068442964, "learning_rate": 6.715906165855212e-08, "loss": 0.5363, "step": 3442 }, { "epoch": 1.86, "grad_norm": 1.6076126136919344, "learning_rate": 6.664575022698794e-08, "loss": 0.5285, "step": 3443 }, { "epoch": 1.86, "grad_norm": 1.5097121784757588, "learning_rate": 6.613438150250062e-08, "loss": 0.517, "step": 3444 }, { "epoch": 1.86, "grad_norm": 1.5015461006160244, "learning_rate": 6.562495589334916e-08, "loss": 0.5314, "step": 3445 }, { "epoch": 1.86, "grad_norm": 1.5601015662547264, "learning_rate": 6.511747380624211e-08, "loss": 0.5388, "step": 3446 }, { "epoch": 1.86, "grad_norm": 1.5649290589744003, "learning_rate": 6.461193564633538e-08, "loss": 0.5146, "step": 3447 }, { "epoch": 1.86, "grad_norm": 1.537096996241829, "learning_rate": 6.410834181723363e-08, "loss": 0.5279, "step": 3448 }, { "epoch": 1.86, "grad_norm": 1.5486499380299785, "learning_rate": 6.360669272098885e-08, "loss": 0.5262, "step": 3449 }, { "epoch": 1.86, "grad_norm": 1.523996678006501, "learning_rate": 6.310698875810068e-08, "loss": 0.5359, "step": 3450 }, { "epoch": 1.86, "grad_norm": 1.5509321488539387, "learning_rate": 6.260923032751554e-08, "loss": 0.5539, "step": 3451 }, { "epoch": 1.86, "grad_norm": 1.6332417780547515, "learning_rate": 6.211341782662722e-08, "loss": 0.5683, "step": 3452 }, { "epoch": 1.86, "grad_norm": 1.4908562080404677, "learning_rate": 6.16195516512752e-08, "loss": 0.5305, "step": 3453 }, { "epoch": 1.87, "grad_norm": 1.5014756564261793, "learning_rate": 6.112763219574575e-08, "loss": 0.5517, "step": 3454 }, { "epoch": 1.87, "grad_norm": 1.5646883076058964, "learning_rate": 6.063765985276998e-08, "loss": 0.5274, "step": 3455 }, { "epoch": 1.87, "grad_norm": 1.5951510097926414, "learning_rate": 6.014963501352556e-08, "loss": 0.543, "step": 3456 }, { "epoch": 1.87, "grad_norm": 1.5006924130493586, "learning_rate": 5.966355806763441e-08, "loss": 0.5441, "step": 3457 }, { "epoch": 1.87, "grad_norm": 1.5003246313325842, "learning_rate": 5.9179429403164454e-08, "loss": 0.5194, "step": 3458 }, { "epoch": 1.87, "grad_norm": 1.615453565612747, "learning_rate": 5.8697249406627354e-08, "loss": 0.5251, "step": 3459 }, { "epoch": 1.87, "grad_norm": 1.5429543933866816, "learning_rate": 5.821701846297906e-08, "loss": 0.5349, "step": 3460 }, { "epoch": 1.87, "grad_norm": 1.4867091207366216, "learning_rate": 5.773873695561955e-08, "loss": 0.532, "step": 3461 }, { "epoch": 1.87, "grad_norm": 1.5242059023835839, "learning_rate": 5.726240526639199e-08, "loss": 0.5504, "step": 3462 }, { "epoch": 1.87, "grad_norm": 1.5202043882798715, "learning_rate": 5.67880237755844e-08, "loss": 0.5328, "step": 3463 }, { "epoch": 1.87, "grad_norm": 1.5028210391548937, "learning_rate": 5.631559286192606e-08, "loss": 0.5116, "step": 3464 }, { "epoch": 1.87, "grad_norm": 1.5296173160002902, "learning_rate": 5.5845112902589703e-08, "loss": 0.5342, "step": 3465 }, { "epoch": 1.87, "grad_norm": 1.529930049471618, "learning_rate": 5.537658427319098e-08, "loss": 0.5413, "step": 3466 }, { "epoch": 1.87, "grad_norm": 1.605699080813501, "learning_rate": 5.4910007347786784e-08, "loss": 0.5613, "step": 3467 }, { "epoch": 1.87, "grad_norm": 1.542857353431393, "learning_rate": 5.44453824988761e-08, "loss": 0.5497, "step": 3468 }, { "epoch": 1.87, "grad_norm": 1.5260546096763432, "learning_rate": 5.398271009739997e-08, "loss": 0.5383, "step": 3469 }, { "epoch": 1.87, "grad_norm": 1.598765324321124, "learning_rate": 5.352199051274015e-08, "loss": 0.543, "step": 3470 }, { "epoch": 1.87, "grad_norm": 1.4861161008871098, "learning_rate": 5.3063224112719355e-08, "loss": 0.5122, "step": 3471 }, { "epoch": 1.87, "grad_norm": 1.5213732930002373, "learning_rate": 5.2606411263601e-08, "loss": 0.5123, "step": 3472 }, { "epoch": 1.88, "grad_norm": 1.4929804633784831, "learning_rate": 5.21515523300889e-08, "loss": 0.539, "step": 3473 }, { "epoch": 1.88, "grad_norm": 1.5141803380332792, "learning_rate": 5.169864767532673e-08, "loss": 0.5475, "step": 3474 }, { "epoch": 1.88, "grad_norm": 1.5764799208699516, "learning_rate": 5.12476976608986e-08, "loss": 0.5465, "step": 3475 }, { "epoch": 1.88, "grad_norm": 1.5282580414064568, "learning_rate": 5.0798702646827626e-08, "loss": 0.5283, "step": 3476 }, { "epoch": 1.88, "grad_norm": 1.5159026437417562, "learning_rate": 5.0351662991575677e-08, "loss": 0.5398, "step": 3477 }, { "epoch": 1.88, "grad_norm": 1.5530807149909336, "learning_rate": 4.990657905204421e-08, "loss": 0.5226, "step": 3478 }, { "epoch": 1.88, "grad_norm": 1.5221207267499663, "learning_rate": 4.9463451183573155e-08, "loss": 0.5319, "step": 3479 }, { "epoch": 1.88, "grad_norm": 1.4823966830713025, "learning_rate": 4.9022279739940335e-08, "loss": 0.5497, "step": 3480 }, { "epoch": 1.88, "grad_norm": 1.569242105073898, "learning_rate": 4.8583065073362354e-08, "loss": 0.5413, "step": 3481 }, { "epoch": 1.88, "grad_norm": 1.4915658138240235, "learning_rate": 4.814580753449316e-08, "loss": 0.506, "step": 3482 }, { "epoch": 1.88, "grad_norm": 1.5105840568415214, "learning_rate": 4.7710507472424336e-08, "loss": 0.5524, "step": 3483 }, { "epoch": 1.88, "grad_norm": 1.521515153044988, "learning_rate": 4.7277165234684285e-08, "loss": 0.5397, "step": 3484 }, { "epoch": 1.88, "grad_norm": 1.4886237190418077, "learning_rate": 4.684578116723903e-08, "loss": 0.5237, "step": 3485 }, { "epoch": 1.88, "grad_norm": 1.4911263073259062, "learning_rate": 4.641635561449087e-08, "loss": 0.5166, "step": 3486 }, { "epoch": 1.88, "grad_norm": 1.4609609301496427, "learning_rate": 4.598888891927833e-08, "loss": 0.5114, "step": 3487 }, { "epoch": 1.88, "grad_norm": 1.505584864552631, "learning_rate": 4.55633814228762e-08, "loss": 0.5319, "step": 3488 }, { "epoch": 1.88, "grad_norm": 1.4941178622597366, "learning_rate": 4.513983346499523e-08, "loss": 0.5485, "step": 3489 }, { "epoch": 1.88, "grad_norm": 1.5638078363921148, "learning_rate": 4.4718245383781886e-08, "loss": 0.5437, "step": 3490 }, { "epoch": 1.89, "grad_norm": 1.5079288067018721, "learning_rate": 4.429861751581749e-08, "loss": 0.5065, "step": 3491 }, { "epoch": 1.89, "grad_norm": 1.5614370568911735, "learning_rate": 4.3880950196118764e-08, "loss": 0.5442, "step": 3492 }, { "epoch": 1.89, "grad_norm": 1.5066227034678081, "learning_rate": 4.3465243758137045e-08, "loss": 0.5215, "step": 3493 }, { "epoch": 1.89, "grad_norm": 1.468977974323818, "learning_rate": 4.305149853375823e-08, "loss": 0.5431, "step": 3494 }, { "epoch": 1.89, "grad_norm": 1.5176712499462606, "learning_rate": 4.263971485330198e-08, "loss": 0.5241, "step": 3495 }, { "epoch": 1.89, "grad_norm": 1.5017460958870954, "learning_rate": 4.222989304552283e-08, "loss": 0.5485, "step": 3496 }, { "epoch": 1.89, "grad_norm": 1.5625540376042875, "learning_rate": 4.182203343760849e-08, "loss": 0.5477, "step": 3497 }, { "epoch": 1.89, "grad_norm": 1.554286215959356, "learning_rate": 4.141613635517988e-08, "loss": 0.5143, "step": 3498 }, { "epoch": 1.89, "grad_norm": 1.5003035575995742, "learning_rate": 4.101220212229196e-08, "loss": 0.5311, "step": 3499 }, { "epoch": 1.89, "grad_norm": 1.5114375748503084, "learning_rate": 4.061023106143147e-08, "loss": 0.5476, "step": 3500 }, { "epoch": 1.89, "grad_norm": 1.5190823481587667, "learning_rate": 4.021022349351838e-08, "loss": 0.5176, "step": 3501 }, { "epoch": 1.89, "grad_norm": 1.476058912585976, "learning_rate": 3.98121797379053e-08, "loss": 0.5244, "step": 3502 }, { "epoch": 1.89, "grad_norm": 1.5251027104533803, "learning_rate": 3.941610011237718e-08, "loss": 0.5197, "step": 3503 }, { "epoch": 1.89, "grad_norm": 1.4812566882490288, "learning_rate": 3.902198493314968e-08, "loss": 0.5326, "step": 3504 }, { "epoch": 1.89, "grad_norm": 1.4868905685835723, "learning_rate": 3.862983451487168e-08, "loss": 0.5367, "step": 3505 }, { "epoch": 1.89, "grad_norm": 1.6311091366678747, "learning_rate": 3.82396491706219e-08, "loss": 0.555, "step": 3506 }, { "epoch": 1.89, "grad_norm": 1.5026024332623678, "learning_rate": 3.785142921191198e-08, "loss": 0.542, "step": 3507 }, { "epoch": 1.89, "grad_norm": 1.5407540373001336, "learning_rate": 3.7465174948682904e-08, "loss": 0.523, "step": 3508 }, { "epoch": 1.89, "grad_norm": 1.4893611363125088, "learning_rate": 3.708088668930715e-08, "loss": 0.4968, "step": 3509 }, { "epoch": 1.9, "grad_norm": 1.4995130221015036, "learning_rate": 3.669856474058708e-08, "loss": 0.5381, "step": 3510 }, { "epoch": 1.9, "grad_norm": 1.5243363788839486, "learning_rate": 3.631820940775577e-08, "loss": 0.5363, "step": 3511 }, { "epoch": 1.9, "grad_norm": 1.4977695356548737, "learning_rate": 3.5939820994475574e-08, "loss": 0.5233, "step": 3512 }, { "epoch": 1.9, "grad_norm": 1.5044249392940616, "learning_rate": 3.556339980283929e-08, "loss": 0.5488, "step": 3513 }, { "epoch": 1.9, "grad_norm": 1.5850300818356975, "learning_rate": 3.518894613336876e-08, "loss": 0.555, "step": 3514 }, { "epoch": 1.9, "grad_norm": 1.5326245317161664, "learning_rate": 3.481646028501484e-08, "loss": 0.5361, "step": 3515 }, { "epoch": 1.9, "grad_norm": 1.5411464628302638, "learning_rate": 3.4445942555157706e-08, "loss": 0.5266, "step": 3516 }, { "epoch": 1.9, "grad_norm": 1.5176083434774688, "learning_rate": 3.407739323960574e-08, "loss": 0.5464, "step": 3517 }, { "epoch": 1.9, "grad_norm": 1.5144568200958524, "learning_rate": 3.371081263259662e-08, "loss": 0.5432, "step": 3518 }, { "epoch": 1.9, "grad_norm": 1.5206196103389378, "learning_rate": 3.3346201026795696e-08, "loss": 0.5424, "step": 3519 }, { "epoch": 1.9, "grad_norm": 1.5618011983204765, "learning_rate": 3.298355871329595e-08, "loss": 0.5147, "step": 3520 }, { "epoch": 1.9, "grad_norm": 1.5020252435750048, "learning_rate": 3.262288598161911e-08, "loss": 0.5371, "step": 3521 }, { "epoch": 1.9, "grad_norm": 1.5724741745536428, "learning_rate": 3.2264183119714296e-08, "loss": 0.5456, "step": 3522 }, { "epoch": 1.9, "grad_norm": 1.5182855150206336, "learning_rate": 3.1907450413956595e-08, "loss": 0.5423, "step": 3523 }, { "epoch": 1.9, "grad_norm": 1.4745263049766573, "learning_rate": 3.155268814915041e-08, "loss": 0.5161, "step": 3524 }, { "epoch": 1.9, "grad_norm": 1.532906550039599, "learning_rate": 3.1199896608525014e-08, "loss": 0.5534, "step": 3525 }, { "epoch": 1.9, "grad_norm": 1.544773923681092, "learning_rate": 3.084907607373788e-08, "loss": 0.5394, "step": 3526 }, { "epoch": 1.9, "grad_norm": 1.550123125154269, "learning_rate": 3.050022682487108e-08, "loss": 0.5203, "step": 3527 }, { "epoch": 1.91, "grad_norm": 1.5087424223781916, "learning_rate": 3.0153349140435165e-08, "loss": 0.5141, "step": 3528 }, { "epoch": 1.91, "grad_norm": 1.5366739587694396, "learning_rate": 2.980844329736443e-08, "loss": 0.5302, "step": 3529 }, { "epoch": 1.91, "grad_norm": 1.5803064969462808, "learning_rate": 2.9465509571020845e-08, "loss": 0.5496, "step": 3530 }, { "epoch": 1.91, "grad_norm": 1.5290542919317303, "learning_rate": 2.9124548235190397e-08, "loss": 0.5455, "step": 3531 }, { "epoch": 1.91, "grad_norm": 1.52299599434127, "learning_rate": 2.8785559562085342e-08, "loss": 0.5493, "step": 3532 }, { "epoch": 1.91, "grad_norm": 1.5861718033419436, "learning_rate": 2.8448543822342247e-08, "loss": 0.5288, "step": 3533 }, { "epoch": 1.91, "grad_norm": 1.5129144861137798, "learning_rate": 2.811350128502338e-08, "loss": 0.5118, "step": 3534 }, { "epoch": 1.91, "grad_norm": 1.4578631131015487, "learning_rate": 2.7780432217614785e-08, "loss": 0.5331, "step": 3535 }, { "epoch": 1.91, "grad_norm": 1.5366289705287848, "learning_rate": 2.7449336886028188e-08, "loss": 0.541, "step": 3536 }, { "epoch": 1.91, "grad_norm": 1.5031881822809963, "learning_rate": 2.7120215554598538e-08, "loss": 0.538, "step": 3537 }, { "epoch": 1.91, "grad_norm": 1.4731474483307068, "learning_rate": 2.6793068486084818e-08, "loss": 0.5207, "step": 3538 }, { "epoch": 1.91, "grad_norm": 1.5158941022002779, "learning_rate": 2.646789594167032e-08, "loss": 0.5614, "step": 3539 }, { "epoch": 1.91, "grad_norm": 1.5272554682387607, "learning_rate": 2.6144698180961548e-08, "loss": 0.5261, "step": 3540 }, { "epoch": 1.91, "grad_norm": 1.4969630460975913, "learning_rate": 2.5823475461989046e-08, "loss": 0.523, "step": 3541 }, { "epoch": 1.91, "grad_norm": 1.55943689880302, "learning_rate": 2.5504228041205448e-08, "loss": 0.5181, "step": 3542 }, { "epoch": 1.91, "grad_norm": 1.5597261707550951, "learning_rate": 2.5186956173487152e-08, "loss": 0.5346, "step": 3543 }, { "epoch": 1.91, "grad_norm": 1.5652600809431354, "learning_rate": 2.4871660112133487e-08, "loss": 0.5312, "step": 3544 }, { "epoch": 1.91, "grad_norm": 1.519248040126365, "learning_rate": 2.4558340108865875e-08, "loss": 0.534, "step": 3545 }, { "epoch": 1.91, "grad_norm": 1.4987724666016193, "learning_rate": 2.424699641382866e-08, "loss": 0.5091, "step": 3546 }, { "epoch": 1.92, "grad_norm": 1.546235781762029, "learning_rate": 2.393762927558746e-08, "loss": 0.5456, "step": 3547 }, { "epoch": 1.92, "grad_norm": 1.4984981672357927, "learning_rate": 2.363023894113081e-08, "loss": 0.5266, "step": 3548 }, { "epoch": 1.92, "grad_norm": 1.4718903883491172, "learning_rate": 2.33248256558688e-08, "loss": 0.5385, "step": 3549 }, { "epoch": 1.92, "grad_norm": 1.5221203183417333, "learning_rate": 2.3021389663632487e-08, "loss": 0.5616, "step": 3550 }, { "epoch": 1.92, "grad_norm": 1.551797812356741, "learning_rate": 2.2719931206675317e-08, "loss": 0.5362, "step": 3551 }, { "epoch": 1.92, "grad_norm": 1.502571552058275, "learning_rate": 2.2420450525671155e-08, "loss": 0.5309, "step": 3552 }, { "epoch": 1.92, "grad_norm": 1.548801353171606, "learning_rate": 2.2122947859715415e-08, "loss": 0.5284, "step": 3553 }, { "epoch": 1.92, "grad_norm": 1.540844204426275, "learning_rate": 2.1827423446323938e-08, "loss": 0.5248, "step": 3554 }, { "epoch": 1.92, "grad_norm": 1.5249799629206027, "learning_rate": 2.1533877521433267e-08, "loss": 0.5154, "step": 3555 }, { "epoch": 1.92, "grad_norm": 1.5059819132187582, "learning_rate": 2.1242310319400384e-08, "loss": 0.5316, "step": 3556 }, { "epoch": 1.92, "grad_norm": 1.5063587316910314, "learning_rate": 2.0952722073002974e-08, "loss": 0.5191, "step": 3557 }, { "epoch": 1.92, "grad_norm": 1.5122130117834942, "learning_rate": 2.066511301343832e-08, "loss": 0.5273, "step": 3558 }, { "epoch": 1.92, "grad_norm": 1.520568889795215, "learning_rate": 2.0379483370323306e-08, "loss": 0.5231, "step": 3559 }, { "epoch": 1.92, "grad_norm": 1.5121735688802889, "learning_rate": 2.009583337169524e-08, "loss": 0.5371, "step": 3560 }, { "epoch": 1.92, "grad_norm": 1.5231731917753475, "learning_rate": 1.9814163244010754e-08, "loss": 0.5531, "step": 3561 }, { "epoch": 1.92, "grad_norm": 1.5081679144833855, "learning_rate": 1.9534473212144967e-08, "loss": 0.5464, "step": 3562 }, { "epoch": 1.92, "grad_norm": 1.5332974361154414, "learning_rate": 1.9256763499393704e-08, "loss": 0.5374, "step": 3563 }, { "epoch": 1.92, "grad_norm": 1.5001155830657, "learning_rate": 1.8981034327470727e-08, "loss": 0.5236, "step": 3564 }, { "epoch": 1.93, "grad_norm": 1.5513066750536328, "learning_rate": 1.8707285916508557e-08, "loss": 0.5242, "step": 3565 }, { "epoch": 1.93, "grad_norm": 1.496831156065505, "learning_rate": 1.843551848505848e-08, "loss": 0.5314, "step": 3566 }, { "epoch": 1.93, "grad_norm": 1.5264206150657997, "learning_rate": 1.8165732250090828e-08, "loss": 0.5107, "step": 3567 }, { "epoch": 1.93, "grad_norm": 1.5173338618578884, "learning_rate": 1.789792742699331e-08, "loss": 0.5248, "step": 3568 }, { "epoch": 1.93, "grad_norm": 1.5239435564863095, "learning_rate": 1.76321042295724e-08, "loss": 0.5321, "step": 3569 }, { "epoch": 1.93, "grad_norm": 1.5138134175697686, "learning_rate": 1.736826287005222e-08, "loss": 0.5315, "step": 3570 }, { "epoch": 1.93, "grad_norm": 1.5973480444156383, "learning_rate": 1.7106403559074836e-08, "loss": 0.5576, "step": 3571 }, { "epoch": 1.93, "grad_norm": 1.4571737214081757, "learning_rate": 1.6846526505699402e-08, "loss": 0.5155, "step": 3572 }, { "epoch": 1.93, "grad_norm": 1.424539753038676, "learning_rate": 1.6588631917403285e-08, "loss": 0.5145, "step": 3573 }, { "epoch": 1.93, "grad_norm": 1.5370158792846558, "learning_rate": 1.633272000008068e-08, "loss": 0.5268, "step": 3574 }, { "epoch": 1.93, "grad_norm": 1.4707843804324667, "learning_rate": 1.607879095804288e-08, "loss": 0.5356, "step": 3575 }, { "epoch": 1.93, "grad_norm": 1.491448112121299, "learning_rate": 1.5826844994017986e-08, "loss": 0.5501, "step": 3576 }, { "epoch": 1.93, "grad_norm": 1.4675319952753278, "learning_rate": 1.5576882309151498e-08, "loss": 0.522, "step": 3577 }, { "epoch": 1.93, "grad_norm": 1.5097912406185834, "learning_rate": 1.532890310300461e-08, "loss": 0.5317, "step": 3578 }, { "epoch": 1.93, "grad_norm": 1.5385061596567298, "learning_rate": 1.5082907573555906e-08, "loss": 0.5436, "step": 3579 }, { "epoch": 1.93, "grad_norm": 1.5356216528420876, "learning_rate": 1.4838895917199392e-08, "loss": 0.5434, "step": 3580 }, { "epoch": 1.93, "grad_norm": 1.5279090009107692, "learning_rate": 1.4596868328746183e-08, "loss": 0.5338, "step": 3581 }, { "epoch": 1.93, "grad_norm": 1.5624719247140293, "learning_rate": 1.435682500142227e-08, "loss": 0.5158, "step": 3582 }, { "epoch": 1.93, "grad_norm": 1.5594682578106032, "learning_rate": 1.4118766126870465e-08, "loss": 0.534, "step": 3583 }, { "epoch": 1.94, "grad_norm": 1.5058374133226362, "learning_rate": 1.3882691895148737e-08, "loss": 0.5432, "step": 3584 }, { "epoch": 1.94, "grad_norm": 1.499386678021753, "learning_rate": 1.3648602494730768e-08, "loss": 0.5283, "step": 3585 }, { "epoch": 1.94, "grad_norm": 1.5047329007577406, "learning_rate": 1.3416498112505671e-08, "loss": 0.5362, "step": 3586 }, { "epoch": 1.94, "grad_norm": 1.4816712680596535, "learning_rate": 1.3186378933777166e-08, "loss": 0.5359, "step": 3587 }, { "epoch": 1.94, "grad_norm": 1.489115722565326, "learning_rate": 1.2958245142265235e-08, "loss": 0.5044, "step": 3588 }, { "epoch": 1.94, "grad_norm": 1.5465138366327573, "learning_rate": 1.2732096920103631e-08, "loss": 0.5447, "step": 3589 }, { "epoch": 1.94, "grad_norm": 1.4570807720946775, "learning_rate": 1.2507934447841264e-08, "loss": 0.5197, "step": 3590 }, { "epoch": 1.94, "grad_norm": 1.4816217081211256, "learning_rate": 1.2285757904442475e-08, "loss": 0.5096, "step": 3591 }, { "epoch": 1.94, "grad_norm": 1.5251238609974005, "learning_rate": 1.2065567467284823e-08, "loss": 0.5361, "step": 3592 }, { "epoch": 1.94, "grad_norm": 1.5021084058405751, "learning_rate": 1.1847363312161297e-08, "loss": 0.5429, "step": 3593 }, { "epoch": 1.94, "grad_norm": 1.5144895936838176, "learning_rate": 1.1631145613278105e-08, "loss": 0.5477, "step": 3594 }, { "epoch": 1.94, "grad_norm": 1.5166445912108701, "learning_rate": 1.1416914543256608e-08, "loss": 0.5277, "step": 3595 }, { "epoch": 1.94, "grad_norm": 1.5534077166841016, "learning_rate": 1.120467027313138e-08, "loss": 0.5329, "step": 3596 }, { "epoch": 1.94, "grad_norm": 1.5823373624465136, "learning_rate": 1.0994412972351043e-08, "loss": 0.5558, "step": 3597 }, { "epoch": 1.94, "grad_norm": 1.5085120779789167, "learning_rate": 1.078614280877771e-08, "loss": 0.5166, "step": 3598 }, { "epoch": 1.94, "grad_norm": 1.5257471935500773, "learning_rate": 1.0579859948687543e-08, "loss": 0.532, "step": 3599 }, { "epoch": 1.94, "grad_norm": 1.5533232339395444, "learning_rate": 1.0375564556769357e-08, "loss": 0.5392, "step": 3600 }, { "epoch": 1.94, "grad_norm": 1.5192527647471457, "learning_rate": 1.0173256796125741e-08, "loss": 0.5252, "step": 3601 }, { "epoch": 1.95, "grad_norm": 1.550434241491198, "learning_rate": 9.972936828272495e-09, "loss": 0.5156, "step": 3602 }, { "epoch": 1.95, "grad_norm": 1.5719725721655813, "learning_rate": 9.774604813138078e-09, "loss": 0.5343, "step": 3603 }, { "epoch": 1.95, "grad_norm": 1.4916153060158188, "learning_rate": 9.578260909064163e-09, "loss": 0.5369, "step": 3604 }, { "epoch": 1.95, "grad_norm": 1.5194683386006904, "learning_rate": 9.383905272804528e-09, "loss": 0.534, "step": 3605 }, { "epoch": 1.95, "grad_norm": 1.5290973593613133, "learning_rate": 9.191538059526717e-09, "loss": 0.524, "step": 3606 }, { "epoch": 1.95, "grad_norm": 1.4352546026073003, "learning_rate": 9.001159422809825e-09, "loss": 0.5255, "step": 3607 }, { "epoch": 1.95, "grad_norm": 1.518274660376929, "learning_rate": 8.81276951464588e-09, "loss": 0.5338, "step": 3608 }, { "epoch": 1.95, "grad_norm": 1.5242952995320473, "learning_rate": 8.626368485438742e-09, "loss": 0.5261, "step": 3609 }, { "epoch": 1.95, "grad_norm": 1.49748900879463, "learning_rate": 8.44195648400492e-09, "loss": 0.5205, "step": 3610 }, { "epoch": 1.95, "grad_norm": 1.5428653592460022, "learning_rate": 8.259533657572205e-09, "loss": 0.5536, "step": 3611 }, { "epoch": 1.95, "grad_norm": 1.5159403501376998, "learning_rate": 8.07910015178104e-09, "loss": 0.5367, "step": 3612 }, { "epoch": 1.95, "grad_norm": 1.4656836200544285, "learning_rate": 7.900656110683413e-09, "loss": 0.5177, "step": 3613 }, { "epoch": 1.95, "grad_norm": 1.5092293232034522, "learning_rate": 7.72420167674287e-09, "loss": 0.5385, "step": 3614 }, { "epoch": 1.95, "grad_norm": 1.5000044152909169, "learning_rate": 7.549736990835054e-09, "loss": 0.5197, "step": 3615 }, { "epoch": 1.95, "grad_norm": 1.4858535003167839, "learning_rate": 7.377262192245771e-09, "loss": 0.5198, "step": 3616 }, { "epoch": 1.95, "grad_norm": 1.494109287774961, "learning_rate": 7.20677741867376e-09, "loss": 0.5495, "step": 3617 }, { "epoch": 1.95, "grad_norm": 1.5588613936462967, "learning_rate": 7.0382828062279254e-09, "loss": 0.5479, "step": 3618 }, { "epoch": 1.95, "grad_norm": 1.5515597505116612, "learning_rate": 6.871778489428715e-09, "loss": 0.5405, "step": 3619 }, { "epoch": 1.95, "grad_norm": 1.536067722714768, "learning_rate": 6.7072646012072975e-09, "loss": 0.5328, "step": 3620 }, { "epoch": 1.96, "grad_norm": 1.5262785766482572, "learning_rate": 6.544741272906385e-09, "loss": 0.5197, "step": 3621 }, { "epoch": 1.96, "grad_norm": 1.5400517209523157, "learning_rate": 6.384208634278577e-09, "loss": 0.5571, "step": 3622 }, { "epoch": 1.96, "grad_norm": 1.492331752199506, "learning_rate": 6.225666813487741e-09, "loss": 0.5081, "step": 3623 }, { "epoch": 1.96, "grad_norm": 1.5316077729366722, "learning_rate": 6.0691159371087386e-09, "loss": 0.5457, "step": 3624 }, { "epoch": 1.96, "grad_norm": 1.5787381616833591, "learning_rate": 5.9145561301260365e-09, "loss": 0.5374, "step": 3625 }, { "epoch": 1.96, "grad_norm": 1.4659861409140742, "learning_rate": 5.76198751593482e-09, "loss": 0.5209, "step": 3626 }, { "epoch": 1.96, "grad_norm": 1.4876560503936822, "learning_rate": 5.611410216340984e-09, "loss": 0.5309, "step": 3627 }, { "epoch": 1.96, "grad_norm": 1.5126679417689397, "learning_rate": 5.462824351560314e-09, "loss": 0.5319, "step": 3628 }, { "epoch": 1.96, "grad_norm": 1.5318671258883056, "learning_rate": 5.3162300402181955e-09, "loss": 0.533, "step": 3629 }, { "epoch": 1.96, "grad_norm": 1.4793742739649185, "learning_rate": 5.171627399351009e-09, "loss": 0.5356, "step": 3630 }, { "epoch": 1.96, "grad_norm": 1.5713209875743919, "learning_rate": 5.029016544404741e-09, "loss": 0.5441, "step": 3631 }, { "epoch": 1.96, "grad_norm": 1.5367117194462983, "learning_rate": 4.888397589234428e-09, "loss": 0.5426, "step": 3632 }, { "epoch": 1.96, "grad_norm": 1.490427483606169, "learning_rate": 4.749770646105822e-09, "loss": 0.5318, "step": 3633 }, { "epoch": 1.96, "grad_norm": 1.502692983963111, "learning_rate": 4.613135825694004e-09, "loss": 0.528, "step": 3634 }, { "epoch": 1.96, "grad_norm": 1.508632147070389, "learning_rate": 4.478493237083381e-09, "loss": 0.5243, "step": 3635 }, { "epoch": 1.96, "grad_norm": 1.57043132335188, "learning_rate": 4.3458429877679675e-09, "loss": 0.5361, "step": 3636 }, { "epoch": 1.96, "grad_norm": 1.5114931873977848, "learning_rate": 4.215185183651383e-09, "loss": 0.5193, "step": 3637 }, { "epoch": 1.96, "grad_norm": 1.5077508190109468, "learning_rate": 4.0865199290462955e-09, "loss": 0.5191, "step": 3638 }, { "epoch": 1.97, "grad_norm": 1.509151115483144, "learning_rate": 3.959847326674704e-09, "loss": 0.5397, "step": 3639 }, { "epoch": 1.97, "grad_norm": 1.5207118807445783, "learning_rate": 3.835167477667656e-09, "loss": 0.5609, "step": 3640 }, { "epoch": 1.97, "grad_norm": 1.547403985574007, "learning_rate": 3.712480481564973e-09, "loss": 0.5443, "step": 3641 }, { "epoch": 1.97, "grad_norm": 1.486329520433631, "learning_rate": 3.591786436316358e-09, "loss": 0.5403, "step": 3642 }, { "epoch": 1.97, "grad_norm": 1.4899690895814688, "learning_rate": 3.4730854382794552e-09, "loss": 0.4955, "step": 3643 }, { "epoch": 1.97, "grad_norm": 1.511232546809363, "learning_rate": 3.3563775822212375e-09, "loss": 0.5549, "step": 3644 }, { "epoch": 1.97, "grad_norm": 1.5136976996958662, "learning_rate": 3.241662961317171e-09, "loss": 0.5255, "step": 3645 }, { "epoch": 1.97, "grad_norm": 1.5369278249477965, "learning_rate": 3.1289416671514972e-09, "loss": 0.5623, "step": 3646 }, { "epoch": 1.97, "grad_norm": 1.567372458201838, "learning_rate": 3.018213789717228e-09, "loss": 0.5344, "step": 3647 }, { "epoch": 1.97, "grad_norm": 1.5200524699716351, "learning_rate": 2.909479417415595e-09, "loss": 0.5475, "step": 3648 }, { "epoch": 1.97, "grad_norm": 1.539873298710087, "learning_rate": 2.802738637056324e-09, "loss": 0.5255, "step": 3649 }, { "epoch": 1.97, "grad_norm": 1.5172006931466702, "learning_rate": 2.6979915338579134e-09, "loss": 0.5302, "step": 3650 }, { "epoch": 1.97, "grad_norm": 1.4636079237105102, "learning_rate": 2.5952381914465253e-09, "loss": 0.5346, "step": 3651 }, { "epoch": 1.97, "grad_norm": 1.522900222902114, "learning_rate": 2.4944786918568166e-09, "loss": 0.5257, "step": 3652 }, { "epoch": 1.97, "grad_norm": 1.5220298488887405, "learning_rate": 2.3957131155324943e-09, "loss": 0.5394, "step": 3653 }, { "epoch": 1.97, "grad_norm": 1.4892015485925112, "learning_rate": 2.298941541323818e-09, "loss": 0.5174, "step": 3654 }, { "epoch": 1.97, "grad_norm": 1.5449841238429136, "learning_rate": 2.2041640464903757e-09, "loss": 0.5633, "step": 3655 }, { "epoch": 1.97, "grad_norm": 1.505302222804852, "learning_rate": 2.1113807066988622e-09, "loss": 0.5209, "step": 3656 }, { "epoch": 1.97, "grad_norm": 1.476047493085665, "learning_rate": 2.020591596024746e-09, "loss": 0.5089, "step": 3657 }, { "epoch": 1.98, "grad_norm": 1.4735571055462653, "learning_rate": 1.931796786950879e-09, "loss": 0.5421, "step": 3658 }, { "epoch": 1.98, "grad_norm": 1.5016400339004516, "learning_rate": 1.8449963503680557e-09, "loss": 0.5388, "step": 3659 }, { "epoch": 1.98, "grad_norm": 1.5360140429077047, "learning_rate": 1.7601903555744537e-09, "loss": 0.5473, "step": 3660 }, { "epoch": 1.98, "grad_norm": 1.5158632194588875, "learning_rate": 1.677378870276747e-09, "loss": 0.5176, "step": 3661 }, { "epoch": 1.98, "grad_norm": 1.4722959876109, "learning_rate": 1.5965619605884387e-09, "loss": 0.5055, "step": 3662 }, { "epoch": 1.98, "grad_norm": 1.4532288752478746, "learning_rate": 1.5177396910312502e-09, "loss": 0.4988, "step": 3663 }, { "epoch": 1.98, "grad_norm": 1.5489595397973053, "learning_rate": 1.4409121245337332e-09, "loss": 0.5394, "step": 3664 }, { "epoch": 1.98, "grad_norm": 1.5444444011284908, "learning_rate": 1.3660793224332113e-09, "loss": 0.5397, "step": 3665 }, { "epoch": 1.98, "grad_norm": 1.5311965814326394, "learning_rate": 1.2932413444727287e-09, "loss": 0.5436, "step": 3666 }, { "epoch": 1.98, "grad_norm": 1.5093057481054428, "learning_rate": 1.2223982488043796e-09, "loss": 0.5505, "step": 3667 }, { "epoch": 1.98, "grad_norm": 1.5099928540852792, "learning_rate": 1.1535500919865328e-09, "loss": 0.5368, "step": 3668 }, { "epoch": 1.98, "grad_norm": 1.5029307634998585, "learning_rate": 1.0866969289849426e-09, "loss": 0.5383, "step": 3669 }, { "epoch": 1.98, "grad_norm": 1.497135292914717, "learning_rate": 1.0218388131735813e-09, "loss": 0.5213, "step": 3670 }, { "epoch": 1.98, "grad_norm": 1.5115759896704035, "learning_rate": 9.589757963324175e-10, "loss": 0.5298, "step": 3671 }, { "epoch": 1.98, "grad_norm": 1.5296627888363978, "learning_rate": 8.98107928649361e-10, "loss": 0.544, "step": 3672 }, { "epoch": 1.98, "grad_norm": 1.531100902178494, "learning_rate": 8.39235258719151e-10, "loss": 0.5344, "step": 3673 }, { "epoch": 1.98, "grad_norm": 1.5370967746243722, "learning_rate": 7.82357833543912e-10, "loss": 0.5659, "step": 3674 }, { "epoch": 1.98, "grad_norm": 1.5251845585681503, "learning_rate": 7.274756985323205e-10, "loss": 0.5325, "step": 3675 }, { "epoch": 1.99, "grad_norm": 1.4520475962004131, "learning_rate": 6.745888975007164e-10, "loss": 0.5269, "step": 3676 }, { "epoch": 1.99, "grad_norm": 1.5457332550332468, "learning_rate": 6.236974726717138e-10, "loss": 0.5433, "step": 3677 }, { "epoch": 1.99, "grad_norm": 1.494540805724315, "learning_rate": 5.748014646755895e-10, "loss": 0.5176, "step": 3678 }, { "epoch": 1.99, "grad_norm": 1.5282502540632146, "learning_rate": 5.279009125494505e-10, "loss": 0.5391, "step": 3679 }, { "epoch": 1.99, "grad_norm": 1.5047005875386976, "learning_rate": 4.829958537366786e-10, "loss": 0.5219, "step": 3680 }, { "epoch": 1.99, "grad_norm": 1.5463462451687864, "learning_rate": 4.4008632408831797e-10, "loss": 0.534, "step": 3681 }, { "epoch": 1.99, "grad_norm": 1.460778915857265, "learning_rate": 3.991723578614104e-10, "loss": 0.5211, "step": 3682 }, { "epoch": 1.99, "grad_norm": 1.5156083242757175, "learning_rate": 3.602539877206601e-10, "loss": 0.5394, "step": 3683 }, { "epoch": 1.99, "grad_norm": 1.482323105963454, "learning_rate": 3.2333124473704623e-10, "loss": 0.5287, "step": 3684 }, { "epoch": 1.99, "grad_norm": 1.5125531845174482, "learning_rate": 2.884041583883779e-10, "loss": 0.5532, "step": 3685 }, { "epoch": 1.99, "grad_norm": 1.5029885233605034, "learning_rate": 2.5547275655929405e-10, "loss": 0.5423, "step": 3686 }, { "epoch": 1.99, "grad_norm": 1.554194722877597, "learning_rate": 2.245370655409862e-10, "loss": 0.5375, "step": 3687 }, { "epoch": 1.99, "grad_norm": 1.5201387172873504, "learning_rate": 1.955971100317533e-10, "loss": 0.5424, "step": 3688 }, { "epoch": 1.99, "grad_norm": 1.5204066874417936, "learning_rate": 1.6865291313616916e-10, "loss": 0.5077, "step": 3689 }, { "epoch": 1.99, "grad_norm": 1.5017088793740738, "learning_rate": 1.4370449636535998e-10, "loss": 0.5348, "step": 3690 }, { "epoch": 1.99, "grad_norm": 1.5239535079098743, "learning_rate": 1.2075187963755951e-10, "loss": 0.5309, "step": 3691 }, { "epoch": 1.99, "grad_norm": 1.524403166901212, "learning_rate": 9.979508127699878e-11, "loss": 0.54, "step": 3692 }, { "epoch": 1.99, "grad_norm": 1.510523154268843, "learning_rate": 8.083411801529384e-11, "loss": 0.5539, "step": 3693 }, { "epoch": 1.99, "grad_norm": 1.526320200965389, "learning_rate": 6.386900498978054e-11, "loss": 0.5204, "step": 3694 }, { "epoch": 2.0, "grad_norm": 1.466513497228869, "learning_rate": 4.88997557451798e-11, "loss": 0.5197, "step": 3695 }, { "epoch": 2.0, "grad_norm": 1.4961474710519727, "learning_rate": 3.592638223220979e-11, "loss": 0.5194, "step": 3696 }, { "epoch": 2.0, "grad_norm": 1.4514233313794258, "learning_rate": 2.494889480869622e-11, "loss": 0.5199, "step": 3697 }, { "epoch": 2.0, "grad_norm": 1.4750431452704758, "learning_rate": 1.5967302238462103e-11, "loss": 0.5267, "step": 3698 }, { "epoch": 2.0, "grad_norm": 1.5661468413135173, "learning_rate": 8.98161169188283e-12, "loss": 0.5185, "step": 3699 }, { "epoch": 2.0, "grad_norm": 1.5602947134534375, "learning_rate": 3.991828746441329e-12, "loss": 0.5376, "step": 3700 }, { "epoch": 2.0, "grad_norm": 1.534479974353284, "learning_rate": 9.979573858953652e-13, "loss": 0.5244, "step": 3701 }, { "epoch": 2.0, "grad_norm": 1.470378409136364, "learning_rate": 0.0, "loss": 0.5276, "step": 3702 }, { "epoch": 2.0, "step": 3702, "total_flos": 4002559796641792.0, "train_loss": 0.6348554471790436, "train_runtime": 40436.1288, "train_samples_per_second": 11.723, "train_steps_per_second": 0.092 } ], "logging_steps": 1.0, "max_steps": 3702, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "total_flos": 4002559796641792.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }