{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 6705, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 9.900990099009901e-08, "loss": 1.8008, "step": 1 }, { "epoch": 0.0, "learning_rate": 1.9801980198019803e-07, "loss": 1.6997, "step": 2 }, { "epoch": 0.0, "learning_rate": 2.9702970297029703e-07, "loss": 1.7573, "step": 3 }, { "epoch": 0.0, "learning_rate": 3.9603960396039606e-07, "loss": 0.2666, "step": 4 }, { "epoch": 0.0, "learning_rate": 4.950495049504951e-07, "loss": 1.8799, "step": 5 }, { "epoch": 0.0, "learning_rate": 5.940594059405941e-07, "loss": 1.7519, "step": 6 }, { "epoch": 0.0, "learning_rate": 6.930693069306931e-07, "loss": 1.8857, "step": 7 }, { "epoch": 0.0, "learning_rate": 7.920792079207921e-07, "loss": 1.6681, "step": 8 }, { "epoch": 0.0, "learning_rate": 8.910891089108911e-07, "loss": 1.8789, "step": 9 }, { "epoch": 0.0, "learning_rate": 9.900990099009902e-07, "loss": 1.6538, "step": 10 }, { "epoch": 0.0, "learning_rate": 1.0891089108910893e-06, "loss": 1.7071, "step": 11 }, { "epoch": 0.0, "learning_rate": 1.1881188118811881e-06, "loss": 1.6477, "step": 12 }, { "epoch": 0.0, "learning_rate": 1.2871287128712872e-06, "loss": 1.7317, "step": 13 }, { "epoch": 0.0, "learning_rate": 1.3861386138613863e-06, "loss": 1.5339, "step": 14 }, { "epoch": 0.0, "learning_rate": 1.4851485148514852e-06, "loss": 0.2865, "step": 15 }, { "epoch": 0.0, "learning_rate": 1.5841584158415842e-06, "loss": 1.5569, "step": 16 }, { "epoch": 0.0, "learning_rate": 1.6831683168316833e-06, "loss": 1.5173, "step": 17 }, { "epoch": 0.0, "learning_rate": 1.7821782178217822e-06, "loss": 1.6748, "step": 18 }, { "epoch": 0.0, "learning_rate": 1.8811881188118813e-06, "loss": 1.4095, "step": 19 }, { "epoch": 0.0, "learning_rate": 1.9801980198019803e-06, "loss": 1.5112, "step": 20 }, { "epoch": 0.0, "learning_rate": 2.0792079207920794e-06, "loss": 1.4426, "step": 21 }, { "epoch": 0.0, "learning_rate": 2.1782178217821785e-06, "loss": 1.4519, "step": 22 }, { "epoch": 0.0, "learning_rate": 2.2772277227722776e-06, "loss": 1.3741, "step": 23 }, { "epoch": 0.0, "learning_rate": 2.3762376237623762e-06, "loss": 1.3699, "step": 24 }, { "epoch": 0.0, "learning_rate": 2.4752475247524753e-06, "loss": 1.4852, "step": 25 }, { "epoch": 0.0, "learning_rate": 2.5742574257425744e-06, "loss": 1.2739, "step": 26 }, { "epoch": 0.0, "learning_rate": 2.6732673267326735e-06, "loss": 1.2723, "step": 27 }, { "epoch": 0.0, "learning_rate": 2.7722772277227726e-06, "loss": 1.4288, "step": 28 }, { "epoch": 0.0, "learning_rate": 2.8712871287128712e-06, "loss": 1.3896, "step": 29 }, { "epoch": 0.0, "learning_rate": 2.9702970297029703e-06, "loss": 1.3858, "step": 30 }, { "epoch": 0.0, "learning_rate": 3.0693069306930694e-06, "loss": 1.3657, "step": 31 }, { "epoch": 0.0, "learning_rate": 3.1683168316831685e-06, "loss": 1.3617, "step": 32 }, { "epoch": 0.0, "learning_rate": 3.2673267326732676e-06, "loss": 1.198, "step": 33 }, { "epoch": 0.01, "learning_rate": 3.3663366336633666e-06, "loss": 1.2789, "step": 34 }, { "epoch": 0.01, "learning_rate": 3.4653465346534653e-06, "loss": 1.2667, "step": 35 }, { "epoch": 0.01, "learning_rate": 3.5643564356435644e-06, "loss": 1.2473, "step": 36 }, { "epoch": 0.01, "learning_rate": 3.6633663366336635e-06, "loss": 1.191, "step": 37 }, { "epoch": 0.01, "learning_rate": 3.7623762376237625e-06, "loss": 1.3272, "step": 38 }, { "epoch": 0.01, "learning_rate": 3.861386138613862e-06, "loss": 1.2961, "step": 39 }, { "epoch": 0.01, "learning_rate": 3.960396039603961e-06, "loss": 0.2714, "step": 40 }, { "epoch": 0.01, "learning_rate": 4.05940594059406e-06, "loss": 1.1592, "step": 41 }, { "epoch": 0.01, "learning_rate": 4.158415841584159e-06, "loss": 1.2421, "step": 42 }, { "epoch": 0.01, "learning_rate": 4.2574257425742575e-06, "loss": 1.2084, "step": 43 }, { "epoch": 0.01, "learning_rate": 4.356435643564357e-06, "loss": 1.1972, "step": 44 }, { "epoch": 0.01, "learning_rate": 4.455445544554456e-06, "loss": 1.1788, "step": 45 }, { "epoch": 0.01, "learning_rate": 4.554455445544555e-06, "loss": 1.1083, "step": 46 }, { "epoch": 0.01, "learning_rate": 4.653465346534654e-06, "loss": 1.25, "step": 47 }, { "epoch": 0.01, "learning_rate": 4.7524752475247525e-06, "loss": 1.1786, "step": 48 }, { "epoch": 0.01, "learning_rate": 4.851485148514852e-06, "loss": 1.0604, "step": 49 }, { "epoch": 0.01, "learning_rate": 4.950495049504951e-06, "loss": 0.2862, "step": 50 }, { "epoch": 0.01, "learning_rate": 5.04950495049505e-06, "loss": 1.2306, "step": 51 }, { "epoch": 0.01, "learning_rate": 5.148514851485149e-06, "loss": 1.1807, "step": 52 }, { "epoch": 0.01, "learning_rate": 5.247524752475248e-06, "loss": 1.1159, "step": 53 }, { "epoch": 0.01, "learning_rate": 5.346534653465347e-06, "loss": 1.0779, "step": 54 }, { "epoch": 0.01, "learning_rate": 5.4455445544554465e-06, "loss": 1.2411, "step": 55 }, { "epoch": 0.01, "learning_rate": 5.544554455445545e-06, "loss": 1.1311, "step": 56 }, { "epoch": 0.01, "learning_rate": 5.643564356435644e-06, "loss": 1.1468, "step": 57 }, { "epoch": 0.01, "learning_rate": 5.7425742574257425e-06, "loss": 1.1318, "step": 58 }, { "epoch": 0.01, "learning_rate": 5.841584158415842e-06, "loss": 1.1427, "step": 59 }, { "epoch": 0.01, "learning_rate": 5.940594059405941e-06, "loss": 1.1353, "step": 60 }, { "epoch": 0.01, "learning_rate": 6.03960396039604e-06, "loss": 1.1566, "step": 61 }, { "epoch": 0.01, "learning_rate": 6.138613861386139e-06, "loss": 1.0438, "step": 62 }, { "epoch": 0.01, "learning_rate": 6.237623762376238e-06, "loss": 1.2064, "step": 63 }, { "epoch": 0.01, "learning_rate": 6.336633663366337e-06, "loss": 1.055, "step": 64 }, { "epoch": 0.01, "learning_rate": 6.4356435643564364e-06, "loss": 1.0528, "step": 65 }, { "epoch": 0.01, "learning_rate": 6.534653465346535e-06, "loss": 1.1261, "step": 66 }, { "epoch": 0.01, "learning_rate": 6.633663366336635e-06, "loss": 1.0472, "step": 67 }, { "epoch": 0.01, "learning_rate": 6.732673267326733e-06, "loss": 1.1621, "step": 68 }, { "epoch": 0.01, "learning_rate": 6.831683168316833e-06, "loss": 0.989, "step": 69 }, { "epoch": 0.01, "learning_rate": 6.930693069306931e-06, "loss": 1.1131, "step": 70 }, { "epoch": 0.01, "learning_rate": 7.02970297029703e-06, "loss": 1.1338, "step": 71 }, { "epoch": 0.01, "learning_rate": 7.128712871287129e-06, "loss": 1.1039, "step": 72 }, { "epoch": 0.01, "learning_rate": 7.227722772277228e-06, "loss": 1.1594, "step": 73 }, { "epoch": 0.01, "learning_rate": 7.326732673267327e-06, "loss": 1.1941, "step": 74 }, { "epoch": 0.01, "learning_rate": 7.425742574257426e-06, "loss": 1.0136, "step": 75 }, { "epoch": 0.01, "learning_rate": 7.524752475247525e-06, "loss": 0.2627, "step": 76 }, { "epoch": 0.01, "learning_rate": 7.6237623762376246e-06, "loss": 1.1273, "step": 77 }, { "epoch": 0.01, "learning_rate": 7.722772277227724e-06, "loss": 1.0313, "step": 78 }, { "epoch": 0.01, "learning_rate": 7.821782178217822e-06, "loss": 1.1105, "step": 79 }, { "epoch": 0.01, "learning_rate": 7.920792079207921e-06, "loss": 0.9856, "step": 80 }, { "epoch": 0.01, "learning_rate": 8.019801980198021e-06, "loss": 0.965, "step": 81 }, { "epoch": 0.01, "learning_rate": 8.11881188118812e-06, "loss": 1.094, "step": 82 }, { "epoch": 0.01, "learning_rate": 8.217821782178218e-06, "loss": 1.0773, "step": 83 }, { "epoch": 0.01, "learning_rate": 8.316831683168318e-06, "loss": 0.9835, "step": 84 }, { "epoch": 0.01, "learning_rate": 8.415841584158416e-06, "loss": 1.0608, "step": 85 }, { "epoch": 0.01, "learning_rate": 8.514851485148515e-06, "loss": 1.0889, "step": 86 }, { "epoch": 0.01, "learning_rate": 8.613861386138615e-06, "loss": 1.0121, "step": 87 }, { "epoch": 0.01, "learning_rate": 8.712871287128714e-06, "loss": 1.0484, "step": 88 }, { "epoch": 0.01, "learning_rate": 8.811881188118812e-06, "loss": 1.0484, "step": 89 }, { "epoch": 0.01, "learning_rate": 8.910891089108911e-06, "loss": 0.9984, "step": 90 }, { "epoch": 0.01, "learning_rate": 9.009900990099011e-06, "loss": 0.2619, "step": 91 }, { "epoch": 0.01, "learning_rate": 9.10891089108911e-06, "loss": 1.0579, "step": 92 }, { "epoch": 0.01, "learning_rate": 9.20792079207921e-06, "loss": 1.1332, "step": 93 }, { "epoch": 0.01, "learning_rate": 9.306930693069308e-06, "loss": 1.0233, "step": 94 }, { "epoch": 0.01, "learning_rate": 9.405940594059405e-06, "loss": 1.0392, "step": 95 }, { "epoch": 0.01, "learning_rate": 9.504950495049505e-06, "loss": 0.9835, "step": 96 }, { "epoch": 0.01, "learning_rate": 9.603960396039604e-06, "loss": 1.1509, "step": 97 }, { "epoch": 0.01, "learning_rate": 9.702970297029704e-06, "loss": 0.9473, "step": 98 }, { "epoch": 0.01, "learning_rate": 9.801980198019802e-06, "loss": 1.0217, "step": 99 }, { "epoch": 0.01, "learning_rate": 9.900990099009901e-06, "loss": 1.0162, "step": 100 }, { "epoch": 0.02, "learning_rate": 1e-05, "loss": 1.1482, "step": 101 }, { "epoch": 0.02, "learning_rate": 1.00990099009901e-05, "loss": 1.0329, "step": 102 }, { "epoch": 0.02, "learning_rate": 1.01980198019802e-05, "loss": 1.1181, "step": 103 }, { "epoch": 0.02, "learning_rate": 1.0297029702970298e-05, "loss": 0.9701, "step": 104 }, { "epoch": 0.02, "learning_rate": 1.0396039603960397e-05, "loss": 1.112, "step": 105 }, { "epoch": 0.02, "learning_rate": 1.0495049504950497e-05, "loss": 0.9845, "step": 106 }, { "epoch": 0.02, "learning_rate": 1.0594059405940596e-05, "loss": 1.0077, "step": 107 }, { "epoch": 0.02, "learning_rate": 1.0693069306930694e-05, "loss": 1.0986, "step": 108 }, { "epoch": 0.02, "learning_rate": 1.0792079207920793e-05, "loss": 1.0302, "step": 109 }, { "epoch": 0.02, "learning_rate": 1.0891089108910893e-05, "loss": 1.0243, "step": 110 }, { "epoch": 0.02, "learning_rate": 1.0990099009900992e-05, "loss": 1.0347, "step": 111 }, { "epoch": 0.02, "learning_rate": 1.108910891089109e-05, "loss": 0.9925, "step": 112 }, { "epoch": 0.02, "learning_rate": 1.118811881188119e-05, "loss": 1.0407, "step": 113 }, { "epoch": 0.02, "learning_rate": 1.1287128712871288e-05, "loss": 1.0507, "step": 114 }, { "epoch": 0.02, "learning_rate": 1.1386138613861385e-05, "loss": 1.0148, "step": 115 }, { "epoch": 0.02, "learning_rate": 1.1485148514851485e-05, "loss": 0.9519, "step": 116 }, { "epoch": 0.02, "learning_rate": 1.1584158415841584e-05, "loss": 0.3028, "step": 117 }, { "epoch": 0.02, "learning_rate": 1.1683168316831684e-05, "loss": 1.0575, "step": 118 }, { "epoch": 0.02, "learning_rate": 1.1782178217821782e-05, "loss": 1.066, "step": 119 }, { "epoch": 0.02, "learning_rate": 1.1881188118811881e-05, "loss": 0.9741, "step": 120 }, { "epoch": 0.02, "learning_rate": 1.198019801980198e-05, "loss": 1.016, "step": 121 }, { "epoch": 0.02, "learning_rate": 1.207920792079208e-05, "loss": 1.0738, "step": 122 }, { "epoch": 0.02, "learning_rate": 1.217821782178218e-05, "loss": 1.0513, "step": 123 }, { "epoch": 0.02, "learning_rate": 1.2277227722772278e-05, "loss": 0.9336, "step": 124 }, { "epoch": 0.02, "learning_rate": 1.2376237623762377e-05, "loss": 1.0289, "step": 125 }, { "epoch": 0.02, "learning_rate": 1.2475247524752477e-05, "loss": 0.9952, "step": 126 }, { "epoch": 0.02, "learning_rate": 1.2574257425742576e-05, "loss": 1.0842, "step": 127 }, { "epoch": 0.02, "learning_rate": 1.2673267326732674e-05, "loss": 0.993, "step": 128 }, { "epoch": 0.02, "learning_rate": 1.2772277227722773e-05, "loss": 1.0469, "step": 129 }, { "epoch": 0.02, "learning_rate": 1.2871287128712873e-05, "loss": 0.9943, "step": 130 }, { "epoch": 0.02, "learning_rate": 1.2970297029702972e-05, "loss": 0.9897, "step": 131 }, { "epoch": 0.02, "learning_rate": 1.306930693069307e-05, "loss": 1.0265, "step": 132 }, { "epoch": 0.02, "learning_rate": 1.316831683168317e-05, "loss": 0.9575, "step": 133 }, { "epoch": 0.02, "learning_rate": 1.326732673267327e-05, "loss": 1.0799, "step": 134 }, { "epoch": 0.02, "learning_rate": 1.3366336633663369e-05, "loss": 0.9527, "step": 135 }, { "epoch": 0.02, "learning_rate": 1.3465346534653467e-05, "loss": 1.014, "step": 136 }, { "epoch": 0.02, "learning_rate": 1.3564356435643566e-05, "loss": 0.989, "step": 137 }, { "epoch": 0.02, "learning_rate": 1.3663366336633666e-05, "loss": 0.9127, "step": 138 }, { "epoch": 0.02, "learning_rate": 1.3762376237623762e-05, "loss": 1.0361, "step": 139 }, { "epoch": 0.02, "learning_rate": 1.3861386138613861e-05, "loss": 1.037, "step": 140 }, { "epoch": 0.02, "learning_rate": 1.396039603960396e-05, "loss": 0.9834, "step": 141 }, { "epoch": 0.02, "learning_rate": 1.405940594059406e-05, "loss": 1.0312, "step": 142 }, { "epoch": 0.02, "learning_rate": 1.4158415841584158e-05, "loss": 1.0893, "step": 143 }, { "epoch": 0.02, "learning_rate": 1.4257425742574257e-05, "loss": 1.0209, "step": 144 }, { "epoch": 0.02, "learning_rate": 1.4356435643564357e-05, "loss": 1.0182, "step": 145 }, { "epoch": 0.02, "learning_rate": 1.4455445544554456e-05, "loss": 1.0373, "step": 146 }, { "epoch": 0.02, "learning_rate": 1.4554455445544556e-05, "loss": 0.9764, "step": 147 }, { "epoch": 0.02, "learning_rate": 1.4653465346534654e-05, "loss": 0.955, "step": 148 }, { "epoch": 0.02, "learning_rate": 1.4752475247524753e-05, "loss": 1.0629, "step": 149 }, { "epoch": 0.02, "learning_rate": 1.4851485148514853e-05, "loss": 1.0799, "step": 150 }, { "epoch": 0.02, "learning_rate": 1.4950495049504952e-05, "loss": 0.976, "step": 151 }, { "epoch": 0.02, "learning_rate": 1.504950495049505e-05, "loss": 0.9237, "step": 152 }, { "epoch": 0.02, "learning_rate": 1.514851485148515e-05, "loss": 0.9626, "step": 153 }, { "epoch": 0.02, "learning_rate": 1.5247524752475249e-05, "loss": 0.9717, "step": 154 }, { "epoch": 0.02, "learning_rate": 1.534653465346535e-05, "loss": 0.9555, "step": 155 }, { "epoch": 0.02, "learning_rate": 1.5445544554455448e-05, "loss": 0.9807, "step": 156 }, { "epoch": 0.02, "learning_rate": 1.5544554455445548e-05, "loss": 0.2888, "step": 157 }, { "epoch": 0.02, "learning_rate": 1.5643564356435644e-05, "loss": 0.9844, "step": 158 }, { "epoch": 0.02, "learning_rate": 1.5742574257425743e-05, "loss": 1.1104, "step": 159 }, { "epoch": 0.02, "learning_rate": 1.5841584158415843e-05, "loss": 1.0492, "step": 160 }, { "epoch": 0.02, "learning_rate": 1.5940594059405942e-05, "loss": 0.9358, "step": 161 }, { "epoch": 0.02, "learning_rate": 1.6039603960396042e-05, "loss": 1.0113, "step": 162 }, { "epoch": 0.02, "learning_rate": 1.613861386138614e-05, "loss": 0.2379, "step": 163 }, { "epoch": 0.02, "learning_rate": 1.623762376237624e-05, "loss": 1.0367, "step": 164 }, { "epoch": 0.02, "learning_rate": 1.6336633663366337e-05, "loss": 1.0756, "step": 165 }, { "epoch": 0.02, "learning_rate": 1.6435643564356436e-05, "loss": 0.9809, "step": 166 }, { "epoch": 0.02, "learning_rate": 1.6534653465346536e-05, "loss": 0.9848, "step": 167 }, { "epoch": 0.03, "learning_rate": 1.6633663366336635e-05, "loss": 0.9824, "step": 168 }, { "epoch": 0.03, "learning_rate": 1.6732673267326735e-05, "loss": 1.0594, "step": 169 }, { "epoch": 0.03, "learning_rate": 1.683168316831683e-05, "loss": 1.0215, "step": 170 }, { "epoch": 0.03, "learning_rate": 1.693069306930693e-05, "loss": 0.9796, "step": 171 }, { "epoch": 0.03, "learning_rate": 1.702970297029703e-05, "loss": 0.9184, "step": 172 }, { "epoch": 0.03, "learning_rate": 1.712871287128713e-05, "loss": 0.2703, "step": 173 }, { "epoch": 0.03, "learning_rate": 1.722772277227723e-05, "loss": 1.0678, "step": 174 }, { "epoch": 0.03, "learning_rate": 1.732673267326733e-05, "loss": 1.0364, "step": 175 }, { "epoch": 0.03, "learning_rate": 1.7425742574257428e-05, "loss": 0.9178, "step": 176 }, { "epoch": 0.03, "learning_rate": 1.7524752475247528e-05, "loss": 0.8885, "step": 177 }, { "epoch": 0.03, "learning_rate": 1.7623762376237624e-05, "loss": 0.8916, "step": 178 }, { "epoch": 0.03, "learning_rate": 1.7722772277227723e-05, "loss": 1.0187, "step": 179 }, { "epoch": 0.03, "learning_rate": 1.7821782178217823e-05, "loss": 0.9091, "step": 180 }, { "epoch": 0.03, "learning_rate": 1.7920792079207922e-05, "loss": 1.0049, "step": 181 }, { "epoch": 0.03, "learning_rate": 1.8019801980198022e-05, "loss": 1.025, "step": 182 }, { "epoch": 0.03, "learning_rate": 1.811881188118812e-05, "loss": 0.883, "step": 183 }, { "epoch": 0.03, "learning_rate": 1.821782178217822e-05, "loss": 1.0384, "step": 184 }, { "epoch": 0.03, "learning_rate": 1.831683168316832e-05, "loss": 0.9924, "step": 185 }, { "epoch": 0.03, "learning_rate": 1.841584158415842e-05, "loss": 1.0336, "step": 186 }, { "epoch": 0.03, "learning_rate": 1.8514851485148516e-05, "loss": 0.9341, "step": 187 }, { "epoch": 0.03, "learning_rate": 1.8613861386138615e-05, "loss": 0.8708, "step": 188 }, { "epoch": 0.03, "learning_rate": 1.8712871287128715e-05, "loss": 0.9933, "step": 189 }, { "epoch": 0.03, "learning_rate": 1.881188118811881e-05, "loss": 1.0116, "step": 190 }, { "epoch": 0.03, "learning_rate": 1.891089108910891e-05, "loss": 0.9195, "step": 191 }, { "epoch": 0.03, "learning_rate": 1.900990099009901e-05, "loss": 0.9445, "step": 192 }, { "epoch": 0.03, "learning_rate": 1.910891089108911e-05, "loss": 0.2567, "step": 193 }, { "epoch": 0.03, "learning_rate": 1.920792079207921e-05, "loss": 0.9144, "step": 194 }, { "epoch": 0.03, "learning_rate": 1.930693069306931e-05, "loss": 0.9102, "step": 195 }, { "epoch": 0.03, "learning_rate": 1.9405940594059408e-05, "loss": 0.9859, "step": 196 }, { "epoch": 0.03, "learning_rate": 1.9504950495049508e-05, "loss": 1.0026, "step": 197 }, { "epoch": 0.03, "learning_rate": 1.9603960396039604e-05, "loss": 0.9963, "step": 198 }, { "epoch": 0.03, "learning_rate": 1.9702970297029703e-05, "loss": 0.9532, "step": 199 }, { "epoch": 0.03, "learning_rate": 1.9801980198019803e-05, "loss": 0.9629, "step": 200 }, { "epoch": 0.03, "learning_rate": 1.9900990099009902e-05, "loss": 0.9509, "step": 201 }, { "epoch": 0.03, "learning_rate": 2e-05, "loss": 0.9109, "step": 202 }, { "epoch": 0.03, "learning_rate": 1.9999998833076913e-05, "loss": 1.0208, "step": 203 }, { "epoch": 0.03, "learning_rate": 1.9999995332307916e-05, "loss": 1.0248, "step": 204 }, { "epoch": 0.03, "learning_rate": 1.9999989497693827e-05, "loss": 0.9317, "step": 205 }, { "epoch": 0.03, "learning_rate": 1.9999981329236014e-05, "loss": 1.0083, "step": 206 }, { "epoch": 0.03, "learning_rate": 1.9999970826936375e-05, "loss": 0.8925, "step": 207 }, { "epoch": 0.03, "learning_rate": 1.9999957990797367e-05, "loss": 1.0206, "step": 208 }, { "epoch": 0.03, "learning_rate": 1.9999942820821983e-05, "loss": 0.9054, "step": 209 }, { "epoch": 0.03, "learning_rate": 1.9999925317013763e-05, "loss": 0.9861, "step": 210 }, { "epoch": 0.03, "learning_rate": 1.9999905479376794e-05, "loss": 0.2925, "step": 211 }, { "epoch": 0.03, "learning_rate": 1.9999883307915706e-05, "loss": 0.8682, "step": 212 }, { "epoch": 0.03, "learning_rate": 1.999985880263567e-05, "loss": 0.9616, "step": 213 }, { "epoch": 0.03, "learning_rate": 1.9999831963542413e-05, "loss": 1.0012, "step": 214 }, { "epoch": 0.03, "learning_rate": 1.999980279064219e-05, "loss": 0.882, "step": 215 }, { "epoch": 0.03, "learning_rate": 1.9999771283941814e-05, "loss": 1.0612, "step": 216 }, { "epoch": 0.03, "learning_rate": 1.999973744344864e-05, "loss": 0.9103, "step": 217 }, { "epoch": 0.03, "learning_rate": 1.9999701269170557e-05, "loss": 0.8103, "step": 218 }, { "epoch": 0.03, "learning_rate": 1.9999662761116017e-05, "loss": 1.0388, "step": 219 }, { "epoch": 0.03, "learning_rate": 1.9999621919294004e-05, "loss": 0.9756, "step": 220 }, { "epoch": 0.03, "learning_rate": 1.999957874371405e-05, "loss": 1.0303, "step": 221 }, { "epoch": 0.03, "learning_rate": 1.9999533234386228e-05, "loss": 1.0236, "step": 222 }, { "epoch": 0.03, "learning_rate": 1.9999485391321164e-05, "loss": 0.9699, "step": 223 }, { "epoch": 0.03, "learning_rate": 1.9999435214530023e-05, "loss": 0.9763, "step": 224 }, { "epoch": 0.03, "learning_rate": 1.9999382704024513e-05, "loss": 1.0204, "step": 225 }, { "epoch": 0.03, "learning_rate": 1.9999327859816894e-05, "loss": 0.9729, "step": 226 }, { "epoch": 0.03, "learning_rate": 1.9999270681919962e-05, "loss": 0.9285, "step": 227 }, { "epoch": 0.03, "learning_rate": 1.999921117034706e-05, "loss": 0.3126, "step": 228 }, { "epoch": 0.03, "learning_rate": 1.9999149325112083e-05, "loss": 0.8653, "step": 229 }, { "epoch": 0.03, "learning_rate": 1.999908514622946e-05, "loss": 0.9361, "step": 230 }, { "epoch": 0.03, "learning_rate": 1.9999018633714166e-05, "loss": 0.9269, "step": 231 }, { "epoch": 0.03, "learning_rate": 1.999894978758173e-05, "loss": 0.9694, "step": 232 }, { "epoch": 0.03, "learning_rate": 1.999887860784822e-05, "loss": 0.931, "step": 233 }, { "epoch": 0.03, "learning_rate": 1.9998805094530245e-05, "loss": 0.9346, "step": 234 }, { "epoch": 0.04, "learning_rate": 1.999872924764496e-05, "loss": 0.9564, "step": 235 }, { "epoch": 0.04, "learning_rate": 1.9998651067210077e-05, "loss": 0.9806, "step": 236 }, { "epoch": 0.04, "learning_rate": 1.9998570553243828e-05, "loss": 1.0366, "step": 237 }, { "epoch": 0.04, "learning_rate": 1.9998487705765012e-05, "loss": 1.0736, "step": 238 }, { "epoch": 0.04, "learning_rate": 1.999840252479296e-05, "loss": 0.9429, "step": 239 }, { "epoch": 0.04, "learning_rate": 1.999831501034756e-05, "loss": 0.9126, "step": 240 }, { "epoch": 0.04, "learning_rate": 1.999822516244923e-05, "loss": 0.9067, "step": 241 }, { "epoch": 0.04, "learning_rate": 1.999813298111894e-05, "loss": 0.8779, "step": 242 }, { "epoch": 0.04, "learning_rate": 1.99980384663782e-05, "loss": 0.9111, "step": 243 }, { "epoch": 0.04, "learning_rate": 1.9997941618249076e-05, "loss": 0.9684, "step": 244 }, { "epoch": 0.04, "learning_rate": 1.9997842436754164e-05, "loss": 0.9205, "step": 245 }, { "epoch": 0.04, "learning_rate": 1.999774092191662e-05, "loss": 0.9306, "step": 246 }, { "epoch": 0.04, "learning_rate": 1.9997637073760125e-05, "loss": 0.8723, "step": 247 }, { "epoch": 0.04, "learning_rate": 1.9997530892308923e-05, "loss": 0.9494, "step": 248 }, { "epoch": 0.04, "learning_rate": 1.9997422377587795e-05, "loss": 0.9807, "step": 249 }, { "epoch": 0.04, "learning_rate": 1.9997311529622064e-05, "loss": 0.8908, "step": 250 }, { "epoch": 0.04, "learning_rate": 1.9997198348437598e-05, "loss": 0.9953, "step": 251 }, { "epoch": 0.04, "learning_rate": 1.999708283406082e-05, "loss": 0.315, "step": 252 }, { "epoch": 0.04, "learning_rate": 1.999696498651868e-05, "loss": 0.9407, "step": 253 }, { "epoch": 0.04, "learning_rate": 1.9996844805838687e-05, "loss": 0.9725, "step": 254 }, { "epoch": 0.04, "learning_rate": 1.9996722292048888e-05, "loss": 0.865, "step": 255 }, { "epoch": 0.04, "learning_rate": 1.999659744517788e-05, "loss": 0.9286, "step": 256 }, { "epoch": 0.04, "learning_rate": 1.9996470265254793e-05, "loss": 0.9271, "step": 257 }, { "epoch": 0.04, "learning_rate": 1.999634075230931e-05, "loss": 0.9504, "step": 258 }, { "epoch": 0.04, "learning_rate": 1.9996208906371663e-05, "loss": 0.8753, "step": 259 }, { "epoch": 0.04, "learning_rate": 1.9996074727472616e-05, "loss": 1.0379, "step": 260 }, { "epoch": 0.04, "learning_rate": 1.9995938215643492e-05, "loss": 0.9221, "step": 261 }, { "epoch": 0.04, "learning_rate": 1.9995799370916144e-05, "loss": 0.9016, "step": 262 }, { "epoch": 0.04, "learning_rate": 1.999565819332298e-05, "loss": 0.8589, "step": 263 }, { "epoch": 0.04, "learning_rate": 1.9995514682896946e-05, "loss": 0.9553, "step": 264 }, { "epoch": 0.04, "learning_rate": 1.9995368839671537e-05, "loss": 0.9226, "step": 265 }, { "epoch": 0.04, "learning_rate": 1.999522066368079e-05, "loss": 1.0524, "step": 266 }, { "epoch": 0.04, "learning_rate": 1.9995070154959286e-05, "loss": 1.0461, "step": 267 }, { "epoch": 0.04, "learning_rate": 1.9994917313542155e-05, "loss": 0.9844, "step": 268 }, { "epoch": 0.04, "learning_rate": 1.9994762139465062e-05, "loss": 0.9872, "step": 269 }, { "epoch": 0.04, "learning_rate": 1.9994604632764228e-05, "loss": 0.9144, "step": 270 }, { "epoch": 0.04, "learning_rate": 1.999444479347641e-05, "loss": 0.9718, "step": 271 }, { "epoch": 0.04, "learning_rate": 1.9994282621638913e-05, "loss": 0.9329, "step": 272 }, { "epoch": 0.04, "learning_rate": 1.9994118117289582e-05, "loss": 0.9804, "step": 273 }, { "epoch": 0.04, "learning_rate": 1.9993951280466813e-05, "loss": 0.9263, "step": 274 }, { "epoch": 0.04, "learning_rate": 1.9993782111209543e-05, "loss": 1.0053, "step": 275 }, { "epoch": 0.04, "learning_rate": 1.9993610609557254e-05, "loss": 0.9233, "step": 276 }, { "epoch": 0.04, "learning_rate": 1.999343677554997e-05, "loss": 0.868, "step": 277 }, { "epoch": 0.04, "learning_rate": 1.9993260609228262e-05, "loss": 0.9085, "step": 278 }, { "epoch": 0.04, "learning_rate": 1.9993082110633244e-05, "loss": 0.9131, "step": 279 }, { "epoch": 0.04, "learning_rate": 1.999290127980658e-05, "loss": 0.9488, "step": 280 }, { "epoch": 0.04, "learning_rate": 1.9992718116790463e-05, "loss": 0.9342, "step": 281 }, { "epoch": 0.04, "learning_rate": 1.9992532621627647e-05, "loss": 0.956, "step": 282 }, { "epoch": 0.04, "learning_rate": 1.9992344794361425e-05, "loss": 0.9086, "step": 283 }, { "epoch": 0.04, "learning_rate": 1.999215463503563e-05, "loss": 0.8838, "step": 284 }, { "epoch": 0.04, "learning_rate": 1.999196214369464e-05, "loss": 0.8877, "step": 285 }, { "epoch": 0.04, "learning_rate": 1.9991767320383386e-05, "loss": 0.9061, "step": 286 }, { "epoch": 0.04, "learning_rate": 1.9991570165147333e-05, "loss": 0.9423, "step": 287 }, { "epoch": 0.04, "learning_rate": 1.999137067803249e-05, "loss": 0.8514, "step": 288 }, { "epoch": 0.04, "learning_rate": 1.999116885908542e-05, "loss": 0.839, "step": 289 }, { "epoch": 0.04, "learning_rate": 1.9990964708353227e-05, "loss": 0.9637, "step": 290 }, { "epoch": 0.04, "learning_rate": 1.999075822588355e-05, "loss": 0.8632, "step": 291 }, { "epoch": 0.04, "learning_rate": 1.999054941172458e-05, "loss": 0.8569, "step": 292 }, { "epoch": 0.04, "learning_rate": 1.9990338265925057e-05, "loss": 0.8276, "step": 293 }, { "epoch": 0.04, "learning_rate": 1.9990124788534252e-05, "loss": 0.9192, "step": 294 }, { "epoch": 0.04, "learning_rate": 1.9989908979601987e-05, "loss": 0.8615, "step": 295 }, { "epoch": 0.04, "learning_rate": 1.9989690839178633e-05, "loss": 1.0402, "step": 296 }, { "epoch": 0.04, "learning_rate": 1.9989470367315103e-05, "loss": 0.934, "step": 297 }, { "epoch": 0.04, "learning_rate": 1.998924756406284e-05, "loss": 1.0553, "step": 298 }, { "epoch": 0.04, "learning_rate": 1.9989022429473858e-05, "loss": 0.9416, "step": 299 }, { "epoch": 0.04, "learning_rate": 1.9988794963600687e-05, "loss": 0.952, "step": 300 }, { "epoch": 0.04, "learning_rate": 1.9988565166496423e-05, "loss": 1.0077, "step": 301 }, { "epoch": 0.05, "learning_rate": 1.9988333038214694e-05, "loss": 0.9409, "step": 302 }, { "epoch": 0.05, "learning_rate": 1.9988098578809675e-05, "loss": 0.905, "step": 303 }, { "epoch": 0.05, "learning_rate": 1.9987861788336084e-05, "loss": 1.0123, "step": 304 }, { "epoch": 0.05, "learning_rate": 1.9987622666849183e-05, "loss": 0.9885, "step": 305 }, { "epoch": 0.05, "learning_rate": 1.9987381214404787e-05, "loss": 0.9522, "step": 306 }, { "epoch": 0.05, "learning_rate": 1.9987137431059236e-05, "loss": 0.8957, "step": 307 }, { "epoch": 0.05, "learning_rate": 1.9986891316869434e-05, "loss": 0.9597, "step": 308 }, { "epoch": 0.05, "learning_rate": 1.998664287189282e-05, "loss": 0.9537, "step": 309 }, { "epoch": 0.05, "learning_rate": 1.998639209618737e-05, "loss": 1.042, "step": 310 }, { "epoch": 0.05, "learning_rate": 1.998613898981162e-05, "loss": 0.9273, "step": 311 }, { "epoch": 0.05, "learning_rate": 1.9985883552824633e-05, "loss": 0.96, "step": 312 }, { "epoch": 0.05, "learning_rate": 1.998562578528603e-05, "loss": 0.9647, "step": 313 }, { "epoch": 0.05, "learning_rate": 1.998536568725597e-05, "loss": 1.0266, "step": 314 }, { "epoch": 0.05, "learning_rate": 1.998510325879515e-05, "loss": 0.9008, "step": 315 }, { "epoch": 0.05, "learning_rate": 1.9984838499964825e-05, "loss": 0.9012, "step": 316 }, { "epoch": 0.05, "learning_rate": 1.9984571410826777e-05, "loss": 0.9212, "step": 317 }, { "epoch": 0.05, "learning_rate": 1.9984301991443348e-05, "loss": 0.9921, "step": 318 }, { "epoch": 0.05, "learning_rate": 1.9984030241877413e-05, "loss": 0.8628, "step": 319 }, { "epoch": 0.05, "learning_rate": 1.9983756162192393e-05, "loss": 1.0321, "step": 320 }, { "epoch": 0.05, "learning_rate": 1.9983479752452258e-05, "loss": 0.974, "step": 321 }, { "epoch": 0.05, "learning_rate": 1.998320101272151e-05, "loss": 0.8914, "step": 322 }, { "epoch": 0.05, "learning_rate": 1.9982919943065213e-05, "loss": 0.9992, "step": 323 }, { "epoch": 0.05, "learning_rate": 1.9982636543548956e-05, "loss": 0.9557, "step": 324 }, { "epoch": 0.05, "learning_rate": 1.9982350814238884e-05, "loss": 0.9055, "step": 325 }, { "epoch": 0.05, "learning_rate": 1.998206275520168e-05, "loss": 1.0226, "step": 326 }, { "epoch": 0.05, "learning_rate": 1.9981772366504576e-05, "loss": 0.8326, "step": 327 }, { "epoch": 0.05, "learning_rate": 1.998147964821534e-05, "loss": 0.9004, "step": 328 }, { "epoch": 0.05, "learning_rate": 1.998118460040229e-05, "loss": 0.9131, "step": 329 }, { "epoch": 0.05, "learning_rate": 1.9980887223134284e-05, "loss": 0.7548, "step": 330 }, { "epoch": 0.05, "learning_rate": 1.9980587516480726e-05, "loss": 0.9411, "step": 331 }, { "epoch": 0.05, "learning_rate": 1.9980285480511567e-05, "loss": 0.9368, "step": 332 }, { "epoch": 0.05, "learning_rate": 1.997998111529729e-05, "loss": 0.937, "step": 333 }, { "epoch": 0.05, "learning_rate": 1.997967442090894e-05, "loss": 0.9514, "step": 334 }, { "epoch": 0.05, "learning_rate": 1.997936539741808e-05, "loss": 0.9268, "step": 335 }, { "epoch": 0.05, "learning_rate": 1.9979054044896843e-05, "loss": 0.897, "step": 336 }, { "epoch": 0.05, "learning_rate": 1.997874036341789e-05, "loss": 0.8963, "step": 337 }, { "epoch": 0.05, "learning_rate": 1.997842435305443e-05, "loss": 0.9599, "step": 338 }, { "epoch": 0.05, "learning_rate": 1.997810601388021e-05, "loss": 0.8666, "step": 339 }, { "epoch": 0.05, "learning_rate": 1.997778534596954e-05, "loss": 0.9395, "step": 340 }, { "epoch": 0.05, "learning_rate": 1.997746234939724e-05, "loss": 0.7928, "step": 341 }, { "epoch": 0.05, "learning_rate": 1.9977137024238705e-05, "loss": 0.8631, "step": 342 }, { "epoch": 0.05, "learning_rate": 1.9976809370569856e-05, "loss": 0.9473, "step": 343 }, { "epoch": 0.05, "learning_rate": 1.9976479388467165e-05, "loss": 0.9648, "step": 344 }, { "epoch": 0.05, "learning_rate": 1.9976147078007644e-05, "loss": 1.045, "step": 345 }, { "epoch": 0.05, "learning_rate": 1.9975812439268846e-05, "loss": 0.9513, "step": 346 }, { "epoch": 0.05, "learning_rate": 1.9975475472328875e-05, "loss": 0.8777, "step": 347 }, { "epoch": 0.05, "learning_rate": 1.997513617726637e-05, "loss": 0.9196, "step": 348 }, { "epoch": 0.05, "learning_rate": 1.9974794554160522e-05, "loss": 0.9602, "step": 349 }, { "epoch": 0.05, "learning_rate": 1.9974450603091056e-05, "loss": 0.8777, "step": 350 }, { "epoch": 0.05, "learning_rate": 1.997410432413825e-05, "loss": 0.9371, "step": 351 }, { "epoch": 0.05, "learning_rate": 1.9973755717382908e-05, "loss": 0.3342, "step": 352 }, { "epoch": 0.05, "learning_rate": 1.997340478290641e-05, "loss": 0.9451, "step": 353 }, { "epoch": 0.05, "learning_rate": 1.997305152079064e-05, "loss": 0.9129, "step": 354 }, { "epoch": 0.05, "learning_rate": 1.9972695931118053e-05, "loss": 0.9832, "step": 355 }, { "epoch": 0.05, "learning_rate": 1.9972338013971635e-05, "loss": 0.9316, "step": 356 }, { "epoch": 0.05, "learning_rate": 1.997197776943492e-05, "loss": 0.883, "step": 357 }, { "epoch": 0.05, "learning_rate": 1.997161519759198e-05, "loss": 0.8919, "step": 358 }, { "epoch": 0.05, "learning_rate": 1.997125029852744e-05, "loss": 0.9344, "step": 359 }, { "epoch": 0.05, "learning_rate": 1.997088307232646e-05, "loss": 0.9724, "step": 360 }, { "epoch": 0.05, "learning_rate": 1.9970513519074743e-05, "loss": 0.8876, "step": 361 }, { "epoch": 0.05, "learning_rate": 1.9970141638858537e-05, "loss": 0.8967, "step": 362 }, { "epoch": 0.05, "learning_rate": 1.9969767431764633e-05, "loss": 0.9489, "step": 363 }, { "epoch": 0.05, "learning_rate": 1.9969390897880364e-05, "loss": 0.901, "step": 364 }, { "epoch": 0.05, "learning_rate": 1.996901203729361e-05, "loss": 0.9152, "step": 365 }, { "epoch": 0.05, "learning_rate": 1.9968630850092792e-05, "loss": 0.9487, "step": 366 }, { "epoch": 0.05, "learning_rate": 1.996824733636687e-05, "loss": 0.9645, "step": 367 }, { "epoch": 0.05, "learning_rate": 1.9967861496205354e-05, "loss": 0.8547, "step": 368 }, { "epoch": 0.06, "learning_rate": 1.996747332969829e-05, "loss": 0.9282, "step": 369 }, { "epoch": 0.06, "learning_rate": 1.996708283693627e-05, "loss": 0.8946, "step": 370 }, { "epoch": 0.06, "learning_rate": 1.9966690018010427e-05, "loss": 0.2885, "step": 371 }, { "epoch": 0.06, "learning_rate": 1.996629487301245e-05, "loss": 0.8543, "step": 372 }, { "epoch": 0.06, "learning_rate": 1.9965897402034546e-05, "loss": 0.9461, "step": 373 }, { "epoch": 0.06, "learning_rate": 1.996549760516948e-05, "loss": 0.9423, "step": 374 }, { "epoch": 0.06, "learning_rate": 1.9965095482510573e-05, "loss": 0.9118, "step": 375 }, { "epoch": 0.06, "learning_rate": 1.9964691034151658e-05, "loss": 0.9121, "step": 376 }, { "epoch": 0.06, "learning_rate": 1.9964284260187133e-05, "loss": 0.94, "step": 377 }, { "epoch": 0.06, "learning_rate": 1.9963875160711934e-05, "loss": 0.885, "step": 378 }, { "epoch": 0.06, "learning_rate": 1.9963463735821535e-05, "loss": 1.0216, "step": 379 }, { "epoch": 0.06, "learning_rate": 1.9963049985611958e-05, "loss": 0.9798, "step": 380 }, { "epoch": 0.06, "learning_rate": 1.9962633910179773e-05, "loss": 0.9294, "step": 381 }, { "epoch": 0.06, "learning_rate": 1.9962215509622073e-05, "loss": 0.8462, "step": 382 }, { "epoch": 0.06, "learning_rate": 1.996179478403651e-05, "loss": 0.8739, "step": 383 }, { "epoch": 0.06, "learning_rate": 1.9961371733521285e-05, "loss": 0.8414, "step": 384 }, { "epoch": 0.06, "learning_rate": 1.996094635817512e-05, "loss": 0.889, "step": 385 }, { "epoch": 0.06, "learning_rate": 1.9960518658097294e-05, "loss": 0.2616, "step": 386 }, { "epoch": 0.06, "learning_rate": 1.9960088633387625e-05, "loss": 0.88, "step": 387 }, { "epoch": 0.06, "learning_rate": 1.995965628414648e-05, "loss": 0.9104, "step": 388 }, { "epoch": 0.06, "learning_rate": 1.9959221610474756e-05, "loss": 0.9165, "step": 389 }, { "epoch": 0.06, "learning_rate": 1.99587846124739e-05, "loss": 0.8673, "step": 390 }, { "epoch": 0.06, "learning_rate": 1.9958345290245904e-05, "loss": 0.8502, "step": 391 }, { "epoch": 0.06, "learning_rate": 1.9957903643893293e-05, "loss": 0.8852, "step": 392 }, { "epoch": 0.06, "learning_rate": 1.9957459673519147e-05, "loss": 0.9846, "step": 393 }, { "epoch": 0.06, "learning_rate": 1.995701337922708e-05, "loss": 0.8054, "step": 394 }, { "epoch": 0.06, "learning_rate": 1.995656476112125e-05, "loss": 0.9427, "step": 395 }, { "epoch": 0.06, "learning_rate": 1.9956113819306356e-05, "loss": 0.859, "step": 396 }, { "epoch": 0.06, "learning_rate": 1.995566055388764e-05, "loss": 0.8362, "step": 397 }, { "epoch": 0.06, "learning_rate": 1.9955204964970893e-05, "loss": 0.8244, "step": 398 }, { "epoch": 0.06, "learning_rate": 1.9954747052662436e-05, "loss": 0.7892, "step": 399 }, { "epoch": 0.06, "learning_rate": 1.9954286817069145e-05, "loss": 0.8501, "step": 400 }, { "epoch": 0.06, "learning_rate": 1.9953824258298426e-05, "loss": 0.9291, "step": 401 }, { "epoch": 0.06, "learning_rate": 1.9953359376458232e-05, "loss": 0.8731, "step": 402 }, { "epoch": 0.06, "learning_rate": 1.9952892171657066e-05, "loss": 0.9751, "step": 403 }, { "epoch": 0.06, "learning_rate": 1.995242264400396e-05, "loss": 0.7893, "step": 404 }, { "epoch": 0.06, "learning_rate": 1.9951950793608504e-05, "loss": 0.9508, "step": 405 }, { "epoch": 0.06, "learning_rate": 1.9951476620580808e-05, "loss": 0.9558, "step": 406 }, { "epoch": 0.06, "learning_rate": 1.9951000125031546e-05, "loss": 0.9368, "step": 407 }, { "epoch": 0.06, "learning_rate": 1.9950521307071916e-05, "loss": 0.8898, "step": 408 }, { "epoch": 0.06, "learning_rate": 1.995004016681368e-05, "loss": 0.8134, "step": 409 }, { "epoch": 0.06, "learning_rate": 1.9949556704369115e-05, "loss": 0.8307, "step": 410 }, { "epoch": 0.06, "learning_rate": 1.9949070919851065e-05, "loss": 0.9297, "step": 411 }, { "epoch": 0.06, "learning_rate": 1.9948582813372893e-05, "loss": 0.884, "step": 412 }, { "epoch": 0.06, "learning_rate": 1.9948092385048528e-05, "loss": 0.276, "step": 413 }, { "epoch": 0.06, "learning_rate": 1.994759963499242e-05, "loss": 0.9525, "step": 414 }, { "epoch": 0.06, "learning_rate": 1.994710456331957e-05, "loss": 0.8561, "step": 415 }, { "epoch": 0.06, "learning_rate": 1.9946607170145523e-05, "loss": 0.9378, "step": 416 }, { "epoch": 0.06, "learning_rate": 1.9946107455586362e-05, "loss": 0.8814, "step": 417 }, { "epoch": 0.06, "learning_rate": 1.9945605419758715e-05, "loss": 0.8429, "step": 418 }, { "epoch": 0.06, "learning_rate": 1.994510106277974e-05, "loss": 0.9664, "step": 419 }, { "epoch": 0.06, "learning_rate": 1.9944594384767158e-05, "loss": 0.8569, "step": 420 }, { "epoch": 0.06, "learning_rate": 1.9944085385839215e-05, "loss": 0.9583, "step": 421 }, { "epoch": 0.06, "learning_rate": 1.9943574066114705e-05, "loss": 0.8849, "step": 422 }, { "epoch": 0.06, "learning_rate": 1.9943060425712955e-05, "loss": 0.8595, "step": 423 }, { "epoch": 0.06, "learning_rate": 1.994254446475385e-05, "loss": 0.8356, "step": 424 }, { "epoch": 0.06, "learning_rate": 1.9942026183357802e-05, "loss": 0.8678, "step": 425 }, { "epoch": 0.06, "learning_rate": 1.9941505581645774e-05, "loss": 0.9045, "step": 426 }, { "epoch": 0.06, "learning_rate": 1.994098265973926e-05, "loss": 0.8507, "step": 427 }, { "epoch": 0.06, "learning_rate": 1.9940457417760308e-05, "loss": 0.8947, "step": 428 }, { "epoch": 0.06, "learning_rate": 1.99399298558315e-05, "loss": 0.9595, "step": 429 }, { "epoch": 0.06, "learning_rate": 1.9939399974075958e-05, "loss": 0.984, "step": 430 }, { "epoch": 0.06, "learning_rate": 1.993886777261735e-05, "loss": 0.9357, "step": 431 }, { "epoch": 0.06, "learning_rate": 1.9938333251579887e-05, "loss": 0.9048, "step": 432 }, { "epoch": 0.06, "learning_rate": 1.993779641108831e-05, "loss": 0.885, "step": 433 }, { "epoch": 0.06, "learning_rate": 1.993725725126792e-05, "loss": 0.8067, "step": 434 }, { "epoch": 0.06, "learning_rate": 1.9936715772244542e-05, "loss": 0.845, "step": 435 }, { "epoch": 0.07, "learning_rate": 1.9936171974144547e-05, "loss": 0.8681, "step": 436 }, { "epoch": 0.07, "learning_rate": 1.9935625857094853e-05, "loss": 0.8482, "step": 437 }, { "epoch": 0.07, "learning_rate": 1.9935077421222914e-05, "loss": 0.8761, "step": 438 }, { "epoch": 0.07, "learning_rate": 1.9934526666656727e-05, "loss": 0.8837, "step": 439 }, { "epoch": 0.07, "learning_rate": 1.9933973593524828e-05, "loss": 0.9695, "step": 440 }, { "epoch": 0.07, "learning_rate": 1.99334182019563e-05, "loss": 0.9342, "step": 441 }, { "epoch": 0.07, "learning_rate": 1.993286049208076e-05, "loss": 0.9204, "step": 442 }, { "epoch": 0.07, "learning_rate": 1.9932300464028364e-05, "loss": 0.8879, "step": 443 }, { "epoch": 0.07, "learning_rate": 1.9931738117929824e-05, "loss": 0.8904, "step": 444 }, { "epoch": 0.07, "learning_rate": 1.9931173453916374e-05, "loss": 0.9328, "step": 445 }, { "epoch": 0.07, "learning_rate": 1.9930606472119805e-05, "loss": 0.9498, "step": 446 }, { "epoch": 0.07, "learning_rate": 1.993003717267244e-05, "loss": 0.9017, "step": 447 }, { "epoch": 0.07, "learning_rate": 1.992946555570714e-05, "loss": 0.92, "step": 448 }, { "epoch": 0.07, "learning_rate": 1.9928891621357312e-05, "loss": 0.9814, "step": 449 }, { "epoch": 0.07, "learning_rate": 1.992831536975691e-05, "loss": 0.8618, "step": 450 }, { "epoch": 0.07, "learning_rate": 1.9927736801040418e-05, "loss": 0.7811, "step": 451 }, { "epoch": 0.07, "learning_rate": 1.9927155915342866e-05, "loss": 0.8898, "step": 452 }, { "epoch": 0.07, "learning_rate": 1.992657271279982e-05, "loss": 0.9273, "step": 453 }, { "epoch": 0.07, "learning_rate": 1.9925987193547397e-05, "loss": 0.7945, "step": 454 }, { "epoch": 0.07, "learning_rate": 1.9925399357722245e-05, "loss": 0.9027, "step": 455 }, { "epoch": 0.07, "learning_rate": 1.9924809205461554e-05, "loss": 0.7659, "step": 456 }, { "epoch": 0.07, "learning_rate": 1.992421673690306e-05, "loss": 1.0432, "step": 457 }, { "epoch": 0.07, "learning_rate": 1.9923621952185034e-05, "loss": 0.9187, "step": 458 }, { "epoch": 0.07, "learning_rate": 1.992302485144629e-05, "loss": 0.8676, "step": 459 }, { "epoch": 0.07, "learning_rate": 1.9922425434826182e-05, "loss": 0.8701, "step": 460 }, { "epoch": 0.07, "learning_rate": 1.9921823702464605e-05, "loss": 0.8749, "step": 461 }, { "epoch": 0.07, "learning_rate": 1.9921219654501996e-05, "loss": 0.8932, "step": 462 }, { "epoch": 0.07, "learning_rate": 1.9920613291079324e-05, "loss": 0.2928, "step": 463 }, { "epoch": 0.07, "learning_rate": 1.9920004612338112e-05, "loss": 0.8354, "step": 464 }, { "epoch": 0.07, "learning_rate": 1.991939361842041e-05, "loss": 0.9437, "step": 465 }, { "epoch": 0.07, "learning_rate": 1.9918780309468822e-05, "loss": 0.8123, "step": 466 }, { "epoch": 0.07, "learning_rate": 1.991816468562648e-05, "loss": 0.8567, "step": 467 }, { "epoch": 0.07, "learning_rate": 1.9917546747037058e-05, "loss": 0.9067, "step": 468 }, { "epoch": 0.07, "learning_rate": 1.9916926493844783e-05, "loss": 0.8701, "step": 469 }, { "epoch": 0.07, "learning_rate": 1.9916303926194406e-05, "loss": 0.3248, "step": 470 }, { "epoch": 0.07, "learning_rate": 1.9915679044231223e-05, "loss": 0.8119, "step": 471 }, { "epoch": 0.07, "learning_rate": 1.9915051848101074e-05, "loss": 0.8938, "step": 472 }, { "epoch": 0.07, "learning_rate": 1.991442233795034e-05, "loss": 0.9198, "step": 473 }, { "epoch": 0.07, "learning_rate": 1.9913790513925936e-05, "loss": 0.9081, "step": 474 }, { "epoch": 0.07, "learning_rate": 1.991315637617532e-05, "loss": 0.8487, "step": 475 }, { "epoch": 0.07, "learning_rate": 1.991251992484649e-05, "loss": 0.8893, "step": 476 }, { "epoch": 0.07, "learning_rate": 1.9911881160087983e-05, "loss": 0.8816, "step": 477 }, { "epoch": 0.07, "learning_rate": 1.9911240082048883e-05, "loss": 0.8157, "step": 478 }, { "epoch": 0.07, "learning_rate": 1.99105966908788e-05, "loss": 0.9881, "step": 479 }, { "epoch": 0.07, "learning_rate": 1.9909950986727895e-05, "loss": 0.9073, "step": 480 }, { "epoch": 0.07, "learning_rate": 1.9909302969746864e-05, "loss": 0.8707, "step": 481 }, { "epoch": 0.07, "learning_rate": 1.990865264008695e-05, "loss": 0.8502, "step": 482 }, { "epoch": 0.07, "learning_rate": 1.9907999997899917e-05, "loss": 0.8691, "step": 483 }, { "epoch": 0.07, "learning_rate": 1.9907345043338097e-05, "loss": 0.7427, "step": 484 }, { "epoch": 0.07, "learning_rate": 1.9906687776554336e-05, "loss": 0.9018, "step": 485 }, { "epoch": 0.07, "learning_rate": 1.9906028197702032e-05, "loss": 0.8833, "step": 486 }, { "epoch": 0.07, "learning_rate": 1.9905366306935122e-05, "loss": 0.9013, "step": 487 }, { "epoch": 0.07, "learning_rate": 1.990470210440808e-05, "loss": 0.8673, "step": 488 }, { "epoch": 0.07, "learning_rate": 1.9904035590275926e-05, "loss": 0.8687, "step": 489 }, { "epoch": 0.07, "learning_rate": 1.9903366764694206e-05, "loss": 0.8809, "step": 490 }, { "epoch": 0.07, "learning_rate": 1.9902695627819016e-05, "loss": 0.869, "step": 491 }, { "epoch": 0.07, "learning_rate": 1.9902022179806992e-05, "loss": 0.8359, "step": 492 }, { "epoch": 0.07, "learning_rate": 1.9901346420815307e-05, "loss": 0.8282, "step": 493 }, { "epoch": 0.07, "learning_rate": 1.9900668351001668e-05, "loss": 0.9214, "step": 494 }, { "epoch": 0.07, "learning_rate": 1.9899987970524327e-05, "loss": 0.9036, "step": 495 }, { "epoch": 0.07, "learning_rate": 1.9899305279542077e-05, "loss": 0.286, "step": 496 }, { "epoch": 0.07, "learning_rate": 1.9898620278214244e-05, "loss": 0.2963, "step": 497 }, { "epoch": 0.07, "learning_rate": 1.98979329667007e-05, "loss": 0.9768, "step": 498 }, { "epoch": 0.07, "learning_rate": 1.9897243345161858e-05, "loss": 0.317, "step": 499 }, { "epoch": 0.07, "learning_rate": 1.9896551413758652e-05, "loss": 0.925, "step": 500 }, { "epoch": 0.07, "learning_rate": 1.989585717265258e-05, "loss": 0.8091, "step": 501 }, { "epoch": 0.07, "learning_rate": 1.9895160622005658e-05, "loss": 0.9363, "step": 502 }, { "epoch": 0.08, "learning_rate": 1.9894461761980455e-05, "loss": 0.8247, "step": 503 }, { "epoch": 0.08, "learning_rate": 1.9893760592740077e-05, "loss": 0.9337, "step": 504 }, { "epoch": 0.08, "learning_rate": 1.989305711444816e-05, "loss": 0.9428, "step": 505 }, { "epoch": 0.08, "learning_rate": 1.989235132726889e-05, "loss": 0.8863, "step": 506 }, { "epoch": 0.08, "learning_rate": 1.9891643231366987e-05, "loss": 1.0137, "step": 507 }, { "epoch": 0.08, "learning_rate": 1.9890932826907702e-05, "loss": 0.8323, "step": 508 }, { "epoch": 0.08, "learning_rate": 1.989022011405684e-05, "loss": 0.9301, "step": 509 }, { "epoch": 0.08, "learning_rate": 1.9889505092980738e-05, "loss": 0.8478, "step": 510 }, { "epoch": 0.08, "learning_rate": 1.9888787763846268e-05, "loss": 0.8264, "step": 511 }, { "epoch": 0.08, "learning_rate": 1.988806812682084e-05, "loss": 0.8478, "step": 512 }, { "epoch": 0.08, "learning_rate": 1.988734618207241e-05, "loss": 0.8308, "step": 513 }, { "epoch": 0.08, "learning_rate": 1.9886621929769473e-05, "loss": 0.9386, "step": 514 }, { "epoch": 0.08, "learning_rate": 1.988589537008105e-05, "loss": 0.8763, "step": 515 }, { "epoch": 0.08, "learning_rate": 1.9885166503176715e-05, "loss": 0.8765, "step": 516 }, { "epoch": 0.08, "learning_rate": 1.988443532922657e-05, "loss": 0.9058, "step": 517 }, { "epoch": 0.08, "learning_rate": 1.9883701848401265e-05, "loss": 0.9099, "step": 518 }, { "epoch": 0.08, "learning_rate": 1.9882966060871978e-05, "loss": 0.9523, "step": 519 }, { "epoch": 0.08, "learning_rate": 1.9882227966810434e-05, "loss": 0.8764, "step": 520 }, { "epoch": 0.08, "learning_rate": 1.988148756638889e-05, "loss": 0.8446, "step": 521 }, { "epoch": 0.08, "learning_rate": 1.9880744859780144e-05, "loss": 0.8582, "step": 522 }, { "epoch": 0.08, "learning_rate": 1.987999984715754e-05, "loss": 0.8318, "step": 523 }, { "epoch": 0.08, "learning_rate": 1.987925252869494e-05, "loss": 0.9015, "step": 524 }, { "epoch": 0.08, "learning_rate": 1.9878502904566764e-05, "loss": 0.8874, "step": 525 }, { "epoch": 0.08, "learning_rate": 1.9877750974947964e-05, "loss": 0.9145, "step": 526 }, { "epoch": 0.08, "learning_rate": 1.9876996740014026e-05, "loss": 0.9236, "step": 527 }, { "epoch": 0.08, "learning_rate": 1.9876240199940976e-05, "loss": 0.8689, "step": 528 }, { "epoch": 0.08, "learning_rate": 1.9875481354905383e-05, "loss": 0.9401, "step": 529 }, { "epoch": 0.08, "learning_rate": 1.9874720205084345e-05, "loss": 0.8747, "step": 530 }, { "epoch": 0.08, "learning_rate": 1.98739567506555e-05, "loss": 0.8706, "step": 531 }, { "epoch": 0.08, "learning_rate": 1.9873190991797038e-05, "loss": 0.8665, "step": 532 }, { "epoch": 0.08, "learning_rate": 1.9872422928687665e-05, "loss": 0.7476, "step": 533 }, { "epoch": 0.08, "learning_rate": 1.9871652561506642e-05, "loss": 0.8856, "step": 534 }, { "epoch": 0.08, "learning_rate": 1.9870879890433752e-05, "loss": 0.9263, "step": 535 }, { "epoch": 0.08, "learning_rate": 1.9870104915649334e-05, "loss": 0.7681, "step": 536 }, { "epoch": 0.08, "learning_rate": 1.9869327637334248e-05, "loss": 0.8862, "step": 537 }, { "epoch": 0.08, "learning_rate": 1.98685480556699e-05, "loss": 0.891, "step": 538 }, { "epoch": 0.08, "learning_rate": 1.986776617083824e-05, "loss": 0.9138, "step": 539 }, { "epoch": 0.08, "learning_rate": 1.9866981983021742e-05, "loss": 0.9078, "step": 540 }, { "epoch": 0.08, "learning_rate": 1.986619549240342e-05, "loss": 0.8531, "step": 541 }, { "epoch": 0.08, "learning_rate": 1.9865406699166834e-05, "loss": 0.2859, "step": 542 }, { "epoch": 0.08, "learning_rate": 1.9864615603496074e-05, "loss": 0.8389, "step": 543 }, { "epoch": 0.08, "learning_rate": 1.9863822205575772e-05, "loss": 0.9341, "step": 544 }, { "epoch": 0.08, "learning_rate": 1.986302650559109e-05, "loss": 0.8472, "step": 545 }, { "epoch": 0.08, "learning_rate": 1.9862228503727737e-05, "loss": 0.9104, "step": 546 }, { "epoch": 0.08, "learning_rate": 1.986142820017195e-05, "loss": 0.9388, "step": 547 }, { "epoch": 0.08, "learning_rate": 1.9860625595110515e-05, "loss": 0.9254, "step": 548 }, { "epoch": 0.08, "learning_rate": 1.9859820688730737e-05, "loss": 0.8287, "step": 549 }, { "epoch": 0.08, "learning_rate": 1.9859013481220478e-05, "loss": 0.7983, "step": 550 }, { "epoch": 0.08, "learning_rate": 1.9858203972768125e-05, "loss": 0.8015, "step": 551 }, { "epoch": 0.08, "learning_rate": 1.98573921635626e-05, "loss": 0.9009, "step": 552 }, { "epoch": 0.08, "learning_rate": 1.9856578053793374e-05, "loss": 0.921, "step": 553 }, { "epoch": 0.08, "learning_rate": 1.9855761643650444e-05, "loss": 0.9147, "step": 554 }, { "epoch": 0.08, "learning_rate": 1.9854942933324348e-05, "loss": 0.8662, "step": 555 }, { "epoch": 0.08, "learning_rate": 1.985412192300616e-05, "loss": 0.8731, "step": 556 }, { "epoch": 0.08, "learning_rate": 1.985329861288749e-05, "loss": 0.8467, "step": 557 }, { "epoch": 0.08, "learning_rate": 1.9852473003160493e-05, "loss": 0.8592, "step": 558 }, { "epoch": 0.08, "learning_rate": 1.9851645094017843e-05, "loss": 0.9109, "step": 559 }, { "epoch": 0.08, "learning_rate": 1.985081488565277e-05, "loss": 0.8233, "step": 560 }, { "epoch": 0.08, "learning_rate": 1.9849982378259028e-05, "loss": 0.9058, "step": 561 }, { "epoch": 0.08, "learning_rate": 1.984914757203091e-05, "loss": 0.8895, "step": 562 }, { "epoch": 0.08, "learning_rate": 1.984831046716325e-05, "loss": 0.864, "step": 563 }, { "epoch": 0.08, "learning_rate": 1.9847471063851414e-05, "loss": 0.8832, "step": 564 }, { "epoch": 0.08, "learning_rate": 1.9846629362291306e-05, "loss": 0.891, "step": 565 }, { "epoch": 0.08, "learning_rate": 1.9845785362679363e-05, "loss": 0.8353, "step": 566 }, { "epoch": 0.08, "learning_rate": 1.984493906521257e-05, "loss": 0.8674, "step": 567 }, { "epoch": 0.08, "learning_rate": 1.984409047008843e-05, "loss": 0.8078, "step": 568 }, { "epoch": 0.08, "learning_rate": 1.9843239577504997e-05, "loss": 0.8366, "step": 569 }, { "epoch": 0.09, "learning_rate": 1.9842386387660857e-05, "loss": 0.8317, "step": 570 }, { "epoch": 0.09, "learning_rate": 1.9841530900755133e-05, "loss": 0.8919, "step": 571 }, { "epoch": 0.09, "learning_rate": 1.9840673116987475e-05, "loss": 0.9108, "step": 572 }, { "epoch": 0.09, "learning_rate": 1.9839813036558086e-05, "loss": 0.7967, "step": 573 }, { "epoch": 0.09, "learning_rate": 1.9838950659667685e-05, "loss": 0.8951, "step": 574 }, { "epoch": 0.09, "learning_rate": 1.9838085986517547e-05, "loss": 0.8885, "step": 575 }, { "epoch": 0.09, "learning_rate": 1.983721901730947e-05, "loss": 0.98, "step": 576 }, { "epoch": 0.09, "learning_rate": 1.983634975224579e-05, "loss": 0.885, "step": 577 }, { "epoch": 0.09, "learning_rate": 1.983547819152938e-05, "loss": 0.939, "step": 578 }, { "epoch": 0.09, "learning_rate": 1.9834604335363653e-05, "loss": 0.9033, "step": 579 }, { "epoch": 0.09, "learning_rate": 1.9833728183952547e-05, "loss": 0.8535, "step": 580 }, { "epoch": 0.09, "learning_rate": 1.9832849737500547e-05, "loss": 0.8259, "step": 581 }, { "epoch": 0.09, "learning_rate": 1.9831968996212666e-05, "loss": 0.8027, "step": 582 }, { "epoch": 0.09, "learning_rate": 1.983108596029446e-05, "loss": 0.9385, "step": 583 }, { "epoch": 0.09, "learning_rate": 1.983020062995201e-05, "loss": 0.8715, "step": 584 }, { "epoch": 0.09, "learning_rate": 1.9829313005391943e-05, "loss": 0.8105, "step": 585 }, { "epoch": 0.09, "learning_rate": 1.9828423086821417e-05, "loss": 0.8368, "step": 586 }, { "epoch": 0.09, "learning_rate": 1.9827530874448122e-05, "loss": 0.763, "step": 587 }, { "epoch": 0.09, "learning_rate": 1.9826636368480287e-05, "loss": 0.8374, "step": 588 }, { "epoch": 0.09, "learning_rate": 1.9825739569126678e-05, "loss": 0.8237, "step": 589 }, { "epoch": 0.09, "learning_rate": 1.9824840476596596e-05, "loss": 0.8268, "step": 590 }, { "epoch": 0.09, "learning_rate": 1.9823939091099872e-05, "loss": 0.8538, "step": 591 }, { "epoch": 0.09, "learning_rate": 1.9823035412846876e-05, "loss": 0.9207, "step": 592 }, { "epoch": 0.09, "learning_rate": 1.9822129442048515e-05, "loss": 0.8148, "step": 593 }, { "epoch": 0.09, "learning_rate": 1.9821221178916225e-05, "loss": 1.0126, "step": 594 }, { "epoch": 0.09, "learning_rate": 1.982031062366198e-05, "loss": 0.9105, "step": 595 }, { "epoch": 0.09, "learning_rate": 1.981939777649829e-05, "loss": 0.8529, "step": 596 }, { "epoch": 0.09, "learning_rate": 1.981848263763821e-05, "loss": 0.8982, "step": 597 }, { "epoch": 0.09, "learning_rate": 1.9817565207295307e-05, "loss": 0.8766, "step": 598 }, { "epoch": 0.09, "learning_rate": 1.9816645485683697e-05, "loss": 0.8468, "step": 599 }, { "epoch": 0.09, "learning_rate": 1.9815723473018028e-05, "loss": 0.9364, "step": 600 }, { "epoch": 0.09, "learning_rate": 1.981479916951349e-05, "loss": 0.8531, "step": 601 }, { "epoch": 0.09, "learning_rate": 1.98138725753858e-05, "loss": 0.8927, "step": 602 }, { "epoch": 0.09, "learning_rate": 1.9812943690851204e-05, "loss": 0.8477, "step": 603 }, { "epoch": 0.09, "learning_rate": 1.9812012516126498e-05, "loss": 0.9113, "step": 604 }, { "epoch": 0.09, "learning_rate": 1.9811079051428998e-05, "loss": 0.8644, "step": 605 }, { "epoch": 0.09, "learning_rate": 1.981014329697656e-05, "loss": 0.9048, "step": 606 }, { "epoch": 0.09, "learning_rate": 1.980920525298758e-05, "loss": 0.321, "step": 607 }, { "epoch": 0.09, "learning_rate": 1.9808264919680974e-05, "loss": 0.8664, "step": 608 }, { "epoch": 0.09, "learning_rate": 1.9807322297276207e-05, "loss": 0.8068, "step": 609 }, { "epoch": 0.09, "learning_rate": 1.9806377385993278e-05, "loss": 0.8822, "step": 610 }, { "epoch": 0.09, "learning_rate": 1.980543018605271e-05, "loss": 0.8967, "step": 611 }, { "epoch": 0.09, "learning_rate": 1.980448069767556e-05, "loss": 0.9501, "step": 612 }, { "epoch": 0.09, "learning_rate": 1.9803528921083428e-05, "loss": 0.8852, "step": 613 }, { "epoch": 0.09, "learning_rate": 1.9802574856498444e-05, "loss": 0.9688, "step": 614 }, { "epoch": 0.09, "learning_rate": 1.9801618504143273e-05, "loss": 0.8092, "step": 615 }, { "epoch": 0.09, "learning_rate": 1.9800659864241112e-05, "loss": 0.909, "step": 616 }, { "epoch": 0.09, "learning_rate": 1.9799698937015694e-05, "loss": 0.748, "step": 617 }, { "epoch": 0.09, "learning_rate": 1.9798735722691282e-05, "loss": 0.7818, "step": 618 }, { "epoch": 0.09, "learning_rate": 1.9797770221492678e-05, "loss": 0.8388, "step": 619 }, { "epoch": 0.09, "learning_rate": 1.979680243364521e-05, "loss": 0.8158, "step": 620 }, { "epoch": 0.09, "learning_rate": 1.9795832359374757e-05, "loss": 0.8485, "step": 621 }, { "epoch": 0.09, "learning_rate": 1.9794859998907707e-05, "loss": 0.8601, "step": 622 }, { "epoch": 0.09, "learning_rate": 1.9793885352471e-05, "loss": 0.9454, "step": 623 }, { "epoch": 0.09, "learning_rate": 1.9792908420292097e-05, "loss": 0.8438, "step": 624 }, { "epoch": 0.09, "learning_rate": 1.9791929202599007e-05, "loss": 0.8418, "step": 625 }, { "epoch": 0.09, "learning_rate": 1.979094769962026e-05, "loss": 0.9085, "step": 626 }, { "epoch": 0.09, "learning_rate": 1.9789963911584926e-05, "loss": 0.9247, "step": 627 }, { "epoch": 0.09, "learning_rate": 1.9788977838722602e-05, "loss": 0.8309, "step": 628 }, { "epoch": 0.09, "learning_rate": 1.9787989481263427e-05, "loss": 0.8782, "step": 629 }, { "epoch": 0.09, "learning_rate": 1.9786998839438067e-05, "loss": 0.9002, "step": 630 }, { "epoch": 0.09, "learning_rate": 1.9786005913477722e-05, "loss": 0.8543, "step": 631 }, { "epoch": 0.09, "learning_rate": 1.9785010703614123e-05, "loss": 0.8683, "step": 632 }, { "epoch": 0.09, "learning_rate": 1.978401321007954e-05, "loss": 0.3252, "step": 633 }, { "epoch": 0.09, "learning_rate": 1.9783013433106776e-05, "loss": 0.8791, "step": 634 }, { "epoch": 0.09, "learning_rate": 1.9782011372929157e-05, "loss": 0.8625, "step": 635 }, { "epoch": 0.09, "learning_rate": 1.978100702978055e-05, "loss": 0.7653, "step": 636 }, { "epoch": 0.1, "learning_rate": 1.9780000403895352e-05, "loss": 0.9011, "step": 637 }, { "epoch": 0.1, "learning_rate": 1.97789914955085e-05, "loss": 0.8073, "step": 638 }, { "epoch": 0.1, "learning_rate": 1.9777980304855453e-05, "loss": 0.9463, "step": 639 }, { "epoch": 0.1, "learning_rate": 1.9776966832172206e-05, "loss": 0.8266, "step": 640 }, { "epoch": 0.1, "learning_rate": 1.977595107769529e-05, "loss": 0.9463, "step": 641 }, { "epoch": 0.1, "learning_rate": 1.977493304166177e-05, "loss": 0.7781, "step": 642 }, { "epoch": 0.1, "learning_rate": 1.9773912724309237e-05, "loss": 0.9773, "step": 643 }, { "epoch": 0.1, "learning_rate": 1.9772890125875812e-05, "loss": 0.869, "step": 644 }, { "epoch": 0.1, "learning_rate": 1.977186524660016e-05, "loss": 0.8339, "step": 645 }, { "epoch": 0.1, "learning_rate": 1.9770838086721477e-05, "loss": 0.8128, "step": 646 }, { "epoch": 0.1, "learning_rate": 1.9769808646479473e-05, "loss": 0.8878, "step": 647 }, { "epoch": 0.1, "learning_rate": 1.9768776926114414e-05, "loss": 0.8465, "step": 648 }, { "epoch": 0.1, "learning_rate": 1.976774292586708e-05, "loss": 0.9315, "step": 649 }, { "epoch": 0.1, "learning_rate": 1.97667066459788e-05, "loss": 0.9177, "step": 650 }, { "epoch": 0.1, "learning_rate": 1.9765668086691415e-05, "loss": 0.3385, "step": 651 }, { "epoch": 0.1, "learning_rate": 1.9764627248247323e-05, "loss": 0.8727, "step": 652 }, { "epoch": 0.1, "learning_rate": 1.9763584130889428e-05, "loss": 0.8615, "step": 653 }, { "epoch": 0.1, "learning_rate": 1.9762538734861177e-05, "loss": 0.9148, "step": 654 }, { "epoch": 0.1, "learning_rate": 1.9761491060406554e-05, "loss": 0.3197, "step": 655 }, { "epoch": 0.1, "learning_rate": 1.976044110777007e-05, "loss": 0.8729, "step": 656 }, { "epoch": 0.1, "learning_rate": 1.975938887719677e-05, "loss": 0.8655, "step": 657 }, { "epoch": 0.1, "learning_rate": 1.975833436893222e-05, "loss": 0.9419, "step": 658 }, { "epoch": 0.1, "learning_rate": 1.9757277583222535e-05, "loss": 0.8565, "step": 659 }, { "epoch": 0.1, "learning_rate": 1.9756218520314347e-05, "loss": 0.8304, "step": 660 }, { "epoch": 0.1, "learning_rate": 1.975515718045483e-05, "loss": 0.8978, "step": 661 }, { "epoch": 0.1, "learning_rate": 1.9754093563891678e-05, "loss": 0.8365, "step": 662 }, { "epoch": 0.1, "learning_rate": 1.975302767087313e-05, "loss": 0.8335, "step": 663 }, { "epoch": 0.1, "learning_rate": 1.9751959501647942e-05, "loss": 0.3326, "step": 664 }, { "epoch": 0.1, "learning_rate": 1.975088905646541e-05, "loss": 0.8592, "step": 665 }, { "epoch": 0.1, "learning_rate": 1.9749816335575364e-05, "loss": 0.8272, "step": 666 }, { "epoch": 0.1, "learning_rate": 1.9748741339228156e-05, "loss": 0.9455, "step": 667 }, { "epoch": 0.1, "learning_rate": 1.9747664067674675e-05, "loss": 0.9311, "step": 668 }, { "epoch": 0.1, "learning_rate": 1.974658452116634e-05, "loss": 0.8883, "step": 669 }, { "epoch": 0.1, "learning_rate": 1.97455026999551e-05, "loss": 0.8678, "step": 670 }, { "epoch": 0.1, "learning_rate": 1.9744418604293436e-05, "loss": 0.8464, "step": 671 }, { "epoch": 0.1, "learning_rate": 1.974333223443436e-05, "loss": 0.8196, "step": 672 }, { "epoch": 0.1, "learning_rate": 1.9742243590631412e-05, "loss": 0.8472, "step": 673 }, { "epoch": 0.1, "learning_rate": 1.9741152673138667e-05, "loss": 0.7921, "step": 674 }, { "epoch": 0.1, "learning_rate": 1.9740059482210723e-05, "loss": 0.8189, "step": 675 }, { "epoch": 0.1, "learning_rate": 1.973896401810272e-05, "loss": 0.8556, "step": 676 }, { "epoch": 0.1, "learning_rate": 1.973786628107032e-05, "loss": 0.821, "step": 677 }, { "epoch": 0.1, "learning_rate": 1.973676627136972e-05, "loss": 0.9011, "step": 678 }, { "epoch": 0.1, "learning_rate": 1.973566398925764e-05, "loss": 0.7828, "step": 679 }, { "epoch": 0.1, "learning_rate": 1.9734559434991344e-05, "loss": 0.8786, "step": 680 }, { "epoch": 0.1, "learning_rate": 1.973345260882861e-05, "loss": 0.9751, "step": 681 }, { "epoch": 0.1, "learning_rate": 1.9732343511027755e-05, "loss": 0.842, "step": 682 }, { "epoch": 0.1, "learning_rate": 1.9731232141847633e-05, "loss": 0.8613, "step": 683 }, { "epoch": 0.1, "learning_rate": 1.9730118501547614e-05, "loss": 0.8432, "step": 684 }, { "epoch": 0.1, "learning_rate": 1.9729002590387605e-05, "loss": 0.8871, "step": 685 }, { "epoch": 0.1, "learning_rate": 1.9727884408628048e-05, "loss": 0.8577, "step": 686 }, { "epoch": 0.1, "learning_rate": 1.9726763956529897e-05, "loss": 0.7907, "step": 687 }, { "epoch": 0.1, "learning_rate": 1.972564123435466e-05, "loss": 0.8144, "step": 688 }, { "epoch": 0.1, "learning_rate": 1.972451624236436e-05, "loss": 0.9163, "step": 689 }, { "epoch": 0.1, "learning_rate": 1.972338898082155e-05, "loss": 0.9008, "step": 690 }, { "epoch": 0.1, "learning_rate": 1.972225944998932e-05, "loss": 0.7845, "step": 691 }, { "epoch": 0.1, "learning_rate": 1.972112765013128e-05, "loss": 0.8939, "step": 692 }, { "epoch": 0.1, "learning_rate": 1.9719993581511576e-05, "loss": 0.8474, "step": 693 }, { "epoch": 0.1, "learning_rate": 1.9718857244394888e-05, "loss": 0.2953, "step": 694 }, { "epoch": 0.1, "learning_rate": 1.971771863904641e-05, "loss": 0.8622, "step": 695 }, { "epoch": 0.1, "learning_rate": 1.9716577765731886e-05, "loss": 0.876, "step": 696 }, { "epoch": 0.1, "learning_rate": 1.9715434624717567e-05, "loss": 0.8365, "step": 697 }, { "epoch": 0.1, "learning_rate": 1.971428921627025e-05, "loss": 0.8307, "step": 698 }, { "epoch": 0.1, "learning_rate": 1.9713141540657263e-05, "loss": 0.8544, "step": 699 }, { "epoch": 0.1, "learning_rate": 1.971199159814644e-05, "loss": 0.7771, "step": 700 }, { "epoch": 0.1, "learning_rate": 1.971083938900617e-05, "loss": 0.8269, "step": 701 }, { "epoch": 0.1, "learning_rate": 1.970968491350536e-05, "loss": 0.8646, "step": 702 }, { "epoch": 0.1, "learning_rate": 1.9708528171913447e-05, "loss": 0.8272, "step": 703 }, { "epoch": 0.1, "learning_rate": 1.9707369164500397e-05, "loss": 0.8709, "step": 704 }, { "epoch": 0.11, "learning_rate": 1.97062078915367e-05, "loss": 0.8312, "step": 705 }, { "epoch": 0.11, "learning_rate": 1.9705044353293386e-05, "loss": 0.327, "step": 706 }, { "epoch": 0.11, "learning_rate": 1.9703878550042e-05, "loss": 0.8176, "step": 707 }, { "epoch": 0.11, "learning_rate": 1.9702710482054627e-05, "loss": 0.9115, "step": 708 }, { "epoch": 0.11, "learning_rate": 1.9701540149603874e-05, "loss": 0.8403, "step": 709 }, { "epoch": 0.11, "learning_rate": 1.9700367552962883e-05, "loss": 0.8248, "step": 710 }, { "epoch": 0.11, "learning_rate": 1.9699192692405315e-05, "loss": 0.8153, "step": 711 }, { "epoch": 0.11, "learning_rate": 1.9698015568205366e-05, "loss": 0.8026, "step": 712 }, { "epoch": 0.11, "learning_rate": 1.9696836180637754e-05, "loss": 0.8168, "step": 713 }, { "epoch": 0.11, "learning_rate": 1.969565452997774e-05, "loss": 0.9349, "step": 714 }, { "epoch": 0.11, "learning_rate": 1.9694470616501096e-05, "loss": 0.9045, "step": 715 }, { "epoch": 0.11, "learning_rate": 1.9693284440484132e-05, "loss": 0.873, "step": 716 }, { "epoch": 0.11, "learning_rate": 1.969209600220368e-05, "loss": 0.8521, "step": 717 }, { "epoch": 0.11, "learning_rate": 1.969090530193711e-05, "loss": 0.8811, "step": 718 }, { "epoch": 0.11, "learning_rate": 1.9689712339962303e-05, "loss": 0.8059, "step": 719 }, { "epoch": 0.11, "learning_rate": 1.9688517116557686e-05, "loss": 0.8798, "step": 720 }, { "epoch": 0.11, "learning_rate": 1.96873196320022e-05, "loss": 0.8548, "step": 721 }, { "epoch": 0.11, "learning_rate": 1.9686119886575328e-05, "loss": 0.8893, "step": 722 }, { "epoch": 0.11, "learning_rate": 1.9684917880557063e-05, "loss": 0.8578, "step": 723 }, { "epoch": 0.11, "learning_rate": 1.968371361422794e-05, "loss": 0.8771, "step": 724 }, { "epoch": 0.11, "learning_rate": 1.9682507087869015e-05, "loss": 0.8347, "step": 725 }, { "epoch": 0.11, "learning_rate": 1.9681298301761873e-05, "loss": 0.9057, "step": 726 }, { "epoch": 0.11, "learning_rate": 1.968008725618862e-05, "loss": 0.856, "step": 727 }, { "epoch": 0.11, "learning_rate": 1.9678873951431908e-05, "loss": 0.9013, "step": 728 }, { "epoch": 0.11, "learning_rate": 1.9677658387774897e-05, "loss": 0.7795, "step": 729 }, { "epoch": 0.11, "learning_rate": 1.967644056550128e-05, "loss": 0.8619, "step": 730 }, { "epoch": 0.11, "learning_rate": 1.9675220484895272e-05, "loss": 0.8744, "step": 731 }, { "epoch": 0.11, "learning_rate": 1.9673998146241634e-05, "loss": 0.8247, "step": 732 }, { "epoch": 0.11, "learning_rate": 1.967277354982563e-05, "loss": 0.8709, "step": 733 }, { "epoch": 0.11, "learning_rate": 1.967154669593307e-05, "loss": 0.9366, "step": 734 }, { "epoch": 0.11, "learning_rate": 1.9670317584850277e-05, "loss": 0.8879, "step": 735 }, { "epoch": 0.11, "learning_rate": 1.966908621686411e-05, "loss": 0.9051, "step": 736 }, { "epoch": 0.11, "learning_rate": 1.966785259226195e-05, "loss": 0.8447, "step": 737 }, { "epoch": 0.11, "learning_rate": 1.966661671133171e-05, "loss": 0.764, "step": 738 }, { "epoch": 0.11, "learning_rate": 1.9665378574361816e-05, "loss": 0.8052, "step": 739 }, { "epoch": 0.11, "learning_rate": 1.966413818164124e-05, "loss": 0.8713, "step": 740 }, { "epoch": 0.11, "learning_rate": 1.9662895533459464e-05, "loss": 0.8283, "step": 741 }, { "epoch": 0.11, "learning_rate": 1.9661650630106507e-05, "loss": 0.8806, "step": 742 }, { "epoch": 0.11, "learning_rate": 1.9660403471872908e-05, "loss": 0.8156, "step": 743 }, { "epoch": 0.11, "learning_rate": 1.9659154059049737e-05, "loss": 0.8729, "step": 744 }, { "epoch": 0.11, "learning_rate": 1.965790239192858e-05, "loss": 0.899, "step": 745 }, { "epoch": 0.11, "learning_rate": 1.965664847080157e-05, "loss": 0.9069, "step": 746 }, { "epoch": 0.11, "learning_rate": 1.9655392295961345e-05, "loss": 0.918, "step": 747 }, { "epoch": 0.11, "learning_rate": 1.965413386770108e-05, "loss": 0.9282, "step": 748 }, { "epoch": 0.11, "learning_rate": 1.9652873186314467e-05, "loss": 0.9069, "step": 749 }, { "epoch": 0.11, "learning_rate": 1.9651610252095735e-05, "loss": 0.831, "step": 750 }, { "epoch": 0.11, "learning_rate": 1.9650345065339627e-05, "loss": 0.8172, "step": 751 }, { "epoch": 0.11, "learning_rate": 1.964907762634143e-05, "loss": 0.854, "step": 752 }, { "epoch": 0.11, "learning_rate": 1.9647807935396933e-05, "loss": 0.8929, "step": 753 }, { "epoch": 0.11, "learning_rate": 1.9646535992802467e-05, "loss": 0.9202, "step": 754 }, { "epoch": 0.11, "learning_rate": 1.9645261798854885e-05, "loss": 0.8073, "step": 755 }, { "epoch": 0.11, "learning_rate": 1.9643985353851564e-05, "loss": 0.9821, "step": 756 }, { "epoch": 0.11, "learning_rate": 1.9642706658090404e-05, "loss": 0.8726, "step": 757 }, { "epoch": 0.11, "learning_rate": 1.9641425711869833e-05, "loss": 0.8085, "step": 758 }, { "epoch": 0.11, "learning_rate": 1.964014251548881e-05, "loss": 0.7489, "step": 759 }, { "epoch": 0.11, "learning_rate": 1.9638857069246805e-05, "loss": 0.8207, "step": 760 }, { "epoch": 0.11, "learning_rate": 1.9637569373443825e-05, "loss": 0.8397, "step": 761 }, { "epoch": 0.11, "learning_rate": 1.96362794283804e-05, "loss": 0.8496, "step": 762 }, { "epoch": 0.11, "learning_rate": 1.9634987234357583e-05, "loss": 0.7604, "step": 763 }, { "epoch": 0.11, "learning_rate": 1.963369279167695e-05, "loss": 0.8262, "step": 764 }, { "epoch": 0.11, "learning_rate": 1.9632396100640607e-05, "loss": 0.8966, "step": 765 }, { "epoch": 0.11, "learning_rate": 1.963109716155118e-05, "loss": 0.7928, "step": 766 }, { "epoch": 0.11, "learning_rate": 1.962979597471182e-05, "loss": 0.8982, "step": 767 }, { "epoch": 0.11, "learning_rate": 1.9628492540426206e-05, "loss": 0.9063, "step": 768 }, { "epoch": 0.11, "learning_rate": 1.962718685899854e-05, "loss": 0.9141, "step": 769 }, { "epoch": 0.11, "learning_rate": 1.9625878930733546e-05, "loss": 0.9483, "step": 770 }, { "epoch": 0.11, "learning_rate": 1.9624568755936476e-05, "loss": 0.9351, "step": 771 }, { "epoch": 0.12, "learning_rate": 1.9623256334913102e-05, "loss": 0.7254, "step": 772 }, { "epoch": 0.12, "learning_rate": 1.962194166796973e-05, "loss": 0.8579, "step": 773 }, { "epoch": 0.12, "learning_rate": 1.962062475541317e-05, "loss": 0.9336, "step": 774 }, { "epoch": 0.12, "learning_rate": 1.961930559755078e-05, "loss": 0.3165, "step": 775 }, { "epoch": 0.12, "learning_rate": 1.9617984194690432e-05, "loss": 0.8092, "step": 776 }, { "epoch": 0.12, "learning_rate": 1.9616660547140514e-05, "loss": 0.7997, "step": 777 }, { "epoch": 0.12, "learning_rate": 1.961533465520995e-05, "loss": 0.9028, "step": 778 }, { "epoch": 0.12, "learning_rate": 1.9614006519208176e-05, "loss": 0.9662, "step": 779 }, { "epoch": 0.12, "learning_rate": 1.961267613944517e-05, "loss": 0.2855, "step": 780 }, { "epoch": 0.12, "learning_rate": 1.9611343516231413e-05, "loss": 0.8733, "step": 781 }, { "epoch": 0.12, "learning_rate": 1.961000864987792e-05, "loss": 0.8698, "step": 782 }, { "epoch": 0.12, "learning_rate": 1.9608671540696233e-05, "loss": 0.7728, "step": 783 }, { "epoch": 0.12, "learning_rate": 1.9607332188998406e-05, "loss": 0.8943, "step": 784 }, { "epoch": 0.12, "learning_rate": 1.960599059509703e-05, "loss": 0.7779, "step": 785 }, { "epoch": 0.12, "learning_rate": 1.9604646759305207e-05, "loss": 0.8909, "step": 786 }, { "epoch": 0.12, "learning_rate": 1.9603300681936565e-05, "loss": 0.7284, "step": 787 }, { "epoch": 0.12, "learning_rate": 1.9601952363305267e-05, "loss": 0.876, "step": 788 }, { "epoch": 0.12, "learning_rate": 1.960060180372599e-05, "loss": 0.8338, "step": 789 }, { "epoch": 0.12, "learning_rate": 1.9599249003513917e-05, "loss": 0.8586, "step": 790 }, { "epoch": 0.12, "learning_rate": 1.959789396298479e-05, "loss": 0.8971, "step": 791 }, { "epoch": 0.12, "learning_rate": 1.959653668245485e-05, "loss": 0.8191, "step": 792 }, { "epoch": 0.12, "learning_rate": 1.9595177162240855e-05, "loss": 0.8542, "step": 793 }, { "epoch": 0.12, "learning_rate": 1.9593815402660106e-05, "loss": 0.8254, "step": 794 }, { "epoch": 0.12, "learning_rate": 1.9592451404030418e-05, "loss": 0.9176, "step": 795 }, { "epoch": 0.12, "learning_rate": 1.9591085166670126e-05, "loss": 0.9057, "step": 796 }, { "epoch": 0.12, "learning_rate": 1.958971669089808e-05, "loss": 0.8022, "step": 797 }, { "epoch": 0.12, "learning_rate": 1.958834597703367e-05, "loss": 0.8284, "step": 798 }, { "epoch": 0.12, "learning_rate": 1.95869730253968e-05, "loss": 0.81, "step": 799 }, { "epoch": 0.12, "learning_rate": 1.958559783630789e-05, "loss": 0.8801, "step": 800 }, { "epoch": 0.12, "learning_rate": 1.958422041008789e-05, "loss": 0.8092, "step": 801 }, { "epoch": 0.12, "learning_rate": 1.9582840747058276e-05, "loss": 0.7709, "step": 802 }, { "epoch": 0.12, "learning_rate": 1.9581458847541032e-05, "loss": 0.8437, "step": 803 }, { "epoch": 0.12, "learning_rate": 1.9580074711858677e-05, "loss": 0.7824, "step": 804 }, { "epoch": 0.12, "learning_rate": 1.9578688340334243e-05, "loss": 0.8793, "step": 805 }, { "epoch": 0.12, "learning_rate": 1.9577299733291293e-05, "loss": 0.8134, "step": 806 }, { "epoch": 0.12, "learning_rate": 1.9575908891053902e-05, "loss": 0.8232, "step": 807 }, { "epoch": 0.12, "learning_rate": 1.9574515813946676e-05, "loss": 0.8509, "step": 808 }, { "epoch": 0.12, "learning_rate": 1.9573120502294728e-05, "loss": 0.8223, "step": 809 }, { "epoch": 0.12, "learning_rate": 1.9571722956423716e-05, "loss": 0.7753, "step": 810 }, { "epoch": 0.12, "learning_rate": 1.9570323176659794e-05, "loss": 0.3364, "step": 811 }, { "epoch": 0.12, "learning_rate": 1.9568921163329656e-05, "loss": 0.8955, "step": 812 }, { "epoch": 0.12, "learning_rate": 1.9567516916760505e-05, "loss": 0.864, "step": 813 }, { "epoch": 0.12, "learning_rate": 1.9566110437280078e-05, "loss": 0.8901, "step": 814 }, { "epoch": 0.12, "learning_rate": 1.9564701725216618e-05, "loss": 0.8392, "step": 815 }, { "epoch": 0.12, "learning_rate": 1.95632907808989e-05, "loss": 0.8202, "step": 816 }, { "epoch": 0.12, "learning_rate": 1.956187760465622e-05, "loss": 0.8895, "step": 817 }, { "epoch": 0.12, "learning_rate": 1.9560462196818384e-05, "loss": 0.9107, "step": 818 }, { "epoch": 0.12, "learning_rate": 1.9559044557715733e-05, "loss": 0.8791, "step": 819 }, { "epoch": 0.12, "learning_rate": 1.955762468767912e-05, "loss": 0.8438, "step": 820 }, { "epoch": 0.12, "learning_rate": 1.9556202587039924e-05, "loss": 0.886, "step": 821 }, { "epoch": 0.12, "learning_rate": 1.9554778256130035e-05, "loss": 0.9047, "step": 822 }, { "epoch": 0.12, "learning_rate": 1.955335169528187e-05, "loss": 0.7835, "step": 823 }, { "epoch": 0.12, "learning_rate": 1.9551922904828372e-05, "loss": 0.8268, "step": 824 }, { "epoch": 0.12, "learning_rate": 1.9550491885103e-05, "loss": 0.8784, "step": 825 }, { "epoch": 0.12, "learning_rate": 1.954905863643972e-05, "loss": 0.3473, "step": 826 }, { "epoch": 0.12, "learning_rate": 1.9547623159173044e-05, "loss": 0.917, "step": 827 }, { "epoch": 0.12, "learning_rate": 1.9546185453637983e-05, "loss": 0.8492, "step": 828 }, { "epoch": 0.12, "learning_rate": 1.9544745520170078e-05, "loss": 0.8918, "step": 829 }, { "epoch": 0.12, "learning_rate": 1.9543303359105386e-05, "loss": 0.7928, "step": 830 }, { "epoch": 0.12, "learning_rate": 1.9541858970780485e-05, "loss": 0.8059, "step": 831 }, { "epoch": 0.12, "learning_rate": 1.9540412355532474e-05, "loss": 0.8078, "step": 832 }, { "epoch": 0.12, "learning_rate": 1.953896351369897e-05, "loss": 0.9448, "step": 833 }, { "epoch": 0.12, "learning_rate": 1.953751244561811e-05, "loss": 0.9225, "step": 834 }, { "epoch": 0.12, "learning_rate": 1.9536059151628552e-05, "loss": 0.7868, "step": 835 }, { "epoch": 0.12, "learning_rate": 1.9534603632069473e-05, "loss": 0.8237, "step": 836 }, { "epoch": 0.12, "learning_rate": 1.953314588728057e-05, "loss": 0.8023, "step": 837 }, { "epoch": 0.12, "learning_rate": 1.953168591760205e-05, "loss": 0.905, "step": 838 }, { "epoch": 0.13, "learning_rate": 1.9530223723374656e-05, "loss": 0.7985, "step": 839 }, { "epoch": 0.13, "learning_rate": 1.9528759304939643e-05, "loss": 0.8329, "step": 840 }, { "epoch": 0.13, "learning_rate": 1.9527292662638777e-05, "loss": 0.855, "step": 841 }, { "epoch": 0.13, "learning_rate": 1.9525823796814354e-05, "loss": 0.8553, "step": 842 }, { "epoch": 0.13, "learning_rate": 1.9524352707809182e-05, "loss": 0.8988, "step": 843 }, { "epoch": 0.13, "learning_rate": 1.9522879395966593e-05, "loss": 0.8414, "step": 844 }, { "epoch": 0.13, "learning_rate": 1.9521403861630436e-05, "loss": 0.9137, "step": 845 }, { "epoch": 0.13, "learning_rate": 1.9519926105145073e-05, "loss": 0.8374, "step": 846 }, { "epoch": 0.13, "learning_rate": 1.9518446126855397e-05, "loss": 0.804, "step": 847 }, { "epoch": 0.13, "learning_rate": 1.9516963927106802e-05, "loss": 0.805, "step": 848 }, { "epoch": 0.13, "learning_rate": 1.9515479506245222e-05, "loss": 0.8382, "step": 849 }, { "epoch": 0.13, "learning_rate": 1.951399286461709e-05, "loss": 0.8554, "step": 850 }, { "epoch": 0.13, "learning_rate": 1.951250400256937e-05, "loss": 0.8405, "step": 851 }, { "epoch": 0.13, "learning_rate": 1.9511012920449538e-05, "loss": 0.7929, "step": 852 }, { "epoch": 0.13, "learning_rate": 1.950951961860559e-05, "loss": 0.7976, "step": 853 }, { "epoch": 0.13, "learning_rate": 1.9508024097386037e-05, "loss": 0.8681, "step": 854 }, { "epoch": 0.13, "learning_rate": 1.950652635713991e-05, "loss": 0.9201, "step": 855 }, { "epoch": 0.13, "learning_rate": 1.9505026398216763e-05, "loss": 0.8736, "step": 856 }, { "epoch": 0.13, "learning_rate": 1.9503524220966664e-05, "loss": 0.8433, "step": 857 }, { "epoch": 0.13, "learning_rate": 1.950201982574019e-05, "loss": 0.9143, "step": 858 }, { "epoch": 0.13, "learning_rate": 1.9500513212888453e-05, "loss": 0.7474, "step": 859 }, { "epoch": 0.13, "learning_rate": 1.949900438276307e-05, "loss": 0.867, "step": 860 }, { "epoch": 0.13, "learning_rate": 1.9497493335716178e-05, "loss": 0.7995, "step": 861 }, { "epoch": 0.13, "learning_rate": 1.9495980072100433e-05, "loss": 0.891, "step": 862 }, { "epoch": 0.13, "learning_rate": 1.9494464592269004e-05, "loss": 0.7843, "step": 863 }, { "epoch": 0.13, "learning_rate": 1.9492946896575583e-05, "loss": 0.9919, "step": 864 }, { "epoch": 0.13, "learning_rate": 1.949142698537438e-05, "loss": 0.8556, "step": 865 }, { "epoch": 0.13, "learning_rate": 1.9489904859020116e-05, "loss": 0.8359, "step": 866 }, { "epoch": 0.13, "learning_rate": 1.9488380517868033e-05, "loss": 0.8755, "step": 867 }, { "epoch": 0.13, "learning_rate": 1.9486853962273887e-05, "loss": 0.837, "step": 868 }, { "epoch": 0.13, "learning_rate": 1.948532519259395e-05, "loss": 0.8466, "step": 869 }, { "epoch": 0.13, "learning_rate": 1.948379420918502e-05, "loss": 0.7767, "step": 870 }, { "epoch": 0.13, "learning_rate": 1.9482261012404403e-05, "loss": 0.9451, "step": 871 }, { "epoch": 0.13, "learning_rate": 1.9480725602609918e-05, "loss": 0.7342, "step": 872 }, { "epoch": 0.13, "learning_rate": 1.9479187980159912e-05, "loss": 0.7508, "step": 873 }, { "epoch": 0.13, "learning_rate": 1.9477648145413242e-05, "loss": 0.8197, "step": 874 }, { "epoch": 0.13, "learning_rate": 1.947610609872928e-05, "loss": 0.8304, "step": 875 }, { "epoch": 0.13, "learning_rate": 1.9474561840467915e-05, "loss": 0.8162, "step": 876 }, { "epoch": 0.13, "learning_rate": 1.9473015370989557e-05, "loss": 0.8209, "step": 877 }, { "epoch": 0.13, "learning_rate": 1.9471466690655126e-05, "loss": 0.8249, "step": 878 }, { "epoch": 0.13, "learning_rate": 1.946991579982606e-05, "loss": 0.7992, "step": 879 }, { "epoch": 0.13, "learning_rate": 1.946836269886431e-05, "loss": 0.8227, "step": 880 }, { "epoch": 0.13, "learning_rate": 1.946680738813235e-05, "loss": 0.853, "step": 881 }, { "epoch": 0.13, "learning_rate": 1.9465249867993162e-05, "loss": 0.9044, "step": 882 }, { "epoch": 0.13, "learning_rate": 1.9463690138810256e-05, "loss": 0.7924, "step": 883 }, { "epoch": 0.13, "learning_rate": 1.9462128200947638e-05, "loss": 0.776, "step": 884 }, { "epoch": 0.13, "learning_rate": 1.9460564054769844e-05, "loss": 0.763, "step": 885 }, { "epoch": 0.13, "learning_rate": 1.9458997700641927e-05, "loss": 0.3187, "step": 886 }, { "epoch": 0.13, "learning_rate": 1.9457429138929443e-05, "loss": 0.8444, "step": 887 }, { "epoch": 0.13, "learning_rate": 1.9455858369998472e-05, "loss": 0.8073, "step": 888 }, { "epoch": 0.13, "learning_rate": 1.9454285394215605e-05, "loss": 0.914, "step": 889 }, { "epoch": 0.13, "learning_rate": 1.9452710211947957e-05, "loss": 0.9049, "step": 890 }, { "epoch": 0.13, "learning_rate": 1.945113282356315e-05, "loss": 0.7753, "step": 891 }, { "epoch": 0.13, "learning_rate": 1.9449553229429315e-05, "loss": 0.875, "step": 892 }, { "epoch": 0.13, "learning_rate": 1.944797142991511e-05, "loss": 0.8138, "step": 893 }, { "epoch": 0.13, "learning_rate": 1.9446387425389706e-05, "loss": 0.8912, "step": 894 }, { "epoch": 0.13, "learning_rate": 1.9444801216222778e-05, "loss": 0.8881, "step": 895 }, { "epoch": 0.13, "learning_rate": 1.944321280278453e-05, "loss": 0.876, "step": 896 }, { "epoch": 0.13, "learning_rate": 1.9441622185445667e-05, "loss": 0.9171, "step": 897 }, { "epoch": 0.13, "learning_rate": 1.944002936457742e-05, "loss": 0.8508, "step": 898 }, { "epoch": 0.13, "learning_rate": 1.943843434055152e-05, "loss": 0.7631, "step": 899 }, { "epoch": 0.13, "learning_rate": 1.9436837113740236e-05, "loss": 0.3005, "step": 900 }, { "epoch": 0.13, "learning_rate": 1.943523768451632e-05, "loss": 0.8157, "step": 901 }, { "epoch": 0.13, "learning_rate": 1.9433636053253068e-05, "loss": 0.8402, "step": 902 }, { "epoch": 0.13, "learning_rate": 1.9432032220324265e-05, "loss": 0.7263, "step": 903 }, { "epoch": 0.13, "learning_rate": 1.943042618610423e-05, "loss": 0.8624, "step": 904 }, { "epoch": 0.13, "learning_rate": 1.942881795096778e-05, "loss": 0.8347, "step": 905 }, { "epoch": 0.14, "learning_rate": 1.9427207515290252e-05, "loss": 0.86, "step": 906 }, { "epoch": 0.14, "learning_rate": 1.94255948794475e-05, "loss": 0.3339, "step": 907 }, { "epoch": 0.14, "learning_rate": 1.9423980043815894e-05, "loss": 0.8514, "step": 908 }, { "epoch": 0.14, "learning_rate": 1.94223630087723e-05, "loss": 0.8117, "step": 909 }, { "epoch": 0.14, "learning_rate": 1.9420743774694117e-05, "loss": 0.8353, "step": 910 }, { "epoch": 0.14, "learning_rate": 1.9419122341959248e-05, "loss": 0.8165, "step": 911 }, { "epoch": 0.14, "learning_rate": 1.941749871094611e-05, "loss": 0.9341, "step": 912 }, { "epoch": 0.14, "learning_rate": 1.9415872882033634e-05, "loss": 0.8659, "step": 913 }, { "epoch": 0.14, "learning_rate": 1.941424485560126e-05, "loss": 0.8766, "step": 914 }, { "epoch": 0.14, "learning_rate": 1.9412614632028947e-05, "loss": 0.9035, "step": 915 }, { "epoch": 0.14, "learning_rate": 1.9410982211697168e-05, "loss": 0.7905, "step": 916 }, { "epoch": 0.14, "learning_rate": 1.94093475949869e-05, "loss": 0.8776, "step": 917 }, { "epoch": 0.14, "learning_rate": 1.9407710782279634e-05, "loss": 0.3421, "step": 918 }, { "epoch": 0.14, "learning_rate": 1.9406071773957383e-05, "loss": 0.821, "step": 919 }, { "epoch": 0.14, "learning_rate": 1.9404430570402665e-05, "loss": 0.9069, "step": 920 }, { "epoch": 0.14, "learning_rate": 1.9402787171998515e-05, "loss": 0.7955, "step": 921 }, { "epoch": 0.14, "learning_rate": 1.940114157912847e-05, "loss": 0.8914, "step": 922 }, { "epoch": 0.14, "learning_rate": 1.9399493792176587e-05, "loss": 0.8522, "step": 923 }, { "epoch": 0.14, "learning_rate": 1.939784381152744e-05, "loss": 0.8807, "step": 924 }, { "epoch": 0.14, "learning_rate": 1.9396191637566102e-05, "loss": 0.8471, "step": 925 }, { "epoch": 0.14, "learning_rate": 1.939453727067817e-05, "loss": 0.8182, "step": 926 }, { "epoch": 0.14, "learning_rate": 1.9392880711249748e-05, "loss": 0.8585, "step": 927 }, { "epoch": 0.14, "learning_rate": 1.939122195966745e-05, "loss": 0.9042, "step": 928 }, { "epoch": 0.14, "learning_rate": 1.93895610163184e-05, "loss": 0.7281, "step": 929 }, { "epoch": 0.14, "learning_rate": 1.938789788159024e-05, "loss": 0.8379, "step": 930 }, { "epoch": 0.14, "learning_rate": 1.938623255587112e-05, "loss": 0.9074, "step": 931 }, { "epoch": 0.14, "learning_rate": 1.9384565039549705e-05, "loss": 0.9338, "step": 932 }, { "epoch": 0.14, "learning_rate": 1.9382895333015157e-05, "loss": 0.7689, "step": 933 }, { "epoch": 0.14, "learning_rate": 1.938122343665717e-05, "loss": 0.7938, "step": 934 }, { "epoch": 0.14, "learning_rate": 1.9379549350865933e-05, "loss": 0.935, "step": 935 }, { "epoch": 0.14, "learning_rate": 1.9377873076032157e-05, "loss": 0.8301, "step": 936 }, { "epoch": 0.14, "learning_rate": 1.937619461254705e-05, "loss": 0.9203, "step": 937 }, { "epoch": 0.14, "learning_rate": 1.937451396080235e-05, "loss": 0.8207, "step": 938 }, { "epoch": 0.14, "learning_rate": 1.9372831121190293e-05, "loss": 0.8556, "step": 939 }, { "epoch": 0.14, "learning_rate": 1.9371146094103623e-05, "loss": 0.8021, "step": 940 }, { "epoch": 0.14, "learning_rate": 1.9369458879935603e-05, "loss": 0.838, "step": 941 }, { "epoch": 0.14, "learning_rate": 1.936776947908e-05, "loss": 0.8174, "step": 942 }, { "epoch": 0.14, "learning_rate": 1.9366077891931095e-05, "loss": 0.7351, "step": 943 }, { "epoch": 0.14, "learning_rate": 1.9364384118883683e-05, "loss": 0.8238, "step": 944 }, { "epoch": 0.14, "learning_rate": 1.936268816033306e-05, "loss": 0.8669, "step": 945 }, { "epoch": 0.14, "learning_rate": 1.936099001667504e-05, "loss": 0.8568, "step": 946 }, { "epoch": 0.14, "learning_rate": 1.9359289688305938e-05, "loss": 0.8995, "step": 947 }, { "epoch": 0.14, "learning_rate": 1.935758717562259e-05, "loss": 0.9226, "step": 948 }, { "epoch": 0.14, "learning_rate": 1.935588247902233e-05, "loss": 0.7809, "step": 949 }, { "epoch": 0.14, "learning_rate": 1.9354175598903014e-05, "loss": 0.804, "step": 950 }, { "epoch": 0.14, "learning_rate": 1.9352466535663e-05, "loss": 0.8094, "step": 951 }, { "epoch": 0.14, "learning_rate": 1.9350755289701156e-05, "loss": 0.8039, "step": 952 }, { "epoch": 0.14, "learning_rate": 1.934904186141686e-05, "loss": 0.7472, "step": 953 }, { "epoch": 0.14, "learning_rate": 1.9347326251210007e-05, "loss": 0.861, "step": 954 }, { "epoch": 0.14, "learning_rate": 1.9345608459480983e-05, "loss": 0.8948, "step": 955 }, { "epoch": 0.14, "learning_rate": 1.93438884866307e-05, "loss": 0.8915, "step": 956 }, { "epoch": 0.14, "learning_rate": 1.934216633306057e-05, "loss": 0.8435, "step": 957 }, { "epoch": 0.14, "learning_rate": 1.9340441999172524e-05, "loss": 0.8977, "step": 958 }, { "epoch": 0.14, "learning_rate": 1.9338715485368987e-05, "loss": 0.9462, "step": 959 }, { "epoch": 0.14, "learning_rate": 1.9336986792052906e-05, "loss": 0.9389, "step": 960 }, { "epoch": 0.14, "learning_rate": 1.9335255919627728e-05, "loss": 0.843, "step": 961 }, { "epoch": 0.14, "learning_rate": 1.9333522868497417e-05, "loss": 0.7772, "step": 962 }, { "epoch": 0.14, "learning_rate": 1.9331787639066434e-05, "loss": 0.8727, "step": 963 }, { "epoch": 0.14, "learning_rate": 1.9330050231739757e-05, "loss": 0.8912, "step": 964 }, { "epoch": 0.14, "learning_rate": 1.9328310646922874e-05, "loss": 0.8291, "step": 965 }, { "epoch": 0.14, "learning_rate": 1.9326568885021773e-05, "loss": 0.7563, "step": 966 }, { "epoch": 0.14, "learning_rate": 1.9324824946442957e-05, "loss": 0.8338, "step": 967 }, { "epoch": 0.14, "learning_rate": 1.9323078831593435e-05, "loss": 0.8361, "step": 968 }, { "epoch": 0.14, "learning_rate": 1.9321330540880718e-05, "loss": 0.7363, "step": 969 }, { "epoch": 0.14, "learning_rate": 1.9319580074712835e-05, "loss": 0.8168, "step": 970 }, { "epoch": 0.14, "learning_rate": 1.9317827433498317e-05, "loss": 0.8727, "step": 971 }, { "epoch": 0.14, "learning_rate": 1.9316072617646203e-05, "loss": 0.7503, "step": 972 }, { "epoch": 0.15, "learning_rate": 1.9314315627566043e-05, "loss": 0.8643, "step": 973 }, { "epoch": 0.15, "learning_rate": 1.931255646366789e-05, "loss": 0.8373, "step": 974 }, { "epoch": 0.15, "learning_rate": 1.9310795126362297e-05, "loss": 0.8092, "step": 975 }, { "epoch": 0.15, "learning_rate": 1.9309031616060346e-05, "loss": 0.7517, "step": 976 }, { "epoch": 0.15, "learning_rate": 1.9307265933173608e-05, "loss": 0.8052, "step": 977 }, { "epoch": 0.15, "learning_rate": 1.9305498078114162e-05, "loss": 0.8091, "step": 978 }, { "epoch": 0.15, "learning_rate": 1.9303728051294607e-05, "loss": 0.3205, "step": 979 }, { "epoch": 0.15, "learning_rate": 1.9301955853128033e-05, "loss": 0.8954, "step": 980 }, { "epoch": 0.15, "learning_rate": 1.9300181484028043e-05, "loss": 0.8002, "step": 981 }, { "epoch": 0.15, "learning_rate": 1.9298404944408755e-05, "loss": 0.8056, "step": 982 }, { "epoch": 0.15, "learning_rate": 1.9296626234684778e-05, "loss": 0.767, "step": 983 }, { "epoch": 0.15, "learning_rate": 1.9294845355271243e-05, "loss": 0.8767, "step": 984 }, { "epoch": 0.15, "learning_rate": 1.9293062306583775e-05, "loss": 0.8967, "step": 985 }, { "epoch": 0.15, "learning_rate": 1.9291277089038507e-05, "loss": 0.7888, "step": 986 }, { "epoch": 0.15, "learning_rate": 1.928948970305209e-05, "loss": 0.8679, "step": 987 }, { "epoch": 0.15, "learning_rate": 1.9287700149041666e-05, "loss": 0.881, "step": 988 }, { "epoch": 0.15, "learning_rate": 1.928590842742489e-05, "loss": 0.8791, "step": 989 }, { "epoch": 0.15, "learning_rate": 1.9284114538619922e-05, "loss": 0.8055, "step": 990 }, { "epoch": 0.15, "learning_rate": 1.9282318483045433e-05, "loss": 0.8217, "step": 991 }, { "epoch": 0.15, "learning_rate": 1.928052026112059e-05, "loss": 0.9172, "step": 992 }, { "epoch": 0.15, "learning_rate": 1.927871987326507e-05, "loss": 0.8155, "step": 993 }, { "epoch": 0.15, "learning_rate": 1.9276917319899057e-05, "loss": 0.8164, "step": 994 }, { "epoch": 0.15, "learning_rate": 1.927511260144324e-05, "loss": 0.8624, "step": 995 }, { "epoch": 0.15, "learning_rate": 1.9273305718318814e-05, "loss": 0.7653, "step": 996 }, { "epoch": 0.15, "learning_rate": 1.9271496670947475e-05, "loss": 0.8997, "step": 997 }, { "epoch": 0.15, "learning_rate": 1.9269685459751427e-05, "loss": 0.3528, "step": 998 }, { "epoch": 0.15, "learning_rate": 1.9267872085153377e-05, "loss": 0.8801, "step": 999 }, { "epoch": 0.15, "learning_rate": 1.9266056547576542e-05, "loss": 0.8367, "step": 1000 }, { "epoch": 0.15, "learning_rate": 1.9264238847444644e-05, "loss": 0.8154, "step": 1001 }, { "epoch": 0.15, "learning_rate": 1.92624189851819e-05, "loss": 0.8329, "step": 1002 }, { "epoch": 0.15, "learning_rate": 1.9260596961213038e-05, "loss": 0.8811, "step": 1003 }, { "epoch": 0.15, "learning_rate": 1.925877277596329e-05, "loss": 0.7903, "step": 1004 }, { "epoch": 0.15, "learning_rate": 1.92569464298584e-05, "loss": 0.7974, "step": 1005 }, { "epoch": 0.15, "learning_rate": 1.9255117923324603e-05, "loss": 0.8686, "step": 1006 }, { "epoch": 0.15, "learning_rate": 1.9253287256788645e-05, "loss": 0.8423, "step": 1007 }, { "epoch": 0.15, "learning_rate": 1.9251454430677772e-05, "loss": 0.8405, "step": 1008 }, { "epoch": 0.15, "learning_rate": 1.9249619445419748e-05, "loss": 0.8122, "step": 1009 }, { "epoch": 0.15, "learning_rate": 1.924778230144282e-05, "loss": 0.7931, "step": 1010 }, { "epoch": 0.15, "learning_rate": 1.924594299917575e-05, "loss": 0.8866, "step": 1011 }, { "epoch": 0.15, "learning_rate": 1.9244101539047807e-05, "loss": 0.8542, "step": 1012 }, { "epoch": 0.15, "learning_rate": 1.9242257921488762e-05, "loss": 0.8389, "step": 1013 }, { "epoch": 0.15, "learning_rate": 1.924041214692888e-05, "loss": 0.8263, "step": 1014 }, { "epoch": 0.15, "learning_rate": 1.9238564215798937e-05, "loss": 0.842, "step": 1015 }, { "epoch": 0.15, "learning_rate": 1.923671412853022e-05, "loss": 0.7969, "step": 1016 }, { "epoch": 0.15, "learning_rate": 1.9234861885554496e-05, "loss": 0.8696, "step": 1017 }, { "epoch": 0.15, "learning_rate": 1.9233007487304065e-05, "loss": 0.7898, "step": 1018 }, { "epoch": 0.15, "learning_rate": 1.9231150934211712e-05, "loss": 0.3576, "step": 1019 }, { "epoch": 0.15, "learning_rate": 1.922929222671072e-05, "loss": 0.8096, "step": 1020 }, { "epoch": 0.15, "learning_rate": 1.9227431365234887e-05, "loss": 0.8036, "step": 1021 }, { "epoch": 0.15, "learning_rate": 1.922556835021851e-05, "loss": 0.3221, "step": 1022 }, { "epoch": 0.15, "learning_rate": 1.922370318209639e-05, "loss": 0.8739, "step": 1023 }, { "epoch": 0.15, "learning_rate": 1.922183586130383e-05, "loss": 0.7954, "step": 1024 }, { "epoch": 0.15, "learning_rate": 1.9219966388276626e-05, "loss": 0.7831, "step": 1025 }, { "epoch": 0.15, "learning_rate": 1.9218094763451092e-05, "loss": 0.7748, "step": 1026 }, { "epoch": 0.15, "learning_rate": 1.921622098726403e-05, "loss": 0.9269, "step": 1027 }, { "epoch": 0.15, "learning_rate": 1.9214345060152756e-05, "loss": 0.8259, "step": 1028 }, { "epoch": 0.15, "learning_rate": 1.921246698255508e-05, "loss": 0.7904, "step": 1029 }, { "epoch": 0.15, "learning_rate": 1.9210586754909316e-05, "loss": 0.8659, "step": 1030 }, { "epoch": 0.15, "learning_rate": 1.9208704377654283e-05, "loss": 0.8622, "step": 1031 }, { "epoch": 0.15, "learning_rate": 1.9206819851229297e-05, "loss": 0.7828, "step": 1032 }, { "epoch": 0.15, "learning_rate": 1.920493317607418e-05, "loss": 0.8515, "step": 1033 }, { "epoch": 0.15, "learning_rate": 1.9203044352629244e-05, "loss": 0.8368, "step": 1034 }, { "epoch": 0.15, "learning_rate": 1.920115338133532e-05, "loss": 0.8629, "step": 1035 }, { "epoch": 0.15, "learning_rate": 1.919926026263373e-05, "loss": 0.8472, "step": 1036 }, { "epoch": 0.15, "learning_rate": 1.91973649969663e-05, "loss": 0.8316, "step": 1037 }, { "epoch": 0.15, "learning_rate": 1.919546758477535e-05, "loss": 0.8308, "step": 1038 }, { "epoch": 0.15, "learning_rate": 1.9193568026503716e-05, "loss": 0.8031, "step": 1039 }, { "epoch": 0.16, "learning_rate": 1.919166632259472e-05, "loss": 0.8558, "step": 1040 }, { "epoch": 0.16, "learning_rate": 1.9189762473492183e-05, "loss": 0.8174, "step": 1041 }, { "epoch": 0.16, "learning_rate": 1.918785647964045e-05, "loss": 0.8307, "step": 1042 }, { "epoch": 0.16, "learning_rate": 1.9185948341484338e-05, "loss": 0.7593, "step": 1043 }, { "epoch": 0.16, "learning_rate": 1.9184038059469186e-05, "loss": 0.8733, "step": 1044 }, { "epoch": 0.16, "learning_rate": 1.918212563404082e-05, "loss": 0.8734, "step": 1045 }, { "epoch": 0.16, "learning_rate": 1.9180211065645565e-05, "loss": 0.8826, "step": 1046 }, { "epoch": 0.16, "learning_rate": 1.9178294354730262e-05, "loss": 0.8087, "step": 1047 }, { "epoch": 0.16, "learning_rate": 1.9176375501742236e-05, "loss": 0.8338, "step": 1048 }, { "epoch": 0.16, "learning_rate": 1.9174454507129324e-05, "loss": 0.8015, "step": 1049 }, { "epoch": 0.16, "learning_rate": 1.917253137133985e-05, "loss": 0.901, "step": 1050 }, { "epoch": 0.16, "learning_rate": 1.917060609482264e-05, "loss": 0.9237, "step": 1051 }, { "epoch": 0.16, "learning_rate": 1.9168678678027036e-05, "loss": 0.8468, "step": 1052 }, { "epoch": 0.16, "learning_rate": 1.9166749121402862e-05, "loss": 0.8448, "step": 1053 }, { "epoch": 0.16, "learning_rate": 1.9164817425400444e-05, "loss": 0.8326, "step": 1054 }, { "epoch": 0.16, "learning_rate": 1.9162883590470614e-05, "loss": 0.8414, "step": 1055 }, { "epoch": 0.16, "learning_rate": 1.9160947617064698e-05, "loss": 0.8363, "step": 1056 }, { "epoch": 0.16, "learning_rate": 1.915900950563452e-05, "loss": 0.8523, "step": 1057 }, { "epoch": 0.16, "learning_rate": 1.915706925663241e-05, "loss": 0.841, "step": 1058 }, { "epoch": 0.16, "learning_rate": 1.915512687051119e-05, "loss": 0.7967, "step": 1059 }, { "epoch": 0.16, "learning_rate": 1.915318234772418e-05, "loss": 0.8057, "step": 1060 }, { "epoch": 0.16, "learning_rate": 1.9151235688725206e-05, "loss": 0.7983, "step": 1061 }, { "epoch": 0.16, "learning_rate": 1.914928689396859e-05, "loss": 0.6932, "step": 1062 }, { "epoch": 0.16, "learning_rate": 1.9147335963909146e-05, "loss": 0.872, "step": 1063 }, { "epoch": 0.16, "learning_rate": 1.9145382899002192e-05, "loss": 0.8072, "step": 1064 }, { "epoch": 0.16, "learning_rate": 1.9143427699703544e-05, "loss": 0.7104, "step": 1065 }, { "epoch": 0.16, "learning_rate": 1.9141470366469517e-05, "loss": 0.7814, "step": 1066 }, { "epoch": 0.16, "learning_rate": 1.913951089975692e-05, "loss": 0.8533, "step": 1067 }, { "epoch": 0.16, "learning_rate": 1.9137549300023063e-05, "loss": 0.8348, "step": 1068 }, { "epoch": 0.16, "learning_rate": 1.9135585567725753e-05, "loss": 0.764, "step": 1069 }, { "epoch": 0.16, "learning_rate": 1.91336197033233e-05, "loss": 0.7534, "step": 1070 }, { "epoch": 0.16, "learning_rate": 1.9131651707274494e-05, "loss": 0.8085, "step": 1071 }, { "epoch": 0.16, "learning_rate": 1.9129681580038647e-05, "loss": 0.8549, "step": 1072 }, { "epoch": 0.16, "learning_rate": 1.9127709322075555e-05, "loss": 0.8002, "step": 1073 }, { "epoch": 0.16, "learning_rate": 1.912573493384551e-05, "loss": 0.821, "step": 1074 }, { "epoch": 0.16, "learning_rate": 1.91237584158093e-05, "loss": 0.9078, "step": 1075 }, { "epoch": 0.16, "learning_rate": 1.9121779768428218e-05, "loss": 0.9555, "step": 1076 }, { "epoch": 0.16, "learning_rate": 1.911979899216405e-05, "loss": 0.8914, "step": 1077 }, { "epoch": 0.16, "learning_rate": 1.911781608747908e-05, "loss": 0.7722, "step": 1078 }, { "epoch": 0.16, "learning_rate": 1.911583105483609e-05, "loss": 0.8868, "step": 1079 }, { "epoch": 0.16, "learning_rate": 1.911384389469834e-05, "loss": 0.8607, "step": 1080 }, { "epoch": 0.16, "learning_rate": 1.9111854607529628e-05, "loss": 0.8067, "step": 1081 }, { "epoch": 0.16, "learning_rate": 1.9109863193794203e-05, "loss": 0.8265, "step": 1082 }, { "epoch": 0.16, "learning_rate": 1.9107869653956838e-05, "loss": 0.7949, "step": 1083 }, { "epoch": 0.16, "learning_rate": 1.910587398848279e-05, "loss": 0.3156, "step": 1084 }, { "epoch": 0.16, "learning_rate": 1.9103876197837822e-05, "loss": 0.7858, "step": 1085 }, { "epoch": 0.16, "learning_rate": 1.9101876282488187e-05, "loss": 0.831, "step": 1086 }, { "epoch": 0.16, "learning_rate": 1.9099874242900632e-05, "loss": 0.7183, "step": 1087 }, { "epoch": 0.16, "learning_rate": 1.9097870079542408e-05, "loss": 0.8884, "step": 1088 }, { "epoch": 0.16, "learning_rate": 1.9095863792881247e-05, "loss": 0.8209, "step": 1089 }, { "epoch": 0.16, "learning_rate": 1.9093855383385388e-05, "loss": 0.8742, "step": 1090 }, { "epoch": 0.16, "learning_rate": 1.9091844851523566e-05, "loss": 0.8717, "step": 1091 }, { "epoch": 0.16, "learning_rate": 1.9089832197765008e-05, "loss": 0.8388, "step": 1092 }, { "epoch": 0.16, "learning_rate": 1.9087817422579435e-05, "loss": 0.8174, "step": 1093 }, { "epoch": 0.16, "learning_rate": 1.9085800526437065e-05, "loss": 0.832, "step": 1094 }, { "epoch": 0.16, "learning_rate": 1.908378150980861e-05, "loss": 0.9067, "step": 1095 }, { "epoch": 0.16, "learning_rate": 1.908176037316528e-05, "loss": 0.7368, "step": 1096 }, { "epoch": 0.16, "learning_rate": 1.9079737116978768e-05, "loss": 0.8263, "step": 1097 }, { "epoch": 0.16, "learning_rate": 1.9077711741721282e-05, "loss": 0.8376, "step": 1098 }, { "epoch": 0.16, "learning_rate": 1.9075684247865514e-05, "loss": 0.333, "step": 1099 }, { "epoch": 0.16, "learning_rate": 1.9073654635884636e-05, "loss": 0.8387, "step": 1100 }, { "epoch": 0.16, "learning_rate": 1.9071622906252343e-05, "loss": 0.8198, "step": 1101 }, { "epoch": 0.16, "learning_rate": 1.9069589059442805e-05, "loss": 0.874, "step": 1102 }, { "epoch": 0.16, "learning_rate": 1.9067553095930688e-05, "loss": 0.8469, "step": 1103 }, { "epoch": 0.16, "learning_rate": 1.9065515016191157e-05, "loss": 0.8828, "step": 1104 }, { "epoch": 0.16, "learning_rate": 1.9063474820699862e-05, "loss": 0.7326, "step": 1105 }, { "epoch": 0.16, "learning_rate": 1.9061432509932965e-05, "loss": 0.7411, "step": 1106 }, { "epoch": 0.17, "learning_rate": 1.90593880843671e-05, "loss": 0.7825, "step": 1107 }, { "epoch": 0.17, "learning_rate": 1.905734154447941e-05, "loss": 0.7756, "step": 1108 }, { "epoch": 0.17, "learning_rate": 1.9055292890747523e-05, "loss": 0.8693, "step": 1109 }, { "epoch": 0.17, "learning_rate": 1.9053242123649562e-05, "loss": 0.8475, "step": 1110 }, { "epoch": 0.17, "learning_rate": 1.905118924366415e-05, "loss": 0.885, "step": 1111 }, { "epoch": 0.17, "learning_rate": 1.9049134251270393e-05, "loss": 0.9603, "step": 1112 }, { "epoch": 0.17, "learning_rate": 1.9047077146947893e-05, "loss": 0.8949, "step": 1113 }, { "epoch": 0.17, "learning_rate": 1.9045017931176754e-05, "loss": 0.884, "step": 1114 }, { "epoch": 0.17, "learning_rate": 1.904295660443756e-05, "loss": 0.8256, "step": 1115 }, { "epoch": 0.17, "learning_rate": 1.9040893167211392e-05, "loss": 0.9156, "step": 1116 }, { "epoch": 0.17, "learning_rate": 1.9038827619979828e-05, "loss": 0.3021, "step": 1117 }, { "epoch": 0.17, "learning_rate": 1.9036759963224934e-05, "loss": 0.3437, "step": 1118 }, { "epoch": 0.17, "learning_rate": 1.9034690197429264e-05, "loss": 0.8738, "step": 1119 }, { "epoch": 0.17, "learning_rate": 1.9032618323075877e-05, "loss": 0.8379, "step": 1120 }, { "epoch": 0.17, "learning_rate": 1.9030544340648314e-05, "loss": 0.8453, "step": 1121 }, { "epoch": 0.17, "learning_rate": 1.902846825063061e-05, "loss": 0.3226, "step": 1122 }, { "epoch": 0.17, "learning_rate": 1.9026390053507292e-05, "loss": 0.8808, "step": 1123 }, { "epoch": 0.17, "learning_rate": 1.9024309749763378e-05, "loss": 0.7538, "step": 1124 }, { "epoch": 0.17, "learning_rate": 1.9022227339884384e-05, "loss": 0.9137, "step": 1125 }, { "epoch": 0.17, "learning_rate": 1.902014282435631e-05, "loss": 0.8561, "step": 1126 }, { "epoch": 0.17, "learning_rate": 1.9018056203665643e-05, "loss": 0.7605, "step": 1127 }, { "epoch": 0.17, "learning_rate": 1.901596747829938e-05, "loss": 0.8454, "step": 1128 }, { "epoch": 0.17, "learning_rate": 1.901387664874499e-05, "loss": 0.8825, "step": 1129 }, { "epoch": 0.17, "learning_rate": 1.9011783715490438e-05, "loss": 0.8016, "step": 1130 }, { "epoch": 0.17, "learning_rate": 1.900968867902419e-05, "loss": 0.7335, "step": 1131 }, { "epoch": 0.17, "learning_rate": 1.9007591539835195e-05, "loss": 0.3344, "step": 1132 }, { "epoch": 0.17, "learning_rate": 1.9005492298412886e-05, "loss": 0.826, "step": 1133 }, { "epoch": 0.17, "learning_rate": 1.9003390955247198e-05, "loss": 0.8707, "step": 1134 }, { "epoch": 0.17, "learning_rate": 1.9001287510828554e-05, "loss": 0.7837, "step": 1135 }, { "epoch": 0.17, "learning_rate": 1.899918196564786e-05, "loss": 0.7889, "step": 1136 }, { "epoch": 0.17, "learning_rate": 1.899707432019652e-05, "loss": 0.8686, "step": 1137 }, { "epoch": 0.17, "learning_rate": 1.899496457496643e-05, "loss": 0.6784, "step": 1138 }, { "epoch": 0.17, "learning_rate": 1.899285273044997e-05, "loss": 0.832, "step": 1139 }, { "epoch": 0.17, "learning_rate": 1.8990738787140008e-05, "loss": 0.8102, "step": 1140 }, { "epoch": 0.17, "learning_rate": 1.898862274552991e-05, "loss": 0.7702, "step": 1141 }, { "epoch": 0.17, "learning_rate": 1.8986504606113527e-05, "loss": 0.867, "step": 1142 }, { "epoch": 0.17, "learning_rate": 1.8984384369385197e-05, "loss": 0.8285, "step": 1143 }, { "epoch": 0.17, "learning_rate": 1.898226203583976e-05, "loss": 0.8338, "step": 1144 }, { "epoch": 0.17, "learning_rate": 1.8980137605972523e-05, "loss": 0.8442, "step": 1145 }, { "epoch": 0.17, "learning_rate": 1.8978011080279306e-05, "loss": 0.841, "step": 1146 }, { "epoch": 0.17, "learning_rate": 1.8975882459256402e-05, "loss": 0.8484, "step": 1147 }, { "epoch": 0.17, "learning_rate": 1.8973751743400598e-05, "loss": 0.8258, "step": 1148 }, { "epoch": 0.17, "learning_rate": 1.897161893320917e-05, "loss": 0.7531, "step": 1149 }, { "epoch": 0.17, "learning_rate": 1.896948402917989e-05, "loss": 0.7454, "step": 1150 }, { "epoch": 0.17, "learning_rate": 1.8967347031811002e-05, "loss": 0.7228, "step": 1151 }, { "epoch": 0.17, "learning_rate": 1.8965207941601257e-05, "loss": 0.8615, "step": 1152 }, { "epoch": 0.17, "learning_rate": 1.896306675904988e-05, "loss": 0.3111, "step": 1153 }, { "epoch": 0.17, "learning_rate": 1.8960923484656592e-05, "loss": 0.8019, "step": 1154 }, { "epoch": 0.17, "learning_rate": 1.8958778118921598e-05, "loss": 0.854, "step": 1155 }, { "epoch": 0.17, "learning_rate": 1.8956630662345595e-05, "loss": 0.8176, "step": 1156 }, { "epoch": 0.17, "learning_rate": 1.8954481115429768e-05, "loss": 0.8684, "step": 1157 }, { "epoch": 0.17, "learning_rate": 1.895232947867579e-05, "loss": 0.8474, "step": 1158 }, { "epoch": 0.17, "learning_rate": 1.895017575258581e-05, "loss": 0.8374, "step": 1159 }, { "epoch": 0.17, "learning_rate": 1.894801993766249e-05, "loss": 0.8621, "step": 1160 }, { "epoch": 0.17, "learning_rate": 1.894586203440895e-05, "loss": 0.7589, "step": 1161 }, { "epoch": 0.17, "learning_rate": 1.8943702043328817e-05, "loss": 0.8519, "step": 1162 }, { "epoch": 0.17, "learning_rate": 1.89415399649262e-05, "loss": 0.8149, "step": 1163 }, { "epoch": 0.17, "learning_rate": 1.893937579970569e-05, "loss": 0.8035, "step": 1164 }, { "epoch": 0.17, "learning_rate": 1.893720954817238e-05, "loss": 0.8929, "step": 1165 }, { "epoch": 0.17, "learning_rate": 1.8935041210831837e-05, "loss": 0.9004, "step": 1166 }, { "epoch": 0.17, "learning_rate": 1.8932870788190108e-05, "loss": 0.9622, "step": 1167 }, { "epoch": 0.17, "learning_rate": 1.8930698280753745e-05, "loss": 0.7449, "step": 1168 }, { "epoch": 0.17, "learning_rate": 1.8928523689029776e-05, "loss": 0.8696, "step": 1169 }, { "epoch": 0.17, "learning_rate": 1.8926347013525717e-05, "loss": 0.8413, "step": 1170 }, { "epoch": 0.17, "learning_rate": 1.892416825474957e-05, "loss": 0.8016, "step": 1171 }, { "epoch": 0.17, "learning_rate": 1.8921987413209825e-05, "loss": 0.813, "step": 1172 }, { "epoch": 0.17, "learning_rate": 1.8919804489415456e-05, "loss": 0.355, "step": 1173 }, { "epoch": 0.18, "learning_rate": 1.8917619483875925e-05, "loss": 0.8139, "step": 1174 }, { "epoch": 0.18, "learning_rate": 1.8915432397101175e-05, "loss": 0.8682, "step": 1175 }, { "epoch": 0.18, "learning_rate": 1.891324322960164e-05, "loss": 0.8194, "step": 1176 }, { "epoch": 0.18, "learning_rate": 1.8911051981888245e-05, "loss": 0.8678, "step": 1177 }, { "epoch": 0.18, "learning_rate": 1.8908858654472383e-05, "loss": 0.2991, "step": 1178 }, { "epoch": 0.18, "learning_rate": 1.8906663247865952e-05, "loss": 0.8742, "step": 1179 }, { "epoch": 0.18, "learning_rate": 1.8904465762581315e-05, "loss": 0.847, "step": 1180 }, { "epoch": 0.18, "learning_rate": 1.890226619913134e-05, "loss": 0.7188, "step": 1181 }, { "epoch": 0.18, "learning_rate": 1.8900064558029375e-05, "loss": 0.844, "step": 1182 }, { "epoch": 0.18, "learning_rate": 1.8897860839789238e-05, "loss": 0.8025, "step": 1183 }, { "epoch": 0.18, "learning_rate": 1.8895655044925248e-05, "loss": 0.8416, "step": 1184 }, { "epoch": 0.18, "learning_rate": 1.8893447173952205e-05, "loss": 0.8867, "step": 1185 }, { "epoch": 0.18, "learning_rate": 1.889123722738539e-05, "loss": 0.849, "step": 1186 }, { "epoch": 0.18, "learning_rate": 1.8889025205740576e-05, "loss": 0.8209, "step": 1187 }, { "epoch": 0.18, "learning_rate": 1.8886811109534005e-05, "loss": 0.8711, "step": 1188 }, { "epoch": 0.18, "learning_rate": 1.888459493928242e-05, "loss": 0.875, "step": 1189 }, { "epoch": 0.18, "learning_rate": 1.888237669550304e-05, "loss": 0.8106, "step": 1190 }, { "epoch": 0.18, "learning_rate": 1.888015637871357e-05, "loss": 0.8434, "step": 1191 }, { "epoch": 0.18, "learning_rate": 1.8877933989432194e-05, "loss": 0.8501, "step": 1192 }, { "epoch": 0.18, "learning_rate": 1.8875709528177583e-05, "loss": 0.917, "step": 1193 }, { "epoch": 0.18, "learning_rate": 1.88734829954689e-05, "loss": 0.8289, "step": 1194 }, { "epoch": 0.18, "learning_rate": 1.8871254391825777e-05, "loss": 0.8847, "step": 1195 }, { "epoch": 0.18, "learning_rate": 1.8869023717768335e-05, "loss": 0.6994, "step": 1196 }, { "epoch": 0.18, "learning_rate": 1.886679097381718e-05, "loss": 0.8732, "step": 1197 }, { "epoch": 0.18, "learning_rate": 1.8864556160493403e-05, "loss": 0.8287, "step": 1198 }, { "epoch": 0.18, "learning_rate": 1.8862319278318576e-05, "loss": 0.9095, "step": 1199 }, { "epoch": 0.18, "learning_rate": 1.8860080327814745e-05, "loss": 0.876, "step": 1200 }, { "epoch": 0.18, "learning_rate": 1.8857839309504454e-05, "loss": 0.7848, "step": 1201 }, { "epoch": 0.18, "learning_rate": 1.885559622391072e-05, "loss": 0.7463, "step": 1202 }, { "epoch": 0.18, "learning_rate": 1.8853351071557046e-05, "loss": 0.7831, "step": 1203 }, { "epoch": 0.18, "learning_rate": 1.885110385296741e-05, "loss": 0.7757, "step": 1204 }, { "epoch": 0.18, "learning_rate": 1.884885456866629e-05, "loss": 0.8253, "step": 1205 }, { "epoch": 0.18, "learning_rate": 1.884660321917862e-05, "loss": 0.7853, "step": 1206 }, { "epoch": 0.18, "learning_rate": 1.884434980502984e-05, "loss": 0.8084, "step": 1207 }, { "epoch": 0.18, "learning_rate": 1.884209432674586e-05, "loss": 0.8151, "step": 1208 }, { "epoch": 0.18, "learning_rate": 1.8839836784853072e-05, "loss": 0.8034, "step": 1209 }, { "epoch": 0.18, "learning_rate": 1.8837577179878357e-05, "loss": 0.8663, "step": 1210 }, { "epoch": 0.18, "learning_rate": 1.8835315512349064e-05, "loss": 0.923, "step": 1211 }, { "epoch": 0.18, "learning_rate": 1.883305178279304e-05, "loss": 0.975, "step": 1212 }, { "epoch": 0.18, "learning_rate": 1.8830785991738597e-05, "loss": 0.8923, "step": 1213 }, { "epoch": 0.18, "learning_rate": 1.882851813971454e-05, "loss": 0.8375, "step": 1214 }, { "epoch": 0.18, "learning_rate": 1.882624822725015e-05, "loss": 0.7437, "step": 1215 }, { "epoch": 0.18, "learning_rate": 1.8823976254875188e-05, "loss": 0.792, "step": 1216 }, { "epoch": 0.18, "learning_rate": 1.88217022231199e-05, "loss": 0.7443, "step": 1217 }, { "epoch": 0.18, "learning_rate": 1.881942613251501e-05, "loss": 0.8668, "step": 1218 }, { "epoch": 0.18, "learning_rate": 1.8817147983591723e-05, "loss": 0.8892, "step": 1219 }, { "epoch": 0.18, "learning_rate": 1.881486777688172e-05, "loss": 0.7659, "step": 1220 }, { "epoch": 0.18, "learning_rate": 1.8812585512917168e-05, "loss": 0.8399, "step": 1221 }, { "epoch": 0.18, "learning_rate": 1.8810301192230713e-05, "loss": 0.9255, "step": 1222 }, { "epoch": 0.18, "learning_rate": 1.880801481535548e-05, "loss": 0.8654, "step": 1223 }, { "epoch": 0.18, "learning_rate": 1.8805726382825075e-05, "loss": 0.8572, "step": 1224 }, { "epoch": 0.18, "learning_rate": 1.8803435895173586e-05, "loss": 0.8471, "step": 1225 }, { "epoch": 0.18, "learning_rate": 1.880114335293557e-05, "loss": 0.8276, "step": 1226 }, { "epoch": 0.18, "learning_rate": 1.8798848756646073e-05, "loss": 0.8322, "step": 1227 }, { "epoch": 0.18, "learning_rate": 1.8796552106840624e-05, "loss": 0.7908, "step": 1228 }, { "epoch": 0.18, "learning_rate": 1.879425340405522e-05, "loss": 0.8288, "step": 1229 }, { "epoch": 0.18, "learning_rate": 1.8791952648826348e-05, "loss": 0.7391, "step": 1230 }, { "epoch": 0.18, "learning_rate": 1.8789649841690966e-05, "loss": 0.8381, "step": 1231 }, { "epoch": 0.18, "learning_rate": 1.878734498318651e-05, "loss": 0.923, "step": 1232 }, { "epoch": 0.18, "learning_rate": 1.8785038073850903e-05, "loss": 0.742, "step": 1233 }, { "epoch": 0.18, "learning_rate": 1.8782729114222544e-05, "loss": 0.7743, "step": 1234 }, { "epoch": 0.18, "learning_rate": 1.8780418104840304e-05, "loss": 0.9036, "step": 1235 }, { "epoch": 0.18, "learning_rate": 1.8778105046243536e-05, "loss": 0.89, "step": 1236 }, { "epoch": 0.18, "learning_rate": 1.8775789938972083e-05, "loss": 0.8086, "step": 1237 }, { "epoch": 0.18, "learning_rate": 1.8773472783566245e-05, "loss": 0.8663, "step": 1238 }, { "epoch": 0.18, "learning_rate": 1.8771153580566813e-05, "loss": 0.8072, "step": 1239 }, { "epoch": 0.18, "learning_rate": 1.876883233051505e-05, "loss": 0.8139, "step": 1240 }, { "epoch": 0.19, "learning_rate": 1.8766509033952708e-05, "loss": 0.9128, "step": 1241 }, { "epoch": 0.19, "learning_rate": 1.8764183691422003e-05, "loss": 0.8017, "step": 1242 }, { "epoch": 0.19, "learning_rate": 1.8761856303465634e-05, "loss": 0.7943, "step": 1243 }, { "epoch": 0.19, "learning_rate": 1.8759526870626782e-05, "loss": 0.853, "step": 1244 }, { "epoch": 0.19, "learning_rate": 1.8757195393449095e-05, "loss": 0.757, "step": 1245 }, { "epoch": 0.19, "learning_rate": 1.8754861872476708e-05, "loss": 0.7563, "step": 1246 }, { "epoch": 0.19, "learning_rate": 1.8752526308254227e-05, "loss": 0.7708, "step": 1247 }, { "epoch": 0.19, "learning_rate": 1.875018870132674e-05, "loss": 0.7711, "step": 1248 }, { "epoch": 0.19, "learning_rate": 1.8747849052239803e-05, "loss": 0.8096, "step": 1249 }, { "epoch": 0.19, "learning_rate": 1.8745507361539457e-05, "loss": 0.3047, "step": 1250 }, { "epoch": 0.19, "learning_rate": 1.874316362977222e-05, "loss": 0.8334, "step": 1251 }, { "epoch": 0.19, "learning_rate": 1.8740817857485075e-05, "loss": 0.7956, "step": 1252 }, { "epoch": 0.19, "learning_rate": 1.8738470045225496e-05, "loss": 0.803, "step": 1253 }, { "epoch": 0.19, "learning_rate": 1.8736120193541424e-05, "loss": 0.7326, "step": 1254 }, { "epoch": 0.19, "learning_rate": 1.873376830298128e-05, "loss": 0.8335, "step": 1255 }, { "epoch": 0.19, "learning_rate": 1.8731414374093954e-05, "loss": 0.8096, "step": 1256 }, { "epoch": 0.19, "learning_rate": 1.8729058407428823e-05, "loss": 0.8194, "step": 1257 }, { "epoch": 0.19, "learning_rate": 1.872670040353573e-05, "loss": 0.9178, "step": 1258 }, { "epoch": 0.19, "learning_rate": 1.8724340362964994e-05, "loss": 0.8893, "step": 1259 }, { "epoch": 0.19, "learning_rate": 1.872197828626742e-05, "loss": 0.889, "step": 1260 }, { "epoch": 0.19, "learning_rate": 1.871961417399427e-05, "loss": 0.7819, "step": 1261 }, { "epoch": 0.19, "learning_rate": 1.8717248026697302e-05, "loss": 0.8336, "step": 1262 }, { "epoch": 0.19, "learning_rate": 1.8714879844928733e-05, "loss": 0.8474, "step": 1263 }, { "epoch": 0.19, "learning_rate": 1.8712509629241263e-05, "loss": 0.8981, "step": 1264 }, { "epoch": 0.19, "learning_rate": 1.871013738018806e-05, "loss": 0.8351, "step": 1265 }, { "epoch": 0.19, "learning_rate": 1.8707763098322767e-05, "loss": 0.8603, "step": 1266 }, { "epoch": 0.19, "learning_rate": 1.870538678419952e-05, "loss": 0.893, "step": 1267 }, { "epoch": 0.19, "learning_rate": 1.8703008438372895e-05, "loss": 0.7856, "step": 1268 }, { "epoch": 0.19, "learning_rate": 1.8700628061397974e-05, "loss": 0.8247, "step": 1269 }, { "epoch": 0.19, "learning_rate": 1.86982456538303e-05, "loss": 0.7992, "step": 1270 }, { "epoch": 0.19, "learning_rate": 1.869586121622588e-05, "loss": 0.7757, "step": 1271 }, { "epoch": 0.19, "learning_rate": 1.869347474914122e-05, "loss": 0.8262, "step": 1272 }, { "epoch": 0.19, "learning_rate": 1.8691086253133272e-05, "loss": 0.8986, "step": 1273 }, { "epoch": 0.19, "learning_rate": 1.868869572875948e-05, "loss": 0.7904, "step": 1274 }, { "epoch": 0.19, "learning_rate": 1.868630317657776e-05, "loss": 0.8365, "step": 1275 }, { "epoch": 0.19, "learning_rate": 1.8683908597146482e-05, "loss": 0.8062, "step": 1276 }, { "epoch": 0.19, "learning_rate": 1.868151199102452e-05, "loss": 0.3062, "step": 1277 }, { "epoch": 0.19, "learning_rate": 1.86791133587712e-05, "loss": 0.7878, "step": 1278 }, { "epoch": 0.19, "learning_rate": 1.8676712700946323e-05, "loss": 0.7405, "step": 1279 }, { "epoch": 0.19, "learning_rate": 1.8674310018110163e-05, "loss": 0.7952, "step": 1280 }, { "epoch": 0.19, "learning_rate": 1.867190531082348e-05, "loss": 0.8222, "step": 1281 }, { "epoch": 0.19, "learning_rate": 1.8669498579647484e-05, "loss": 0.7656, "step": 1282 }, { "epoch": 0.19, "learning_rate": 1.8667089825143877e-05, "loss": 0.7932, "step": 1283 }, { "epoch": 0.19, "learning_rate": 1.866467904787482e-05, "loss": 0.3428, "step": 1284 }, { "epoch": 0.19, "learning_rate": 1.866226624840296e-05, "loss": 0.7756, "step": 1285 }, { "epoch": 0.19, "learning_rate": 1.8659851427291395e-05, "loss": 0.9586, "step": 1286 }, { "epoch": 0.19, "learning_rate": 1.8657434585103714e-05, "loss": 0.8164, "step": 1287 }, { "epoch": 0.19, "learning_rate": 1.865501572240397e-05, "loss": 0.8475, "step": 1288 }, { "epoch": 0.19, "learning_rate": 1.8652594839756685e-05, "loss": 0.9021, "step": 1289 }, { "epoch": 0.19, "learning_rate": 1.8650171937726863e-05, "loss": 0.7628, "step": 1290 }, { "epoch": 0.19, "learning_rate": 1.864774701687997e-05, "loss": 0.8643, "step": 1291 }, { "epoch": 0.19, "learning_rate": 1.8645320077781942e-05, "loss": 0.8392, "step": 1292 }, { "epoch": 0.19, "learning_rate": 1.8642891120999188e-05, "loss": 0.8394, "step": 1293 }, { "epoch": 0.19, "learning_rate": 1.8640460147098594e-05, "loss": 0.8568, "step": 1294 }, { "epoch": 0.19, "learning_rate": 1.863802715664751e-05, "loss": 0.8819, "step": 1295 }, { "epoch": 0.19, "learning_rate": 1.8635592150213754e-05, "loss": 0.8581, "step": 1296 }, { "epoch": 0.19, "learning_rate": 1.8633155128365627e-05, "loss": 0.7694, "step": 1297 }, { "epoch": 0.19, "learning_rate": 1.8630716091671887e-05, "loss": 0.7748, "step": 1298 }, { "epoch": 0.19, "learning_rate": 1.862827504070177e-05, "loss": 0.8492, "step": 1299 }, { "epoch": 0.19, "learning_rate": 1.862583197602498e-05, "loss": 0.9228, "step": 1300 }, { "epoch": 0.19, "learning_rate": 1.862338689821169e-05, "loss": 0.7924, "step": 1301 }, { "epoch": 0.19, "learning_rate": 1.862093980783254e-05, "loss": 0.764, "step": 1302 }, { "epoch": 0.19, "learning_rate": 1.8618490705458648e-05, "loss": 0.8314, "step": 1303 }, { "epoch": 0.19, "learning_rate": 1.8616039591661597e-05, "loss": 0.904, "step": 1304 }, { "epoch": 0.19, "learning_rate": 1.8613586467013437e-05, "loss": 0.8388, "step": 1305 }, { "epoch": 0.19, "learning_rate": 1.861113133208669e-05, "loss": 0.7661, "step": 1306 }, { "epoch": 0.19, "learning_rate": 1.8608674187454347e-05, "loss": 0.8285, "step": 1307 }, { "epoch": 0.2, "learning_rate": 1.8606215033689867e-05, "loss": 0.8325, "step": 1308 }, { "epoch": 0.2, "learning_rate": 1.860375387136718e-05, "loss": 0.8464, "step": 1309 }, { "epoch": 0.2, "learning_rate": 1.8601290701060683e-05, "loss": 0.8305, "step": 1310 }, { "epoch": 0.2, "learning_rate": 1.859882552334524e-05, "loss": 0.8849, "step": 1311 }, { "epoch": 0.2, "learning_rate": 1.8596358338796186e-05, "loss": 0.7474, "step": 1312 }, { "epoch": 0.2, "learning_rate": 1.859388914798933e-05, "loss": 0.7927, "step": 1313 }, { "epoch": 0.2, "learning_rate": 1.8591417951500935e-05, "loss": 0.8223, "step": 1314 }, { "epoch": 0.2, "learning_rate": 1.8588944749907746e-05, "loss": 0.8914, "step": 1315 }, { "epoch": 0.2, "learning_rate": 1.858646954378697e-05, "loss": 0.845, "step": 1316 }, { "epoch": 0.2, "learning_rate": 1.8583992333716275e-05, "loss": 0.7666, "step": 1317 }, { "epoch": 0.2, "learning_rate": 1.858151312027381e-05, "loss": 0.7929, "step": 1318 }, { "epoch": 0.2, "learning_rate": 1.8579031904038184e-05, "loss": 0.8072, "step": 1319 }, { "epoch": 0.2, "learning_rate": 1.8576548685588476e-05, "loss": 0.8737, "step": 1320 }, { "epoch": 0.2, "learning_rate": 1.857406346550423e-05, "loss": 0.7092, "step": 1321 }, { "epoch": 0.2, "learning_rate": 1.8571576244365456e-05, "loss": 0.792, "step": 1322 }, { "epoch": 0.2, "learning_rate": 1.8569087022752636e-05, "loss": 0.8154, "step": 1323 }, { "epoch": 0.2, "learning_rate": 1.8566595801246712e-05, "loss": 0.8512, "step": 1324 }, { "epoch": 0.2, "learning_rate": 1.8564102580429103e-05, "loss": 0.8277, "step": 1325 }, { "epoch": 0.2, "learning_rate": 1.8561607360881685e-05, "loss": 0.8413, "step": 1326 }, { "epoch": 0.2, "learning_rate": 1.8559110143186804e-05, "loss": 0.7693, "step": 1327 }, { "epoch": 0.2, "learning_rate": 1.8556610927927272e-05, "loss": 0.8466, "step": 1328 }, { "epoch": 0.2, "learning_rate": 1.8554109715686367e-05, "loss": 0.7552, "step": 1329 }, { "epoch": 0.2, "learning_rate": 1.8551606507047834e-05, "loss": 0.8276, "step": 1330 }, { "epoch": 0.2, "learning_rate": 1.8549101302595884e-05, "loss": 0.7637, "step": 1331 }, { "epoch": 0.2, "learning_rate": 1.8546594102915194e-05, "loss": 0.8791, "step": 1332 }, { "epoch": 0.2, "learning_rate": 1.85440849085909e-05, "loss": 0.7849, "step": 1333 }, { "epoch": 0.2, "learning_rate": 1.854157372020862e-05, "loss": 0.9083, "step": 1334 }, { "epoch": 0.2, "learning_rate": 1.8539060538354416e-05, "loss": 0.7926, "step": 1335 }, { "epoch": 0.2, "learning_rate": 1.853654536361483e-05, "loss": 0.8872, "step": 1336 }, { "epoch": 0.2, "learning_rate": 1.853402819657687e-05, "loss": 0.769, "step": 1337 }, { "epoch": 0.2, "learning_rate": 1.8531509037828e-05, "loss": 0.8648, "step": 1338 }, { "epoch": 0.2, "learning_rate": 1.8528987887956147e-05, "loss": 0.9216, "step": 1339 }, { "epoch": 0.2, "learning_rate": 1.8526464747549718e-05, "loss": 0.728, "step": 1340 }, { "epoch": 0.2, "learning_rate": 1.852393961719757e-05, "loss": 0.7664, "step": 1341 }, { "epoch": 0.2, "learning_rate": 1.852141249748903e-05, "loss": 0.8596, "step": 1342 }, { "epoch": 0.2, "learning_rate": 1.851888338901389e-05, "loss": 0.8966, "step": 1343 }, { "epoch": 0.2, "learning_rate": 1.851635229236241e-05, "loss": 0.8313, "step": 1344 }, { "epoch": 0.2, "learning_rate": 1.85138192081253e-05, "loss": 0.8142, "step": 1345 }, { "epoch": 0.2, "learning_rate": 1.8511284136893744e-05, "loss": 0.8624, "step": 1346 }, { "epoch": 0.2, "learning_rate": 1.8508747079259393e-05, "loss": 0.858, "step": 1347 }, { "epoch": 0.2, "learning_rate": 1.850620803581436e-05, "loss": 0.8342, "step": 1348 }, { "epoch": 0.2, "learning_rate": 1.850366700715121e-05, "loss": 0.706, "step": 1349 }, { "epoch": 0.2, "learning_rate": 1.8501123993862986e-05, "loss": 0.785, "step": 1350 }, { "epoch": 0.2, "learning_rate": 1.849857899654319e-05, "loss": 0.906, "step": 1351 }, { "epoch": 0.2, "learning_rate": 1.8496032015785775e-05, "loss": 0.7914, "step": 1352 }, { "epoch": 0.2, "learning_rate": 1.8493483052185177e-05, "loss": 0.8005, "step": 1353 }, { "epoch": 0.2, "learning_rate": 1.849093210633628e-05, "loss": 0.8248, "step": 1354 }, { "epoch": 0.2, "learning_rate": 1.8488379178834442e-05, "loss": 0.8297, "step": 1355 }, { "epoch": 0.2, "learning_rate": 1.8485824270275468e-05, "loss": 0.8824, "step": 1356 }, { "epoch": 0.2, "learning_rate": 1.8483267381255638e-05, "loss": 0.8139, "step": 1357 }, { "epoch": 0.2, "learning_rate": 1.8480708512371693e-05, "loss": 0.8696, "step": 1358 }, { "epoch": 0.2, "learning_rate": 1.8478147664220832e-05, "loss": 0.7997, "step": 1359 }, { "epoch": 0.2, "learning_rate": 1.8475584837400718e-05, "loss": 0.8322, "step": 1360 }, { "epoch": 0.2, "learning_rate": 1.8473020032509472e-05, "loss": 0.8041, "step": 1361 }, { "epoch": 0.2, "learning_rate": 1.8470453250145685e-05, "loss": 0.8394, "step": 1362 }, { "epoch": 0.2, "learning_rate": 1.8467884490908403e-05, "loss": 0.7849, "step": 1363 }, { "epoch": 0.2, "learning_rate": 1.8465313755397135e-05, "loss": 0.7214, "step": 1364 }, { "epoch": 0.2, "learning_rate": 1.8462741044211848e-05, "loss": 0.907, "step": 1365 }, { "epoch": 0.2, "learning_rate": 1.8460166357952976e-05, "loss": 0.7939, "step": 1366 }, { "epoch": 0.2, "learning_rate": 1.845758969722141e-05, "loss": 0.7877, "step": 1367 }, { "epoch": 0.2, "learning_rate": 1.8455011062618505e-05, "loss": 0.8252, "step": 1368 }, { "epoch": 0.2, "learning_rate": 1.8452430454746072e-05, "loss": 0.7862, "step": 1369 }, { "epoch": 0.2, "learning_rate": 1.8449847874206387e-05, "loss": 0.762, "step": 1370 }, { "epoch": 0.2, "learning_rate": 1.8447263321602185e-05, "loss": 0.8205, "step": 1371 }, { "epoch": 0.2, "learning_rate": 1.844467679753666e-05, "loss": 0.8035, "step": 1372 }, { "epoch": 0.2, "learning_rate": 1.8442088302613465e-05, "loss": 0.871, "step": 1373 }, { "epoch": 0.2, "learning_rate": 1.8439497837436718e-05, "loss": 0.6683, "step": 1374 }, { "epoch": 0.21, "learning_rate": 1.8436905402610992e-05, "loss": 0.6726, "step": 1375 }, { "epoch": 0.21, "learning_rate": 1.8434310998741323e-05, "loss": 0.7414, "step": 1376 }, { "epoch": 0.21, "learning_rate": 1.8431714626433204e-05, "loss": 0.81, "step": 1377 }, { "epoch": 0.21, "learning_rate": 1.842911628629259e-05, "loss": 0.8208, "step": 1378 }, { "epoch": 0.21, "learning_rate": 1.842651597892589e-05, "loss": 0.8501, "step": 1379 }, { "epoch": 0.21, "learning_rate": 1.8423913704939976e-05, "loss": 0.8294, "step": 1380 }, { "epoch": 0.21, "learning_rate": 1.842130946494218e-05, "loss": 0.9036, "step": 1381 }, { "epoch": 0.21, "learning_rate": 1.8418703259540292e-05, "loss": 0.8597, "step": 1382 }, { "epoch": 0.21, "learning_rate": 1.8416095089342563e-05, "loss": 0.8446, "step": 1383 }, { "epoch": 0.21, "learning_rate": 1.8413484954957693e-05, "loss": 0.8036, "step": 1384 }, { "epoch": 0.21, "learning_rate": 1.8410872856994855e-05, "loss": 0.8467, "step": 1385 }, { "epoch": 0.21, "learning_rate": 1.8408258796063668e-05, "loss": 0.8313, "step": 1386 }, { "epoch": 0.21, "learning_rate": 1.8405642772774213e-05, "loss": 0.853, "step": 1387 }, { "epoch": 0.21, "learning_rate": 1.8403024787737028e-05, "loss": 0.8016, "step": 1388 }, { "epoch": 0.21, "learning_rate": 1.840040484156312e-05, "loss": 0.8004, "step": 1389 }, { "epoch": 0.21, "learning_rate": 1.839778293486393e-05, "loss": 0.8308, "step": 1390 }, { "epoch": 0.21, "learning_rate": 1.839515906825138e-05, "loss": 0.8772, "step": 1391 }, { "epoch": 0.21, "learning_rate": 1.839253324233784e-05, "loss": 0.8624, "step": 1392 }, { "epoch": 0.21, "learning_rate": 1.8389905457736136e-05, "loss": 0.8276, "step": 1393 }, { "epoch": 0.21, "learning_rate": 1.838727571505955e-05, "loss": 0.8582, "step": 1394 }, { "epoch": 0.21, "learning_rate": 1.8384644014921828e-05, "loss": 0.838, "step": 1395 }, { "epoch": 0.21, "learning_rate": 1.8382010357937165e-05, "loss": 0.7385, "step": 1396 }, { "epoch": 0.21, "learning_rate": 1.8379374744720218e-05, "loss": 0.9277, "step": 1397 }, { "epoch": 0.21, "learning_rate": 1.8376737175886098e-05, "loss": 0.8968, "step": 1398 }, { "epoch": 0.21, "learning_rate": 1.837409765205037e-05, "loss": 0.844, "step": 1399 }, { "epoch": 0.21, "learning_rate": 1.8371456173829062e-05, "loss": 0.746, "step": 1400 }, { "epoch": 0.21, "learning_rate": 1.836881274183865e-05, "loss": 0.7687, "step": 1401 }, { "epoch": 0.21, "learning_rate": 1.8366167356696077e-05, "loss": 0.7779, "step": 1402 }, { "epoch": 0.21, "learning_rate": 1.836352001901873e-05, "loss": 0.8088, "step": 1403 }, { "epoch": 0.21, "learning_rate": 1.836087072942446e-05, "loss": 0.8754, "step": 1404 }, { "epoch": 0.21, "learning_rate": 1.835821948853157e-05, "loss": 0.7778, "step": 1405 }, { "epoch": 0.21, "learning_rate": 1.835556629695882e-05, "loss": 0.8229, "step": 1406 }, { "epoch": 0.21, "learning_rate": 1.8352911155325414e-05, "loss": 0.7006, "step": 1407 }, { "epoch": 0.21, "learning_rate": 1.8350254064251034e-05, "loss": 0.827, "step": 1408 }, { "epoch": 0.21, "learning_rate": 1.83475950243558e-05, "loss": 0.7824, "step": 1409 }, { "epoch": 0.21, "learning_rate": 1.834493403626029e-05, "loss": 0.8044, "step": 1410 }, { "epoch": 0.21, "learning_rate": 1.8342271100585535e-05, "loss": 0.8099, "step": 1411 }, { "epoch": 0.21, "learning_rate": 1.833960621795303e-05, "loss": 0.8352, "step": 1412 }, { "epoch": 0.21, "learning_rate": 1.8336939388984713e-05, "loss": 0.9358, "step": 1413 }, { "epoch": 0.21, "learning_rate": 1.8334270614302984e-05, "loss": 0.8115, "step": 1414 }, { "epoch": 0.21, "learning_rate": 1.833159989453069e-05, "loss": 0.8716, "step": 1415 }, { "epoch": 0.21, "learning_rate": 1.8328927230291135e-05, "loss": 0.7123, "step": 1416 }, { "epoch": 0.21, "learning_rate": 1.8326252622208082e-05, "loss": 0.3341, "step": 1417 }, { "epoch": 0.21, "learning_rate": 1.832357607090574e-05, "loss": 0.8211, "step": 1418 }, { "epoch": 0.21, "learning_rate": 1.832089757700878e-05, "loss": 0.8046, "step": 1419 }, { "epoch": 0.21, "learning_rate": 1.8318217141142316e-05, "loss": 0.9029, "step": 1420 }, { "epoch": 0.21, "learning_rate": 1.8315534763931925e-05, "loss": 0.8668, "step": 1421 }, { "epoch": 0.21, "learning_rate": 1.831285044600363e-05, "loss": 0.8426, "step": 1422 }, { "epoch": 0.21, "learning_rate": 1.8310164187983907e-05, "loss": 0.766, "step": 1423 }, { "epoch": 0.21, "learning_rate": 1.830747599049969e-05, "loss": 0.8136, "step": 1424 }, { "epoch": 0.21, "learning_rate": 1.8304785854178365e-05, "loss": 0.6975, "step": 1425 }, { "epoch": 0.21, "learning_rate": 1.8302093779647763e-05, "loss": 0.758, "step": 1426 }, { "epoch": 0.21, "learning_rate": 1.8299399767536178e-05, "loss": 0.8603, "step": 1427 }, { "epoch": 0.21, "learning_rate": 1.8296703818472347e-05, "loss": 0.8314, "step": 1428 }, { "epoch": 0.21, "learning_rate": 1.8294005933085466e-05, "loss": 0.8241, "step": 1429 }, { "epoch": 0.21, "learning_rate": 1.829130611200518e-05, "loss": 0.6981, "step": 1430 }, { "epoch": 0.21, "learning_rate": 1.8288604355861584e-05, "loss": 0.8578, "step": 1431 }, { "epoch": 0.21, "learning_rate": 1.828590066528523e-05, "loss": 0.8858, "step": 1432 }, { "epoch": 0.21, "learning_rate": 1.828319504090711e-05, "loss": 0.9264, "step": 1433 }, { "epoch": 0.21, "learning_rate": 1.8280487483358684e-05, "loss": 0.8608, "step": 1434 }, { "epoch": 0.21, "learning_rate": 1.8277777993271847e-05, "loss": 0.7328, "step": 1435 }, { "epoch": 0.21, "learning_rate": 1.827506657127896e-05, "loss": 0.8576, "step": 1436 }, { "epoch": 0.21, "learning_rate": 1.8272353218012816e-05, "loss": 0.6673, "step": 1437 }, { "epoch": 0.21, "learning_rate": 1.826963793410668e-05, "loss": 0.8158, "step": 1438 }, { "epoch": 0.21, "learning_rate": 1.8266920720194257e-05, "loss": 0.8097, "step": 1439 }, { "epoch": 0.21, "learning_rate": 1.82642015769097e-05, "loss": 0.7996, "step": 1440 }, { "epoch": 0.21, "learning_rate": 1.826148050488761e-05, "loss": 0.3833, "step": 1441 }, { "epoch": 0.22, "learning_rate": 1.8258757504763052e-05, "loss": 0.7679, "step": 1442 }, { "epoch": 0.22, "learning_rate": 1.8256032577171527e-05, "loss": 0.831, "step": 1443 }, { "epoch": 0.22, "learning_rate": 1.8253305722748996e-05, "loss": 0.8525, "step": 1444 }, { "epoch": 0.22, "learning_rate": 1.825057694213186e-05, "loss": 0.7421, "step": 1445 }, { "epoch": 0.22, "learning_rate": 1.8247846235956976e-05, "loss": 0.8398, "step": 1446 }, { "epoch": 0.22, "learning_rate": 1.8245113604861652e-05, "loss": 0.8675, "step": 1447 }, { "epoch": 0.22, "learning_rate": 1.8242379049483637e-05, "loss": 0.8369, "step": 1448 }, { "epoch": 0.22, "learning_rate": 1.8239642570461137e-05, "loss": 0.7644, "step": 1449 }, { "epoch": 0.22, "learning_rate": 1.8236904168432802e-05, "loss": 0.8217, "step": 1450 }, { "epoch": 0.22, "learning_rate": 1.8234163844037738e-05, "loss": 0.864, "step": 1451 }, { "epoch": 0.22, "learning_rate": 1.823142159791549e-05, "loss": 0.7849, "step": 1452 }, { "epoch": 0.22, "learning_rate": 1.8228677430706056e-05, "loss": 0.791, "step": 1453 }, { "epoch": 0.22, "learning_rate": 1.8225931343049884e-05, "loss": 0.8671, "step": 1454 }, { "epoch": 0.22, "learning_rate": 1.822318333558787e-05, "loss": 0.8703, "step": 1455 }, { "epoch": 0.22, "learning_rate": 1.8220433408961354e-05, "loss": 0.8048, "step": 1456 }, { "epoch": 0.22, "learning_rate": 1.8217681563812124e-05, "loss": 0.8313, "step": 1457 }, { "epoch": 0.22, "learning_rate": 1.8214927800782425e-05, "loss": 0.7371, "step": 1458 }, { "epoch": 0.22, "learning_rate": 1.8212172120514938e-05, "loss": 0.8025, "step": 1459 }, { "epoch": 0.22, "learning_rate": 1.82094145236528e-05, "loss": 0.8041, "step": 1460 }, { "epoch": 0.22, "learning_rate": 1.820665501083959e-05, "loss": 0.7995, "step": 1461 }, { "epoch": 0.22, "learning_rate": 1.8203893582719333e-05, "loss": 0.7934, "step": 1462 }, { "epoch": 0.22, "learning_rate": 1.820113023993651e-05, "loss": 0.8183, "step": 1463 }, { "epoch": 0.22, "learning_rate": 1.8198364983136034e-05, "loss": 0.7966, "step": 1464 }, { "epoch": 0.22, "learning_rate": 1.819559781296328e-05, "loss": 0.828, "step": 1465 }, { "epoch": 0.22, "learning_rate": 1.8192828730064067e-05, "loss": 0.8164, "step": 1466 }, { "epoch": 0.22, "learning_rate": 1.8190057735084644e-05, "loss": 0.8006, "step": 1467 }, { "epoch": 0.22, "learning_rate": 1.818728482867173e-05, "loss": 0.4085, "step": 1468 }, { "epoch": 0.22, "learning_rate": 1.8184510011472474e-05, "loss": 0.6685, "step": 1469 }, { "epoch": 0.22, "learning_rate": 1.8181733284134474e-05, "loss": 0.8733, "step": 1470 }, { "epoch": 0.22, "learning_rate": 1.8178954647305777e-05, "loss": 0.8137, "step": 1471 }, { "epoch": 0.22, "learning_rate": 1.8176174101634876e-05, "loss": 0.8065, "step": 1472 }, { "epoch": 0.22, "learning_rate": 1.8173391647770703e-05, "loss": 0.8061, "step": 1473 }, { "epoch": 0.22, "learning_rate": 1.8170607286362643e-05, "loss": 0.8588, "step": 1474 }, { "epoch": 0.22, "learning_rate": 1.816782101806053e-05, "loss": 0.8828, "step": 1475 }, { "epoch": 0.22, "learning_rate": 1.8165032843514624e-05, "loss": 0.7965, "step": 1476 }, { "epoch": 0.22, "learning_rate": 1.8162242763375645e-05, "loss": 0.7488, "step": 1477 }, { "epoch": 0.22, "learning_rate": 1.815945077829476e-05, "loss": 0.7965, "step": 1478 }, { "epoch": 0.22, "learning_rate": 1.8156656888923573e-05, "loss": 0.8067, "step": 1479 }, { "epoch": 0.22, "learning_rate": 1.815386109591413e-05, "loss": 0.7854, "step": 1480 }, { "epoch": 0.22, "learning_rate": 1.8151063399918937e-05, "loss": 0.7537, "step": 1481 }, { "epoch": 0.22, "learning_rate": 1.814826380159092e-05, "loss": 0.7882, "step": 1482 }, { "epoch": 0.22, "learning_rate": 1.8145462301583472e-05, "loss": 0.7828, "step": 1483 }, { "epoch": 0.22, "learning_rate": 1.8142658900550417e-05, "loss": 0.7666, "step": 1484 }, { "epoch": 0.22, "learning_rate": 1.8139853599146025e-05, "loss": 0.8789, "step": 1485 }, { "epoch": 0.22, "learning_rate": 1.813704639802501e-05, "loss": 0.3962, "step": 1486 }, { "epoch": 0.22, "learning_rate": 1.813423729784253e-05, "loss": 0.7802, "step": 1487 }, { "epoch": 0.22, "learning_rate": 1.8131426299254187e-05, "loss": 0.8162, "step": 1488 }, { "epoch": 0.22, "learning_rate": 1.812861340291602e-05, "loss": 0.7579, "step": 1489 }, { "epoch": 0.22, "learning_rate": 1.8125798609484525e-05, "loss": 0.8072, "step": 1490 }, { "epoch": 0.22, "learning_rate": 1.812298191961662e-05, "loss": 0.7303, "step": 1491 }, { "epoch": 0.22, "learning_rate": 1.8120163333969687e-05, "loss": 0.9141, "step": 1492 }, { "epoch": 0.22, "learning_rate": 1.8117342853201538e-05, "loss": 0.7631, "step": 1493 }, { "epoch": 0.22, "learning_rate": 1.8114520477970425e-05, "loss": 0.8126, "step": 1494 }, { "epoch": 0.22, "learning_rate": 1.8111696208935053e-05, "loss": 0.7973, "step": 1495 }, { "epoch": 0.22, "learning_rate": 1.810887004675456e-05, "loss": 0.7434, "step": 1496 }, { "epoch": 0.22, "learning_rate": 1.8106041992088527e-05, "loss": 0.81, "step": 1497 }, { "epoch": 0.22, "learning_rate": 1.810321204559698e-05, "loss": 0.8706, "step": 1498 }, { "epoch": 0.22, "learning_rate": 1.8100380207940387e-05, "loss": 0.7929, "step": 1499 }, { "epoch": 0.22, "learning_rate": 1.8097546479779655e-05, "loss": 0.836, "step": 1500 }, { "epoch": 0.22, "learning_rate": 1.809471086177613e-05, "loss": 0.7825, "step": 1501 }, { "epoch": 0.22, "learning_rate": 1.8091873354591602e-05, "loss": 0.8858, "step": 1502 }, { "epoch": 0.22, "learning_rate": 1.8089033958888306e-05, "loss": 0.8752, "step": 1503 }, { "epoch": 0.22, "learning_rate": 1.8086192675328908e-05, "loss": 0.7778, "step": 1504 }, { "epoch": 0.22, "learning_rate": 1.808334950457652e-05, "loss": 0.7517, "step": 1505 }, { "epoch": 0.22, "learning_rate": 1.80805044472947e-05, "loss": 0.8323, "step": 1506 }, { "epoch": 0.22, "learning_rate": 1.807765750414743e-05, "loss": 0.8018, "step": 1507 }, { "epoch": 0.22, "learning_rate": 1.8074808675799156e-05, "loss": 0.7187, "step": 1508 }, { "epoch": 0.23, "learning_rate": 1.8071957962914744e-05, "loss": 0.76, "step": 1509 }, { "epoch": 0.23, "learning_rate": 1.8069105366159502e-05, "loss": 0.8412, "step": 1510 }, { "epoch": 0.23, "learning_rate": 1.8066250886199192e-05, "loss": 0.8298, "step": 1511 }, { "epoch": 0.23, "learning_rate": 1.8063394523699998e-05, "loss": 0.678, "step": 1512 }, { "epoch": 0.23, "learning_rate": 1.8060536279328555e-05, "loss": 0.7814, "step": 1513 }, { "epoch": 0.23, "learning_rate": 1.805767615375193e-05, "loss": 0.8875, "step": 1514 }, { "epoch": 0.23, "learning_rate": 1.8054814147637633e-05, "loss": 0.8167, "step": 1515 }, { "epoch": 0.23, "learning_rate": 1.805195026165362e-05, "loss": 0.8521, "step": 1516 }, { "epoch": 0.23, "learning_rate": 1.8049084496468263e-05, "loss": 0.8331, "step": 1517 }, { "epoch": 0.23, "learning_rate": 1.80462168527504e-05, "loss": 0.7498, "step": 1518 }, { "epoch": 0.23, "learning_rate": 1.8043347331169292e-05, "loss": 0.7706, "step": 1519 }, { "epoch": 0.23, "learning_rate": 1.804047593239464e-05, "loss": 0.7694, "step": 1520 }, { "epoch": 0.23, "learning_rate": 1.803760265709658e-05, "loss": 0.3014, "step": 1521 }, { "epoch": 0.23, "learning_rate": 1.80347275059457e-05, "loss": 0.8053, "step": 1522 }, { "epoch": 0.23, "learning_rate": 1.803185047961301e-05, "loss": 0.8176, "step": 1523 }, { "epoch": 0.23, "learning_rate": 1.802897157876996e-05, "loss": 0.739, "step": 1524 }, { "epoch": 0.23, "learning_rate": 1.802609080408845e-05, "loss": 0.8052, "step": 1525 }, { "epoch": 0.23, "learning_rate": 1.80232081562408e-05, "loss": 0.6891, "step": 1526 }, { "epoch": 0.23, "learning_rate": 1.8020323635899784e-05, "loss": 0.7489, "step": 1527 }, { "epoch": 0.23, "learning_rate": 1.80174372437386e-05, "loss": 0.3243, "step": 1528 }, { "epoch": 0.23, "learning_rate": 1.8014548980430885e-05, "loss": 0.8301, "step": 1529 }, { "epoch": 0.23, "learning_rate": 1.8011658846650716e-05, "loss": 0.8573, "step": 1530 }, { "epoch": 0.23, "learning_rate": 1.8008766843072614e-05, "loss": 0.8118, "step": 1531 }, { "epoch": 0.23, "learning_rate": 1.800587297037152e-05, "loss": 0.8063, "step": 1532 }, { "epoch": 0.23, "learning_rate": 1.8002977229222817e-05, "loss": 0.3501, "step": 1533 }, { "epoch": 0.23, "learning_rate": 1.800007962030234e-05, "loss": 0.7698, "step": 1534 }, { "epoch": 0.23, "learning_rate": 1.799718014428633e-05, "loss": 0.7643, "step": 1535 }, { "epoch": 0.23, "learning_rate": 1.7994278801851486e-05, "loss": 0.7947, "step": 1536 }, { "epoch": 0.23, "learning_rate": 1.7991375593674944e-05, "loss": 0.7927, "step": 1537 }, { "epoch": 0.23, "learning_rate": 1.7988470520434258e-05, "loss": 0.8656, "step": 1538 }, { "epoch": 0.23, "learning_rate": 1.798556358280743e-05, "loss": 0.766, "step": 1539 }, { "epoch": 0.23, "learning_rate": 1.7982654781472903e-05, "loss": 0.744, "step": 1540 }, { "epoch": 0.23, "learning_rate": 1.7979744117109536e-05, "loss": 0.7431, "step": 1541 }, { "epoch": 0.23, "learning_rate": 1.7976831590396637e-05, "loss": 0.8686, "step": 1542 }, { "epoch": 0.23, "learning_rate": 1.797391720201394e-05, "loss": 0.873, "step": 1543 }, { "epoch": 0.23, "learning_rate": 1.797100095264163e-05, "loss": 0.7955, "step": 1544 }, { "epoch": 0.23, "learning_rate": 1.7968082842960303e-05, "loss": 0.7955, "step": 1545 }, { "epoch": 0.23, "learning_rate": 1.7965162873651007e-05, "loss": 0.8643, "step": 1546 }, { "epoch": 0.23, "learning_rate": 1.7962241045395217e-05, "loss": 0.8735, "step": 1547 }, { "epoch": 0.23, "learning_rate": 1.7959317358874845e-05, "loss": 0.8491, "step": 1548 }, { "epoch": 0.23, "learning_rate": 1.7956391814772232e-05, "loss": 0.8015, "step": 1549 }, { "epoch": 0.23, "learning_rate": 1.795346441377015e-05, "loss": 0.8542, "step": 1550 }, { "epoch": 0.23, "learning_rate": 1.795053515655182e-05, "loss": 0.8495, "step": 1551 }, { "epoch": 0.23, "learning_rate": 1.794760404380088e-05, "loss": 0.7691, "step": 1552 }, { "epoch": 0.23, "learning_rate": 1.79446710762014e-05, "loss": 0.3325, "step": 1553 }, { "epoch": 0.23, "learning_rate": 1.7941736254437903e-05, "loss": 0.7277, "step": 1554 }, { "epoch": 0.23, "learning_rate": 1.793879957919532e-05, "loss": 0.8275, "step": 1555 }, { "epoch": 0.23, "learning_rate": 1.7935861051159034e-05, "loss": 0.7499, "step": 1556 }, { "epoch": 0.23, "learning_rate": 1.7932920671014846e-05, "loss": 0.8006, "step": 1557 }, { "epoch": 0.23, "learning_rate": 1.7929978439449e-05, "loss": 0.8137, "step": 1558 }, { "epoch": 0.23, "learning_rate": 1.7927034357148162e-05, "loss": 0.7248, "step": 1559 }, { "epoch": 0.23, "learning_rate": 1.792408842479944e-05, "loss": 0.7881, "step": 1560 }, { "epoch": 0.23, "learning_rate": 1.792114064309037e-05, "loss": 0.923, "step": 1561 }, { "epoch": 0.23, "learning_rate": 1.7918191012708916e-05, "loss": 0.7498, "step": 1562 }, { "epoch": 0.23, "learning_rate": 1.7915239534343478e-05, "loss": 0.7188, "step": 1563 }, { "epoch": 0.23, "learning_rate": 1.7912286208682887e-05, "loss": 0.8233, "step": 1564 }, { "epoch": 0.23, "learning_rate": 1.79093310364164e-05, "loss": 0.9073, "step": 1565 }, { "epoch": 0.23, "learning_rate": 1.790637401823371e-05, "loss": 0.9018, "step": 1566 }, { "epoch": 0.23, "learning_rate": 1.7903415154824944e-05, "loss": 0.8326, "step": 1567 }, { "epoch": 0.23, "learning_rate": 1.7900454446880648e-05, "loss": 0.7998, "step": 1568 }, { "epoch": 0.23, "learning_rate": 1.789749189509181e-05, "loss": 0.849, "step": 1569 }, { "epoch": 0.23, "learning_rate": 1.789452750014984e-05, "loss": 0.8467, "step": 1570 }, { "epoch": 0.23, "learning_rate": 1.7891561262746595e-05, "loss": 0.802, "step": 1571 }, { "epoch": 0.23, "learning_rate": 1.788859318357433e-05, "loss": 0.8331, "step": 1572 }, { "epoch": 0.23, "learning_rate": 1.7885623263325765e-05, "loss": 0.7877, "step": 1573 }, { "epoch": 0.23, "learning_rate": 1.7882651502694024e-05, "loss": 0.7681, "step": 1574 }, { "epoch": 0.23, "learning_rate": 1.7879677902372674e-05, "loss": 0.7611, "step": 1575 }, { "epoch": 0.24, "learning_rate": 1.7876702463055707e-05, "loss": 0.8775, "step": 1576 }, { "epoch": 0.24, "learning_rate": 1.7873725185437548e-05, "loss": 0.8274, "step": 1577 }, { "epoch": 0.24, "learning_rate": 1.7870746070213042e-05, "loss": 0.7568, "step": 1578 }, { "epoch": 0.24, "learning_rate": 1.786776511807747e-05, "loss": 0.8069, "step": 1579 }, { "epoch": 0.24, "learning_rate": 1.7864782329726544e-05, "loss": 0.8486, "step": 1580 }, { "epoch": 0.24, "learning_rate": 1.7861797705856395e-05, "loss": 0.8243, "step": 1581 }, { "epoch": 0.24, "learning_rate": 1.7858811247163597e-05, "loss": 0.8819, "step": 1582 }, { "epoch": 0.24, "learning_rate": 1.7855822954345137e-05, "loss": 0.8497, "step": 1583 }, { "epoch": 0.24, "learning_rate": 1.7852832828098438e-05, "loss": 0.9083, "step": 1584 }, { "epoch": 0.24, "learning_rate": 1.7849840869121346e-05, "loss": 0.9022, "step": 1585 }, { "epoch": 0.24, "learning_rate": 1.7846847078112147e-05, "loss": 0.8226, "step": 1586 }, { "epoch": 0.24, "learning_rate": 1.7843851455769536e-05, "loss": 0.8348, "step": 1587 }, { "epoch": 0.24, "learning_rate": 1.7840854002792655e-05, "loss": 0.8018, "step": 1588 }, { "epoch": 0.24, "learning_rate": 1.7837854719881052e-05, "loss": 0.7779, "step": 1589 }, { "epoch": 0.24, "learning_rate": 1.7834853607734724e-05, "loss": 0.7816, "step": 1590 }, { "epoch": 0.24, "learning_rate": 1.783185066705408e-05, "loss": 0.801, "step": 1591 }, { "epoch": 0.24, "learning_rate": 1.7828845898539958e-05, "loss": 0.8383, "step": 1592 }, { "epoch": 0.24, "learning_rate": 1.7825839302893627e-05, "loss": 0.7428, "step": 1593 }, { "epoch": 0.24, "learning_rate": 1.7822830880816782e-05, "loss": 0.7957, "step": 1594 }, { "epoch": 0.24, "learning_rate": 1.781982063301154e-05, "loss": 0.8766, "step": 1595 }, { "epoch": 0.24, "learning_rate": 1.7816808560180446e-05, "loss": 0.8779, "step": 1596 }, { "epoch": 0.24, "learning_rate": 1.7813794663026477e-05, "loss": 0.8569, "step": 1597 }, { "epoch": 0.24, "learning_rate": 1.781077894225302e-05, "loss": 0.7618, "step": 1598 }, { "epoch": 0.24, "learning_rate": 1.7807761398563905e-05, "loss": 0.7624, "step": 1599 }, { "epoch": 0.24, "learning_rate": 1.780474203266338e-05, "loss": 0.8283, "step": 1600 }, { "epoch": 0.24, "learning_rate": 1.780172084525612e-05, "loss": 0.8937, "step": 1601 }, { "epoch": 0.24, "learning_rate": 1.7798697837047216e-05, "loss": 0.7476, "step": 1602 }, { "epoch": 0.24, "learning_rate": 1.77956730087422e-05, "loss": 0.3569, "step": 1603 }, { "epoch": 0.24, "learning_rate": 1.7792646361047013e-05, "loss": 0.8539, "step": 1604 }, { "epoch": 0.24, "learning_rate": 1.7789617894668037e-05, "loss": 0.7763, "step": 1605 }, { "epoch": 0.24, "learning_rate": 1.778658761031206e-05, "loss": 0.8776, "step": 1606 }, { "epoch": 0.24, "learning_rate": 1.7783555508686315e-05, "loss": 0.8697, "step": 1607 }, { "epoch": 0.24, "learning_rate": 1.778052159049844e-05, "loss": 0.7867, "step": 1608 }, { "epoch": 0.24, "learning_rate": 1.7777485856456503e-05, "loss": 0.7849, "step": 1609 }, { "epoch": 0.24, "learning_rate": 1.7774448307269e-05, "loss": 0.7232, "step": 1610 }, { "epoch": 0.24, "learning_rate": 1.777140894364485e-05, "loss": 0.7421, "step": 1611 }, { "epoch": 0.24, "learning_rate": 1.776836776629339e-05, "loss": 0.7858, "step": 1612 }, { "epoch": 0.24, "learning_rate": 1.7765324775924392e-05, "loss": 0.7894, "step": 1613 }, { "epoch": 0.24, "learning_rate": 1.7762279973248036e-05, "loss": 0.8459, "step": 1614 }, { "epoch": 0.24, "learning_rate": 1.7759233358974936e-05, "loss": 0.8159, "step": 1615 }, { "epoch": 0.24, "learning_rate": 1.775618493381612e-05, "loss": 0.7653, "step": 1616 }, { "epoch": 0.24, "learning_rate": 1.7753134698483044e-05, "loss": 0.8124, "step": 1617 }, { "epoch": 0.24, "learning_rate": 1.7750082653687596e-05, "loss": 0.8048, "step": 1618 }, { "epoch": 0.24, "learning_rate": 1.7747028800142062e-05, "loss": 0.8063, "step": 1619 }, { "epoch": 0.24, "learning_rate": 1.7743973138559175e-05, "loss": 0.8522, "step": 1620 }, { "epoch": 0.24, "learning_rate": 1.7740915669652074e-05, "loss": 0.851, "step": 1621 }, { "epoch": 0.24, "learning_rate": 1.773785639413433e-05, "loss": 0.8134, "step": 1622 }, { "epoch": 0.24, "learning_rate": 1.773479531271992e-05, "loss": 0.8234, "step": 1623 }, { "epoch": 0.24, "learning_rate": 1.7731732426123268e-05, "loss": 0.8014, "step": 1624 }, { "epoch": 0.24, "learning_rate": 1.7728667735059195e-05, "loss": 0.3422, "step": 1625 }, { "epoch": 0.24, "learning_rate": 1.7725601240242953e-05, "loss": 0.8211, "step": 1626 }, { "epoch": 0.24, "learning_rate": 1.7722532942390222e-05, "loss": 0.8795, "step": 1627 }, { "epoch": 0.24, "learning_rate": 1.771946284221709e-05, "loss": 0.8644, "step": 1628 }, { "epoch": 0.24, "learning_rate": 1.7716390940440066e-05, "loss": 0.8186, "step": 1629 }, { "epoch": 0.24, "learning_rate": 1.7713317237776096e-05, "loss": 0.7875, "step": 1630 }, { "epoch": 0.24, "learning_rate": 1.7710241734942527e-05, "loss": 0.7428, "step": 1631 }, { "epoch": 0.24, "learning_rate": 1.770716443265714e-05, "loss": 0.8216, "step": 1632 }, { "epoch": 0.24, "learning_rate": 1.770408533163812e-05, "loss": 0.7248, "step": 1633 }, { "epoch": 0.24, "learning_rate": 1.7701004432604093e-05, "loss": 0.7423, "step": 1634 }, { "epoch": 0.24, "learning_rate": 1.7697921736274085e-05, "loss": 0.8743, "step": 1635 }, { "epoch": 0.24, "learning_rate": 1.7694837243367558e-05, "loss": 0.8961, "step": 1636 }, { "epoch": 0.24, "learning_rate": 1.769175095460438e-05, "loss": 0.8684, "step": 1637 }, { "epoch": 0.24, "learning_rate": 1.768866287070484e-05, "loss": 0.8922, "step": 1638 }, { "epoch": 0.24, "learning_rate": 1.7685572992389658e-05, "loss": 0.7764, "step": 1639 }, { "epoch": 0.24, "learning_rate": 1.7682481320379957e-05, "loss": 0.816, "step": 1640 }, { "epoch": 0.24, "learning_rate": 1.767938785539729e-05, "loss": 0.7677, "step": 1641 }, { "epoch": 0.24, "learning_rate": 1.767629259816362e-05, "loss": 0.8368, "step": 1642 }, { "epoch": 0.25, "learning_rate": 1.7673195549401333e-05, "loss": 0.8106, "step": 1643 }, { "epoch": 0.25, "learning_rate": 1.7670096709833235e-05, "loss": 0.8188, "step": 1644 }, { "epoch": 0.25, "learning_rate": 1.766699608018255e-05, "loss": 0.8335, "step": 1645 }, { "epoch": 0.25, "learning_rate": 1.7663893661172912e-05, "loss": 0.6557, "step": 1646 }, { "epoch": 0.25, "learning_rate": 1.766078945352838e-05, "loss": 0.8074, "step": 1647 }, { "epoch": 0.25, "learning_rate": 1.765768345797343e-05, "loss": 0.8671, "step": 1648 }, { "epoch": 0.25, "learning_rate": 1.7654575675232948e-05, "loss": 0.8005, "step": 1649 }, { "epoch": 0.25, "learning_rate": 1.765146610603225e-05, "loss": 0.7892, "step": 1650 }, { "epoch": 0.25, "learning_rate": 1.7648354751097057e-05, "loss": 0.8702, "step": 1651 }, { "epoch": 0.25, "learning_rate": 1.7645241611153513e-05, "loss": 0.8092, "step": 1652 }, { "epoch": 0.25, "learning_rate": 1.7642126686928175e-05, "loss": 0.8006, "step": 1653 }, { "epoch": 0.25, "learning_rate": 1.763900997914802e-05, "loss": 0.8348, "step": 1654 }, { "epoch": 0.25, "learning_rate": 1.763589148854044e-05, "loss": 0.7864, "step": 1655 }, { "epoch": 0.25, "learning_rate": 1.763277121583324e-05, "loss": 0.8163, "step": 1656 }, { "epoch": 0.25, "learning_rate": 1.762964916175465e-05, "loss": 0.7586, "step": 1657 }, { "epoch": 0.25, "learning_rate": 1.7626525327033302e-05, "loss": 0.7368, "step": 1658 }, { "epoch": 0.25, "learning_rate": 1.7623399712398255e-05, "loss": 0.7189, "step": 1659 }, { "epoch": 0.25, "learning_rate": 1.7620272318578976e-05, "loss": 0.754, "step": 1660 }, { "epoch": 0.25, "learning_rate": 1.7617143146305357e-05, "loss": 0.736, "step": 1661 }, { "epoch": 0.25, "learning_rate": 1.7614012196307696e-05, "loss": 0.8044, "step": 1662 }, { "epoch": 0.25, "learning_rate": 1.7610879469316704e-05, "loss": 0.8009, "step": 1663 }, { "epoch": 0.25, "learning_rate": 1.7607744966063517e-05, "loss": 0.8053, "step": 1664 }, { "epoch": 0.25, "learning_rate": 1.7604608687279677e-05, "loss": 0.9109, "step": 1665 }, { "epoch": 0.25, "learning_rate": 1.760147063369714e-05, "loss": 0.7537, "step": 1666 }, { "epoch": 0.25, "learning_rate": 1.759833080604829e-05, "loss": 0.8459, "step": 1667 }, { "epoch": 0.25, "learning_rate": 1.75951892050659e-05, "loss": 0.7601, "step": 1668 }, { "epoch": 0.25, "learning_rate": 1.7592045831483188e-05, "loss": 0.7892, "step": 1669 }, { "epoch": 0.25, "learning_rate": 1.7588900686033753e-05, "loss": 0.8425, "step": 1670 }, { "epoch": 0.25, "learning_rate": 1.7585753769451636e-05, "loss": 0.6827, "step": 1671 }, { "epoch": 0.25, "learning_rate": 1.758260508247127e-05, "loss": 0.8297, "step": 1672 }, { "epoch": 0.25, "learning_rate": 1.7579454625827516e-05, "loss": 0.8157, "step": 1673 }, { "epoch": 0.25, "learning_rate": 1.757630240025564e-05, "loss": 0.7702, "step": 1674 }, { "epoch": 0.25, "learning_rate": 1.757314840649132e-05, "loss": 0.7357, "step": 1675 }, { "epoch": 0.25, "learning_rate": 1.7569992645270652e-05, "loss": 0.8668, "step": 1676 }, { "epoch": 0.25, "learning_rate": 1.7566835117330145e-05, "loss": 0.8052, "step": 1677 }, { "epoch": 0.25, "learning_rate": 1.7563675823406712e-05, "loss": 0.8024, "step": 1678 }, { "epoch": 0.25, "learning_rate": 1.7560514764237693e-05, "loss": 0.7796, "step": 1679 }, { "epoch": 0.25, "learning_rate": 1.755735194056082e-05, "loss": 0.8547, "step": 1680 }, { "epoch": 0.25, "learning_rate": 1.755418735311425e-05, "loss": 0.7803, "step": 1681 }, { "epoch": 0.25, "learning_rate": 1.7551021002636552e-05, "loss": 0.8258, "step": 1682 }, { "epoch": 0.25, "learning_rate": 1.7547852889866703e-05, "loss": 0.793, "step": 1683 }, { "epoch": 0.25, "learning_rate": 1.7544683015544093e-05, "loss": 0.8022, "step": 1684 }, { "epoch": 0.25, "learning_rate": 1.7541511380408515e-05, "loss": 0.725, "step": 1685 }, { "epoch": 0.25, "learning_rate": 1.7538337985200188e-05, "loss": 0.8563, "step": 1686 }, { "epoch": 0.25, "learning_rate": 1.753516283065973e-05, "loss": 0.8036, "step": 1687 }, { "epoch": 0.25, "learning_rate": 1.7531985917528172e-05, "loss": 0.7529, "step": 1688 }, { "epoch": 0.25, "learning_rate": 1.7528807246546957e-05, "loss": 0.8267, "step": 1689 }, { "epoch": 0.25, "learning_rate": 1.7525626818457943e-05, "loss": 0.3583, "step": 1690 }, { "epoch": 0.25, "learning_rate": 1.752244463400339e-05, "loss": 0.8338, "step": 1691 }, { "epoch": 0.25, "learning_rate": 1.7519260693925963e-05, "loss": 0.82, "step": 1692 }, { "epoch": 0.25, "learning_rate": 1.751607499896876e-05, "loss": 0.8473, "step": 1693 }, { "epoch": 0.25, "learning_rate": 1.7512887549875257e-05, "loss": 0.8938, "step": 1694 }, { "epoch": 0.25, "learning_rate": 1.750969834738937e-05, "loss": 0.8167, "step": 1695 }, { "epoch": 0.25, "learning_rate": 1.75065073922554e-05, "loss": 0.799, "step": 1696 }, { "epoch": 0.25, "learning_rate": 1.750331468521807e-05, "loss": 0.7959, "step": 1697 }, { "epoch": 0.25, "learning_rate": 1.750012022702251e-05, "loss": 0.7706, "step": 1698 }, { "epoch": 0.25, "learning_rate": 1.7496924018414257e-05, "loss": 0.8108, "step": 1699 }, { "epoch": 0.25, "learning_rate": 1.7493726060139254e-05, "loss": 0.3583, "step": 1700 }, { "epoch": 0.25, "learning_rate": 1.7490526352943856e-05, "loss": 0.9466, "step": 1701 }, { "epoch": 0.25, "learning_rate": 1.748732489757483e-05, "loss": 0.8152, "step": 1702 }, { "epoch": 0.25, "learning_rate": 1.7484121694779342e-05, "loss": 0.8725, "step": 1703 }, { "epoch": 0.25, "learning_rate": 1.7480916745304972e-05, "loss": 0.8007, "step": 1704 }, { "epoch": 0.25, "learning_rate": 1.7477710049899705e-05, "loss": 0.7693, "step": 1705 }, { "epoch": 0.25, "learning_rate": 1.7474501609311937e-05, "loss": 0.8338, "step": 1706 }, { "epoch": 0.25, "learning_rate": 1.7471291424290464e-05, "loss": 0.8183, "step": 1707 }, { "epoch": 0.25, "learning_rate": 1.74680794955845e-05, "loss": 0.8395, "step": 1708 }, { "epoch": 0.25, "learning_rate": 1.746486582394365e-05, "loss": 0.8511, "step": 1709 }, { "epoch": 0.26, "learning_rate": 1.7461650410117946e-05, "loss": 0.8693, "step": 1710 }, { "epoch": 0.26, "learning_rate": 1.745843325485781e-05, "loss": 0.8246, "step": 1711 }, { "epoch": 0.26, "learning_rate": 1.7455214358914077e-05, "loss": 0.7387, "step": 1712 }, { "epoch": 0.26, "learning_rate": 1.745199372303799e-05, "loss": 0.7521, "step": 1713 }, { "epoch": 0.26, "learning_rate": 1.7448771347981198e-05, "loss": 0.8482, "step": 1714 }, { "epoch": 0.26, "learning_rate": 1.7445547234495745e-05, "loss": 0.8795, "step": 1715 }, { "epoch": 0.26, "learning_rate": 1.74423213833341e-05, "loss": 0.7468, "step": 1716 }, { "epoch": 0.26, "learning_rate": 1.7439093795249122e-05, "loss": 0.8024, "step": 1717 }, { "epoch": 0.26, "learning_rate": 1.7435864470994078e-05, "loss": 0.7595, "step": 1718 }, { "epoch": 0.26, "learning_rate": 1.7432633411322645e-05, "loss": 0.9033, "step": 1719 }, { "epoch": 0.26, "learning_rate": 1.7429400616988905e-05, "loss": 0.8024, "step": 1720 }, { "epoch": 0.26, "learning_rate": 1.742616608874734e-05, "loss": 0.7826, "step": 1721 }, { "epoch": 0.26, "learning_rate": 1.7422929827352843e-05, "loss": 0.8384, "step": 1722 }, { "epoch": 0.26, "learning_rate": 1.74196918335607e-05, "loss": 0.7718, "step": 1723 }, { "epoch": 0.26, "learning_rate": 1.7416452108126612e-05, "loss": 0.7543, "step": 1724 }, { "epoch": 0.26, "learning_rate": 1.7413210651806685e-05, "loss": 0.8064, "step": 1725 }, { "epoch": 0.26, "learning_rate": 1.7409967465357422e-05, "loss": 0.8281, "step": 1726 }, { "epoch": 0.26, "learning_rate": 1.740672254953573e-05, "loss": 0.878, "step": 1727 }, { "epoch": 0.26, "learning_rate": 1.740347590509893e-05, "loss": 0.7119, "step": 1728 }, { "epoch": 0.26, "learning_rate": 1.740022753280473e-05, "loss": 0.8516, "step": 1729 }, { "epoch": 0.26, "learning_rate": 1.739697743341126e-05, "loss": 0.8691, "step": 1730 }, { "epoch": 0.26, "learning_rate": 1.739372560767703e-05, "loss": 0.9124, "step": 1731 }, { "epoch": 0.26, "learning_rate": 1.739047205636098e-05, "loss": 0.7957, "step": 1732 }, { "epoch": 0.26, "learning_rate": 1.738721678022243e-05, "loss": 0.7803, "step": 1733 }, { "epoch": 0.26, "learning_rate": 1.7383959780021114e-05, "loss": 0.3282, "step": 1734 }, { "epoch": 0.26, "learning_rate": 1.7380701056517166e-05, "loss": 0.7975, "step": 1735 }, { "epoch": 0.26, "learning_rate": 1.7377440610471123e-05, "loss": 0.7604, "step": 1736 }, { "epoch": 0.26, "learning_rate": 1.737417844264392e-05, "loss": 0.9055, "step": 1737 }, { "epoch": 0.26, "learning_rate": 1.7370914553796902e-05, "loss": 0.7575, "step": 1738 }, { "epoch": 0.26, "learning_rate": 1.73676489446918e-05, "loss": 0.7072, "step": 1739 }, { "epoch": 0.26, "learning_rate": 1.736438161609077e-05, "loss": 0.7539, "step": 1740 }, { "epoch": 0.26, "learning_rate": 1.7361112568756347e-05, "loss": 0.7757, "step": 1741 }, { "epoch": 0.26, "learning_rate": 1.735784180345148e-05, "loss": 0.7785, "step": 1742 }, { "epoch": 0.26, "learning_rate": 1.7354569320939517e-05, "loss": 0.7682, "step": 1743 }, { "epoch": 0.26, "learning_rate": 1.73512951219842e-05, "loss": 0.7328, "step": 1744 }, { "epoch": 0.26, "learning_rate": 1.734801920734968e-05, "loss": 0.701, "step": 1745 }, { "epoch": 0.26, "learning_rate": 1.7344741577800505e-05, "loss": 0.7473, "step": 1746 }, { "epoch": 0.26, "learning_rate": 1.7341462234101624e-05, "loss": 0.837, "step": 1747 }, { "epoch": 0.26, "learning_rate": 1.733818117701838e-05, "loss": 0.6926, "step": 1748 }, { "epoch": 0.26, "learning_rate": 1.7334898407316528e-05, "loss": 0.8027, "step": 1749 }, { "epoch": 0.26, "learning_rate": 1.7331613925762218e-05, "loss": 0.8173, "step": 1750 }, { "epoch": 0.26, "learning_rate": 1.7328327733121987e-05, "loss": 0.8608, "step": 1751 }, { "epoch": 0.26, "learning_rate": 1.732503983016279e-05, "loss": 0.8224, "step": 1752 }, { "epoch": 0.26, "learning_rate": 1.732175021765197e-05, "loss": 0.7856, "step": 1753 }, { "epoch": 0.26, "learning_rate": 1.7318458896357274e-05, "loss": 0.8446, "step": 1754 }, { "epoch": 0.26, "learning_rate": 1.7315165867046842e-05, "loss": 0.3347, "step": 1755 }, { "epoch": 0.26, "learning_rate": 1.731187113048922e-05, "loss": 0.7452, "step": 1756 }, { "epoch": 0.26, "learning_rate": 1.730857468745335e-05, "loss": 0.7781, "step": 1757 }, { "epoch": 0.26, "learning_rate": 1.7305276538708566e-05, "loss": 0.8086, "step": 1758 }, { "epoch": 0.26, "learning_rate": 1.730197668502461e-05, "loss": 0.8817, "step": 1759 }, { "epoch": 0.26, "learning_rate": 1.7298675127171615e-05, "loss": 0.8821, "step": 1760 }, { "epoch": 0.26, "learning_rate": 1.7295371865920113e-05, "loss": 0.818, "step": 1761 }, { "epoch": 0.26, "learning_rate": 1.7292066902041032e-05, "loss": 0.7708, "step": 1762 }, { "epoch": 0.26, "learning_rate": 1.728876023630571e-05, "loss": 0.3318, "step": 1763 }, { "epoch": 0.26, "learning_rate": 1.7285451869485858e-05, "loss": 0.7122, "step": 1764 }, { "epoch": 0.26, "learning_rate": 1.728214180235361e-05, "loss": 0.8915, "step": 1765 }, { "epoch": 0.26, "learning_rate": 1.7278830035681476e-05, "loss": 0.3424, "step": 1766 }, { "epoch": 0.26, "learning_rate": 1.7275516570242378e-05, "loss": 0.74, "step": 1767 }, { "epoch": 0.26, "learning_rate": 1.7272201406809623e-05, "loss": 0.7503, "step": 1768 }, { "epoch": 0.26, "learning_rate": 1.726888454615692e-05, "loss": 0.809, "step": 1769 }, { "epoch": 0.26, "learning_rate": 1.7265565989058374e-05, "loss": 0.8117, "step": 1770 }, { "epoch": 0.26, "learning_rate": 1.7262245736288487e-05, "loss": 0.755, "step": 1771 }, { "epoch": 0.26, "learning_rate": 1.7258923788622156e-05, "loss": 0.8558, "step": 1772 }, { "epoch": 0.26, "learning_rate": 1.7255600146834664e-05, "loss": 0.8232, "step": 1773 }, { "epoch": 0.26, "learning_rate": 1.725227481170171e-05, "loss": 0.8578, "step": 1774 }, { "epoch": 0.26, "learning_rate": 1.7248947783999366e-05, "loss": 0.7305, "step": 1775 }, { "epoch": 0.26, "learning_rate": 1.7245619064504112e-05, "loss": 0.6941, "step": 1776 }, { "epoch": 0.27, "learning_rate": 1.724228865399282e-05, "loss": 0.8532, "step": 1777 }, { "epoch": 0.27, "learning_rate": 1.723895655324276e-05, "loss": 0.8571, "step": 1778 }, { "epoch": 0.27, "learning_rate": 1.723562276303159e-05, "loss": 0.7551, "step": 1779 }, { "epoch": 0.27, "learning_rate": 1.7232287284137367e-05, "loss": 0.7475, "step": 1780 }, { "epoch": 0.27, "learning_rate": 1.7228950117338536e-05, "loss": 0.829, "step": 1781 }, { "epoch": 0.27, "learning_rate": 1.7225611263413945e-05, "loss": 0.7575, "step": 1782 }, { "epoch": 0.27, "learning_rate": 1.7222270723142833e-05, "loss": 0.8788, "step": 1783 }, { "epoch": 0.27, "learning_rate": 1.7218928497304826e-05, "loss": 0.8486, "step": 1784 }, { "epoch": 0.27, "learning_rate": 1.7215584586679947e-05, "loss": 0.782, "step": 1785 }, { "epoch": 0.27, "learning_rate": 1.7212238992048615e-05, "loss": 0.7746, "step": 1786 }, { "epoch": 0.27, "learning_rate": 1.7208891714191644e-05, "loss": 0.8062, "step": 1787 }, { "epoch": 0.27, "learning_rate": 1.7205542753890238e-05, "loss": 0.8001, "step": 1788 }, { "epoch": 0.27, "learning_rate": 1.7202192111925982e-05, "loss": 0.7234, "step": 1789 }, { "epoch": 0.27, "learning_rate": 1.7198839789080873e-05, "loss": 0.771, "step": 1790 }, { "epoch": 0.27, "learning_rate": 1.7195485786137296e-05, "loss": 0.7737, "step": 1791 }, { "epoch": 0.27, "learning_rate": 1.7192130103878013e-05, "loss": 0.7867, "step": 1792 }, { "epoch": 0.27, "learning_rate": 1.7188772743086196e-05, "loss": 0.3084, "step": 1793 }, { "epoch": 0.27, "learning_rate": 1.7185413704545392e-05, "loss": 0.8147, "step": 1794 }, { "epoch": 0.27, "learning_rate": 1.718205298903956e-05, "loss": 0.8206, "step": 1795 }, { "epoch": 0.27, "learning_rate": 1.7178690597353037e-05, "loss": 0.8724, "step": 1796 }, { "epoch": 0.27, "learning_rate": 1.717532653027055e-05, "loss": 0.8112, "step": 1797 }, { "epoch": 0.27, "learning_rate": 1.7171960788577223e-05, "loss": 0.8625, "step": 1798 }, { "epoch": 0.27, "learning_rate": 1.7168593373058566e-05, "loss": 0.7459, "step": 1799 }, { "epoch": 0.27, "learning_rate": 1.7165224284500486e-05, "loss": 0.8339, "step": 1800 }, { "epoch": 0.27, "learning_rate": 1.716185352368927e-05, "loss": 0.8402, "step": 1801 }, { "epoch": 0.27, "learning_rate": 1.715848109141161e-05, "loss": 0.7807, "step": 1802 }, { "epoch": 0.27, "learning_rate": 1.7155106988454572e-05, "loss": 0.7858, "step": 1803 }, { "epoch": 0.27, "learning_rate": 1.715173121560562e-05, "loss": 0.7845, "step": 1804 }, { "epoch": 0.27, "learning_rate": 1.7148353773652618e-05, "loss": 0.7844, "step": 1805 }, { "epoch": 0.27, "learning_rate": 1.71449746633838e-05, "loss": 0.8117, "step": 1806 }, { "epoch": 0.27, "learning_rate": 1.7141593885587796e-05, "loss": 0.8629, "step": 1807 }, { "epoch": 0.27, "learning_rate": 1.713821144105363e-05, "loss": 0.8322, "step": 1808 }, { "epoch": 0.27, "learning_rate": 1.713482733057072e-05, "loss": 0.7956, "step": 1809 }, { "epoch": 0.27, "learning_rate": 1.7131441554928857e-05, "loss": 0.7592, "step": 1810 }, { "epoch": 0.27, "learning_rate": 1.7128054114918228e-05, "loss": 0.7447, "step": 1811 }, { "epoch": 0.27, "learning_rate": 1.7124665011329414e-05, "loss": 0.8136, "step": 1812 }, { "epoch": 0.27, "learning_rate": 1.7121274244953384e-05, "loss": 0.817, "step": 1813 }, { "epoch": 0.27, "learning_rate": 1.7117881816581478e-05, "loss": 0.8292, "step": 1814 }, { "epoch": 0.27, "learning_rate": 1.7114487727005446e-05, "loss": 0.8151, "step": 1815 }, { "epoch": 0.27, "learning_rate": 1.7111091977017412e-05, "loss": 0.7536, "step": 1816 }, { "epoch": 0.27, "learning_rate": 1.7107694567409894e-05, "loss": 0.7856, "step": 1817 }, { "epoch": 0.27, "learning_rate": 1.71042954989758e-05, "loss": 0.8175, "step": 1818 }, { "epoch": 0.27, "learning_rate": 1.710089477250841e-05, "loss": 0.7666, "step": 1819 }, { "epoch": 0.27, "learning_rate": 1.7097492388801413e-05, "loss": 0.8204, "step": 1820 }, { "epoch": 0.27, "learning_rate": 1.7094088348648858e-05, "loss": 0.7613, "step": 1821 }, { "epoch": 0.27, "learning_rate": 1.7090682652845206e-05, "loss": 0.8099, "step": 1822 }, { "epoch": 0.27, "learning_rate": 1.7087275302185295e-05, "loss": 0.8261, "step": 1823 }, { "epoch": 0.27, "learning_rate": 1.7083866297464345e-05, "loss": 0.8216, "step": 1824 }, { "epoch": 0.27, "learning_rate": 1.7080455639477963e-05, "loss": 0.8373, "step": 1825 }, { "epoch": 0.27, "learning_rate": 1.707704332902215e-05, "loss": 0.773, "step": 1826 }, { "epoch": 0.27, "learning_rate": 1.7073629366893278e-05, "loss": 0.7914, "step": 1827 }, { "epoch": 0.27, "learning_rate": 1.7070213753888122e-05, "loss": 0.8642, "step": 1828 }, { "epoch": 0.27, "learning_rate": 1.7066796490803832e-05, "loss": 0.7866, "step": 1829 }, { "epoch": 0.27, "learning_rate": 1.706337757843794e-05, "loss": 0.7806, "step": 1830 }, { "epoch": 0.27, "learning_rate": 1.705995701758837e-05, "loss": 0.7845, "step": 1831 }, { "epoch": 0.27, "learning_rate": 1.705653480905343e-05, "loss": 0.7603, "step": 1832 }, { "epoch": 0.27, "learning_rate": 1.705311095363181e-05, "loss": 0.6938, "step": 1833 }, { "epoch": 0.27, "learning_rate": 1.7049685452122585e-05, "loss": 0.677, "step": 1834 }, { "epoch": 0.27, "learning_rate": 1.7046258305325218e-05, "loss": 0.818, "step": 1835 }, { "epoch": 0.27, "learning_rate": 1.704282951403954e-05, "loss": 0.8114, "step": 1836 }, { "epoch": 0.27, "learning_rate": 1.703939907906579e-05, "loss": 0.8116, "step": 1837 }, { "epoch": 0.27, "learning_rate": 1.7035967001204576e-05, "loss": 0.8433, "step": 1838 }, { "epoch": 0.27, "learning_rate": 1.703253328125689e-05, "loss": 0.84, "step": 1839 }, { "epoch": 0.27, "learning_rate": 1.7029097920024112e-05, "loss": 0.7815, "step": 1840 }, { "epoch": 0.27, "learning_rate": 1.7025660918308e-05, "loss": 0.7211, "step": 1841 }, { "epoch": 0.27, "learning_rate": 1.70222222769107e-05, "loss": 0.7357, "step": 1842 }, { "epoch": 0.27, "learning_rate": 1.701878199663474e-05, "loss": 0.8779, "step": 1843 }, { "epoch": 0.28, "learning_rate": 1.701534007828302e-05, "loss": 0.8241, "step": 1844 }, { "epoch": 0.28, "learning_rate": 1.701189652265884e-05, "loss": 0.319, "step": 1845 }, { "epoch": 0.28, "learning_rate": 1.7008451330565864e-05, "loss": 0.743, "step": 1846 }, { "epoch": 0.28, "learning_rate": 1.7005004502808154e-05, "loss": 0.8585, "step": 1847 }, { "epoch": 0.28, "learning_rate": 1.7001556040190148e-05, "loss": 0.6885, "step": 1848 }, { "epoch": 0.28, "learning_rate": 1.6998105943516657e-05, "loss": 0.8205, "step": 1849 }, { "epoch": 0.28, "learning_rate": 1.6994654213592886e-05, "loss": 0.7218, "step": 1850 }, { "epoch": 0.28, "learning_rate": 1.699120085122441e-05, "loss": 0.831, "step": 1851 }, { "epoch": 0.28, "learning_rate": 1.6987745857217197e-05, "loss": 0.8353, "step": 1852 }, { "epoch": 0.28, "learning_rate": 1.6984289232377586e-05, "loss": 0.8089, "step": 1853 }, { "epoch": 0.28, "learning_rate": 1.69808309775123e-05, "loss": 0.8302, "step": 1854 }, { "epoch": 0.28, "learning_rate": 1.6977371093428444e-05, "loss": 0.7581, "step": 1855 }, { "epoch": 0.28, "learning_rate": 1.6973909580933503e-05, "loss": 0.8487, "step": 1856 }, { "epoch": 0.28, "learning_rate": 1.6970446440835338e-05, "loss": 0.7061, "step": 1857 }, { "epoch": 0.28, "learning_rate": 1.696698167394219e-05, "loss": 0.8117, "step": 1858 }, { "epoch": 0.28, "learning_rate": 1.696351528106269e-05, "loss": 0.7325, "step": 1859 }, { "epoch": 0.28, "learning_rate": 1.6960047263005833e-05, "loss": 0.7522, "step": 1860 }, { "epoch": 0.28, "learning_rate": 1.695657762058101e-05, "loss": 0.7519, "step": 1861 }, { "epoch": 0.28, "learning_rate": 1.6953106354597972e-05, "loss": 0.8167, "step": 1862 }, { "epoch": 0.28, "learning_rate": 1.6949633465866865e-05, "loss": 0.8578, "step": 1863 }, { "epoch": 0.28, "learning_rate": 1.6946158955198203e-05, "loss": 0.7761, "step": 1864 }, { "epoch": 0.28, "learning_rate": 1.6942682823402893e-05, "loss": 0.7303, "step": 1865 }, { "epoch": 0.28, "learning_rate": 1.69392050712922e-05, "loss": 0.7772, "step": 1866 }, { "epoch": 0.28, "learning_rate": 1.6935725699677786e-05, "loss": 0.8034, "step": 1867 }, { "epoch": 0.28, "learning_rate": 1.693224470937168e-05, "loss": 0.8403, "step": 1868 }, { "epoch": 0.28, "learning_rate": 1.692876210118629e-05, "loss": 0.8189, "step": 1869 }, { "epoch": 0.28, "learning_rate": 1.6925277875934402e-05, "loss": 0.3117, "step": 1870 }, { "epoch": 0.28, "learning_rate": 1.692179203442918e-05, "loss": 0.7892, "step": 1871 }, { "epoch": 0.28, "learning_rate": 1.6918304577484174e-05, "loss": 0.7792, "step": 1872 }, { "epoch": 0.28, "learning_rate": 1.6914815505913295e-05, "loss": 0.8192, "step": 1873 }, { "epoch": 0.28, "learning_rate": 1.6911324820530842e-05, "loss": 0.8041, "step": 1874 }, { "epoch": 0.28, "learning_rate": 1.6907832522151487e-05, "loss": 0.8285, "step": 1875 }, { "epoch": 0.28, "learning_rate": 1.6904338611590277e-05, "loss": 0.8488, "step": 1876 }, { "epoch": 0.28, "learning_rate": 1.690084308966264e-05, "loss": 0.7844, "step": 1877 }, { "epoch": 0.28, "learning_rate": 1.689734595718437e-05, "loss": 0.755, "step": 1878 }, { "epoch": 0.28, "learning_rate": 1.6893847214971652e-05, "loss": 0.768, "step": 1879 }, { "epoch": 0.28, "learning_rate": 1.6890346863841034e-05, "loss": 0.7572, "step": 1880 }, { "epoch": 0.28, "learning_rate": 1.688684490460945e-05, "loss": 0.7794, "step": 1881 }, { "epoch": 0.28, "learning_rate": 1.6883341338094194e-05, "loss": 0.8228, "step": 1882 }, { "epoch": 0.28, "learning_rate": 1.687983616511295e-05, "loss": 0.7655, "step": 1883 }, { "epoch": 0.28, "learning_rate": 1.6876329386483772e-05, "loss": 0.7859, "step": 1884 }, { "epoch": 0.28, "learning_rate": 1.687282100302509e-05, "loss": 0.3375, "step": 1885 }, { "epoch": 0.28, "learning_rate": 1.6869311015555697e-05, "loss": 0.7746, "step": 1886 }, { "epoch": 0.28, "learning_rate": 1.6865799424894784e-05, "loss": 0.8126, "step": 1887 }, { "epoch": 0.28, "learning_rate": 1.686228623186189e-05, "loss": 0.3459, "step": 1888 }, { "epoch": 0.28, "learning_rate": 1.685877143727695e-05, "loss": 0.7876, "step": 1889 }, { "epoch": 0.28, "learning_rate": 1.6855255041960255e-05, "loss": 0.6984, "step": 1890 }, { "epoch": 0.28, "learning_rate": 1.6851737046732483e-05, "loss": 0.8723, "step": 1891 }, { "epoch": 0.28, "learning_rate": 1.6848217452414675e-05, "loss": 0.7829, "step": 1892 }, { "epoch": 0.28, "learning_rate": 1.684469625982826e-05, "loss": 0.7873, "step": 1893 }, { "epoch": 0.28, "learning_rate": 1.6841173469795017e-05, "loss": 0.8004, "step": 1894 }, { "epoch": 0.28, "learning_rate": 1.683764908313712e-05, "loss": 0.765, "step": 1895 }, { "epoch": 0.28, "learning_rate": 1.6834123100677104e-05, "loss": 0.7826, "step": 1896 }, { "epoch": 0.28, "learning_rate": 1.6830595523237877e-05, "loss": 0.7483, "step": 1897 }, { "epoch": 0.28, "learning_rate": 1.682706635164273e-05, "loss": 0.7979, "step": 1898 }, { "epoch": 0.28, "learning_rate": 1.6823535586715302e-05, "loss": 0.8156, "step": 1899 }, { "epoch": 0.28, "learning_rate": 1.6820003229279635e-05, "loss": 0.8404, "step": 1900 }, { "epoch": 0.28, "learning_rate": 1.6816469280160114e-05, "loss": 0.7701, "step": 1901 }, { "epoch": 0.28, "learning_rate": 1.681293374018152e-05, "loss": 0.7188, "step": 1902 }, { "epoch": 0.28, "learning_rate": 1.6809396610168984e-05, "loss": 0.8638, "step": 1903 }, { "epoch": 0.28, "learning_rate": 1.6805857890948025e-05, "loss": 0.7502, "step": 1904 }, { "epoch": 0.28, "learning_rate": 1.6802317583344516e-05, "loss": 0.9071, "step": 1905 }, { "epoch": 0.28, "learning_rate": 1.679877568818472e-05, "loss": 0.801, "step": 1906 }, { "epoch": 0.28, "learning_rate": 1.679523220629526e-05, "loss": 0.771, "step": 1907 }, { "epoch": 0.28, "learning_rate": 1.6791687138503126e-05, "loss": 0.8069, "step": 1908 }, { "epoch": 0.28, "learning_rate": 1.6788140485635683e-05, "loss": 0.777, "step": 1909 }, { "epoch": 0.28, "learning_rate": 1.6784592248520666e-05, "loss": 0.8047, "step": 1910 }, { "epoch": 0.29, "learning_rate": 1.6781042427986177e-05, "loss": 0.7777, "step": 1911 }, { "epoch": 0.29, "learning_rate": 1.6777491024860693e-05, "loss": 0.7573, "step": 1912 }, { "epoch": 0.29, "learning_rate": 1.677393803997306e-05, "loss": 0.8233, "step": 1913 }, { "epoch": 0.29, "learning_rate": 1.6770383474152484e-05, "loss": 0.7631, "step": 1914 }, { "epoch": 0.29, "learning_rate": 1.6766827328228544e-05, "loss": 0.7483, "step": 1915 }, { "epoch": 0.29, "learning_rate": 1.6763269603031196e-05, "loss": 0.806, "step": 1916 }, { "epoch": 0.29, "learning_rate": 1.6759710299390753e-05, "loss": 0.7693, "step": 1917 }, { "epoch": 0.29, "learning_rate": 1.6756149418137907e-05, "loss": 0.8321, "step": 1918 }, { "epoch": 0.29, "learning_rate": 1.675258696010371e-05, "loss": 0.8094, "step": 1919 }, { "epoch": 0.29, "learning_rate": 1.6749022926119582e-05, "loss": 0.8684, "step": 1920 }, { "epoch": 0.29, "learning_rate": 1.674545731701732e-05, "loss": 0.7351, "step": 1921 }, { "epoch": 0.29, "learning_rate": 1.674189013362908e-05, "loss": 0.7976, "step": 1922 }, { "epoch": 0.29, "learning_rate": 1.6738321376787383e-05, "loss": 0.7847, "step": 1923 }, { "epoch": 0.29, "learning_rate": 1.6734751047325127e-05, "loss": 0.7113, "step": 1924 }, { "epoch": 0.29, "learning_rate": 1.673117914607557e-05, "loss": 0.6899, "step": 1925 }, { "epoch": 0.29, "learning_rate": 1.672760567387234e-05, "loss": 0.722, "step": 1926 }, { "epoch": 0.29, "learning_rate": 1.6724030631549436e-05, "loss": 0.7929, "step": 1927 }, { "epoch": 0.29, "learning_rate": 1.6720454019941207e-05, "loss": 0.8035, "step": 1928 }, { "epoch": 0.29, "learning_rate": 1.6716875839882384e-05, "loss": 0.8117, "step": 1929 }, { "epoch": 0.29, "learning_rate": 1.671329609220806e-05, "loss": 0.8113, "step": 1930 }, { "epoch": 0.29, "learning_rate": 1.670971477775369e-05, "loss": 0.7916, "step": 1931 }, { "epoch": 0.29, "learning_rate": 1.67061318973551e-05, "loss": 0.7698, "step": 1932 }, { "epoch": 0.29, "learning_rate": 1.6702547451848482e-05, "loss": 0.7708, "step": 1933 }, { "epoch": 0.29, "learning_rate": 1.669896144207039e-05, "loss": 0.7908, "step": 1934 }, { "epoch": 0.29, "learning_rate": 1.6695373868857736e-05, "loss": 0.7157, "step": 1935 }, { "epoch": 0.29, "learning_rate": 1.669178473304781e-05, "loss": 0.8828, "step": 1936 }, { "epoch": 0.29, "learning_rate": 1.6688194035478264e-05, "loss": 0.8109, "step": 1937 }, { "epoch": 0.29, "learning_rate": 1.6684601776987106e-05, "loss": 0.7733, "step": 1938 }, { "epoch": 0.29, "learning_rate": 1.6681007958412715e-05, "loss": 0.8226, "step": 1939 }, { "epoch": 0.29, "learning_rate": 1.6677412580593838e-05, "loss": 0.8134, "step": 1940 }, { "epoch": 0.29, "learning_rate": 1.667381564436957e-05, "loss": 0.7491, "step": 1941 }, { "epoch": 0.29, "learning_rate": 1.6670217150579393e-05, "loss": 0.6976, "step": 1942 }, { "epoch": 0.29, "learning_rate": 1.6666617100063133e-05, "loss": 0.7862, "step": 1943 }, { "epoch": 0.29, "learning_rate": 1.666301549366099e-05, "loss": 0.8327, "step": 1944 }, { "epoch": 0.29, "learning_rate": 1.665941233221352e-05, "loss": 0.7479, "step": 1945 }, { "epoch": 0.29, "learning_rate": 1.665580761656165e-05, "loss": 0.7753, "step": 1946 }, { "epoch": 0.29, "learning_rate": 1.6652201347546657e-05, "loss": 0.8577, "step": 1947 }, { "epoch": 0.29, "learning_rate": 1.6648593526010197e-05, "loss": 0.8091, "step": 1948 }, { "epoch": 0.29, "learning_rate": 1.6644984152794275e-05, "loss": 0.8272, "step": 1949 }, { "epoch": 0.29, "learning_rate": 1.6641373228741267e-05, "loss": 0.8046, "step": 1950 }, { "epoch": 0.29, "learning_rate": 1.6637760754693904e-05, "loss": 0.7851, "step": 1951 }, { "epoch": 0.29, "learning_rate": 1.6634146731495288e-05, "loss": 0.7931, "step": 1952 }, { "epoch": 0.29, "learning_rate": 1.6630531159988865e-05, "loss": 0.3629, "step": 1953 }, { "epoch": 0.29, "learning_rate": 1.662691404101846e-05, "loss": 0.816, "step": 1954 }, { "epoch": 0.29, "learning_rate": 1.6623295375428262e-05, "loss": 0.8036, "step": 1955 }, { "epoch": 0.29, "learning_rate": 1.6619675164062795e-05, "loss": 0.3288, "step": 1956 }, { "epoch": 0.29, "learning_rate": 1.6616053407766973e-05, "loss": 0.789, "step": 1957 }, { "epoch": 0.29, "learning_rate": 1.661243010738605e-05, "loss": 0.7406, "step": 1958 }, { "epoch": 0.29, "learning_rate": 1.6608805263765656e-05, "loss": 0.7236, "step": 1959 }, { "epoch": 0.29, "learning_rate": 1.660517887775177e-05, "loss": 0.7458, "step": 1960 }, { "epoch": 0.29, "learning_rate": 1.6601550950190735e-05, "loss": 0.8251, "step": 1961 }, { "epoch": 0.29, "learning_rate": 1.6597921481929255e-05, "loss": 0.9029, "step": 1962 }, { "epoch": 0.29, "learning_rate": 1.659429047381439e-05, "loss": 0.7835, "step": 1963 }, { "epoch": 0.29, "learning_rate": 1.659065792669356e-05, "loss": 0.7668, "step": 1964 }, { "epoch": 0.29, "learning_rate": 1.6587023841414546e-05, "loss": 0.8132, "step": 1965 }, { "epoch": 0.29, "learning_rate": 1.6583388218825495e-05, "loss": 0.8169, "step": 1966 }, { "epoch": 0.29, "learning_rate": 1.6579751059774894e-05, "loss": 0.8142, "step": 1967 }, { "epoch": 0.29, "learning_rate": 1.657611236511161e-05, "loss": 0.7665, "step": 1968 }, { "epoch": 0.29, "learning_rate": 1.657247213568485e-05, "loss": 0.8395, "step": 1969 }, { "epoch": 0.29, "learning_rate": 1.6568830372344196e-05, "loss": 0.8074, "step": 1970 }, { "epoch": 0.29, "learning_rate": 1.6565187075939576e-05, "loss": 0.8442, "step": 1971 }, { "epoch": 0.29, "learning_rate": 1.6561542247321273e-05, "loss": 0.7503, "step": 1972 }, { "epoch": 0.29, "learning_rate": 1.655789588733994e-05, "loss": 0.7978, "step": 1973 }, { "epoch": 0.29, "learning_rate": 1.6554247996846584e-05, "loss": 0.8002, "step": 1974 }, { "epoch": 0.29, "learning_rate": 1.655059857669256e-05, "loss": 0.8631, "step": 1975 }, { "epoch": 0.29, "learning_rate": 1.6546947627729593e-05, "loss": 0.7695, "step": 1976 }, { "epoch": 0.29, "learning_rate": 1.6543295150809755e-05, "loss": 0.3464, "step": 1977 }, { "epoch": 0.3, "learning_rate": 1.653964114678548e-05, "loss": 0.7748, "step": 1978 }, { "epoch": 0.3, "learning_rate": 1.653598561650955e-05, "loss": 0.7215, "step": 1979 }, { "epoch": 0.3, "learning_rate": 1.6532328560835115e-05, "loss": 0.6618, "step": 1980 }, { "epoch": 0.3, "learning_rate": 1.6528669980615672e-05, "loss": 0.3439, "step": 1981 }, { "epoch": 0.3, "learning_rate": 1.6525009876705084e-05, "loss": 0.8207, "step": 1982 }, { "epoch": 0.3, "learning_rate": 1.652134824995756e-05, "loss": 0.8754, "step": 1983 }, { "epoch": 0.3, "learning_rate": 1.651768510122766e-05, "loss": 0.7405, "step": 1984 }, { "epoch": 0.3, "learning_rate": 1.651402043137032e-05, "loss": 0.7248, "step": 1985 }, { "epoch": 0.3, "learning_rate": 1.6510354241240805e-05, "loss": 0.7358, "step": 1986 }, { "epoch": 0.3, "learning_rate": 1.6506686531694755e-05, "loss": 0.8532, "step": 1987 }, { "epoch": 0.3, "learning_rate": 1.6503017303588153e-05, "loss": 0.8174, "step": 1988 }, { "epoch": 0.3, "learning_rate": 1.6499346557777344e-05, "loss": 0.7996, "step": 1989 }, { "epoch": 0.3, "learning_rate": 1.649567429511902e-05, "loss": 0.7457, "step": 1990 }, { "epoch": 0.3, "learning_rate": 1.6492000516470232e-05, "loss": 0.7909, "step": 1991 }, { "epoch": 0.3, "learning_rate": 1.6488325222688385e-05, "loss": 0.7968, "step": 1992 }, { "epoch": 0.3, "learning_rate": 1.6484648414631236e-05, "loss": 0.7384, "step": 1993 }, { "epoch": 0.3, "learning_rate": 1.6480970093156892e-05, "loss": 0.8613, "step": 1994 }, { "epoch": 0.3, "learning_rate": 1.6477290259123816e-05, "loss": 0.8656, "step": 1995 }, { "epoch": 0.3, "learning_rate": 1.6473608913390836e-05, "loss": 0.8303, "step": 1996 }, { "epoch": 0.3, "learning_rate": 1.6469926056817107e-05, "loss": 0.8067, "step": 1997 }, { "epoch": 0.3, "learning_rate": 1.6466241690262155e-05, "loss": 0.7817, "step": 1998 }, { "epoch": 0.3, "learning_rate": 1.646255581458586e-05, "loss": 0.8789, "step": 1999 }, { "epoch": 0.3, "learning_rate": 1.6458868430648445e-05, "loss": 0.7595, "step": 2000 }, { "epoch": 0.3, "learning_rate": 1.645517953931049e-05, "loss": 0.8557, "step": 2001 }, { "epoch": 0.3, "learning_rate": 1.6451489141432923e-05, "loss": 0.8472, "step": 2002 }, { "epoch": 0.3, "learning_rate": 1.6447797237877027e-05, "loss": 0.8892, "step": 2003 }, { "epoch": 0.3, "learning_rate": 1.6444103829504436e-05, "loss": 0.3384, "step": 2004 }, { "epoch": 0.3, "learning_rate": 1.6440408917177136e-05, "loss": 0.7633, "step": 2005 }, { "epoch": 0.3, "learning_rate": 1.643671250175746e-05, "loss": 0.6829, "step": 2006 }, { "epoch": 0.3, "learning_rate": 1.6433014584108095e-05, "loss": 0.8, "step": 2007 }, { "epoch": 0.3, "learning_rate": 1.642931516509208e-05, "loss": 0.7112, "step": 2008 }, { "epoch": 0.3, "learning_rate": 1.6425614245572804e-05, "loss": 0.7407, "step": 2009 }, { "epoch": 0.3, "learning_rate": 1.6421911826414e-05, "loss": 0.7816, "step": 2010 }, { "epoch": 0.3, "learning_rate": 1.6418207908479757e-05, "loss": 0.8132, "step": 2011 }, { "epoch": 0.3, "learning_rate": 1.6414502492634514e-05, "loss": 0.7915, "step": 2012 }, { "epoch": 0.3, "learning_rate": 1.6410795579743056e-05, "loss": 0.748, "step": 2013 }, { "epoch": 0.3, "learning_rate": 1.6407087170670524e-05, "loss": 0.3373, "step": 2014 }, { "epoch": 0.3, "learning_rate": 1.6403377266282398e-05, "loss": 0.8584, "step": 2015 }, { "epoch": 0.3, "learning_rate": 1.6399665867444516e-05, "loss": 0.8123, "step": 2016 }, { "epoch": 0.3, "learning_rate": 1.639595297502306e-05, "loss": 0.7467, "step": 2017 }, { "epoch": 0.3, "learning_rate": 1.6392238589884562e-05, "loss": 0.7665, "step": 2018 }, { "epoch": 0.3, "learning_rate": 1.6388522712895906e-05, "loss": 0.6938, "step": 2019 }, { "epoch": 0.3, "learning_rate": 1.6384805344924315e-05, "loss": 0.8237, "step": 2020 }, { "epoch": 0.3, "learning_rate": 1.6381086486837367e-05, "loss": 0.8133, "step": 2021 }, { "epoch": 0.3, "learning_rate": 1.637736613950299e-05, "loss": 0.7968, "step": 2022 }, { "epoch": 0.3, "learning_rate": 1.637364430378945e-05, "loss": 0.7802, "step": 2023 }, { "epoch": 0.3, "learning_rate": 1.636992098056537e-05, "loss": 0.8671, "step": 2024 }, { "epoch": 0.3, "learning_rate": 1.6366196170699716e-05, "loss": 0.7239, "step": 2025 }, { "epoch": 0.3, "learning_rate": 1.63624698750618e-05, "loss": 0.7977, "step": 2026 }, { "epoch": 0.3, "learning_rate": 1.6358742094521285e-05, "loss": 0.7627, "step": 2027 }, { "epoch": 0.3, "learning_rate": 1.6355012829948172e-05, "loss": 0.799, "step": 2028 }, { "epoch": 0.3, "learning_rate": 1.635128208221282e-05, "loss": 0.7845, "step": 2029 }, { "epoch": 0.3, "learning_rate": 1.6347549852185926e-05, "loss": 0.7403, "step": 2030 }, { "epoch": 0.3, "learning_rate": 1.6343816140738534e-05, "loss": 0.8134, "step": 2031 }, { "epoch": 0.3, "learning_rate": 1.6340080948742036e-05, "loss": 0.7435, "step": 2032 }, { "epoch": 0.3, "learning_rate": 1.633634427706817e-05, "loss": 0.8298, "step": 2033 }, { "epoch": 0.3, "learning_rate": 1.6332606126589016e-05, "loss": 0.7764, "step": 2034 }, { "epoch": 0.3, "learning_rate": 1.6328866498176992e-05, "loss": 0.7461, "step": 2035 }, { "epoch": 0.3, "learning_rate": 1.6325125392704887e-05, "loss": 0.8491, "step": 2036 }, { "epoch": 0.3, "learning_rate": 1.6321382811045807e-05, "loss": 0.8315, "step": 2037 }, { "epoch": 0.3, "learning_rate": 1.6317638754073214e-05, "loss": 0.32, "step": 2038 }, { "epoch": 0.3, "learning_rate": 1.6313893222660914e-05, "loss": 0.7817, "step": 2039 }, { "epoch": 0.3, "learning_rate": 1.6310146217683053e-05, "loss": 0.8356, "step": 2040 }, { "epoch": 0.3, "learning_rate": 1.630639774001413e-05, "loss": 0.7837, "step": 2041 }, { "epoch": 0.3, "learning_rate": 1.6302647790528983e-05, "loss": 0.3398, "step": 2042 }, { "epoch": 0.3, "learning_rate": 1.629889637010279e-05, "loss": 0.8455, "step": 2043 }, { "epoch": 0.3, "learning_rate": 1.629514347961107e-05, "loss": 0.7445, "step": 2044 }, { "epoch": 0.3, "learning_rate": 1.6291389119929695e-05, "loss": 0.7244, "step": 2045 }, { "epoch": 0.31, "learning_rate": 1.6287633291934872e-05, "loss": 0.8278, "step": 2046 }, { "epoch": 0.31, "learning_rate": 1.628387599650316e-05, "loss": 0.8116, "step": 2047 }, { "epoch": 0.31, "learning_rate": 1.6280117234511446e-05, "loss": 0.358, "step": 2048 }, { "epoch": 0.31, "learning_rate": 1.627635700683697e-05, "loss": 0.7021, "step": 2049 }, { "epoch": 0.31, "learning_rate": 1.6272595314357312e-05, "loss": 0.7055, "step": 2050 }, { "epoch": 0.31, "learning_rate": 1.6268832157950398e-05, "loss": 0.8518, "step": 2051 }, { "epoch": 0.31, "learning_rate": 1.626506753849448e-05, "loss": 0.828, "step": 2052 }, { "epoch": 0.31, "learning_rate": 1.6261301456868168e-05, "loss": 0.9041, "step": 2053 }, { "epoch": 0.31, "learning_rate": 1.625753391395041e-05, "loss": 0.8276, "step": 2054 }, { "epoch": 0.31, "learning_rate": 1.625376491062049e-05, "loss": 0.8236, "step": 2055 }, { "epoch": 0.31, "learning_rate": 1.6249994447758034e-05, "loss": 0.3248, "step": 2056 }, { "epoch": 0.31, "learning_rate": 1.6246222526243017e-05, "loss": 0.744, "step": 2057 }, { "epoch": 0.31, "learning_rate": 1.6242449146955733e-05, "loss": 0.2999, "step": 2058 }, { "epoch": 0.31, "learning_rate": 1.6238674310776846e-05, "loss": 0.7823, "step": 2059 }, { "epoch": 0.31, "learning_rate": 1.6234898018587336e-05, "loss": 0.7852, "step": 2060 }, { "epoch": 0.31, "learning_rate": 1.6231120271268535e-05, "loss": 0.9003, "step": 2061 }, { "epoch": 0.31, "learning_rate": 1.622734106970211e-05, "loss": 0.8868, "step": 2062 }, { "epoch": 0.31, "learning_rate": 1.6223560414770068e-05, "loss": 0.7266, "step": 2063 }, { "epoch": 0.31, "learning_rate": 1.6219778307354758e-05, "loss": 0.3321, "step": 2064 }, { "epoch": 0.31, "learning_rate": 1.6215994748338863e-05, "loss": 0.8113, "step": 2065 }, { "epoch": 0.31, "learning_rate": 1.621220973860541e-05, "loss": 0.7402, "step": 2066 }, { "epoch": 0.31, "learning_rate": 1.6208423279037757e-05, "loss": 0.8334, "step": 2067 }, { "epoch": 0.31, "learning_rate": 1.6204635370519614e-05, "loss": 0.7857, "step": 2068 }, { "epoch": 0.31, "learning_rate": 1.6200846013935008e-05, "loss": 0.8536, "step": 2069 }, { "epoch": 0.31, "learning_rate": 1.619705521016833e-05, "loss": 0.8189, "step": 2070 }, { "epoch": 0.31, "learning_rate": 1.619326296010429e-05, "loss": 0.7878, "step": 2071 }, { "epoch": 0.31, "learning_rate": 1.6189469264627936e-05, "loss": 0.7574, "step": 2072 }, { "epoch": 0.31, "learning_rate": 1.618567412462466e-05, "loss": 0.8473, "step": 2073 }, { "epoch": 0.31, "learning_rate": 1.6181877540980196e-05, "loss": 0.7715, "step": 2074 }, { "epoch": 0.31, "learning_rate": 1.6178079514580606e-05, "loss": 0.7114, "step": 2075 }, { "epoch": 0.31, "learning_rate": 1.6174280046312286e-05, "loss": 0.8703, "step": 2076 }, { "epoch": 0.31, "learning_rate": 1.6170479137061975e-05, "loss": 0.7657, "step": 2077 }, { "epoch": 0.31, "learning_rate": 1.616667678771675e-05, "loss": 0.8073, "step": 2078 }, { "epoch": 0.31, "learning_rate": 1.616287299916402e-05, "loss": 0.699, "step": 2079 }, { "epoch": 0.31, "learning_rate": 1.6159067772291525e-05, "loss": 0.7256, "step": 2080 }, { "epoch": 0.31, "learning_rate": 1.6155261107987357e-05, "loss": 0.8416, "step": 2081 }, { "epoch": 0.31, "learning_rate": 1.6151453007139923e-05, "loss": 0.7869, "step": 2082 }, { "epoch": 0.31, "learning_rate": 1.614764347063798e-05, "loss": 0.777, "step": 2083 }, { "epoch": 0.31, "learning_rate": 1.6143832499370616e-05, "loss": 0.7656, "step": 2084 }, { "epoch": 0.31, "learning_rate": 1.6140020094227248e-05, "loss": 0.7776, "step": 2085 }, { "epoch": 0.31, "learning_rate": 1.6136206256097636e-05, "loss": 0.8365, "step": 2086 }, { "epoch": 0.31, "learning_rate": 1.6132390985871874e-05, "loss": 0.3447, "step": 2087 }, { "epoch": 0.31, "learning_rate": 1.6128574284440385e-05, "loss": 0.8326, "step": 2088 }, { "epoch": 0.31, "learning_rate": 1.6124756152693926e-05, "loss": 0.8497, "step": 2089 }, { "epoch": 0.31, "learning_rate": 1.6120936591523594e-05, "loss": 0.7598, "step": 2090 }, { "epoch": 0.31, "learning_rate": 1.611711560182081e-05, "loss": 0.7284, "step": 2091 }, { "epoch": 0.31, "learning_rate": 1.6113293184477343e-05, "loss": 0.7211, "step": 2092 }, { "epoch": 0.31, "learning_rate": 1.610946934038528e-05, "loss": 0.7723, "step": 2093 }, { "epoch": 0.31, "learning_rate": 1.6105644070437048e-05, "loss": 0.8755, "step": 2094 }, { "epoch": 0.31, "learning_rate": 1.610181737552541e-05, "loss": 0.7911, "step": 2095 }, { "epoch": 0.31, "learning_rate": 1.6097989256543454e-05, "loss": 0.8035, "step": 2096 }, { "epoch": 0.31, "learning_rate": 1.6094159714384603e-05, "loss": 0.843, "step": 2097 }, { "epoch": 0.31, "learning_rate": 1.6090328749942616e-05, "loss": 0.763, "step": 2098 }, { "epoch": 0.31, "learning_rate": 1.608649636411158e-05, "loss": 0.7439, "step": 2099 }, { "epoch": 0.31, "learning_rate": 1.6082662557785918e-05, "loss": 0.8123, "step": 2100 }, { "epoch": 0.31, "learning_rate": 1.607882733186038e-05, "loss": 0.8033, "step": 2101 }, { "epoch": 0.31, "learning_rate": 1.607499068723004e-05, "loss": 0.7873, "step": 2102 }, { "epoch": 0.31, "learning_rate": 1.6071152624790328e-05, "loss": 0.9228, "step": 2103 }, { "epoch": 0.31, "learning_rate": 1.6067313145436973e-05, "loss": 0.3483, "step": 2104 }, { "epoch": 0.31, "learning_rate": 1.6063472250066064e-05, "loss": 0.8527, "step": 2105 }, { "epoch": 0.31, "learning_rate": 1.6059629939573994e-05, "loss": 0.8509, "step": 2106 }, { "epoch": 0.31, "learning_rate": 1.6055786214857508e-05, "loss": 0.7462, "step": 2107 }, { "epoch": 0.31, "learning_rate": 1.6051941076813674e-05, "loss": 0.802, "step": 2108 }, { "epoch": 0.31, "learning_rate": 1.6048094526339877e-05, "loss": 0.7975, "step": 2109 }, { "epoch": 0.31, "learning_rate": 1.6044246564333855e-05, "loss": 0.7702, "step": 2110 }, { "epoch": 0.31, "learning_rate": 1.6040397191693658e-05, "loss": 0.7742, "step": 2111 }, { "epoch": 0.31, "learning_rate": 1.6036546409317664e-05, "loss": 0.8027, "step": 2112 }, { "epoch": 0.32, "learning_rate": 1.6032694218104597e-05, "loss": 0.7505, "step": 2113 }, { "epoch": 0.32, "learning_rate": 1.6028840618953498e-05, "loss": 0.8381, "step": 2114 }, { "epoch": 0.32, "learning_rate": 1.6024985612763728e-05, "loss": 0.8137, "step": 2115 }, { "epoch": 0.32, "learning_rate": 1.6021129200434996e-05, "loss": 0.7733, "step": 2116 }, { "epoch": 0.32, "learning_rate": 1.6017271382867326e-05, "loss": 0.8633, "step": 2117 }, { "epoch": 0.32, "learning_rate": 1.601341216096107e-05, "loss": 0.7801, "step": 2118 }, { "epoch": 0.32, "learning_rate": 1.600955153561692e-05, "loss": 0.8012, "step": 2119 }, { "epoch": 0.32, "learning_rate": 1.6005689507735877e-05, "loss": 0.7884, "step": 2120 }, { "epoch": 0.32, "learning_rate": 1.6001826078219282e-05, "loss": 0.786, "step": 2121 }, { "epoch": 0.32, "learning_rate": 1.5997961247968802e-05, "loss": 0.7596, "step": 2122 }, { "epoch": 0.32, "learning_rate": 1.599409501788643e-05, "loss": 0.8195, "step": 2123 }, { "epoch": 0.32, "learning_rate": 1.5990227388874475e-05, "loss": 0.9119, "step": 2124 }, { "epoch": 0.32, "learning_rate": 1.5986358361835597e-05, "loss": 0.8036, "step": 2125 }, { "epoch": 0.32, "learning_rate": 1.5982487937672756e-05, "loss": 0.802, "step": 2126 }, { "epoch": 0.32, "learning_rate": 1.5978616117289256e-05, "loss": 0.7551, "step": 2127 }, { "epoch": 0.32, "learning_rate": 1.5974742901588713e-05, "loss": 0.8552, "step": 2128 }, { "epoch": 0.32, "learning_rate": 1.5970868291475085e-05, "loss": 0.359, "step": 2129 }, { "epoch": 0.32, "learning_rate": 1.5966992287852637e-05, "loss": 0.8432, "step": 2130 }, { "epoch": 0.32, "learning_rate": 1.596311489162598e-05, "loss": 0.8489, "step": 2131 }, { "epoch": 0.32, "learning_rate": 1.5959236103700028e-05, "loss": 0.8643, "step": 2132 }, { "epoch": 0.32, "learning_rate": 1.5955355924980036e-05, "loss": 0.8304, "step": 2133 }, { "epoch": 0.32, "learning_rate": 1.5951474356371576e-05, "loss": 0.818, "step": 2134 }, { "epoch": 0.32, "learning_rate": 1.5947591398780545e-05, "loss": 0.7775, "step": 2135 }, { "epoch": 0.32, "learning_rate": 1.594370705311317e-05, "loss": 0.7592, "step": 2136 }, { "epoch": 0.32, "learning_rate": 1.5939821320275995e-05, "loss": 0.7563, "step": 2137 }, { "epoch": 0.32, "learning_rate": 1.5935934201175892e-05, "loss": 0.7802, "step": 2138 }, { "epoch": 0.32, "learning_rate": 1.593204569672005e-05, "loss": 0.7138, "step": 2139 }, { "epoch": 0.32, "learning_rate": 1.5928155807815994e-05, "loss": 0.7601, "step": 2140 }, { "epoch": 0.32, "learning_rate": 1.5924264535371556e-05, "loss": 0.7578, "step": 2141 }, { "epoch": 0.32, "learning_rate": 1.5920371880294903e-05, "loss": 0.7473, "step": 2142 }, { "epoch": 0.32, "learning_rate": 1.591647784349452e-05, "loss": 0.78, "step": 2143 }, { "epoch": 0.32, "learning_rate": 1.5912582425879216e-05, "loss": 0.7244, "step": 2144 }, { "epoch": 0.32, "learning_rate": 1.5908685628358125e-05, "loss": 0.7337, "step": 2145 }, { "epoch": 0.32, "learning_rate": 1.5904787451840695e-05, "loss": 0.7823, "step": 2146 }, { "epoch": 0.32, "learning_rate": 1.5900887897236695e-05, "loss": 0.7382, "step": 2147 }, { "epoch": 0.32, "learning_rate": 1.5896986965456233e-05, "loss": 0.8482, "step": 2148 }, { "epoch": 0.32, "learning_rate": 1.5893084657409723e-05, "loss": 0.7858, "step": 2149 }, { "epoch": 0.32, "learning_rate": 1.5889180974007898e-05, "loss": 0.3354, "step": 2150 }, { "epoch": 0.32, "learning_rate": 1.5885275916161824e-05, "loss": 0.8381, "step": 2151 }, { "epoch": 0.32, "learning_rate": 1.5881369484782877e-05, "loss": 0.7949, "step": 2152 }, { "epoch": 0.32, "learning_rate": 1.587746168078276e-05, "loss": 0.3199, "step": 2153 }, { "epoch": 0.32, "learning_rate": 1.5873552505073494e-05, "loss": 0.3491, "step": 2154 }, { "epoch": 0.32, "learning_rate": 1.586964195856742e-05, "loss": 0.8911, "step": 2155 }, { "epoch": 0.32, "learning_rate": 1.58657300421772e-05, "loss": 0.7911, "step": 2156 }, { "epoch": 0.32, "learning_rate": 1.5861816756815812e-05, "loss": 0.6577, "step": 2157 }, { "epoch": 0.32, "learning_rate": 1.5857902103396568e-05, "loss": 0.8039, "step": 2158 }, { "epoch": 0.32, "learning_rate": 1.5853986082833072e-05, "loss": 0.8857, "step": 2159 }, { "epoch": 0.32, "learning_rate": 1.5850068696039277e-05, "loss": 0.7528, "step": 2160 }, { "epoch": 0.32, "learning_rate": 1.5846149943929428e-05, "loss": 0.3271, "step": 2161 }, { "epoch": 0.32, "learning_rate": 1.5842229827418112e-05, "loss": 0.8442, "step": 2162 }, { "epoch": 0.32, "learning_rate": 1.583830834742022e-05, "loss": 0.7838, "step": 2163 }, { "epoch": 0.32, "learning_rate": 1.583438550485096e-05, "loss": 0.7779, "step": 2164 }, { "epoch": 0.32, "learning_rate": 1.583046130062587e-05, "loss": 0.3591, "step": 2165 }, { "epoch": 0.32, "learning_rate": 1.5826535735660797e-05, "loss": 0.7825, "step": 2166 }, { "epoch": 0.32, "learning_rate": 1.582260881087191e-05, "loss": 0.3261, "step": 2167 }, { "epoch": 0.32, "learning_rate": 1.5818680527175688e-05, "loss": 0.7716, "step": 2168 }, { "epoch": 0.32, "learning_rate": 1.5814750885488934e-05, "loss": 0.8079, "step": 2169 }, { "epoch": 0.32, "learning_rate": 1.5810819886728767e-05, "loss": 0.7364, "step": 2170 }, { "epoch": 0.32, "learning_rate": 1.580688753181262e-05, "loss": 0.8118, "step": 2171 }, { "epoch": 0.32, "learning_rate": 1.5802953821658248e-05, "loss": 0.8408, "step": 2172 }, { "epoch": 0.32, "learning_rate": 1.5799018757183712e-05, "loss": 0.8264, "step": 2173 }, { "epoch": 0.32, "learning_rate": 1.57950823393074e-05, "loss": 0.7241, "step": 2174 }, { "epoch": 0.32, "learning_rate": 1.5791144568948008e-05, "loss": 0.6975, "step": 2175 }, { "epoch": 0.32, "learning_rate": 1.5787205447024552e-05, "loss": 0.7515, "step": 2176 }, { "epoch": 0.32, "learning_rate": 1.578326497445637e-05, "loss": 0.7807, "step": 2177 }, { "epoch": 0.32, "learning_rate": 1.57793231521631e-05, "loss": 0.785, "step": 2178 }, { "epoch": 0.32, "learning_rate": 1.57753799810647e-05, "loss": 0.7956, "step": 2179 }, { "epoch": 0.33, "learning_rate": 1.5771435462081452e-05, "loss": 0.7797, "step": 2180 }, { "epoch": 0.33, "learning_rate": 1.5767489596133942e-05, "loss": 0.8353, "step": 2181 }, { "epoch": 0.33, "learning_rate": 1.576354238414308e-05, "loss": 0.7554, "step": 2182 }, { "epoch": 0.33, "learning_rate": 1.5759593827030076e-05, "loss": 0.8116, "step": 2183 }, { "epoch": 0.33, "learning_rate": 1.575564392571647e-05, "loss": 0.7399, "step": 2184 }, { "epoch": 0.33, "learning_rate": 1.5751692681124104e-05, "loss": 0.7634, "step": 2185 }, { "epoch": 0.33, "learning_rate": 1.574774009417514e-05, "loss": 0.7156, "step": 2186 }, { "epoch": 0.33, "learning_rate": 1.574378616579205e-05, "loss": 0.8353, "step": 2187 }, { "epoch": 0.33, "learning_rate": 1.5739830896897618e-05, "loss": 0.686, "step": 2188 }, { "epoch": 0.33, "learning_rate": 1.5735874288414947e-05, "loss": 0.7311, "step": 2189 }, { "epoch": 0.33, "learning_rate": 1.5731916341267445e-05, "loss": 0.786, "step": 2190 }, { "epoch": 0.33, "learning_rate": 1.572795705637884e-05, "loss": 0.7682, "step": 2191 }, { "epoch": 0.33, "learning_rate": 1.5723996434673164e-05, "loss": 0.8501, "step": 2192 }, { "epoch": 0.33, "learning_rate": 1.572003447707477e-05, "loss": 0.8827, "step": 2193 }, { "epoch": 0.33, "learning_rate": 1.571607118450831e-05, "loss": 0.6814, "step": 2194 }, { "epoch": 0.33, "learning_rate": 1.5712106557898764e-05, "loss": 0.7574, "step": 2195 }, { "epoch": 0.33, "learning_rate": 1.570814059817141e-05, "loss": 0.7963, "step": 2196 }, { "epoch": 0.33, "learning_rate": 1.570417330625184e-05, "loss": 0.7816, "step": 2197 }, { "epoch": 0.33, "learning_rate": 1.5700204683065966e-05, "loss": 0.7118, "step": 2198 }, { "epoch": 0.33, "learning_rate": 1.569623472954e-05, "loss": 0.8891, "step": 2199 }, { "epoch": 0.33, "learning_rate": 1.5692263446600467e-05, "loss": 0.3071, "step": 2200 }, { "epoch": 0.33, "learning_rate": 1.5688290835174208e-05, "loss": 0.8455, "step": 2201 }, { "epoch": 0.33, "learning_rate": 1.568431689618836e-05, "loss": 0.7629, "step": 2202 }, { "epoch": 0.33, "learning_rate": 1.5680341630570386e-05, "loss": 0.3111, "step": 2203 }, { "epoch": 0.33, "learning_rate": 1.5676365039248053e-05, "loss": 0.8276, "step": 2204 }, { "epoch": 0.33, "learning_rate": 1.5672387123149433e-05, "loss": 0.7637, "step": 2205 }, { "epoch": 0.33, "learning_rate": 1.566840788320291e-05, "loss": 0.7582, "step": 2206 }, { "epoch": 0.33, "learning_rate": 1.5664427320337177e-05, "loss": 0.816, "step": 2207 }, { "epoch": 0.33, "learning_rate": 1.566044543548124e-05, "loss": 0.7561, "step": 2208 }, { "epoch": 0.33, "learning_rate": 1.5656462229564405e-05, "loss": 0.3204, "step": 2209 }, { "epoch": 0.33, "learning_rate": 1.56524777035163e-05, "loss": 0.3701, "step": 2210 }, { "epoch": 0.33, "learning_rate": 1.564849185826684e-05, "loss": 0.7574, "step": 2211 }, { "epoch": 0.33, "learning_rate": 1.5644504694746273e-05, "loss": 0.7711, "step": 2212 }, { "epoch": 0.33, "learning_rate": 1.5640516213885127e-05, "loss": 0.7909, "step": 2213 }, { "epoch": 0.33, "learning_rate": 1.5636526416614263e-05, "loss": 0.8076, "step": 2214 }, { "epoch": 0.33, "learning_rate": 1.5632535303864832e-05, "loss": 0.671, "step": 2215 }, { "epoch": 0.33, "learning_rate": 1.56285428765683e-05, "loss": 0.7558, "step": 2216 }, { "epoch": 0.33, "learning_rate": 1.5624549135656443e-05, "loss": 0.7399, "step": 2217 }, { "epoch": 0.33, "learning_rate": 1.5620554082061333e-05, "loss": 0.7304, "step": 2218 }, { "epoch": 0.33, "learning_rate": 1.5616557716715357e-05, "loss": 0.8117, "step": 2219 }, { "epoch": 0.33, "learning_rate": 1.5612560040551202e-05, "loss": 0.7835, "step": 2220 }, { "epoch": 0.33, "learning_rate": 1.5608561054501866e-05, "loss": 0.8123, "step": 2221 }, { "epoch": 0.33, "learning_rate": 1.5604560759500654e-05, "loss": 0.8133, "step": 2222 }, { "epoch": 0.33, "learning_rate": 1.5600559156481167e-05, "loss": 0.8163, "step": 2223 }, { "epoch": 0.33, "learning_rate": 1.5596556246377322e-05, "loss": 0.7393, "step": 2224 }, { "epoch": 0.33, "learning_rate": 1.5592552030123337e-05, "loss": 0.8057, "step": 2225 }, { "epoch": 0.33, "learning_rate": 1.5588546508653735e-05, "loss": 0.8172, "step": 2226 }, { "epoch": 0.33, "learning_rate": 1.5584539682903335e-05, "loss": 0.7141, "step": 2227 }, { "epoch": 0.33, "learning_rate": 1.5580531553807277e-05, "loss": 0.3199, "step": 2228 }, { "epoch": 0.33, "learning_rate": 1.5576522122300996e-05, "loss": 0.8156, "step": 2229 }, { "epoch": 0.33, "learning_rate": 1.557251138932023e-05, "loss": 0.7331, "step": 2230 }, { "epoch": 0.33, "learning_rate": 1.556849935580102e-05, "loss": 0.8213, "step": 2231 }, { "epoch": 0.33, "learning_rate": 1.5564486022679715e-05, "loss": 0.7732, "step": 2232 }, { "epoch": 0.33, "learning_rate": 1.556047139089297e-05, "loss": 0.7609, "step": 2233 }, { "epoch": 0.33, "learning_rate": 1.5556455461377735e-05, "loss": 0.8981, "step": 2234 }, { "epoch": 0.33, "learning_rate": 1.5552438235071258e-05, "loss": 0.8051, "step": 2235 }, { "epoch": 0.33, "learning_rate": 1.5548419712911108e-05, "loss": 0.7296, "step": 2236 }, { "epoch": 0.33, "learning_rate": 1.5544399895835144e-05, "loss": 0.7363, "step": 2237 }, { "epoch": 0.33, "learning_rate": 1.5540378784781526e-05, "loss": 0.7549, "step": 2238 }, { "epoch": 0.33, "learning_rate": 1.5536356380688725e-05, "loss": 0.8345, "step": 2239 }, { "epoch": 0.33, "learning_rate": 1.5532332684495505e-05, "loss": 0.8622, "step": 2240 }, { "epoch": 0.33, "learning_rate": 1.552830769714093e-05, "loss": 0.8301, "step": 2241 }, { "epoch": 0.33, "learning_rate": 1.552428141956438e-05, "loss": 0.7236, "step": 2242 }, { "epoch": 0.33, "learning_rate": 1.552025385270552e-05, "loss": 0.8076, "step": 2243 }, { "epoch": 0.33, "learning_rate": 1.5516224997504327e-05, "loss": 0.7962, "step": 2244 }, { "epoch": 0.33, "learning_rate": 1.5512194854901067e-05, "loss": 0.7335, "step": 2245 }, { "epoch": 0.33, "learning_rate": 1.5508163425836314e-05, "loss": 0.7969, "step": 2246 }, { "epoch": 0.34, "learning_rate": 1.550413071125095e-05, "loss": 0.7686, "step": 2247 }, { "epoch": 0.34, "learning_rate": 1.5500096712086138e-05, "loss": 0.756, "step": 2248 }, { "epoch": 0.34, "learning_rate": 1.5496061429283357e-05, "loss": 0.698, "step": 2249 }, { "epoch": 0.34, "learning_rate": 1.549202486378438e-05, "loss": 0.8149, "step": 2250 }, { "epoch": 0.34, "learning_rate": 1.548798701653128e-05, "loss": 0.7283, "step": 2251 }, { "epoch": 0.34, "learning_rate": 1.5483947888466426e-05, "loss": 0.7862, "step": 2252 }, { "epoch": 0.34, "learning_rate": 1.5479907480532487e-05, "loss": 0.7957, "step": 2253 }, { "epoch": 0.34, "learning_rate": 1.547586579367244e-05, "loss": 0.3056, "step": 2254 }, { "epoch": 0.34, "learning_rate": 1.547182282882954e-05, "loss": 0.7651, "step": 2255 }, { "epoch": 0.34, "learning_rate": 1.5467778586947362e-05, "loss": 0.7936, "step": 2256 }, { "epoch": 0.34, "learning_rate": 1.5463733068969765e-05, "loss": 0.8134, "step": 2257 }, { "epoch": 0.34, "learning_rate": 1.5459686275840916e-05, "loss": 0.8166, "step": 2258 }, { "epoch": 0.34, "learning_rate": 1.5455638208505273e-05, "loss": 0.7199, "step": 2259 }, { "epoch": 0.34, "learning_rate": 1.545158886790759e-05, "loss": 0.7735, "step": 2260 }, { "epoch": 0.34, "learning_rate": 1.544753825499292e-05, "loss": 0.7451, "step": 2261 }, { "epoch": 0.34, "learning_rate": 1.5443486370706616e-05, "loss": 0.8302, "step": 2262 }, { "epoch": 0.34, "learning_rate": 1.5439433215994327e-05, "loss": 0.8309, "step": 2263 }, { "epoch": 0.34, "learning_rate": 1.5435378791801992e-05, "loss": 0.7996, "step": 2264 }, { "epoch": 0.34, "learning_rate": 1.5431323099075853e-05, "loss": 0.7176, "step": 2265 }, { "epoch": 0.34, "learning_rate": 1.5427266138762453e-05, "loss": 0.8097, "step": 2266 }, { "epoch": 0.34, "learning_rate": 1.5423207911808617e-05, "loss": 0.8293, "step": 2267 }, { "epoch": 0.34, "learning_rate": 1.541914841916147e-05, "loss": 0.729, "step": 2268 }, { "epoch": 0.34, "learning_rate": 1.5415087661768446e-05, "loss": 0.7874, "step": 2269 }, { "epoch": 0.34, "learning_rate": 1.5411025640577252e-05, "loss": 0.8214, "step": 2270 }, { "epoch": 0.34, "learning_rate": 1.540696235653591e-05, "loss": 0.7842, "step": 2271 }, { "epoch": 0.34, "learning_rate": 1.5402897810592722e-05, "loss": 0.8569, "step": 2272 }, { "epoch": 0.34, "learning_rate": 1.5398832003696292e-05, "loss": 0.8405, "step": 2273 }, { "epoch": 0.34, "learning_rate": 1.5394764936795518e-05, "loss": 0.7641, "step": 2274 }, { "epoch": 0.34, "learning_rate": 1.539069661083959e-05, "loss": 0.7614, "step": 2275 }, { "epoch": 0.34, "learning_rate": 1.538662702677799e-05, "loss": 0.758, "step": 2276 }, { "epoch": 0.34, "learning_rate": 1.5382556185560508e-05, "loss": 0.7573, "step": 2277 }, { "epoch": 0.34, "learning_rate": 1.5378484088137198e-05, "loss": 0.7809, "step": 2278 }, { "epoch": 0.34, "learning_rate": 1.537441073545844e-05, "loss": 0.8315, "step": 2279 }, { "epoch": 0.34, "learning_rate": 1.537033612847488e-05, "loss": 0.7694, "step": 2280 }, { "epoch": 0.34, "learning_rate": 1.536626026813748e-05, "loss": 0.7277, "step": 2281 }, { "epoch": 0.34, "learning_rate": 1.5362183155397478e-05, "loss": 0.8104, "step": 2282 }, { "epoch": 0.34, "learning_rate": 1.5358104791206404e-05, "loss": 0.7952, "step": 2283 }, { "epoch": 0.34, "learning_rate": 1.5354025176516094e-05, "loss": 0.712, "step": 2284 }, { "epoch": 0.34, "learning_rate": 1.534994431227866e-05, "loss": 0.7921, "step": 2285 }, { "epoch": 0.34, "learning_rate": 1.534586219944652e-05, "loss": 0.7037, "step": 2286 }, { "epoch": 0.34, "learning_rate": 1.534177883897237e-05, "loss": 0.8136, "step": 2287 }, { "epoch": 0.34, "learning_rate": 1.5337694231809205e-05, "loss": 0.8305, "step": 2288 }, { "epoch": 0.34, "learning_rate": 1.533360837891032e-05, "loss": 0.7415, "step": 2289 }, { "epoch": 0.34, "learning_rate": 1.5329521281229274e-05, "loss": 0.8014, "step": 2290 }, { "epoch": 0.34, "learning_rate": 1.532543293971994e-05, "loss": 0.766, "step": 2291 }, { "epoch": 0.34, "learning_rate": 1.5321343355336474e-05, "loss": 0.772, "step": 2292 }, { "epoch": 0.34, "learning_rate": 1.5317252529033325e-05, "loss": 0.7949, "step": 2293 }, { "epoch": 0.34, "learning_rate": 1.531316046176522e-05, "loss": 0.7346, "step": 2294 }, { "epoch": 0.34, "learning_rate": 1.5309067154487197e-05, "loss": 0.8507, "step": 2295 }, { "epoch": 0.34, "learning_rate": 1.530497260815456e-05, "loss": 0.6451, "step": 2296 }, { "epoch": 0.34, "learning_rate": 1.530087682372292e-05, "loss": 0.7528, "step": 2297 }, { "epoch": 0.34, "learning_rate": 1.5296779802148168e-05, "loss": 0.7584, "step": 2298 }, { "epoch": 0.34, "learning_rate": 1.5292681544386484e-05, "loss": 0.7844, "step": 2299 }, { "epoch": 0.34, "learning_rate": 1.528858205139434e-05, "loss": 0.7836, "step": 2300 }, { "epoch": 0.34, "learning_rate": 1.5284481324128494e-05, "loss": 0.6876, "step": 2301 }, { "epoch": 0.34, "learning_rate": 1.5280379363545995e-05, "loss": 0.8142, "step": 2302 }, { "epoch": 0.34, "learning_rate": 1.5276276170604175e-05, "loss": 0.7766, "step": 2303 }, { "epoch": 0.34, "learning_rate": 1.5272171746260655e-05, "loss": 0.797, "step": 2304 }, { "epoch": 0.34, "learning_rate": 1.526806609147335e-05, "loss": 0.3299, "step": 2305 }, { "epoch": 0.34, "learning_rate": 1.5263959207200447e-05, "loss": 0.7911, "step": 2306 }, { "epoch": 0.34, "learning_rate": 1.525985109440044e-05, "loss": 0.7713, "step": 2307 }, { "epoch": 0.34, "learning_rate": 1.5255741754032097e-05, "loss": 0.7751, "step": 2308 }, { "epoch": 0.34, "learning_rate": 1.5251631187054465e-05, "loss": 0.7557, "step": 2309 }, { "epoch": 0.34, "learning_rate": 1.52475193944269e-05, "loss": 0.8304, "step": 2310 }, { "epoch": 0.34, "learning_rate": 1.5243406377109024e-05, "loss": 0.7406, "step": 2311 }, { "epoch": 0.34, "learning_rate": 1.5239292136060757e-05, "loss": 0.8448, "step": 2312 }, { "epoch": 0.34, "learning_rate": 1.5235176672242293e-05, "loss": 0.8428, "step": 2313 }, { "epoch": 0.35, "learning_rate": 1.5231059986614124e-05, "loss": 0.7045, "step": 2314 }, { "epoch": 0.35, "learning_rate": 1.5226942080137018e-05, "loss": 0.7603, "step": 2315 }, { "epoch": 0.35, "learning_rate": 1.5222822953772029e-05, "loss": 0.8613, "step": 2316 }, { "epoch": 0.35, "learning_rate": 1.5218702608480503e-05, "loss": 0.7257, "step": 2317 }, { "epoch": 0.35, "learning_rate": 1.5214581045224065e-05, "loss": 0.814, "step": 2318 }, { "epoch": 0.35, "learning_rate": 1.5210458264964616e-05, "loss": 0.6715, "step": 2319 }, { "epoch": 0.35, "learning_rate": 1.5206334268664358e-05, "loss": 0.7786, "step": 2320 }, { "epoch": 0.35, "learning_rate": 1.5202209057285764e-05, "loss": 0.3319, "step": 2321 }, { "epoch": 0.35, "learning_rate": 1.5198082631791603e-05, "loss": 0.3418, "step": 2322 }, { "epoch": 0.35, "learning_rate": 1.5193954993144906e-05, "loss": 0.8008, "step": 2323 }, { "epoch": 0.35, "learning_rate": 1.5189826142309007e-05, "loss": 0.7808, "step": 2324 }, { "epoch": 0.35, "learning_rate": 1.518569608024752e-05, "loss": 0.8562, "step": 2325 }, { "epoch": 0.35, "learning_rate": 1.5181564807924334e-05, "loss": 0.7795, "step": 2326 }, { "epoch": 0.35, "learning_rate": 1.5177432326303628e-05, "loss": 0.7933, "step": 2327 }, { "epoch": 0.35, "learning_rate": 1.5173298636349851e-05, "loss": 0.8431, "step": 2328 }, { "epoch": 0.35, "learning_rate": 1.5169163739027748e-05, "loss": 0.7597, "step": 2329 }, { "epoch": 0.35, "learning_rate": 1.5165027635302346e-05, "loss": 0.846, "step": 2330 }, { "epoch": 0.35, "learning_rate": 1.5160890326138937e-05, "loss": 0.3473, "step": 2331 }, { "epoch": 0.35, "learning_rate": 1.5156751812503117e-05, "loss": 0.3346, "step": 2332 }, { "epoch": 0.35, "learning_rate": 1.5152612095360739e-05, "loss": 0.8508, "step": 2333 }, { "epoch": 0.35, "learning_rate": 1.5148471175677958e-05, "loss": 0.7578, "step": 2334 }, { "epoch": 0.35, "learning_rate": 1.5144329054421199e-05, "loss": 0.6977, "step": 2335 }, { "epoch": 0.35, "learning_rate": 1.514018573255717e-05, "loss": 0.839, "step": 2336 }, { "epoch": 0.35, "learning_rate": 1.5136041211052856e-05, "loss": 0.8698, "step": 2337 }, { "epoch": 0.35, "learning_rate": 1.5131895490875528e-05, "loss": 0.8113, "step": 2338 }, { "epoch": 0.35, "learning_rate": 1.5127748572992729e-05, "loss": 0.7803, "step": 2339 }, { "epoch": 0.35, "learning_rate": 1.512360045837229e-05, "loss": 0.8772, "step": 2340 }, { "epoch": 0.35, "learning_rate": 1.5119451147982318e-05, "loss": 0.7682, "step": 2341 }, { "epoch": 0.35, "learning_rate": 1.511530064279119e-05, "loss": 0.7953, "step": 2342 }, { "epoch": 0.35, "learning_rate": 1.5111148943767577e-05, "loss": 0.7853, "step": 2343 }, { "epoch": 0.35, "learning_rate": 1.5106996051880423e-05, "loss": 0.7776, "step": 2344 }, { "epoch": 0.35, "learning_rate": 1.5102841968098947e-05, "loss": 0.8533, "step": 2345 }, { "epoch": 0.35, "learning_rate": 1.5098686693392646e-05, "loss": 0.8699, "step": 2346 }, { "epoch": 0.35, "learning_rate": 1.50945302287313e-05, "loss": 0.6919, "step": 2347 }, { "epoch": 0.35, "learning_rate": 1.5090372575084961e-05, "loss": 0.7998, "step": 2348 }, { "epoch": 0.35, "learning_rate": 1.5086213733423964e-05, "loss": 0.7542, "step": 2349 }, { "epoch": 0.35, "learning_rate": 1.5082053704718919e-05, "loss": 0.848, "step": 2350 }, { "epoch": 0.35, "learning_rate": 1.507789248994071e-05, "loss": 0.7485, "step": 2351 }, { "epoch": 0.35, "learning_rate": 1.5073730090060505e-05, "loss": 0.7674, "step": 2352 }, { "epoch": 0.35, "learning_rate": 1.5069566506049742e-05, "loss": 0.8736, "step": 2353 }, { "epoch": 0.35, "learning_rate": 1.5065401738880132e-05, "loss": 0.7453, "step": 2354 }, { "epoch": 0.35, "learning_rate": 1.5061235789523676e-05, "loss": 0.7431, "step": 2355 }, { "epoch": 0.35, "learning_rate": 1.5057068658952637e-05, "loss": 0.797, "step": 2356 }, { "epoch": 0.35, "learning_rate": 1.505290034813956e-05, "loss": 0.3806, "step": 2357 }, { "epoch": 0.35, "learning_rate": 1.504873085805727e-05, "loss": 0.769, "step": 2358 }, { "epoch": 0.35, "learning_rate": 1.5044560189678856e-05, "loss": 0.7784, "step": 2359 }, { "epoch": 0.35, "learning_rate": 1.5040388343977688e-05, "loss": 0.7848, "step": 2360 }, { "epoch": 0.35, "learning_rate": 1.503621532192741e-05, "loss": 0.8092, "step": 2361 }, { "epoch": 0.35, "learning_rate": 1.5032041124501947e-05, "loss": 0.8345, "step": 2362 }, { "epoch": 0.35, "learning_rate": 1.5027865752675488e-05, "loss": 0.8129, "step": 2363 }, { "epoch": 0.35, "learning_rate": 1.50236892074225e-05, "loss": 0.7813, "step": 2364 }, { "epoch": 0.35, "learning_rate": 1.5019511489717724e-05, "loss": 0.7074, "step": 2365 }, { "epoch": 0.35, "learning_rate": 1.5015332600536181e-05, "loss": 0.7568, "step": 2366 }, { "epoch": 0.35, "learning_rate": 1.5011152540853156e-05, "loss": 0.749, "step": 2367 }, { "epoch": 0.35, "learning_rate": 1.5006971311644205e-05, "loss": 0.775, "step": 2368 }, { "epoch": 0.35, "learning_rate": 1.5002788913885169e-05, "loss": 0.6954, "step": 2369 }, { "epoch": 0.35, "learning_rate": 1.4998605348552153e-05, "loss": 0.786, "step": 2370 }, { "epoch": 0.35, "learning_rate": 1.4994420616621539e-05, "loss": 0.7495, "step": 2371 }, { "epoch": 0.35, "learning_rate": 1.4990234719069975e-05, "loss": 0.726, "step": 2372 }, { "epoch": 0.35, "learning_rate": 1.498604765687439e-05, "loss": 0.7506, "step": 2373 }, { "epoch": 0.35, "learning_rate": 1.4981859431011976e-05, "loss": 0.8303, "step": 2374 }, { "epoch": 0.35, "learning_rate": 1.4977670042460202e-05, "loss": 0.6937, "step": 2375 }, { "epoch": 0.35, "learning_rate": 1.4973479492196808e-05, "loss": 0.7235, "step": 2376 }, { "epoch": 0.35, "learning_rate": 1.4969287781199802e-05, "loss": 0.824, "step": 2377 }, { "epoch": 0.35, "learning_rate": 1.4965094910447463e-05, "loss": 0.7185, "step": 2378 }, { "epoch": 0.35, "learning_rate": 1.496090088091835e-05, "loss": 0.7403, "step": 2379 }, { "epoch": 0.35, "learning_rate": 1.4956705693591278e-05, "loss": 0.7914, "step": 2380 }, { "epoch": 0.36, "learning_rate": 1.4952509349445344e-05, "loss": 0.7625, "step": 2381 }, { "epoch": 0.36, "learning_rate": 1.4948311849459904e-05, "loss": 0.8039, "step": 2382 }, { "epoch": 0.36, "learning_rate": 1.4944113194614594e-05, "loss": 0.7614, "step": 2383 }, { "epoch": 0.36, "learning_rate": 1.4939913385889315e-05, "loss": 0.7717, "step": 2384 }, { "epoch": 0.36, "learning_rate": 1.4935712424264239e-05, "loss": 0.8097, "step": 2385 }, { "epoch": 0.36, "learning_rate": 1.4931510310719805e-05, "loss": 0.3845, "step": 2386 }, { "epoch": 0.36, "learning_rate": 1.492730704623672e-05, "loss": 0.7508, "step": 2387 }, { "epoch": 0.36, "learning_rate": 1.492310263179596e-05, "loss": 0.8895, "step": 2388 }, { "epoch": 0.36, "learning_rate": 1.4918897068378777e-05, "loss": 0.7414, "step": 2389 }, { "epoch": 0.36, "learning_rate": 1.4914690356966681e-05, "loss": 0.7593, "step": 2390 }, { "epoch": 0.36, "learning_rate": 1.4910482498541453e-05, "loss": 0.7191, "step": 2391 }, { "epoch": 0.36, "learning_rate": 1.490627349408514e-05, "loss": 0.7367, "step": 2392 }, { "epoch": 0.36, "learning_rate": 1.4902063344580065e-05, "loss": 0.7376, "step": 2393 }, { "epoch": 0.36, "learning_rate": 1.4897852051008807e-05, "loss": 0.7883, "step": 2394 }, { "epoch": 0.36, "learning_rate": 1.4893639614354222e-05, "loss": 0.8127, "step": 2395 }, { "epoch": 0.36, "learning_rate": 1.4889426035599426e-05, "loss": 0.7495, "step": 2396 }, { "epoch": 0.36, "learning_rate": 1.4885211315727799e-05, "loss": 0.748, "step": 2397 }, { "epoch": 0.36, "learning_rate": 1.4880995455722997e-05, "loss": 0.7708, "step": 2398 }, { "epoch": 0.36, "learning_rate": 1.4876778456568937e-05, "loss": 0.8003, "step": 2399 }, { "epoch": 0.36, "learning_rate": 1.4872560319249799e-05, "loss": 0.7509, "step": 2400 }, { "epoch": 0.36, "learning_rate": 1.486834104475003e-05, "loss": 0.8205, "step": 2401 }, { "epoch": 0.36, "learning_rate": 1.4864120634054348e-05, "loss": 0.7023, "step": 2402 }, { "epoch": 0.36, "learning_rate": 1.4859899088147731e-05, "loss": 0.7713, "step": 2403 }, { "epoch": 0.36, "learning_rate": 1.4855676408015422e-05, "loss": 0.7468, "step": 2404 }, { "epoch": 0.36, "learning_rate": 1.4851452594642928e-05, "loss": 0.8429, "step": 2405 }, { "epoch": 0.36, "learning_rate": 1.4847227649016022e-05, "loss": 0.702, "step": 2406 }, { "epoch": 0.36, "learning_rate": 1.4843001572120748e-05, "loss": 0.3122, "step": 2407 }, { "epoch": 0.36, "learning_rate": 1.48387743649434e-05, "loss": 0.7694, "step": 2408 }, { "epoch": 0.36, "learning_rate": 1.4834546028470547e-05, "loss": 0.8215, "step": 2409 }, { "epoch": 0.36, "learning_rate": 1.4830316563689015e-05, "loss": 0.7796, "step": 2410 }, { "epoch": 0.36, "learning_rate": 1.4826085971585896e-05, "loss": 0.846, "step": 2411 }, { "epoch": 0.36, "learning_rate": 1.4821854253148546e-05, "loss": 0.716, "step": 2412 }, { "epoch": 0.36, "learning_rate": 1.4817621409364584e-05, "loss": 0.839, "step": 2413 }, { "epoch": 0.36, "learning_rate": 1.4813387441221892e-05, "loss": 0.7875, "step": 2414 }, { "epoch": 0.36, "learning_rate": 1.4809152349708609e-05, "loss": 0.3072, "step": 2415 }, { "epoch": 0.36, "learning_rate": 1.480491613581314e-05, "loss": 0.747, "step": 2416 }, { "epoch": 0.36, "learning_rate": 1.4800678800524157e-05, "loss": 0.7961, "step": 2417 }, { "epoch": 0.36, "learning_rate": 1.4796440344830588e-05, "loss": 0.8153, "step": 2418 }, { "epoch": 0.36, "learning_rate": 1.4792200769721618e-05, "loss": 0.7573, "step": 2419 }, { "epoch": 0.36, "learning_rate": 1.4787960076186702e-05, "loss": 0.8217, "step": 2420 }, { "epoch": 0.36, "learning_rate": 1.4783718265215556e-05, "loss": 0.7712, "step": 2421 }, { "epoch": 0.36, "learning_rate": 1.477947533779815e-05, "loss": 0.688, "step": 2422 }, { "epoch": 0.36, "learning_rate": 1.477523129492472e-05, "loss": 0.787, "step": 2423 }, { "epoch": 0.36, "learning_rate": 1.4770986137585752e-05, "loss": 0.7562, "step": 2424 }, { "epoch": 0.36, "learning_rate": 1.476673986677201e-05, "loss": 0.3842, "step": 2425 }, { "epoch": 0.36, "learning_rate": 1.4762492483474508e-05, "loss": 0.7955, "step": 2426 }, { "epoch": 0.36, "learning_rate": 1.4758243988684515e-05, "loss": 0.8077, "step": 2427 }, { "epoch": 0.36, "learning_rate": 1.4753994383393568e-05, "loss": 0.8725, "step": 2428 }, { "epoch": 0.36, "learning_rate": 1.4749743668593454e-05, "loss": 0.7937, "step": 2429 }, { "epoch": 0.36, "learning_rate": 1.4745491845276235e-05, "loss": 0.7372, "step": 2430 }, { "epoch": 0.36, "learning_rate": 1.4741238914434208e-05, "loss": 0.7232, "step": 2431 }, { "epoch": 0.36, "learning_rate": 1.4736984877059954e-05, "loss": 0.3095, "step": 2432 }, { "epoch": 0.36, "learning_rate": 1.473272973414629e-05, "loss": 0.3557, "step": 2433 }, { "epoch": 0.36, "learning_rate": 1.4728473486686308e-05, "loss": 0.8462, "step": 2434 }, { "epoch": 0.36, "learning_rate": 1.4724216135673346e-05, "loss": 0.7161, "step": 2435 }, { "epoch": 0.36, "learning_rate": 1.4719957682101006e-05, "loss": 0.7922, "step": 2436 }, { "epoch": 0.36, "learning_rate": 1.4715698126963149e-05, "loss": 0.7105, "step": 2437 }, { "epoch": 0.36, "learning_rate": 1.4711437471253882e-05, "loss": 0.7747, "step": 2438 }, { "epoch": 0.36, "learning_rate": 1.470717571596758e-05, "loss": 0.7934, "step": 2439 }, { "epoch": 0.36, "learning_rate": 1.4702912862098876e-05, "loss": 0.8163, "step": 2440 }, { "epoch": 0.36, "learning_rate": 1.469864891064265e-05, "loss": 0.7914, "step": 2441 }, { "epoch": 0.36, "learning_rate": 1.469438386259404e-05, "loss": 0.7304, "step": 2442 }, { "epoch": 0.36, "learning_rate": 1.4690117718948447e-05, "loss": 0.745, "step": 2443 }, { "epoch": 0.36, "learning_rate": 1.4685850480701522e-05, "loss": 0.7925, "step": 2444 }, { "epoch": 0.36, "learning_rate": 1.4681582148849174e-05, "loss": 0.7782, "step": 2445 }, { "epoch": 0.36, "learning_rate": 1.4677312724387563e-05, "loss": 0.814, "step": 2446 }, { "epoch": 0.36, "learning_rate": 1.4673042208313108e-05, "loss": 0.7598, "step": 2447 }, { "epoch": 0.37, "learning_rate": 1.4668770601622487e-05, "loss": 0.642, "step": 2448 }, { "epoch": 0.37, "learning_rate": 1.466449790531262e-05, "loss": 0.8333, "step": 2449 }, { "epoch": 0.37, "learning_rate": 1.466022412038069e-05, "loss": 0.7832, "step": 2450 }, { "epoch": 0.37, "learning_rate": 1.4655949247824134e-05, "loss": 0.7393, "step": 2451 }, { "epoch": 0.37, "learning_rate": 1.4651673288640644e-05, "loss": 0.7345, "step": 2452 }, { "epoch": 0.37, "learning_rate": 1.464739624382816e-05, "loss": 0.6994, "step": 2453 }, { "epoch": 0.37, "learning_rate": 1.464311811438488e-05, "loss": 0.7219, "step": 2454 }, { "epoch": 0.37, "learning_rate": 1.4638838901309252e-05, "loss": 0.7924, "step": 2455 }, { "epoch": 0.37, "learning_rate": 1.4634558605599977e-05, "loss": 0.7852, "step": 2456 }, { "epoch": 0.37, "learning_rate": 1.4630277228256013e-05, "loss": 0.8515, "step": 2457 }, { "epoch": 0.37, "learning_rate": 1.462599477027657e-05, "loss": 0.7505, "step": 2458 }, { "epoch": 0.37, "learning_rate": 1.4621711232661105e-05, "loss": 0.7227, "step": 2459 }, { "epoch": 0.37, "learning_rate": 1.4617426616409328e-05, "loss": 0.7946, "step": 2460 }, { "epoch": 0.37, "learning_rate": 1.4613140922521201e-05, "loss": 0.732, "step": 2461 }, { "epoch": 0.37, "learning_rate": 1.4608854151996948e-05, "loss": 0.7981, "step": 2462 }, { "epoch": 0.37, "learning_rate": 1.4604566305837027e-05, "loss": 0.6653, "step": 2463 }, { "epoch": 0.37, "learning_rate": 1.4600277385042157e-05, "loss": 0.768, "step": 2464 }, { "epoch": 0.37, "learning_rate": 1.4595987390613306e-05, "loss": 0.7975, "step": 2465 }, { "epoch": 0.37, "learning_rate": 1.4591696323551697e-05, "loss": 0.7891, "step": 2466 }, { "epoch": 0.37, "learning_rate": 1.4587404184858796e-05, "loss": 0.7988, "step": 2467 }, { "epoch": 0.37, "learning_rate": 1.4583110975536319e-05, "loss": 0.7808, "step": 2468 }, { "epoch": 0.37, "learning_rate": 1.457881669658624e-05, "loss": 0.7628, "step": 2469 }, { "epoch": 0.37, "learning_rate": 1.4574521349010773e-05, "loss": 0.7759, "step": 2470 }, { "epoch": 0.37, "learning_rate": 1.4570224933812391e-05, "loss": 0.7322, "step": 2471 }, { "epoch": 0.37, "learning_rate": 1.4565927451993807e-05, "loss": 0.6801, "step": 2472 }, { "epoch": 0.37, "learning_rate": 1.4561628904557992e-05, "loss": 0.778, "step": 2473 }, { "epoch": 0.37, "learning_rate": 1.4557329292508156e-05, "loss": 0.7454, "step": 2474 }, { "epoch": 0.37, "learning_rate": 1.4553028616847764e-05, "loss": 0.7985, "step": 2475 }, { "epoch": 0.37, "learning_rate": 1.4548726878580528e-05, "loss": 0.7234, "step": 2476 }, { "epoch": 0.37, "learning_rate": 1.4544424078710406e-05, "loss": 0.7959, "step": 2477 }, { "epoch": 0.37, "learning_rate": 1.454012021824161e-05, "loss": 0.7084, "step": 2478 }, { "epoch": 0.37, "learning_rate": 1.4535815298178586e-05, "loss": 0.3416, "step": 2479 }, { "epoch": 0.37, "learning_rate": 1.4531509319526045e-05, "loss": 0.7521, "step": 2480 }, { "epoch": 0.37, "learning_rate": 1.4527202283288935e-05, "loss": 0.7166, "step": 2481 }, { "epoch": 0.37, "learning_rate": 1.4522894190472448e-05, "loss": 0.6938, "step": 2482 }, { "epoch": 0.37, "learning_rate": 1.4518585042082026e-05, "loss": 0.7066, "step": 2483 }, { "epoch": 0.37, "learning_rate": 1.4514274839123358e-05, "loss": 0.7215, "step": 2484 }, { "epoch": 0.37, "learning_rate": 1.4509963582602389e-05, "loss": 0.7946, "step": 2485 }, { "epoch": 0.37, "learning_rate": 1.4505651273525287e-05, "loss": 0.8281, "step": 2486 }, { "epoch": 0.37, "learning_rate": 1.4501337912898486e-05, "loss": 0.7913, "step": 2487 }, { "epoch": 0.37, "learning_rate": 1.4497023501728653e-05, "loss": 0.7435, "step": 2488 }, { "epoch": 0.37, "learning_rate": 1.4492708041022712e-05, "loss": 0.7319, "step": 2489 }, { "epoch": 0.37, "learning_rate": 1.4488391531787817e-05, "loss": 0.8314, "step": 2490 }, { "epoch": 0.37, "learning_rate": 1.4484073975031386e-05, "loss": 0.7015, "step": 2491 }, { "epoch": 0.37, "learning_rate": 1.4479755371761055e-05, "loss": 0.7375, "step": 2492 }, { "epoch": 0.37, "learning_rate": 1.4475435722984734e-05, "loss": 0.7892, "step": 2493 }, { "epoch": 0.37, "learning_rate": 1.4471115029710554e-05, "loss": 0.3345, "step": 2494 }, { "epoch": 0.37, "learning_rate": 1.4466793292946903e-05, "loss": 0.8799, "step": 2495 }, { "epoch": 0.37, "learning_rate": 1.4462470513702405e-05, "loss": 0.7652, "step": 2496 }, { "epoch": 0.37, "learning_rate": 1.445814669298593e-05, "loss": 0.7207, "step": 2497 }, { "epoch": 0.37, "learning_rate": 1.4453821831806591e-05, "loss": 0.8406, "step": 2498 }, { "epoch": 0.37, "learning_rate": 1.4449495931173747e-05, "loss": 0.7573, "step": 2499 }, { "epoch": 0.37, "learning_rate": 1.4445168992096998e-05, "loss": 0.7616, "step": 2500 }, { "epoch": 0.37, "learning_rate": 1.4440841015586178e-05, "loss": 0.7464, "step": 2501 }, { "epoch": 0.37, "learning_rate": 1.4436512002651375e-05, "loss": 0.8069, "step": 2502 }, { "epoch": 0.37, "learning_rate": 1.4432181954302913e-05, "loss": 0.8023, "step": 2503 }, { "epoch": 0.37, "learning_rate": 1.442785087155136e-05, "loss": 0.7532, "step": 2504 }, { "epoch": 0.37, "learning_rate": 1.4423518755407522e-05, "loss": 0.8128, "step": 2505 }, { "epoch": 0.37, "learning_rate": 1.4419185606882449e-05, "loss": 0.8751, "step": 2506 }, { "epoch": 0.37, "learning_rate": 1.4414851426987432e-05, "loss": 0.824, "step": 2507 }, { "epoch": 0.37, "learning_rate": 1.4410516216734002e-05, "loss": 0.8343, "step": 2508 }, { "epoch": 0.37, "learning_rate": 1.440617997713393e-05, "loss": 0.8264, "step": 2509 }, { "epoch": 0.37, "learning_rate": 1.4401842709199228e-05, "loss": 0.7683, "step": 2510 }, { "epoch": 0.37, "learning_rate": 1.4397504413942145e-05, "loss": 0.875, "step": 2511 }, { "epoch": 0.37, "learning_rate": 1.4393165092375176e-05, "loss": 0.8877, "step": 2512 }, { "epoch": 0.37, "learning_rate": 1.4388824745511047e-05, "loss": 0.8344, "step": 2513 }, { "epoch": 0.37, "learning_rate": 1.4384483374362736e-05, "loss": 0.8303, "step": 2514 }, { "epoch": 0.38, "learning_rate": 1.4380140979943444e-05, "loss": 0.8177, "step": 2515 }, { "epoch": 0.38, "learning_rate": 1.4375797563266624e-05, "loss": 0.7612, "step": 2516 }, { "epoch": 0.38, "learning_rate": 1.4371453125345963e-05, "loss": 0.8319, "step": 2517 }, { "epoch": 0.38, "learning_rate": 1.4367107667195383e-05, "loss": 0.7201, "step": 2518 }, { "epoch": 0.38, "learning_rate": 1.436276118982905e-05, "loss": 0.7708, "step": 2519 }, { "epoch": 0.38, "learning_rate": 1.4358413694261362e-05, "loss": 0.8283, "step": 2520 }, { "epoch": 0.38, "learning_rate": 1.435406518150696e-05, "loss": 0.8118, "step": 2521 }, { "epoch": 0.38, "learning_rate": 1.434971565258072e-05, "loss": 0.7274, "step": 2522 }, { "epoch": 0.38, "learning_rate": 1.4345365108497754e-05, "loss": 0.7777, "step": 2523 }, { "epoch": 0.38, "learning_rate": 1.4341013550273408e-05, "loss": 0.7608, "step": 2524 }, { "epoch": 0.38, "learning_rate": 1.4336660978923275e-05, "loss": 0.8142, "step": 2525 }, { "epoch": 0.38, "learning_rate": 1.4332307395463179e-05, "loss": 0.8284, "step": 2526 }, { "epoch": 0.38, "learning_rate": 1.4327952800909175e-05, "loss": 0.7674, "step": 2527 }, { "epoch": 0.38, "learning_rate": 1.432359719627756e-05, "loss": 0.6804, "step": 2528 }, { "epoch": 0.38, "learning_rate": 1.4319240582584866e-05, "loss": 0.8288, "step": 2529 }, { "epoch": 0.38, "learning_rate": 1.4314882960847858e-05, "loss": 0.7438, "step": 2530 }, { "epoch": 0.38, "learning_rate": 1.4310524332083538e-05, "loss": 0.7565, "step": 2531 }, { "epoch": 0.38, "learning_rate": 1.4306164697309144e-05, "loss": 0.7996, "step": 2532 }, { "epoch": 0.38, "learning_rate": 1.4301804057542148e-05, "loss": 0.6964, "step": 2533 }, { "epoch": 0.38, "learning_rate": 1.4297442413800255e-05, "loss": 0.7757, "step": 2534 }, { "epoch": 0.38, "learning_rate": 1.4293079767101405e-05, "loss": 0.7564, "step": 2535 }, { "epoch": 0.38, "learning_rate": 1.4288716118463775e-05, "loss": 0.7403, "step": 2536 }, { "epoch": 0.38, "learning_rate": 1.4284351468905774e-05, "loss": 0.7837, "step": 2537 }, { "epoch": 0.38, "learning_rate": 1.4279985819446039e-05, "loss": 0.7588, "step": 2538 }, { "epoch": 0.38, "learning_rate": 1.4275619171103447e-05, "loss": 0.7283, "step": 2539 }, { "epoch": 0.38, "learning_rate": 1.4271251524897115e-05, "loss": 0.766, "step": 2540 }, { "epoch": 0.38, "learning_rate": 1.4266882881846374e-05, "loss": 0.7569, "step": 2541 }, { "epoch": 0.38, "learning_rate": 1.42625132429708e-05, "loss": 0.7451, "step": 2542 }, { "epoch": 0.38, "learning_rate": 1.4258142609290203e-05, "loss": 0.7665, "step": 2543 }, { "epoch": 0.38, "learning_rate": 1.4253770981824619e-05, "loss": 0.9155, "step": 2544 }, { "epoch": 0.38, "learning_rate": 1.4249398361594322e-05, "loss": 0.7675, "step": 2545 }, { "epoch": 0.38, "learning_rate": 1.4245024749619809e-05, "loss": 0.7744, "step": 2546 }, { "epoch": 0.38, "learning_rate": 1.4240650146921814e-05, "loss": 0.7553, "step": 2547 }, { "epoch": 0.38, "learning_rate": 1.4236274554521312e-05, "loss": 0.7166, "step": 2548 }, { "epoch": 0.38, "learning_rate": 1.4231897973439488e-05, "loss": 0.8525, "step": 2549 }, { "epoch": 0.38, "learning_rate": 1.422752040469777e-05, "loss": 0.7512, "step": 2550 }, { "epoch": 0.38, "learning_rate": 1.422314184931782e-05, "loss": 0.792, "step": 2551 }, { "epoch": 0.38, "learning_rate": 1.4218762308321524e-05, "loss": 0.8108, "step": 2552 }, { "epoch": 0.38, "learning_rate": 1.4214381782730995e-05, "loss": 0.79, "step": 2553 }, { "epoch": 0.38, "learning_rate": 1.4210000273568587e-05, "loss": 0.7069, "step": 2554 }, { "epoch": 0.38, "learning_rate": 1.4205617781856874e-05, "loss": 0.7588, "step": 2555 }, { "epoch": 0.38, "learning_rate": 1.4201234308618657e-05, "loss": 0.881, "step": 2556 }, { "epoch": 0.38, "learning_rate": 1.4196849854876979e-05, "loss": 0.7344, "step": 2557 }, { "epoch": 0.38, "learning_rate": 1.4192464421655102e-05, "loss": 0.7848, "step": 2558 }, { "epoch": 0.38, "learning_rate": 1.4188078009976515e-05, "loss": 0.798, "step": 2559 }, { "epoch": 0.38, "learning_rate": 1.4183690620864946e-05, "loss": 0.7404, "step": 2560 }, { "epoch": 0.38, "learning_rate": 1.4179302255344333e-05, "loss": 0.7577, "step": 2561 }, { "epoch": 0.38, "learning_rate": 1.4174912914438865e-05, "loss": 0.7182, "step": 2562 }, { "epoch": 0.38, "learning_rate": 1.4170522599172943e-05, "loss": 0.6743, "step": 2563 }, { "epoch": 0.38, "learning_rate": 1.4166131310571194e-05, "loss": 0.7878, "step": 2564 }, { "epoch": 0.38, "learning_rate": 1.4161739049658483e-05, "loss": 0.7467, "step": 2565 }, { "epoch": 0.38, "learning_rate": 1.4157345817459889e-05, "loss": 0.7273, "step": 2566 }, { "epoch": 0.38, "learning_rate": 1.4152951615000734e-05, "loss": 0.8698, "step": 2567 }, { "epoch": 0.38, "learning_rate": 1.4148556443306551e-05, "loss": 0.8341, "step": 2568 }, { "epoch": 0.38, "learning_rate": 1.4144160303403109e-05, "loss": 0.7517, "step": 2569 }, { "epoch": 0.38, "learning_rate": 1.4139763196316398e-05, "loss": 0.8145, "step": 2570 }, { "epoch": 0.38, "learning_rate": 1.4135365123072635e-05, "loss": 0.7398, "step": 2571 }, { "epoch": 0.38, "learning_rate": 1.4130966084698261e-05, "loss": 0.7252, "step": 2572 }, { "epoch": 0.38, "learning_rate": 1.4126566082219948e-05, "loss": 0.7992, "step": 2573 }, { "epoch": 0.38, "learning_rate": 1.4122165116664583e-05, "loss": 0.678, "step": 2574 }, { "epoch": 0.38, "learning_rate": 1.4117763189059288e-05, "loss": 0.713, "step": 2575 }, { "epoch": 0.38, "learning_rate": 1.4113360300431407e-05, "loss": 0.8106, "step": 2576 }, { "epoch": 0.38, "learning_rate": 1.4108956451808502e-05, "loss": 0.7374, "step": 2577 }, { "epoch": 0.38, "learning_rate": 1.4104551644218367e-05, "loss": 0.7865, "step": 2578 }, { "epoch": 0.38, "learning_rate": 1.4100145878689012e-05, "loss": 0.7563, "step": 2579 }, { "epoch": 0.38, "learning_rate": 1.4095739156248678e-05, "loss": 0.8857, "step": 2580 }, { "epoch": 0.38, "learning_rate": 1.4091331477925826e-05, "loss": 0.868, "step": 2581 }, { "epoch": 0.39, "learning_rate": 1.4086922844749142e-05, "loss": 0.725, "step": 2582 }, { "epoch": 0.39, "learning_rate": 1.408251325774753e-05, "loss": 0.8236, "step": 2583 }, { "epoch": 0.39, "learning_rate": 1.4078102717950119e-05, "loss": 0.731, "step": 2584 }, { "epoch": 0.39, "learning_rate": 1.4073691226386266e-05, "loss": 0.7812, "step": 2585 }, { "epoch": 0.39, "learning_rate": 1.4069278784085542e-05, "loss": 0.7937, "step": 2586 }, { "epoch": 0.39, "learning_rate": 1.4064865392077742e-05, "loss": 0.7799, "step": 2587 }, { "epoch": 0.39, "learning_rate": 1.4060451051392887e-05, "loss": 0.6649, "step": 2588 }, { "epoch": 0.39, "learning_rate": 1.4056035763061216e-05, "loss": 0.8174, "step": 2589 }, { "epoch": 0.39, "learning_rate": 1.4051619528113186e-05, "loss": 0.7156, "step": 2590 }, { "epoch": 0.39, "learning_rate": 1.404720234757948e-05, "loss": 0.7829, "step": 2591 }, { "epoch": 0.39, "learning_rate": 1.4042784222491001e-05, "loss": 0.7981, "step": 2592 }, { "epoch": 0.39, "learning_rate": 1.403836515387887e-05, "loss": 0.7715, "step": 2593 }, { "epoch": 0.39, "learning_rate": 1.403394514277443e-05, "loss": 0.8474, "step": 2594 }, { "epoch": 0.39, "learning_rate": 1.4029524190209247e-05, "loss": 0.769, "step": 2595 }, { "epoch": 0.39, "learning_rate": 1.4025102297215099e-05, "loss": 0.7509, "step": 2596 }, { "epoch": 0.39, "learning_rate": 1.4020679464823987e-05, "loss": 0.7489, "step": 2597 }, { "epoch": 0.39, "learning_rate": 1.4016255694068135e-05, "loss": 0.7577, "step": 2598 }, { "epoch": 0.39, "learning_rate": 1.4011830985979984e-05, "loss": 0.7807, "step": 2599 }, { "epoch": 0.39, "learning_rate": 1.400740534159219e-05, "loss": 0.8424, "step": 2600 }, { "epoch": 0.39, "learning_rate": 1.4002978761937632e-05, "loss": 0.7926, "step": 2601 }, { "epoch": 0.39, "learning_rate": 1.3998551248049402e-05, "loss": 0.8511, "step": 2602 }, { "epoch": 0.39, "learning_rate": 1.3994122800960818e-05, "loss": 0.7708, "step": 2603 }, { "epoch": 0.39, "learning_rate": 1.3989693421705414e-05, "loss": 0.7902, "step": 2604 }, { "epoch": 0.39, "learning_rate": 1.3985263111316931e-05, "loss": 0.8129, "step": 2605 }, { "epoch": 0.39, "learning_rate": 1.3980831870829338e-05, "loss": 0.8111, "step": 2606 }, { "epoch": 0.39, "learning_rate": 1.3976399701276823e-05, "loss": 0.7685, "step": 2607 }, { "epoch": 0.39, "learning_rate": 1.3971966603693785e-05, "loss": 0.791, "step": 2608 }, { "epoch": 0.39, "learning_rate": 1.3967532579114832e-05, "loss": 0.3428, "step": 2609 }, { "epoch": 0.39, "learning_rate": 1.3963097628574809e-05, "loss": 0.7554, "step": 2610 }, { "epoch": 0.39, "learning_rate": 1.3958661753108758e-05, "loss": 0.8471, "step": 2611 }, { "epoch": 0.39, "learning_rate": 1.3954224953751945e-05, "loss": 0.7365, "step": 2612 }, { "epoch": 0.39, "learning_rate": 1.3949787231539855e-05, "loss": 0.6537, "step": 2613 }, { "epoch": 0.39, "learning_rate": 1.3945348587508181e-05, "loss": 0.852, "step": 2614 }, { "epoch": 0.39, "learning_rate": 1.394090902269283e-05, "loss": 0.7867, "step": 2615 }, { "epoch": 0.39, "learning_rate": 1.3936468538129935e-05, "loss": 0.7558, "step": 2616 }, { "epoch": 0.39, "learning_rate": 1.3932027134855835e-05, "loss": 0.7628, "step": 2617 }, { "epoch": 0.39, "learning_rate": 1.3927584813907084e-05, "loss": 0.7429, "step": 2618 }, { "epoch": 0.39, "learning_rate": 1.3923141576320451e-05, "loss": 0.8662, "step": 2619 }, { "epoch": 0.39, "learning_rate": 1.3918697423132916e-05, "loss": 0.7373, "step": 2620 }, { "epoch": 0.39, "learning_rate": 1.3914252355381686e-05, "loss": 0.7118, "step": 2621 }, { "epoch": 0.39, "learning_rate": 1.3909806374104165e-05, "loss": 0.7206, "step": 2622 }, { "epoch": 0.39, "learning_rate": 1.3905359480337977e-05, "loss": 0.6687, "step": 2623 }, { "epoch": 0.39, "learning_rate": 1.3900911675120956e-05, "loss": 0.8168, "step": 2624 }, { "epoch": 0.39, "learning_rate": 1.3896462959491154e-05, "loss": 0.7536, "step": 2625 }, { "epoch": 0.39, "learning_rate": 1.3892013334486835e-05, "loss": 0.7158, "step": 2626 }, { "epoch": 0.39, "learning_rate": 1.388756280114647e-05, "loss": 0.8947, "step": 2627 }, { "epoch": 0.39, "learning_rate": 1.3883111360508748e-05, "loss": 0.7818, "step": 2628 }, { "epoch": 0.39, "learning_rate": 1.387865901361256e-05, "loss": 0.76, "step": 2629 }, { "epoch": 0.39, "learning_rate": 1.3874205761497023e-05, "loss": 0.7606, "step": 2630 }, { "epoch": 0.39, "learning_rate": 1.3869751605201454e-05, "loss": 0.8618, "step": 2631 }, { "epoch": 0.39, "learning_rate": 1.3865296545765388e-05, "loss": 0.7582, "step": 2632 }, { "epoch": 0.39, "learning_rate": 1.386084058422856e-05, "loss": 0.3326, "step": 2633 }, { "epoch": 0.39, "learning_rate": 1.3856383721630927e-05, "loss": 0.7944, "step": 2634 }, { "epoch": 0.39, "learning_rate": 1.3851925959012655e-05, "loss": 0.726, "step": 2635 }, { "epoch": 0.39, "learning_rate": 1.3847467297414114e-05, "loss": 0.7768, "step": 2636 }, { "epoch": 0.39, "learning_rate": 1.384300773787589e-05, "loss": 0.8531, "step": 2637 }, { "epoch": 0.39, "learning_rate": 1.3838547281438769e-05, "loss": 0.7101, "step": 2638 }, { "epoch": 0.39, "learning_rate": 1.3834085929143756e-05, "loss": 0.7629, "step": 2639 }, { "epoch": 0.39, "learning_rate": 1.382962368203207e-05, "loss": 0.756, "step": 2640 }, { "epoch": 0.39, "learning_rate": 1.382516054114512e-05, "loss": 0.79, "step": 2641 }, { "epoch": 0.39, "learning_rate": 1.3820696507524538e-05, "loss": 0.3242, "step": 2642 }, { "epoch": 0.39, "learning_rate": 1.381623158221216e-05, "loss": 0.7545, "step": 2643 }, { "epoch": 0.39, "learning_rate": 1.3811765766250036e-05, "loss": 0.7887, "step": 2644 }, { "epoch": 0.39, "learning_rate": 1.3807299060680415e-05, "loss": 0.7565, "step": 2645 }, { "epoch": 0.39, "learning_rate": 1.3802831466545753e-05, "loss": 0.7935, "step": 2646 }, { "epoch": 0.39, "learning_rate": 1.3798362984888724e-05, "loss": 0.7473, "step": 2647 }, { "epoch": 0.39, "learning_rate": 1.3793893616752203e-05, "loss": 0.7347, "step": 2648 }, { "epoch": 0.4, "learning_rate": 1.378942336317927e-05, "loss": 0.7084, "step": 2649 }, { "epoch": 0.4, "learning_rate": 1.3784952225213211e-05, "loss": 0.7516, "step": 2650 }, { "epoch": 0.4, "learning_rate": 1.3780480203897524e-05, "loss": 0.8652, "step": 2651 }, { "epoch": 0.4, "learning_rate": 1.3776007300275906e-05, "loss": 0.8011, "step": 2652 }, { "epoch": 0.4, "learning_rate": 1.3771533515392268e-05, "loss": 0.7116, "step": 2653 }, { "epoch": 0.4, "learning_rate": 1.3767058850290721e-05, "loss": 0.684, "step": 2654 }, { "epoch": 0.4, "learning_rate": 1.3762583306015587e-05, "loss": 0.8078, "step": 2655 }, { "epoch": 0.4, "learning_rate": 1.3758106883611383e-05, "loss": 0.7943, "step": 2656 }, { "epoch": 0.4, "learning_rate": 1.3753629584122839e-05, "loss": 0.8201, "step": 2657 }, { "epoch": 0.4, "learning_rate": 1.3749151408594892e-05, "loss": 0.837, "step": 2658 }, { "epoch": 0.4, "learning_rate": 1.3744672358072673e-05, "loss": 0.7118, "step": 2659 }, { "epoch": 0.4, "learning_rate": 1.3740192433601527e-05, "loss": 0.7291, "step": 2660 }, { "epoch": 0.4, "learning_rate": 1.3735711636226995e-05, "loss": 0.8212, "step": 2661 }, { "epoch": 0.4, "learning_rate": 1.3731229966994836e-05, "loss": 0.671, "step": 2662 }, { "epoch": 0.4, "learning_rate": 1.3726747426950994e-05, "loss": 0.7615, "step": 2663 }, { "epoch": 0.4, "learning_rate": 1.3722264017141628e-05, "loss": 0.7629, "step": 2664 }, { "epoch": 0.4, "learning_rate": 1.3717779738613098e-05, "loss": 0.7533, "step": 2665 }, { "epoch": 0.4, "learning_rate": 1.3713294592411962e-05, "loss": 0.74, "step": 2666 }, { "epoch": 0.4, "learning_rate": 1.3708808579584986e-05, "loss": 0.7316, "step": 2667 }, { "epoch": 0.4, "learning_rate": 1.3704321701179138e-05, "loss": 0.782, "step": 2668 }, { "epoch": 0.4, "learning_rate": 1.3699833958241584e-05, "loss": 0.755, "step": 2669 }, { "epoch": 0.4, "learning_rate": 1.3695345351819694e-05, "loss": 0.7683, "step": 2670 }, { "epoch": 0.4, "learning_rate": 1.3690855882961042e-05, "loss": 0.782, "step": 2671 }, { "epoch": 0.4, "learning_rate": 1.36863655527134e-05, "loss": 0.7945, "step": 2672 }, { "epoch": 0.4, "learning_rate": 1.3681874362124743e-05, "loss": 0.3425, "step": 2673 }, { "epoch": 0.4, "learning_rate": 1.3677382312243244e-05, "loss": 0.7814, "step": 2674 }, { "epoch": 0.4, "learning_rate": 1.3672889404117275e-05, "loss": 0.8159, "step": 2675 }, { "epoch": 0.4, "learning_rate": 1.3668395638795419e-05, "loss": 0.8381, "step": 2676 }, { "epoch": 0.4, "learning_rate": 1.3663901017326451e-05, "loss": 0.8332, "step": 2677 }, { "epoch": 0.4, "learning_rate": 1.3659405540759342e-05, "loss": 0.7312, "step": 2678 }, { "epoch": 0.4, "learning_rate": 1.3654909210143268e-05, "loss": 0.3223, "step": 2679 }, { "epoch": 0.4, "learning_rate": 1.3650412026527602e-05, "loss": 0.8049, "step": 2680 }, { "epoch": 0.4, "learning_rate": 1.3645913990961926e-05, "loss": 0.7054, "step": 2681 }, { "epoch": 0.4, "learning_rate": 1.3641415104496e-05, "loss": 0.8115, "step": 2682 }, { "epoch": 0.4, "learning_rate": 1.3636915368179804e-05, "loss": 0.7644, "step": 2683 }, { "epoch": 0.4, "learning_rate": 1.3632414783063503e-05, "loss": 0.7969, "step": 2684 }, { "epoch": 0.4, "learning_rate": 1.3627913350197469e-05, "loss": 0.757, "step": 2685 }, { "epoch": 0.4, "learning_rate": 1.3623411070632257e-05, "loss": 0.7885, "step": 2686 }, { "epoch": 0.4, "learning_rate": 1.3618907945418643e-05, "loss": 0.7617, "step": 2687 }, { "epoch": 0.4, "learning_rate": 1.3614403975607578e-05, "loss": 0.7326, "step": 2688 }, { "epoch": 0.4, "learning_rate": 1.3609899162250222e-05, "loss": 0.7311, "step": 2689 }, { "epoch": 0.4, "learning_rate": 1.360539350639793e-05, "loss": 0.7503, "step": 2690 }, { "epoch": 0.4, "learning_rate": 1.3600887009102253e-05, "loss": 0.785, "step": 2691 }, { "epoch": 0.4, "learning_rate": 1.3596379671414936e-05, "loss": 0.3233, "step": 2692 }, { "epoch": 0.4, "learning_rate": 1.3591871494387922e-05, "loss": 0.8137, "step": 2693 }, { "epoch": 0.4, "learning_rate": 1.3587362479073352e-05, "loss": 0.7936, "step": 2694 }, { "epoch": 0.4, "learning_rate": 1.3582852626523561e-05, "loss": 0.7641, "step": 2695 }, { "epoch": 0.4, "learning_rate": 1.357834193779108e-05, "loss": 0.8315, "step": 2696 }, { "epoch": 0.4, "learning_rate": 1.357383041392863e-05, "loss": 0.7661, "step": 2697 }, { "epoch": 0.4, "learning_rate": 1.3569318055989135e-05, "loss": 0.8214, "step": 2698 }, { "epoch": 0.4, "learning_rate": 1.3564804865025711e-05, "loss": 0.6998, "step": 2699 }, { "epoch": 0.4, "learning_rate": 1.3560290842091666e-05, "loss": 0.3496, "step": 2700 }, { "epoch": 0.4, "learning_rate": 1.3555775988240499e-05, "loss": 0.7951, "step": 2701 }, { "epoch": 0.4, "learning_rate": 1.3551260304525913e-05, "loss": 0.7664, "step": 2702 }, { "epoch": 0.4, "learning_rate": 1.3546743792001799e-05, "loss": 0.791, "step": 2703 }, { "epoch": 0.4, "learning_rate": 1.3542226451722242e-05, "loss": 0.8038, "step": 2704 }, { "epoch": 0.4, "learning_rate": 1.3537708284741514e-05, "loss": 0.7565, "step": 2705 }, { "epoch": 0.4, "learning_rate": 1.353318929211409e-05, "loss": 0.8088, "step": 2706 }, { "epoch": 0.4, "learning_rate": 1.3528669474894634e-05, "loss": 0.7785, "step": 2707 }, { "epoch": 0.4, "learning_rate": 1.3524148834138e-05, "loss": 0.7482, "step": 2708 }, { "epoch": 0.4, "learning_rate": 1.3519627370899238e-05, "loss": 0.7167, "step": 2709 }, { "epoch": 0.4, "learning_rate": 1.3515105086233586e-05, "loss": 0.6887, "step": 2710 }, { "epoch": 0.4, "learning_rate": 1.3510581981196475e-05, "loss": 0.7229, "step": 2711 }, { "epoch": 0.4, "learning_rate": 1.350605805684353e-05, "loss": 0.7365, "step": 2712 }, { "epoch": 0.4, "learning_rate": 1.3501533314230568e-05, "loss": 0.7505, "step": 2713 }, { "epoch": 0.4, "learning_rate": 1.349700775441359e-05, "loss": 0.7902, "step": 2714 }, { "epoch": 0.4, "learning_rate": 1.349248137844879e-05, "loss": 0.7331, "step": 2715 }, { "epoch": 0.41, "learning_rate": 1.3487954187392558e-05, "loss": 0.8208, "step": 2716 }, { "epoch": 0.41, "learning_rate": 1.3483426182301473e-05, "loss": 0.7963, "step": 2717 }, { "epoch": 0.41, "learning_rate": 1.34788973642323e-05, "loss": 0.7112, "step": 2718 }, { "epoch": 0.41, "learning_rate": 1.3474367734241995e-05, "loss": 0.7941, "step": 2719 }, { "epoch": 0.41, "learning_rate": 1.3469837293387702e-05, "loss": 0.7432, "step": 2720 }, { "epoch": 0.41, "learning_rate": 1.3465306042726755e-05, "loss": 0.7593, "step": 2721 }, { "epoch": 0.41, "learning_rate": 1.3460773983316685e-05, "loss": 0.868, "step": 2722 }, { "epoch": 0.41, "learning_rate": 1.3456241116215201e-05, "loss": 0.7937, "step": 2723 }, { "epoch": 0.41, "learning_rate": 1.3451707442480204e-05, "loss": 0.7986, "step": 2724 }, { "epoch": 0.41, "learning_rate": 1.3447172963169783e-05, "loss": 0.7557, "step": 2725 }, { "epoch": 0.41, "learning_rate": 1.3442637679342218e-05, "loss": 0.6881, "step": 2726 }, { "epoch": 0.41, "learning_rate": 1.3438101592055974e-05, "loss": 0.8455, "step": 2727 }, { "epoch": 0.41, "learning_rate": 1.3433564702369704e-05, "loss": 0.7656, "step": 2728 }, { "epoch": 0.41, "learning_rate": 1.3429027011342246e-05, "loss": 0.7421, "step": 2729 }, { "epoch": 0.41, "learning_rate": 1.3424488520032631e-05, "loss": 0.3263, "step": 2730 }, { "epoch": 0.41, "learning_rate": 1.341994922950007e-05, "loss": 0.7549, "step": 2731 }, { "epoch": 0.41, "learning_rate": 1.3415409140803964e-05, "loss": 0.7355, "step": 2732 }, { "epoch": 0.41, "learning_rate": 1.3410868255003902e-05, "loss": 0.7877, "step": 2733 }, { "epoch": 0.41, "learning_rate": 1.3406326573159652e-05, "loss": 0.765, "step": 2734 }, { "epoch": 0.41, "learning_rate": 1.3401784096331179e-05, "loss": 0.8247, "step": 2735 }, { "epoch": 0.41, "learning_rate": 1.3397240825578623e-05, "loss": 0.6727, "step": 2736 }, { "epoch": 0.41, "learning_rate": 1.3392696761962318e-05, "loss": 0.7769, "step": 2737 }, { "epoch": 0.41, "learning_rate": 1.3388151906542773e-05, "loss": 0.7901, "step": 2738 }, { "epoch": 0.41, "learning_rate": 1.3383606260380686e-05, "loss": 0.6978, "step": 2739 }, { "epoch": 0.41, "learning_rate": 1.3379059824536951e-05, "loss": 0.7977, "step": 2740 }, { "epoch": 0.41, "learning_rate": 1.3374512600072626e-05, "loss": 0.7963, "step": 2741 }, { "epoch": 0.41, "learning_rate": 1.3369964588048967e-05, "loss": 0.7762, "step": 2742 }, { "epoch": 0.41, "learning_rate": 1.3365415789527411e-05, "loss": 0.7251, "step": 2743 }, { "epoch": 0.41, "learning_rate": 1.3360866205569578e-05, "loss": 0.884, "step": 2744 }, { "epoch": 0.41, "learning_rate": 1.3356315837237271e-05, "loss": 0.8138, "step": 2745 }, { "epoch": 0.41, "learning_rate": 1.335176468559247e-05, "loss": 0.3269, "step": 2746 }, { "epoch": 0.41, "learning_rate": 1.3347212751697352e-05, "loss": 0.717, "step": 2747 }, { "epoch": 0.41, "learning_rate": 1.334266003661426e-05, "loss": 0.7858, "step": 2748 }, { "epoch": 0.41, "learning_rate": 1.3338106541405735e-05, "loss": 0.653, "step": 2749 }, { "epoch": 0.41, "learning_rate": 1.3333552267134492e-05, "loss": 0.835, "step": 2750 }, { "epoch": 0.41, "learning_rate": 1.3328997214863426e-05, "loss": 0.8035, "step": 2751 }, { "epoch": 0.41, "learning_rate": 1.3324441385655615e-05, "loss": 0.8255, "step": 2752 }, { "epoch": 0.41, "learning_rate": 1.3319884780574324e-05, "loss": 0.7514, "step": 2753 }, { "epoch": 0.41, "learning_rate": 1.331532740068299e-05, "loss": 0.8307, "step": 2754 }, { "epoch": 0.41, "learning_rate": 1.3310769247045238e-05, "loss": 0.8712, "step": 2755 }, { "epoch": 0.41, "learning_rate": 1.3306210320724869e-05, "loss": 0.3481, "step": 2756 }, { "epoch": 0.41, "learning_rate": 1.3301650622785867e-05, "loss": 0.7908, "step": 2757 }, { "epoch": 0.41, "learning_rate": 1.3297090154292397e-05, "loss": 0.7195, "step": 2758 }, { "epoch": 0.41, "learning_rate": 1.3292528916308802e-05, "loss": 0.7346, "step": 2759 }, { "epoch": 0.41, "learning_rate": 1.3287966909899603e-05, "loss": 0.8007, "step": 2760 }, { "epoch": 0.41, "learning_rate": 1.3283404136129502e-05, "loss": 0.6957, "step": 2761 }, { "epoch": 0.41, "learning_rate": 1.3278840596063382e-05, "loss": 0.7422, "step": 2762 }, { "epoch": 0.41, "learning_rate": 1.3274276290766304e-05, "loss": 0.8133, "step": 2763 }, { "epoch": 0.41, "learning_rate": 1.3269711221303501e-05, "loss": 0.7237, "step": 2764 }, { "epoch": 0.41, "learning_rate": 1.3265145388740397e-05, "loss": 0.8149, "step": 2765 }, { "epoch": 0.41, "learning_rate": 1.326057879414258e-05, "loss": 0.7932, "step": 2766 }, { "epoch": 0.41, "learning_rate": 1.3256011438575832e-05, "loss": 0.7357, "step": 2767 }, { "epoch": 0.41, "learning_rate": 1.3251443323106092e-05, "loss": 0.8383, "step": 2768 }, { "epoch": 0.41, "learning_rate": 1.32468744487995e-05, "loss": 0.8214, "step": 2769 }, { "epoch": 0.41, "learning_rate": 1.324230481672235e-05, "loss": 0.8155, "step": 2770 }, { "epoch": 0.41, "learning_rate": 1.3237734427941135e-05, "loss": 0.7316, "step": 2771 }, { "epoch": 0.41, "learning_rate": 1.3233163283522503e-05, "loss": 0.7117, "step": 2772 }, { "epoch": 0.41, "learning_rate": 1.3228591384533296e-05, "loss": 0.856, "step": 2773 }, { "epoch": 0.41, "learning_rate": 1.3224018732040522e-05, "loss": 0.3351, "step": 2774 }, { "epoch": 0.41, "learning_rate": 1.3219445327111365e-05, "loss": 0.7657, "step": 2775 }, { "epoch": 0.41, "learning_rate": 1.3214871170813193e-05, "loss": 0.8077, "step": 2776 }, { "epoch": 0.41, "learning_rate": 1.321029626421354e-05, "loss": 0.7645, "step": 2777 }, { "epoch": 0.41, "learning_rate": 1.3205720608380121e-05, "loss": 0.8156, "step": 2778 }, { "epoch": 0.41, "learning_rate": 1.320114420438082e-05, "loss": 0.3167, "step": 2779 }, { "epoch": 0.41, "learning_rate": 1.3196567053283702e-05, "loss": 0.7101, "step": 2780 }, { "epoch": 0.41, "learning_rate": 1.3191989156157009e-05, "loss": 0.6639, "step": 2781 }, { "epoch": 0.41, "learning_rate": 1.3187410514069142e-05, "loss": 0.7843, "step": 2782 }, { "epoch": 0.42, "learning_rate": 1.3182831128088688e-05, "loss": 0.7037, "step": 2783 }, { "epoch": 0.42, "learning_rate": 1.317825099928441e-05, "loss": 0.8099, "step": 2784 }, { "epoch": 0.42, "learning_rate": 1.3173670128725235e-05, "loss": 0.318, "step": 2785 }, { "epoch": 0.42, "learning_rate": 1.316908851748027e-05, "loss": 0.8068, "step": 2786 }, { "epoch": 0.42, "learning_rate": 1.3164506166618796e-05, "loss": 0.3451, "step": 2787 }, { "epoch": 0.42, "learning_rate": 1.3159923077210255e-05, "loss": 0.7477, "step": 2788 }, { "epoch": 0.42, "learning_rate": 1.3155339250324275e-05, "loss": 0.8081, "step": 2789 }, { "epoch": 0.42, "learning_rate": 1.3150754687030646e-05, "loss": 0.8174, "step": 2790 }, { "epoch": 0.42, "learning_rate": 1.3146169388399343e-05, "loss": 0.7222, "step": 2791 }, { "epoch": 0.42, "learning_rate": 1.31415833555005e-05, "loss": 0.6752, "step": 2792 }, { "epoch": 0.42, "learning_rate": 1.3136996589404423e-05, "loss": 0.7845, "step": 2793 }, { "epoch": 0.42, "learning_rate": 1.3132409091181595e-05, "loss": 0.7647, "step": 2794 }, { "epoch": 0.42, "learning_rate": 1.312782086190267e-05, "loss": 0.729, "step": 2795 }, { "epoch": 0.42, "learning_rate": 1.3123231902638466e-05, "loss": 0.7016, "step": 2796 }, { "epoch": 0.42, "learning_rate": 1.311864221445998e-05, "loss": 0.8217, "step": 2797 }, { "epoch": 0.42, "learning_rate": 1.311405179843837e-05, "loss": 0.8278, "step": 2798 }, { "epoch": 0.42, "learning_rate": 1.3109460655644973e-05, "loss": 0.9525, "step": 2799 }, { "epoch": 0.42, "learning_rate": 1.3104868787151289e-05, "loss": 0.3117, "step": 2800 }, { "epoch": 0.42, "learning_rate": 1.3100276194028985e-05, "loss": 0.7503, "step": 2801 }, { "epoch": 0.42, "learning_rate": 1.3095682877349906e-05, "loss": 0.7483, "step": 2802 }, { "epoch": 0.42, "learning_rate": 1.3091088838186067e-05, "loss": 0.8163, "step": 2803 }, { "epoch": 0.42, "learning_rate": 1.3086494077609639e-05, "loss": 0.8979, "step": 2804 }, { "epoch": 0.42, "learning_rate": 1.3081898596692968e-05, "loss": 0.7066, "step": 2805 }, { "epoch": 0.42, "learning_rate": 1.307730239650857e-05, "loss": 0.7944, "step": 2806 }, { "epoch": 0.42, "learning_rate": 1.307270547812913e-05, "loss": 0.7758, "step": 2807 }, { "epoch": 0.42, "learning_rate": 1.3068107842627495e-05, "loss": 0.7202, "step": 2808 }, { "epoch": 0.42, "learning_rate": 1.3063509491076683e-05, "loss": 0.7881, "step": 2809 }, { "epoch": 0.42, "learning_rate": 1.3058910424549878e-05, "loss": 0.771, "step": 2810 }, { "epoch": 0.42, "learning_rate": 1.3054310644120432e-05, "loss": 0.8328, "step": 2811 }, { "epoch": 0.42, "learning_rate": 1.3049710150861862e-05, "loss": 0.8203, "step": 2812 }, { "epoch": 0.42, "learning_rate": 1.3045108945847857e-05, "loss": 0.7785, "step": 2813 }, { "epoch": 0.42, "learning_rate": 1.3040507030152265e-05, "loss": 0.7037, "step": 2814 }, { "epoch": 0.42, "learning_rate": 1.30359044048491e-05, "loss": 0.7699, "step": 2815 }, { "epoch": 0.42, "learning_rate": 1.3031301071012541e-05, "loss": 0.7914, "step": 2816 }, { "epoch": 0.42, "learning_rate": 1.3026697029716943e-05, "loss": 0.7558, "step": 2817 }, { "epoch": 0.42, "learning_rate": 1.3022092282036818e-05, "loss": 0.8423, "step": 2818 }, { "epoch": 0.42, "learning_rate": 1.301748682904684e-05, "loss": 0.7567, "step": 2819 }, { "epoch": 0.42, "learning_rate": 1.3012880671821847e-05, "loss": 0.7314, "step": 2820 }, { "epoch": 0.42, "learning_rate": 1.3008273811436848e-05, "loss": 0.8002, "step": 2821 }, { "epoch": 0.42, "learning_rate": 1.300366624896702e-05, "loss": 0.8334, "step": 2822 }, { "epoch": 0.42, "learning_rate": 1.2999057985487689e-05, "loss": 0.8176, "step": 2823 }, { "epoch": 0.42, "learning_rate": 1.2994449022074357e-05, "loss": 0.835, "step": 2824 }, { "epoch": 0.42, "learning_rate": 1.2989839359802683e-05, "loss": 0.7894, "step": 2825 }, { "epoch": 0.42, "learning_rate": 1.2985228999748496e-05, "loss": 0.7586, "step": 2826 }, { "epoch": 0.42, "learning_rate": 1.2980617942987774e-05, "loss": 0.8389, "step": 2827 }, { "epoch": 0.42, "learning_rate": 1.2976006190596678e-05, "loss": 0.7781, "step": 2828 }, { "epoch": 0.42, "learning_rate": 1.297139374365151e-05, "loss": 0.7808, "step": 2829 }, { "epoch": 0.42, "learning_rate": 1.2966780603228748e-05, "loss": 0.7779, "step": 2830 }, { "epoch": 0.42, "learning_rate": 1.2962166770405029e-05, "loss": 0.7475, "step": 2831 }, { "epoch": 0.42, "learning_rate": 1.295755224625715e-05, "loss": 0.8054, "step": 2832 }, { "epoch": 0.42, "learning_rate": 1.2952937031862071e-05, "loss": 0.7877, "step": 2833 }, { "epoch": 0.42, "learning_rate": 1.294832112829691e-05, "loss": 0.7594, "step": 2834 }, { "epoch": 0.42, "learning_rate": 1.2943704536638947e-05, "loss": 0.6977, "step": 2835 }, { "epoch": 0.42, "learning_rate": 1.293908725796563e-05, "loss": 0.7462, "step": 2836 }, { "epoch": 0.42, "learning_rate": 1.2934469293354555e-05, "loss": 0.6677, "step": 2837 }, { "epoch": 0.42, "learning_rate": 1.2929850643883484e-05, "loss": 0.8484, "step": 2838 }, { "epoch": 0.42, "learning_rate": 1.2925231310630337e-05, "loss": 0.7888, "step": 2839 }, { "epoch": 0.42, "learning_rate": 1.2920611294673202e-05, "loss": 0.8234, "step": 2840 }, { "epoch": 0.42, "learning_rate": 1.2915990597090315e-05, "loss": 0.7487, "step": 2841 }, { "epoch": 0.42, "learning_rate": 1.2911369218960077e-05, "loss": 0.6983, "step": 2842 }, { "epoch": 0.42, "learning_rate": 1.2906747161361043e-05, "loss": 0.7466, "step": 2843 }, { "epoch": 0.42, "learning_rate": 1.2902124425371935e-05, "loss": 0.6612, "step": 2844 }, { "epoch": 0.42, "learning_rate": 1.289750101207163e-05, "loss": 0.6675, "step": 2845 }, { "epoch": 0.42, "learning_rate": 1.2892876922539154e-05, "loss": 0.7494, "step": 2846 }, { "epoch": 0.42, "learning_rate": 1.2888252157853707e-05, "loss": 0.3096, "step": 2847 }, { "epoch": 0.42, "learning_rate": 1.2883626719094629e-05, "loss": 0.7583, "step": 2848 }, { "epoch": 0.42, "learning_rate": 1.2879000607341432e-05, "loss": 0.6765, "step": 2849 }, { "epoch": 0.43, "learning_rate": 1.2874373823673779e-05, "loss": 0.7041, "step": 2850 }, { "epoch": 0.43, "learning_rate": 1.286974636917149e-05, "loss": 0.7468, "step": 2851 }, { "epoch": 0.43, "learning_rate": 1.2865118244914538e-05, "loss": 0.8216, "step": 2852 }, { "epoch": 0.43, "learning_rate": 1.286048945198306e-05, "loss": 0.7786, "step": 2853 }, { "epoch": 0.43, "learning_rate": 1.2855859991457346e-05, "loss": 0.776, "step": 2854 }, { "epoch": 0.43, "learning_rate": 1.285122986441784e-05, "loss": 0.759, "step": 2855 }, { "epoch": 0.43, "learning_rate": 1.2846599071945141e-05, "loss": 0.7418, "step": 2856 }, { "epoch": 0.43, "learning_rate": 1.2841967615119998e-05, "loss": 0.3391, "step": 2857 }, { "epoch": 0.43, "learning_rate": 1.2837335495023338e-05, "loss": 0.7815, "step": 2858 }, { "epoch": 0.43, "learning_rate": 1.2832702712736214e-05, "loss": 0.7753, "step": 2859 }, { "epoch": 0.43, "learning_rate": 1.2828069269339849e-05, "loss": 0.7113, "step": 2860 }, { "epoch": 0.43, "learning_rate": 1.282343516591562e-05, "loss": 0.8016, "step": 2861 }, { "epoch": 0.43, "learning_rate": 1.2818800403545051e-05, "loss": 0.7684, "step": 2862 }, { "epoch": 0.43, "learning_rate": 1.2814164983309828e-05, "loss": 0.763, "step": 2863 }, { "epoch": 0.43, "learning_rate": 1.2809528906291785e-05, "loss": 0.7864, "step": 2864 }, { "epoch": 0.43, "learning_rate": 1.2804892173572914e-05, "loss": 0.6676, "step": 2865 }, { "epoch": 0.43, "learning_rate": 1.280025478623535e-05, "loss": 0.7672, "step": 2866 }, { "epoch": 0.43, "learning_rate": 1.2795616745361396e-05, "loss": 0.7479, "step": 2867 }, { "epoch": 0.43, "learning_rate": 1.2790978052033496e-05, "loss": 0.7017, "step": 2868 }, { "epoch": 0.43, "learning_rate": 1.278633870733425e-05, "loss": 0.8532, "step": 2869 }, { "epoch": 0.43, "learning_rate": 1.2781698712346407e-05, "loss": 0.753, "step": 2870 }, { "epoch": 0.43, "learning_rate": 1.2777058068152872e-05, "loss": 0.7881, "step": 2871 }, { "epoch": 0.43, "learning_rate": 1.2772416775836703e-05, "loss": 0.7562, "step": 2872 }, { "epoch": 0.43, "learning_rate": 1.2767774836481104e-05, "loss": 0.7187, "step": 2873 }, { "epoch": 0.43, "learning_rate": 1.2763132251169434e-05, "loss": 0.7763, "step": 2874 }, { "epoch": 0.43, "learning_rate": 1.2758489020985194e-05, "loss": 0.3119, "step": 2875 }, { "epoch": 0.43, "learning_rate": 1.2753845147012051e-05, "loss": 0.7948, "step": 2876 }, { "epoch": 0.43, "learning_rate": 1.2749200630333811e-05, "loss": 0.8685, "step": 2877 }, { "epoch": 0.43, "learning_rate": 1.2744555472034433e-05, "loss": 0.7982, "step": 2878 }, { "epoch": 0.43, "learning_rate": 1.2739909673198022e-05, "loss": 0.6959, "step": 2879 }, { "epoch": 0.43, "learning_rate": 1.2735263234908837e-05, "loss": 0.7697, "step": 2880 }, { "epoch": 0.43, "learning_rate": 1.273061615825129e-05, "loss": 0.875, "step": 2881 }, { "epoch": 0.43, "learning_rate": 1.2725968444309934e-05, "loss": 0.7623, "step": 2882 }, { "epoch": 0.43, "learning_rate": 1.2721320094169473e-05, "loss": 0.8005, "step": 2883 }, { "epoch": 0.43, "learning_rate": 1.2716671108914763e-05, "loss": 0.6963, "step": 2884 }, { "epoch": 0.43, "learning_rate": 1.2712021489630804e-05, "loss": 0.7053, "step": 2885 }, { "epoch": 0.43, "learning_rate": 1.2707371237402744e-05, "loss": 0.8216, "step": 2886 }, { "epoch": 0.43, "learning_rate": 1.2702720353315883e-05, "loss": 0.7719, "step": 2887 }, { "epoch": 0.43, "learning_rate": 1.2698068838455667e-05, "loss": 0.7088, "step": 2888 }, { "epoch": 0.43, "learning_rate": 1.2693416693907683e-05, "loss": 0.8997, "step": 2889 }, { "epoch": 0.43, "learning_rate": 1.2688763920757672e-05, "loss": 0.3254, "step": 2890 }, { "epoch": 0.43, "learning_rate": 1.2684110520091523e-05, "loss": 0.7725, "step": 2891 }, { "epoch": 0.43, "learning_rate": 1.2679456492995268e-05, "loss": 0.7675, "step": 2892 }, { "epoch": 0.43, "learning_rate": 1.2674801840555077e-05, "loss": 0.8745, "step": 2893 }, { "epoch": 0.43, "learning_rate": 1.2670146563857283e-05, "loss": 0.7317, "step": 2894 }, { "epoch": 0.43, "learning_rate": 1.2665490663988355e-05, "loss": 0.776, "step": 2895 }, { "epoch": 0.43, "learning_rate": 1.2660834142034908e-05, "loss": 0.8118, "step": 2896 }, { "epoch": 0.43, "learning_rate": 1.2656176999083698e-05, "loss": 0.7929, "step": 2897 }, { "epoch": 0.43, "learning_rate": 1.2651519236221634e-05, "loss": 0.7363, "step": 2898 }, { "epoch": 0.43, "learning_rate": 1.2646860854535767e-05, "loss": 0.722, "step": 2899 }, { "epoch": 0.43, "learning_rate": 1.2642201855113293e-05, "loss": 0.8097, "step": 2900 }, { "epoch": 0.43, "learning_rate": 1.2637542239041545e-05, "loss": 0.8032, "step": 2901 }, { "epoch": 0.43, "learning_rate": 1.2632882007408011e-05, "loss": 0.7611, "step": 2902 }, { "epoch": 0.43, "learning_rate": 1.2628221161300314e-05, "loss": 0.7906, "step": 2903 }, { "epoch": 0.43, "learning_rate": 1.2623559701806228e-05, "loss": 0.765, "step": 2904 }, { "epoch": 0.43, "learning_rate": 1.2618897630013657e-05, "loss": 0.7056, "step": 2905 }, { "epoch": 0.43, "learning_rate": 1.261423494701067e-05, "loss": 0.7437, "step": 2906 }, { "epoch": 0.43, "learning_rate": 1.2609571653885455e-05, "loss": 0.7492, "step": 2907 }, { "epoch": 0.43, "learning_rate": 1.2604907751726358e-05, "loss": 0.7633, "step": 2908 }, { "epoch": 0.43, "learning_rate": 1.2600243241621862e-05, "loss": 0.7641, "step": 2909 }, { "epoch": 0.43, "learning_rate": 1.259557812466059e-05, "loss": 0.8089, "step": 2910 }, { "epoch": 0.43, "learning_rate": 1.2590912401931306e-05, "loss": 0.8056, "step": 2911 }, { "epoch": 0.43, "learning_rate": 1.2586246074522923e-05, "loss": 0.7905, "step": 2912 }, { "epoch": 0.43, "learning_rate": 1.2581579143524487e-05, "loss": 0.7004, "step": 2913 }, { "epoch": 0.43, "learning_rate": 1.257691161002519e-05, "loss": 0.7128, "step": 2914 }, { "epoch": 0.43, "learning_rate": 1.2572243475114364e-05, "loss": 0.6884, "step": 2915 }, { "epoch": 0.43, "learning_rate": 1.2567574739881474e-05, "loss": 0.6707, "step": 2916 }, { "epoch": 0.44, "learning_rate": 1.2562905405416133e-05, "loss": 0.7277, "step": 2917 }, { "epoch": 0.44, "learning_rate": 1.2558235472808096e-05, "loss": 0.8125, "step": 2918 }, { "epoch": 0.44, "learning_rate": 1.2553564943147247e-05, "loss": 0.7547, "step": 2919 }, { "epoch": 0.44, "learning_rate": 1.2548893817523622e-05, "loss": 0.8229, "step": 2920 }, { "epoch": 0.44, "learning_rate": 1.2544222097027388e-05, "loss": 0.3181, "step": 2921 }, { "epoch": 0.44, "learning_rate": 1.253954978274885e-05, "loss": 0.7272, "step": 2922 }, { "epoch": 0.44, "learning_rate": 1.2534876875778457e-05, "loss": 0.6917, "step": 2923 }, { "epoch": 0.44, "learning_rate": 1.2530203377206793e-05, "loss": 0.7017, "step": 2924 }, { "epoch": 0.44, "learning_rate": 1.2525529288124577e-05, "loss": 0.7304, "step": 2925 }, { "epoch": 0.44, "learning_rate": 1.2520854609622678e-05, "loss": 0.7687, "step": 2926 }, { "epoch": 0.44, "learning_rate": 1.2516179342792087e-05, "loss": 0.7941, "step": 2927 }, { "epoch": 0.44, "learning_rate": 1.2511503488723942e-05, "loss": 0.6825, "step": 2928 }, { "epoch": 0.44, "learning_rate": 1.2506827048509515e-05, "loss": 0.8727, "step": 2929 }, { "epoch": 0.44, "learning_rate": 1.2502150023240214e-05, "loss": 0.7801, "step": 2930 }, { "epoch": 0.44, "learning_rate": 1.2497472414007584e-05, "loss": 0.8312, "step": 2931 }, { "epoch": 0.44, "learning_rate": 1.2492794221903313e-05, "loss": 0.6586, "step": 2932 }, { "epoch": 0.44, "learning_rate": 1.2488115448019213e-05, "loss": 0.8017, "step": 2933 }, { "epoch": 0.44, "learning_rate": 1.248343609344724e-05, "loss": 0.8202, "step": 2934 }, { "epoch": 0.44, "learning_rate": 1.247875615927948e-05, "loss": 0.8345, "step": 2935 }, { "epoch": 0.44, "learning_rate": 1.2474075646608163e-05, "loss": 0.7599, "step": 2936 }, { "epoch": 0.44, "learning_rate": 1.2469394556525649e-05, "loss": 0.7929, "step": 2937 }, { "epoch": 0.44, "learning_rate": 1.2464712890124426e-05, "loss": 0.7743, "step": 2938 }, { "epoch": 0.44, "learning_rate": 1.2460030648497128e-05, "loss": 0.8202, "step": 2939 }, { "epoch": 0.44, "learning_rate": 1.2455347832736518e-05, "loss": 0.7524, "step": 2940 }, { "epoch": 0.44, "learning_rate": 1.2450664443935491e-05, "loss": 0.6405, "step": 2941 }, { "epoch": 0.44, "learning_rate": 1.244598048318708e-05, "loss": 0.7931, "step": 2942 }, { "epoch": 0.44, "learning_rate": 1.2441295951584449e-05, "loss": 0.7663, "step": 2943 }, { "epoch": 0.44, "learning_rate": 1.243661085022089e-05, "loss": 0.7289, "step": 2944 }, { "epoch": 0.44, "learning_rate": 1.2431925180189843e-05, "loss": 0.7193, "step": 2945 }, { "epoch": 0.44, "learning_rate": 1.2427238942584868e-05, "loss": 0.7693, "step": 2946 }, { "epoch": 0.44, "learning_rate": 1.2422552138499659e-05, "loss": 0.7217, "step": 2947 }, { "epoch": 0.44, "learning_rate": 1.2417864769028043e-05, "loss": 0.7226, "step": 2948 }, { "epoch": 0.44, "learning_rate": 1.2413176835263981e-05, "loss": 0.7968, "step": 2949 }, { "epoch": 0.44, "learning_rate": 1.2408488338301568e-05, "loss": 0.7394, "step": 2950 }, { "epoch": 0.44, "learning_rate": 1.2403799279235023e-05, "loss": 0.7396, "step": 2951 }, { "epoch": 0.44, "learning_rate": 1.2399109659158701e-05, "loss": 0.8366, "step": 2952 }, { "epoch": 0.44, "learning_rate": 1.2394419479167085e-05, "loss": 0.6761, "step": 2953 }, { "epoch": 0.44, "learning_rate": 1.23897287403548e-05, "loss": 0.7463, "step": 2954 }, { "epoch": 0.44, "learning_rate": 1.2385037443816583e-05, "loss": 0.7302, "step": 2955 }, { "epoch": 0.44, "learning_rate": 1.2380345590647313e-05, "loss": 0.7783, "step": 2956 }, { "epoch": 0.44, "learning_rate": 1.2375653181941995e-05, "loss": 0.7602, "step": 2957 }, { "epoch": 0.44, "learning_rate": 1.237096021879577e-05, "loss": 0.3463, "step": 2958 }, { "epoch": 0.44, "learning_rate": 1.23662667023039e-05, "loss": 0.3162, "step": 2959 }, { "epoch": 0.44, "learning_rate": 1.2361572633561778e-05, "loss": 0.833, "step": 2960 }, { "epoch": 0.44, "learning_rate": 1.235687801366493e-05, "loss": 0.7742, "step": 2961 }, { "epoch": 0.44, "learning_rate": 1.2352182843709006e-05, "loss": 0.7732, "step": 2962 }, { "epoch": 0.44, "learning_rate": 1.2347487124789789e-05, "loss": 0.7738, "step": 2963 }, { "epoch": 0.44, "learning_rate": 1.2342790858003187e-05, "loss": 0.7716, "step": 2964 }, { "epoch": 0.44, "learning_rate": 1.2338094044445235e-05, "loss": 0.8054, "step": 2965 }, { "epoch": 0.44, "learning_rate": 1.2333396685212095e-05, "loss": 0.7476, "step": 2966 }, { "epoch": 0.44, "learning_rate": 1.2328698781400063e-05, "loss": 0.7185, "step": 2967 }, { "epoch": 0.44, "learning_rate": 1.2324000334105555e-05, "loss": 0.6812, "step": 2968 }, { "epoch": 0.44, "learning_rate": 1.2319301344425121e-05, "loss": 0.343, "step": 2969 }, { "epoch": 0.44, "learning_rate": 1.2314601813455425e-05, "loss": 0.725, "step": 2970 }, { "epoch": 0.44, "learning_rate": 1.2309901742293266e-05, "loss": 0.825, "step": 2971 }, { "epoch": 0.44, "learning_rate": 1.2305201132035575e-05, "loss": 0.7272, "step": 2972 }, { "epoch": 0.44, "learning_rate": 1.2300499983779402e-05, "loss": 0.831, "step": 2973 }, { "epoch": 0.44, "learning_rate": 1.2295798298621914e-05, "loss": 0.7515, "step": 2974 }, { "epoch": 0.44, "learning_rate": 1.2291096077660418e-05, "loss": 0.8949, "step": 2975 }, { "epoch": 0.44, "learning_rate": 1.228639332199234e-05, "loss": 0.3588, "step": 2976 }, { "epoch": 0.44, "learning_rate": 1.228169003271523e-05, "loss": 0.7854, "step": 2977 }, { "epoch": 0.44, "learning_rate": 1.2276986210926767e-05, "loss": 0.8556, "step": 2978 }, { "epoch": 0.44, "learning_rate": 1.2272281857724742e-05, "loss": 0.8182, "step": 2979 }, { "epoch": 0.44, "learning_rate": 1.2267576974207082e-05, "loss": 0.7477, "step": 2980 }, { "epoch": 0.44, "learning_rate": 1.2262871561471844e-05, "loss": 0.8081, "step": 2981 }, { "epoch": 0.44, "learning_rate": 1.2258165620617185e-05, "loss": 0.6697, "step": 2982 }, { "epoch": 0.44, "learning_rate": 1.2253459152741408e-05, "loss": 0.6465, "step": 2983 }, { "epoch": 0.45, "learning_rate": 1.2248752158942926e-05, "loss": 0.7292, "step": 2984 }, { "epoch": 0.45, "learning_rate": 1.2244044640320281e-05, "loss": 0.7119, "step": 2985 }, { "epoch": 0.45, "learning_rate": 1.2239336597972133e-05, "loss": 0.7343, "step": 2986 }, { "epoch": 0.45, "learning_rate": 1.2234628032997271e-05, "loss": 0.8041, "step": 2987 }, { "epoch": 0.45, "learning_rate": 1.22299189464946e-05, "loss": 0.3377, "step": 2988 }, { "epoch": 0.45, "learning_rate": 1.2225209339563144e-05, "loss": 0.7141, "step": 2989 }, { "epoch": 0.45, "learning_rate": 1.222049921330206e-05, "loss": 0.7147, "step": 2990 }, { "epoch": 0.45, "learning_rate": 1.2215788568810613e-05, "loss": 0.7382, "step": 2991 }, { "epoch": 0.45, "learning_rate": 1.22110774071882e-05, "loss": 0.737, "step": 2992 }, { "epoch": 0.45, "learning_rate": 1.220636572953433e-05, "loss": 0.7463, "step": 2993 }, { "epoch": 0.45, "learning_rate": 1.2201653536948636e-05, "loss": 0.6927, "step": 2994 }, { "epoch": 0.45, "learning_rate": 1.2196940830530875e-05, "loss": 0.8307, "step": 2995 }, { "epoch": 0.45, "learning_rate": 1.2192227611380916e-05, "loss": 0.7367, "step": 2996 }, { "epoch": 0.45, "learning_rate": 1.218751388059875e-05, "loss": 0.7472, "step": 2997 }, { "epoch": 0.45, "learning_rate": 1.2182799639284497e-05, "loss": 0.8205, "step": 2998 }, { "epoch": 0.45, "learning_rate": 1.2178084888538384e-05, "loss": 0.8381, "step": 2999 }, { "epoch": 0.45, "learning_rate": 1.2173369629460762e-05, "loss": 0.8145, "step": 3000 }, { "epoch": 0.45, "learning_rate": 1.2168653863152098e-05, "loss": 0.8128, "step": 3001 }, { "epoch": 0.45, "learning_rate": 1.216393759071298e-05, "loss": 0.3286, "step": 3002 }, { "epoch": 0.45, "learning_rate": 1.2159220813244115e-05, "loss": 0.8327, "step": 3003 }, { "epoch": 0.45, "learning_rate": 1.2154503531846324e-05, "loss": 0.8145, "step": 3004 }, { "epoch": 0.45, "learning_rate": 1.214978574762055e-05, "loss": 0.3539, "step": 3005 }, { "epoch": 0.45, "learning_rate": 1.2145067461667854e-05, "loss": 0.7026, "step": 3006 }, { "epoch": 0.45, "learning_rate": 1.2140348675089404e-05, "loss": 0.3447, "step": 3007 }, { "epoch": 0.45, "learning_rate": 1.2135629388986497e-05, "loss": 0.7675, "step": 3008 }, { "epoch": 0.45, "learning_rate": 1.2130909604460542e-05, "loss": 0.7951, "step": 3009 }, { "epoch": 0.45, "learning_rate": 1.2126189322613063e-05, "loss": 0.734, "step": 3010 }, { "epoch": 0.45, "learning_rate": 1.2121468544545701e-05, "loss": 0.6722, "step": 3011 }, { "epoch": 0.45, "learning_rate": 1.2116747271360209e-05, "loss": 0.7596, "step": 3012 }, { "epoch": 0.45, "learning_rate": 1.211202550415847e-05, "loss": 0.7138, "step": 3013 }, { "epoch": 0.45, "learning_rate": 1.2107303244042464e-05, "loss": 0.3368, "step": 3014 }, { "epoch": 0.45, "learning_rate": 1.2102580492114296e-05, "loss": 0.6764, "step": 3015 }, { "epoch": 0.45, "learning_rate": 1.2097857249476179e-05, "loss": 0.7478, "step": 3016 }, { "epoch": 0.45, "learning_rate": 1.209313351723045e-05, "loss": 0.6897, "step": 3017 }, { "epoch": 0.45, "learning_rate": 1.208840929647956e-05, "loss": 0.885, "step": 3018 }, { "epoch": 0.45, "learning_rate": 1.2083684588326058e-05, "loss": 0.6881, "step": 3019 }, { "epoch": 0.45, "learning_rate": 1.207895939387263e-05, "loss": 0.7199, "step": 3020 }, { "epoch": 0.45, "learning_rate": 1.2074233714222054e-05, "loss": 0.6998, "step": 3021 }, { "epoch": 0.45, "learning_rate": 1.2069507550477237e-05, "loss": 0.7144, "step": 3022 }, { "epoch": 0.45, "learning_rate": 1.2064780903741189e-05, "loss": 0.7508, "step": 3023 }, { "epoch": 0.45, "learning_rate": 1.2060053775117043e-05, "loss": 0.7828, "step": 3024 }, { "epoch": 0.45, "learning_rate": 1.2055326165708028e-05, "loss": 0.8001, "step": 3025 }, { "epoch": 0.45, "learning_rate": 1.2050598076617505e-05, "loss": 0.7299, "step": 3026 }, { "epoch": 0.45, "learning_rate": 1.2045869508948931e-05, "loss": 0.8452, "step": 3027 }, { "epoch": 0.45, "learning_rate": 1.2041140463805887e-05, "loss": 0.7269, "step": 3028 }, { "epoch": 0.45, "learning_rate": 1.2036410942292054e-05, "loss": 0.357, "step": 3029 }, { "epoch": 0.45, "learning_rate": 1.2031680945511228e-05, "loss": 0.7865, "step": 3030 }, { "epoch": 0.45, "learning_rate": 1.2026950474567322e-05, "loss": 0.3393, "step": 3031 }, { "epoch": 0.45, "learning_rate": 1.2022219530564357e-05, "loss": 0.7384, "step": 3032 }, { "epoch": 0.45, "learning_rate": 1.201748811460646e-05, "loss": 0.7584, "step": 3033 }, { "epoch": 0.45, "learning_rate": 1.2012756227797865e-05, "loss": 0.79, "step": 3034 }, { "epoch": 0.45, "learning_rate": 1.2008023871242928e-05, "loss": 0.7125, "step": 3035 }, { "epoch": 0.45, "learning_rate": 1.200329104604611e-05, "loss": 0.7784, "step": 3036 }, { "epoch": 0.45, "learning_rate": 1.1998557753311975e-05, "loss": 0.8164, "step": 3037 }, { "epoch": 0.45, "learning_rate": 1.1993823994145202e-05, "loss": 0.6777, "step": 3038 }, { "epoch": 0.45, "learning_rate": 1.1989089769650577e-05, "loss": 0.7315, "step": 3039 }, { "epoch": 0.45, "learning_rate": 1.1984355080932999e-05, "loss": 0.7452, "step": 3040 }, { "epoch": 0.45, "learning_rate": 1.1979619929097465e-05, "loss": 0.7942, "step": 3041 }, { "epoch": 0.45, "learning_rate": 1.197488431524909e-05, "loss": 0.8284, "step": 3042 }, { "epoch": 0.45, "learning_rate": 1.1970148240493098e-05, "loss": 0.754, "step": 3043 }, { "epoch": 0.45, "learning_rate": 1.1965411705934808e-05, "loss": 0.7922, "step": 3044 }, { "epoch": 0.45, "learning_rate": 1.1960674712679659e-05, "loss": 0.748, "step": 3045 }, { "epoch": 0.45, "learning_rate": 1.195593726183319e-05, "loss": 0.7205, "step": 3046 }, { "epoch": 0.45, "learning_rate": 1.1951199354501053e-05, "loss": 0.7787, "step": 3047 }, { "epoch": 0.45, "learning_rate": 1.1946460991789e-05, "loss": 0.7922, "step": 3048 }, { "epoch": 0.45, "learning_rate": 1.1941722174802887e-05, "loss": 0.3282, "step": 3049 }, { "epoch": 0.45, "learning_rate": 1.1936982904648691e-05, "loss": 0.3421, "step": 3050 }, { "epoch": 0.46, "learning_rate": 1.193224318243248e-05, "loss": 0.8001, "step": 3051 }, { "epoch": 0.46, "learning_rate": 1.1927503009260432e-05, "loss": 0.8407, "step": 3052 }, { "epoch": 0.46, "learning_rate": 1.1922762386238826e-05, "loss": 0.6898, "step": 3053 }, { "epoch": 0.46, "learning_rate": 1.1918021314474059e-05, "loss": 0.8172, "step": 3054 }, { "epoch": 0.46, "learning_rate": 1.1913279795072622e-05, "loss": 0.7112, "step": 3055 }, { "epoch": 0.46, "learning_rate": 1.1908537829141107e-05, "loss": 0.7281, "step": 3056 }, { "epoch": 0.46, "learning_rate": 1.1903795417786222e-05, "loss": 0.7469, "step": 3057 }, { "epoch": 0.46, "learning_rate": 1.1899052562114769e-05, "loss": 0.8028, "step": 3058 }, { "epoch": 0.46, "learning_rate": 1.189430926323366e-05, "loss": 0.7686, "step": 3059 }, { "epoch": 0.46, "learning_rate": 1.1889565522249908e-05, "loss": 0.8005, "step": 3060 }, { "epoch": 0.46, "learning_rate": 1.1884821340270629e-05, "loss": 0.752, "step": 3061 }, { "epoch": 0.46, "learning_rate": 1.188007671840304e-05, "loss": 0.7383, "step": 3062 }, { "epoch": 0.46, "learning_rate": 1.1875331657754465e-05, "loss": 0.7279, "step": 3063 }, { "epoch": 0.46, "learning_rate": 1.1870586159432327e-05, "loss": 0.7819, "step": 3064 }, { "epoch": 0.46, "learning_rate": 1.1865840224544157e-05, "loss": 0.8274, "step": 3065 }, { "epoch": 0.46, "learning_rate": 1.1861093854197572e-05, "loss": 0.7118, "step": 3066 }, { "epoch": 0.46, "learning_rate": 1.1856347049500311e-05, "loss": 0.7435, "step": 3067 }, { "epoch": 0.46, "learning_rate": 1.1851599811560206e-05, "loss": 0.7539, "step": 3068 }, { "epoch": 0.46, "learning_rate": 1.1846852141485186e-05, "loss": 0.7754, "step": 3069 }, { "epoch": 0.46, "learning_rate": 1.1842104040383284e-05, "loss": 0.6088, "step": 3070 }, { "epoch": 0.46, "learning_rate": 1.1837355509362634e-05, "loss": 0.318, "step": 3071 }, { "epoch": 0.46, "learning_rate": 1.1832606549531468e-05, "loss": 0.8294, "step": 3072 }, { "epoch": 0.46, "learning_rate": 1.1827857161998126e-05, "loss": 0.792, "step": 3073 }, { "epoch": 0.46, "learning_rate": 1.1823107347871039e-05, "loss": 0.351, "step": 3074 }, { "epoch": 0.46, "learning_rate": 1.1818357108258738e-05, "loss": 0.7584, "step": 3075 }, { "epoch": 0.46, "learning_rate": 1.1813606444269856e-05, "loss": 0.6835, "step": 3076 }, { "epoch": 0.46, "learning_rate": 1.1808855357013132e-05, "loss": 0.7681, "step": 3077 }, { "epoch": 0.46, "learning_rate": 1.1804103847597387e-05, "loss": 0.7449, "step": 3078 }, { "epoch": 0.46, "learning_rate": 1.1799351917131556e-05, "loss": 0.743, "step": 3079 }, { "epoch": 0.46, "learning_rate": 1.1794599566724667e-05, "loss": 0.707, "step": 3080 }, { "epoch": 0.46, "learning_rate": 1.1789846797485842e-05, "loss": 0.6849, "step": 3081 }, { "epoch": 0.46, "learning_rate": 1.1785093610524307e-05, "loss": 0.7837, "step": 3082 }, { "epoch": 0.46, "learning_rate": 1.1780340006949383e-05, "loss": 0.813, "step": 3083 }, { "epoch": 0.46, "learning_rate": 1.1775585987870484e-05, "loss": 0.6658, "step": 3084 }, { "epoch": 0.46, "learning_rate": 1.1770831554397129e-05, "loss": 0.7415, "step": 3085 }, { "epoch": 0.46, "learning_rate": 1.1766076707638926e-05, "loss": 0.769, "step": 3086 }, { "epoch": 0.46, "learning_rate": 1.1761321448705586e-05, "loss": 0.7153, "step": 3087 }, { "epoch": 0.46, "learning_rate": 1.1756565778706917e-05, "loss": 0.7434, "step": 3088 }, { "epoch": 0.46, "learning_rate": 1.1751809698752809e-05, "loss": 0.7942, "step": 3089 }, { "epoch": 0.46, "learning_rate": 1.1747053209953265e-05, "loss": 0.7533, "step": 3090 }, { "epoch": 0.46, "learning_rate": 1.1742296313418377e-05, "loss": 0.7247, "step": 3091 }, { "epoch": 0.46, "learning_rate": 1.1737539010258332e-05, "loss": 0.7587, "step": 3092 }, { "epoch": 0.46, "learning_rate": 1.1732781301583404e-05, "loss": 0.8181, "step": 3093 }, { "epoch": 0.46, "learning_rate": 1.1728023188503974e-05, "loss": 0.7294, "step": 3094 }, { "epoch": 0.46, "learning_rate": 1.1723264672130515e-05, "loss": 0.7226, "step": 3095 }, { "epoch": 0.46, "learning_rate": 1.1718505753573588e-05, "loss": 0.7451, "step": 3096 }, { "epoch": 0.46, "learning_rate": 1.1713746433943851e-05, "loss": 0.7097, "step": 3097 }, { "epoch": 0.46, "learning_rate": 1.1708986714352059e-05, "loss": 0.8388, "step": 3098 }, { "epoch": 0.46, "learning_rate": 1.1704226595909054e-05, "loss": 0.7729, "step": 3099 }, { "epoch": 0.46, "learning_rate": 1.169946607972578e-05, "loss": 0.77, "step": 3100 }, { "epoch": 0.46, "learning_rate": 1.1694705166913259e-05, "loss": 0.7249, "step": 3101 }, { "epoch": 0.46, "learning_rate": 1.1689943858582625e-05, "loss": 0.8196, "step": 3102 }, { "epoch": 0.46, "learning_rate": 1.1685182155845082e-05, "loss": 0.8044, "step": 3103 }, { "epoch": 0.46, "learning_rate": 1.168042005981195e-05, "loss": 0.8155, "step": 3104 }, { "epoch": 0.46, "learning_rate": 1.1675657571594626e-05, "loss": 0.6834, "step": 3105 }, { "epoch": 0.46, "learning_rate": 1.1670894692304597e-05, "loss": 0.7211, "step": 3106 }, { "epoch": 0.46, "learning_rate": 1.1666131423053451e-05, "loss": 0.7437, "step": 3107 }, { "epoch": 0.46, "learning_rate": 1.1661367764952856e-05, "loss": 0.7403, "step": 3108 }, { "epoch": 0.46, "learning_rate": 1.1656603719114583e-05, "loss": 0.6913, "step": 3109 }, { "epoch": 0.46, "learning_rate": 1.1651839286650484e-05, "loss": 0.7297, "step": 3110 }, { "epoch": 0.46, "learning_rate": 1.1647074468672503e-05, "loss": 0.7893, "step": 3111 }, { "epoch": 0.46, "learning_rate": 1.1642309266292674e-05, "loss": 0.7277, "step": 3112 }, { "epoch": 0.46, "learning_rate": 1.1637543680623127e-05, "loss": 0.7511, "step": 3113 }, { "epoch": 0.46, "learning_rate": 1.1632777712776076e-05, "loss": 0.7829, "step": 3114 }, { "epoch": 0.46, "learning_rate": 1.1628011363863819e-05, "loss": 0.7677, "step": 3115 }, { "epoch": 0.46, "learning_rate": 1.1623244634998752e-05, "loss": 0.7298, "step": 3116 }, { "epoch": 0.46, "learning_rate": 1.1618477527293356e-05, "loss": 0.7583, "step": 3117 }, { "epoch": 0.47, "learning_rate": 1.16137100418602e-05, "loss": 0.7454, "step": 3118 }, { "epoch": 0.47, "learning_rate": 1.1608942179811943e-05, "loss": 0.7535, "step": 3119 }, { "epoch": 0.47, "learning_rate": 1.1604173942261332e-05, "loss": 0.7007, "step": 3120 }, { "epoch": 0.47, "learning_rate": 1.1599405330321196e-05, "loss": 0.7611, "step": 3121 }, { "epoch": 0.47, "learning_rate": 1.159463634510446e-05, "loss": 0.7862, "step": 3122 }, { "epoch": 0.47, "learning_rate": 1.1589866987724128e-05, "loss": 0.7145, "step": 3123 }, { "epoch": 0.47, "learning_rate": 1.1585097259293296e-05, "loss": 0.7844, "step": 3124 }, { "epoch": 0.47, "learning_rate": 1.1580327160925149e-05, "loss": 0.7899, "step": 3125 }, { "epoch": 0.47, "learning_rate": 1.1575556693732948e-05, "loss": 0.675, "step": 3126 }, { "epoch": 0.47, "learning_rate": 1.1570785858830051e-05, "loss": 0.7737, "step": 3127 }, { "epoch": 0.47, "learning_rate": 1.1566014657329897e-05, "loss": 0.7646, "step": 3128 }, { "epoch": 0.47, "learning_rate": 1.1561243090346013e-05, "loss": 0.8249, "step": 3129 }, { "epoch": 0.47, "learning_rate": 1.1556471158992002e-05, "loss": 0.7378, "step": 3130 }, { "epoch": 0.47, "learning_rate": 1.1551698864381564e-05, "loss": 0.7465, "step": 3131 }, { "epoch": 0.47, "learning_rate": 1.1546926207628484e-05, "loss": 0.709, "step": 3132 }, { "epoch": 0.47, "learning_rate": 1.1542153189846618e-05, "loss": 0.7618, "step": 3133 }, { "epoch": 0.47, "learning_rate": 1.153737981214992e-05, "loss": 0.6801, "step": 3134 }, { "epoch": 0.47, "learning_rate": 1.153260607565242e-05, "loss": 0.7142, "step": 3135 }, { "epoch": 0.47, "learning_rate": 1.1527831981468243e-05, "loss": 0.7971, "step": 3136 }, { "epoch": 0.47, "learning_rate": 1.1523057530711579e-05, "loss": 0.7298, "step": 3137 }, { "epoch": 0.47, "learning_rate": 1.1518282724496714e-05, "loss": 0.8595, "step": 3138 }, { "epoch": 0.47, "learning_rate": 1.1513507563938017e-05, "loss": 0.6224, "step": 3139 }, { "epoch": 0.47, "learning_rate": 1.1508732050149934e-05, "loss": 0.7716, "step": 3140 }, { "epoch": 0.47, "learning_rate": 1.1503956184246996e-05, "loss": 0.6937, "step": 3141 }, { "epoch": 0.47, "learning_rate": 1.1499179967343822e-05, "loss": 0.6866, "step": 3142 }, { "epoch": 0.47, "learning_rate": 1.1494403400555106e-05, "loss": 0.7914, "step": 3143 }, { "epoch": 0.47, "learning_rate": 1.1489626484995618e-05, "loss": 0.7988, "step": 3144 }, { "epoch": 0.47, "learning_rate": 1.1484849221780224e-05, "loss": 0.7869, "step": 3145 }, { "epoch": 0.47, "learning_rate": 1.1480071612023862e-05, "loss": 0.7726, "step": 3146 }, { "epoch": 0.47, "learning_rate": 1.1475293656841553e-05, "loss": 0.7978, "step": 3147 }, { "epoch": 0.47, "learning_rate": 1.1470515357348395e-05, "loss": 0.8407, "step": 3148 }, { "epoch": 0.47, "learning_rate": 1.1465736714659573e-05, "loss": 0.7928, "step": 3149 }, { "epoch": 0.47, "learning_rate": 1.1460957729890349e-05, "loss": 0.7859, "step": 3150 }, { "epoch": 0.47, "learning_rate": 1.1456178404156064e-05, "loss": 0.7118, "step": 3151 }, { "epoch": 0.47, "learning_rate": 1.145139873857214e-05, "loss": 0.7054, "step": 3152 }, { "epoch": 0.47, "learning_rate": 1.1446618734254069e-05, "loss": 0.865, "step": 3153 }, { "epoch": 0.47, "learning_rate": 1.1441838392317443e-05, "loss": 0.6823, "step": 3154 }, { "epoch": 0.47, "learning_rate": 1.1437057713877914e-05, "loss": 0.7558, "step": 3155 }, { "epoch": 0.47, "learning_rate": 1.143227670005122e-05, "loss": 0.7713, "step": 3156 }, { "epoch": 0.47, "learning_rate": 1.1427495351953172e-05, "loss": 0.7643, "step": 3157 }, { "epoch": 0.47, "learning_rate": 1.142271367069967e-05, "loss": 0.7703, "step": 3158 }, { "epoch": 0.47, "learning_rate": 1.1417931657406683e-05, "loss": 0.7105, "step": 3159 }, { "epoch": 0.47, "learning_rate": 1.1413149313190253e-05, "loss": 0.7288, "step": 3160 }, { "epoch": 0.47, "learning_rate": 1.1408366639166513e-05, "loss": 0.7966, "step": 3161 }, { "epoch": 0.47, "learning_rate": 1.1403583636451663e-05, "loss": 0.8312, "step": 3162 }, { "epoch": 0.47, "learning_rate": 1.1398800306161983e-05, "loss": 0.719, "step": 3163 }, { "epoch": 0.47, "learning_rate": 1.1394016649413827e-05, "loss": 0.6952, "step": 3164 }, { "epoch": 0.47, "learning_rate": 1.1389232667323627e-05, "loss": 0.7239, "step": 3165 }, { "epoch": 0.47, "learning_rate": 1.1384448361007894e-05, "loss": 0.7729, "step": 3166 }, { "epoch": 0.47, "learning_rate": 1.1379663731583205e-05, "loss": 0.7917, "step": 3167 }, { "epoch": 0.47, "learning_rate": 1.1374878780166227e-05, "loss": 0.3766, "step": 3168 }, { "epoch": 0.47, "learning_rate": 1.137009350787369e-05, "loss": 0.8543, "step": 3169 }, { "epoch": 0.47, "learning_rate": 1.1365307915822404e-05, "loss": 0.8214, "step": 3170 }, { "epoch": 0.47, "learning_rate": 1.1360522005129248e-05, "loss": 0.8517, "step": 3171 }, { "epoch": 0.47, "learning_rate": 1.1355735776911184e-05, "loss": 0.754, "step": 3172 }, { "epoch": 0.47, "learning_rate": 1.1350949232285246e-05, "loss": 0.7255, "step": 3173 }, { "epoch": 0.47, "learning_rate": 1.1346162372368536e-05, "loss": 0.8355, "step": 3174 }, { "epoch": 0.47, "learning_rate": 1.1341375198278235e-05, "loss": 0.7234, "step": 3175 }, { "epoch": 0.47, "learning_rate": 1.1336587711131597e-05, "loss": 0.852, "step": 3176 }, { "epoch": 0.47, "learning_rate": 1.1331799912045946e-05, "loss": 0.8035, "step": 3177 }, { "epoch": 0.47, "learning_rate": 1.1327011802138682e-05, "loss": 0.6866, "step": 3178 }, { "epoch": 0.47, "learning_rate": 1.1322223382527275e-05, "loss": 0.8356, "step": 3179 }, { "epoch": 0.47, "learning_rate": 1.1317434654329271e-05, "loss": 0.6432, "step": 3180 }, { "epoch": 0.47, "learning_rate": 1.1312645618662279e-05, "loss": 0.7617, "step": 3181 }, { "epoch": 0.47, "learning_rate": 1.1307856276643993e-05, "loss": 0.3301, "step": 3182 }, { "epoch": 0.47, "learning_rate": 1.130306662939217e-05, "loss": 0.7994, "step": 3183 }, { "epoch": 0.47, "learning_rate": 1.1298276678024642e-05, "loss": 0.81, "step": 3184 }, { "epoch": 0.48, "learning_rate": 1.1293486423659303e-05, "loss": 0.8741, "step": 3185 }, { "epoch": 0.48, "learning_rate": 1.1288695867414131e-05, "loss": 0.7675, "step": 3186 }, { "epoch": 0.48, "learning_rate": 1.1283905010407166e-05, "loss": 0.7429, "step": 3187 }, { "epoch": 0.48, "learning_rate": 1.1279113853756523e-05, "loss": 0.6432, "step": 3188 }, { "epoch": 0.48, "learning_rate": 1.1274322398580378e-05, "loss": 0.6863, "step": 3189 }, { "epoch": 0.48, "learning_rate": 1.1269530645996989e-05, "loss": 0.6826, "step": 3190 }, { "epoch": 0.48, "learning_rate": 1.1264738597124677e-05, "loss": 0.7895, "step": 3191 }, { "epoch": 0.48, "learning_rate": 1.1259946253081828e-05, "loss": 0.7157, "step": 3192 }, { "epoch": 0.48, "learning_rate": 1.1255153614986904e-05, "loss": 0.6704, "step": 3193 }, { "epoch": 0.48, "learning_rate": 1.1250360683958433e-05, "loss": 0.8351, "step": 3194 }, { "epoch": 0.48, "learning_rate": 1.1245567461115012e-05, "loss": 0.8095, "step": 3195 }, { "epoch": 0.48, "learning_rate": 1.1240773947575308e-05, "loss": 0.7542, "step": 3196 }, { "epoch": 0.48, "learning_rate": 1.1235980144458045e-05, "loss": 0.7597, "step": 3197 }, { "epoch": 0.48, "learning_rate": 1.1231186052882032e-05, "loss": 0.7931, "step": 3198 }, { "epoch": 0.48, "learning_rate": 1.122639167396613e-05, "loss": 0.809, "step": 3199 }, { "epoch": 0.48, "learning_rate": 1.122159700882928e-05, "loss": 0.7488, "step": 3200 }, { "epoch": 0.48, "learning_rate": 1.1216802058590473e-05, "loss": 0.8313, "step": 3201 }, { "epoch": 0.48, "learning_rate": 1.1212006824368787e-05, "loss": 0.8621, "step": 3202 }, { "epoch": 0.48, "learning_rate": 1.1207211307283348e-05, "loss": 0.7685, "step": 3203 }, { "epoch": 0.48, "learning_rate": 1.1202415508453359e-05, "loss": 0.6959, "step": 3204 }, { "epoch": 0.48, "learning_rate": 1.119761942899809e-05, "loss": 0.6547, "step": 3205 }, { "epoch": 0.48, "learning_rate": 1.1192823070036867e-05, "loss": 0.6714, "step": 3206 }, { "epoch": 0.48, "learning_rate": 1.1188026432689085e-05, "loss": 0.6929, "step": 3207 }, { "epoch": 0.48, "learning_rate": 1.1183229518074208e-05, "loss": 0.8406, "step": 3208 }, { "epoch": 0.48, "learning_rate": 1.1178432327311763e-05, "loss": 0.7757, "step": 3209 }, { "epoch": 0.48, "learning_rate": 1.1173634861521341e-05, "loss": 0.7441, "step": 3210 }, { "epoch": 0.48, "learning_rate": 1.1168837121822596e-05, "loss": 0.7306, "step": 3211 }, { "epoch": 0.48, "learning_rate": 1.116403910933524e-05, "loss": 0.73, "step": 3212 }, { "epoch": 0.48, "learning_rate": 1.1159240825179064e-05, "loss": 0.7236, "step": 3213 }, { "epoch": 0.48, "learning_rate": 1.1154442270473915e-05, "loss": 0.7142, "step": 3214 }, { "epoch": 0.48, "learning_rate": 1.1149643446339693e-05, "loss": 0.6885, "step": 3215 }, { "epoch": 0.48, "learning_rate": 1.1144844353896378e-05, "loss": 0.829, "step": 3216 }, { "epoch": 0.48, "learning_rate": 1.1140044994263996e-05, "loss": 0.7296, "step": 3217 }, { "epoch": 0.48, "learning_rate": 1.1135245368562653e-05, "loss": 0.746, "step": 3218 }, { "epoch": 0.48, "learning_rate": 1.1130445477912502e-05, "loss": 0.6725, "step": 3219 }, { "epoch": 0.48, "learning_rate": 1.1125645323433765e-05, "loss": 0.8137, "step": 3220 }, { "epoch": 0.48, "learning_rate": 1.1120844906246724e-05, "loss": 0.7803, "step": 3221 }, { "epoch": 0.48, "learning_rate": 1.1116044227471723e-05, "loss": 0.8817, "step": 3222 }, { "epoch": 0.48, "learning_rate": 1.1111243288229165e-05, "loss": 0.7048, "step": 3223 }, { "epoch": 0.48, "learning_rate": 1.1106442089639519e-05, "loss": 0.7187, "step": 3224 }, { "epoch": 0.48, "learning_rate": 1.1101640632823309e-05, "loss": 0.7371, "step": 3225 }, { "epoch": 0.48, "learning_rate": 1.1096838918901119e-05, "loss": 0.7411, "step": 3226 }, { "epoch": 0.48, "learning_rate": 1.1092036948993595e-05, "loss": 0.672, "step": 3227 }, { "epoch": 0.48, "learning_rate": 1.108723472422145e-05, "loss": 0.7395, "step": 3228 }, { "epoch": 0.48, "learning_rate": 1.1082432245705442e-05, "loss": 0.321, "step": 3229 }, { "epoch": 0.48, "learning_rate": 1.1077629514566395e-05, "loss": 0.861, "step": 3230 }, { "epoch": 0.48, "learning_rate": 1.1072826531925196e-05, "loss": 0.7401, "step": 3231 }, { "epoch": 0.48, "learning_rate": 1.106802329890279e-05, "loss": 0.7034, "step": 3232 }, { "epoch": 0.48, "learning_rate": 1.1063219816620175e-05, "loss": 0.7416, "step": 3233 }, { "epoch": 0.48, "learning_rate": 1.1058416086198404e-05, "loss": 0.593, "step": 3234 }, { "epoch": 0.48, "learning_rate": 1.1053612108758602e-05, "loss": 0.7484, "step": 3235 }, { "epoch": 0.48, "learning_rate": 1.1048807885421942e-05, "loss": 0.683, "step": 3236 }, { "epoch": 0.48, "learning_rate": 1.1044003417309655e-05, "loss": 0.7703, "step": 3237 }, { "epoch": 0.48, "learning_rate": 1.1039198705543026e-05, "loss": 0.7768, "step": 3238 }, { "epoch": 0.48, "learning_rate": 1.1034393751243406e-05, "loss": 0.3225, "step": 3239 }, { "epoch": 0.48, "learning_rate": 1.1029588555532195e-05, "loss": 0.7657, "step": 3240 }, { "epoch": 0.48, "learning_rate": 1.102478311953085e-05, "loss": 0.7779, "step": 3241 }, { "epoch": 0.48, "learning_rate": 1.1019977444360892e-05, "loss": 0.7604, "step": 3242 }, { "epoch": 0.48, "learning_rate": 1.1015171531143888e-05, "loss": 0.6946, "step": 3243 }, { "epoch": 0.48, "learning_rate": 1.1010365381001462e-05, "loss": 0.724, "step": 3244 }, { "epoch": 0.48, "learning_rate": 1.1005558995055296e-05, "loss": 0.7585, "step": 3245 }, { "epoch": 0.48, "learning_rate": 1.1000752374427133e-05, "loss": 0.6868, "step": 3246 }, { "epoch": 0.48, "learning_rate": 1.0995945520238758e-05, "loss": 0.7921, "step": 3247 }, { "epoch": 0.48, "learning_rate": 1.0991138433612015e-05, "loss": 0.7491, "step": 3248 }, { "epoch": 0.48, "learning_rate": 1.0986331115668807e-05, "loss": 0.7631, "step": 3249 }, { "epoch": 0.48, "learning_rate": 1.0981523567531092e-05, "loss": 0.7289, "step": 3250 }, { "epoch": 0.48, "learning_rate": 1.0976715790320873e-05, "loss": 0.7307, "step": 3251 }, { "epoch": 0.49, "learning_rate": 1.0971907785160213e-05, "loss": 0.7256, "step": 3252 }, { "epoch": 0.49, "learning_rate": 1.0967099553171225e-05, "loss": 0.6753, "step": 3253 }, { "epoch": 0.49, "learning_rate": 1.0962291095476077e-05, "loss": 0.6996, "step": 3254 }, { "epoch": 0.49, "learning_rate": 1.0957482413196989e-05, "loss": 0.6677, "step": 3255 }, { "epoch": 0.49, "learning_rate": 1.0952673507456235e-05, "loss": 0.7269, "step": 3256 }, { "epoch": 0.49, "learning_rate": 1.0947864379376139e-05, "loss": 0.7724, "step": 3257 }, { "epoch": 0.49, "learning_rate": 1.0943055030079076e-05, "loss": 0.7559, "step": 3258 }, { "epoch": 0.49, "learning_rate": 1.0938245460687474e-05, "loss": 0.726, "step": 3259 }, { "epoch": 0.49, "learning_rate": 1.0933435672323813e-05, "loss": 0.719, "step": 3260 }, { "epoch": 0.49, "learning_rate": 1.0928625666110629e-05, "loss": 0.7695, "step": 3261 }, { "epoch": 0.49, "learning_rate": 1.0923815443170494e-05, "loss": 0.7659, "step": 3262 }, { "epoch": 0.49, "learning_rate": 1.0919005004626044e-05, "loss": 0.7557, "step": 3263 }, { "epoch": 0.49, "learning_rate": 1.0914194351599964e-05, "loss": 0.8005, "step": 3264 }, { "epoch": 0.49, "learning_rate": 1.0909383485214987e-05, "loss": 0.7691, "step": 3265 }, { "epoch": 0.49, "learning_rate": 1.0904572406593891e-05, "loss": 0.7742, "step": 3266 }, { "epoch": 0.49, "learning_rate": 1.0899761116859508e-05, "loss": 0.7523, "step": 3267 }, { "epoch": 0.49, "learning_rate": 1.0894949617134721e-05, "loss": 0.7327, "step": 3268 }, { "epoch": 0.49, "learning_rate": 1.0890137908542461e-05, "loss": 0.7308, "step": 3269 }, { "epoch": 0.49, "learning_rate": 1.0885325992205705e-05, "loss": 0.8291, "step": 3270 }, { "epoch": 0.49, "learning_rate": 1.088051386924748e-05, "loss": 0.8469, "step": 3271 }, { "epoch": 0.49, "learning_rate": 1.087570154079086e-05, "loss": 0.8025, "step": 3272 }, { "epoch": 0.49, "learning_rate": 1.0870889007958973e-05, "loss": 0.7931, "step": 3273 }, { "epoch": 0.49, "learning_rate": 1.0866076271874988e-05, "loss": 0.8257, "step": 3274 }, { "epoch": 0.49, "learning_rate": 1.086126333366212e-05, "loss": 0.6699, "step": 3275 }, { "epoch": 0.49, "learning_rate": 1.085645019444364e-05, "loss": 0.7311, "step": 3276 }, { "epoch": 0.49, "learning_rate": 1.085163685534286e-05, "loss": 0.8081, "step": 3277 }, { "epoch": 0.49, "learning_rate": 1.0846823317483135e-05, "loss": 0.6957, "step": 3278 }, { "epoch": 0.49, "learning_rate": 1.0842009581987876e-05, "loss": 0.8125, "step": 3279 }, { "epoch": 0.49, "learning_rate": 1.0837195649980532e-05, "loss": 0.6781, "step": 3280 }, { "epoch": 0.49, "learning_rate": 1.0832381522584598e-05, "loss": 0.7725, "step": 3281 }, { "epoch": 0.49, "learning_rate": 1.0827567200923622e-05, "loss": 0.7702, "step": 3282 }, { "epoch": 0.49, "learning_rate": 1.0822752686121195e-05, "loss": 0.7908, "step": 3283 }, { "epoch": 0.49, "learning_rate": 1.0817937979300943e-05, "loss": 0.7879, "step": 3284 }, { "epoch": 0.49, "learning_rate": 1.0813123081586547e-05, "loss": 0.7481, "step": 3285 }, { "epoch": 0.49, "learning_rate": 1.0808307994101735e-05, "loss": 0.8184, "step": 3286 }, { "epoch": 0.49, "learning_rate": 1.0803492717970268e-05, "loss": 0.718, "step": 3287 }, { "epoch": 0.49, "learning_rate": 1.0798677254315961e-05, "loss": 0.6918, "step": 3288 }, { "epoch": 0.49, "learning_rate": 1.0793861604262668e-05, "loss": 0.7613, "step": 3289 }, { "epoch": 0.49, "learning_rate": 1.0789045768934285e-05, "loss": 0.7394, "step": 3290 }, { "epoch": 0.49, "learning_rate": 1.078422974945476e-05, "loss": 0.7197, "step": 3291 }, { "epoch": 0.49, "learning_rate": 1.0779413546948074e-05, "loss": 0.7456, "step": 3292 }, { "epoch": 0.49, "learning_rate": 1.0774597162538254e-05, "loss": 0.7532, "step": 3293 }, { "epoch": 0.49, "learning_rate": 1.0769780597349374e-05, "loss": 0.7724, "step": 3294 }, { "epoch": 0.49, "learning_rate": 1.076496385250554e-05, "loss": 0.7132, "step": 3295 }, { "epoch": 0.49, "learning_rate": 1.0760146929130912e-05, "loss": 0.7372, "step": 3296 }, { "epoch": 0.49, "learning_rate": 1.075532982834968e-05, "loss": 0.7841, "step": 3297 }, { "epoch": 0.49, "learning_rate": 1.0750512551286089e-05, "loss": 0.7577, "step": 3298 }, { "epoch": 0.49, "learning_rate": 1.074569509906441e-05, "loss": 0.7565, "step": 3299 }, { "epoch": 0.49, "learning_rate": 1.0740877472808964e-05, "loss": 0.7322, "step": 3300 }, { "epoch": 0.49, "learning_rate": 1.0736059673644115e-05, "loss": 0.7428, "step": 3301 }, { "epoch": 0.49, "learning_rate": 1.0731241702694261e-05, "loss": 0.6918, "step": 3302 }, { "epoch": 0.49, "learning_rate": 1.0726423561083839e-05, "loss": 0.7061, "step": 3303 }, { "epoch": 0.49, "learning_rate": 1.072160524993733e-05, "loss": 0.336, "step": 3304 }, { "epoch": 0.49, "learning_rate": 1.071678677037926e-05, "loss": 0.7753, "step": 3305 }, { "epoch": 0.49, "learning_rate": 1.0711968123534183e-05, "loss": 0.8241, "step": 3306 }, { "epoch": 0.49, "learning_rate": 1.0707149310526697e-05, "loss": 0.7268, "step": 3307 }, { "epoch": 0.49, "learning_rate": 1.0702330332481434e-05, "loss": 0.7557, "step": 3308 }, { "epoch": 0.49, "learning_rate": 1.069751119052308e-05, "loss": 0.7763, "step": 3309 }, { "epoch": 0.49, "learning_rate": 1.0692691885776343e-05, "loss": 0.7922, "step": 3310 }, { "epoch": 0.49, "learning_rate": 1.0687872419365976e-05, "loss": 0.7255, "step": 3311 }, { "epoch": 0.49, "learning_rate": 1.0683052792416764e-05, "loss": 0.7408, "step": 3312 }, { "epoch": 0.49, "learning_rate": 1.0678233006053537e-05, "loss": 0.7558, "step": 3313 }, { "epoch": 0.49, "learning_rate": 1.0673413061401162e-05, "loss": 0.7787, "step": 3314 }, { "epoch": 0.49, "learning_rate": 1.0668592959584533e-05, "loss": 0.7929, "step": 3315 }, { "epoch": 0.49, "learning_rate": 1.0663772701728597e-05, "loss": 0.7366, "step": 3316 }, { "epoch": 0.49, "learning_rate": 1.0658952288958317e-05, "loss": 0.7751, "step": 3317 }, { "epoch": 0.49, "learning_rate": 1.0654131722398714e-05, "loss": 0.7844, "step": 3318 }, { "epoch": 0.5, "learning_rate": 1.0649311003174823e-05, "loss": 0.6896, "step": 3319 }, { "epoch": 0.5, "learning_rate": 1.0644490132411735e-05, "loss": 0.7824, "step": 3320 }, { "epoch": 0.5, "learning_rate": 1.0639669111234565e-05, "loss": 0.7188, "step": 3321 }, { "epoch": 0.5, "learning_rate": 1.063484794076846e-05, "loss": 0.7584, "step": 3322 }, { "epoch": 0.5, "learning_rate": 1.0630026622138614e-05, "loss": 0.7359, "step": 3323 }, { "epoch": 0.5, "learning_rate": 1.0625205156470245e-05, "loss": 0.8129, "step": 3324 }, { "epoch": 0.5, "learning_rate": 1.062038354488861e-05, "loss": 0.7833, "step": 3325 }, { "epoch": 0.5, "learning_rate": 1.0615561788518995e-05, "loss": 0.6621, "step": 3326 }, { "epoch": 0.5, "learning_rate": 1.061073988848673e-05, "loss": 0.7874, "step": 3327 }, { "epoch": 0.5, "learning_rate": 1.0605917845917169e-05, "loss": 0.846, "step": 3328 }, { "epoch": 0.5, "learning_rate": 1.0601095661935705e-05, "loss": 0.6329, "step": 3329 }, { "epoch": 0.5, "learning_rate": 1.0596273337667755e-05, "loss": 0.768, "step": 3330 }, { "epoch": 0.5, "learning_rate": 1.0591450874238782e-05, "loss": 0.8169, "step": 3331 }, { "epoch": 0.5, "learning_rate": 1.0586628272774275e-05, "loss": 0.7096, "step": 3332 }, { "epoch": 0.5, "learning_rate": 1.0581805534399752e-05, "loss": 0.7767, "step": 3333 }, { "epoch": 0.5, "learning_rate": 1.0576982660240764e-05, "loss": 0.3008, "step": 3334 }, { "epoch": 0.5, "learning_rate": 1.05721596514229e-05, "loss": 0.7576, "step": 3335 }, { "epoch": 0.5, "learning_rate": 1.0567336509071773e-05, "loss": 0.7544, "step": 3336 }, { "epoch": 0.5, "learning_rate": 1.0562513234313032e-05, "loss": 0.7367, "step": 3337 }, { "epoch": 0.5, "learning_rate": 1.0557689828272357e-05, "loss": 0.3566, "step": 3338 }, { "epoch": 0.5, "learning_rate": 1.0552866292075455e-05, "loss": 0.7899, "step": 3339 }, { "epoch": 0.5, "learning_rate": 1.054804262684806e-05, "loss": 0.7429, "step": 3340 }, { "epoch": 0.5, "learning_rate": 1.0543218833715946e-05, "loss": 0.688, "step": 3341 }, { "epoch": 0.5, "learning_rate": 1.0538394913804916e-05, "loss": 0.7909, "step": 3342 }, { "epoch": 0.5, "learning_rate": 1.0533570868240794e-05, "loss": 0.6333, "step": 3343 }, { "epoch": 0.5, "learning_rate": 1.0528746698149436e-05, "loss": 0.7776, "step": 3344 }, { "epoch": 0.5, "learning_rate": 1.0523922404656734e-05, "loss": 0.7361, "step": 3345 }, { "epoch": 0.5, "learning_rate": 1.0519097988888602e-05, "loss": 0.774, "step": 3346 }, { "epoch": 0.5, "learning_rate": 1.0514273451970987e-05, "loss": 0.8493, "step": 3347 }, { "epoch": 0.5, "learning_rate": 1.0509448795029855e-05, "loss": 0.7813, "step": 3348 }, { "epoch": 0.5, "learning_rate": 1.0504624019191207e-05, "loss": 0.8293, "step": 3349 }, { "epoch": 0.5, "learning_rate": 1.0499799125581082e-05, "loss": 0.8116, "step": 3350 }, { "epoch": 0.5, "learning_rate": 1.0494974115325527e-05, "loss": 0.7441, "step": 3351 }, { "epoch": 0.5, "learning_rate": 1.0490148989550626e-05, "loss": 0.7341, "step": 3352 }, { "epoch": 0.5, "learning_rate": 1.0485323749382492e-05, "loss": 0.7292, "step": 3353 }, { "epoch": 0.5, "learning_rate": 1.0480498395947259e-05, "loss": 0.7289, "step": 3354 }, { "epoch": 0.5, "learning_rate": 1.047567293037109e-05, "loss": 0.6797, "step": 3355 }, { "epoch": 0.5, "learning_rate": 1.0470847353780178e-05, "loss": 0.609, "step": 3356 }, { "epoch": 0.5, "learning_rate": 1.0466021667300737e-05, "loss": 0.7395, "step": 3357 }, { "epoch": 0.5, "learning_rate": 1.0461195872059002e-05, "loss": 0.7209, "step": 3358 }, { "epoch": 0.5, "learning_rate": 1.045636996918125e-05, "loss": 0.7425, "step": 3359 }, { "epoch": 0.5, "learning_rate": 1.0451543959793764e-05, "loss": 0.7771, "step": 3360 }, { "epoch": 0.5, "learning_rate": 1.0446717845022866e-05, "loss": 0.6901, "step": 3361 }, { "epoch": 0.5, "learning_rate": 1.0441891625994895e-05, "loss": 0.7819, "step": 3362 }, { "epoch": 0.5, "learning_rate": 1.0437065303836212e-05, "loss": 0.7229, "step": 3363 }, { "epoch": 0.5, "learning_rate": 1.0432238879673213e-05, "loss": 0.6976, "step": 3364 }, { "epoch": 0.5, "learning_rate": 1.0427412354632308e-05, "loss": 0.8116, "step": 3365 }, { "epoch": 0.5, "learning_rate": 1.0422585729839933e-05, "loss": 0.763, "step": 3366 }, { "epoch": 0.5, "learning_rate": 1.041775900642255e-05, "loss": 0.7266, "step": 3367 }, { "epoch": 0.5, "learning_rate": 1.0412932185506637e-05, "loss": 0.7262, "step": 3368 }, { "epoch": 0.5, "learning_rate": 1.0408105268218709e-05, "loss": 0.7563, "step": 3369 }, { "epoch": 0.5, "learning_rate": 1.0403278255685288e-05, "loss": 0.7076, "step": 3370 }, { "epoch": 0.5, "learning_rate": 1.0398451149032923e-05, "loss": 0.3511, "step": 3371 }, { "epoch": 0.5, "learning_rate": 1.0393623949388188e-05, "loss": 0.7536, "step": 3372 }, { "epoch": 0.5, "learning_rate": 1.038879665787768e-05, "loss": 0.8221, "step": 3373 }, { "epoch": 0.5, "learning_rate": 1.038396927562801e-05, "loss": 0.846, "step": 3374 }, { "epoch": 0.5, "learning_rate": 1.0379141803765822e-05, "loss": 0.7876, "step": 3375 }, { "epoch": 0.5, "learning_rate": 1.0374314243417765e-05, "loss": 0.8348, "step": 3376 }, { "epoch": 0.5, "learning_rate": 1.0369486595710523e-05, "loss": 0.807, "step": 3377 }, { "epoch": 0.5, "learning_rate": 1.0364658861770791e-05, "loss": 0.7208, "step": 3378 }, { "epoch": 0.5, "learning_rate": 1.0359831042725292e-05, "loss": 0.6774, "step": 3379 }, { "epoch": 0.5, "learning_rate": 1.0355003139700766e-05, "loss": 0.7186, "step": 3380 }, { "epoch": 0.5, "learning_rate": 1.0350175153823963e-05, "loss": 0.7144, "step": 3381 }, { "epoch": 0.5, "learning_rate": 1.0345347086221665e-05, "loss": 0.8199, "step": 3382 }, { "epoch": 0.5, "learning_rate": 1.0340518938020672e-05, "loss": 0.7644, "step": 3383 }, { "epoch": 0.5, "learning_rate": 1.0335690710347794e-05, "loss": 0.8175, "step": 3384 }, { "epoch": 0.5, "learning_rate": 1.0330862404329869e-05, "loss": 0.7512, "step": 3385 }, { "epoch": 0.5, "learning_rate": 1.0326034021093747e-05, "loss": 0.7068, "step": 3386 }, { "epoch": 0.51, "learning_rate": 1.0321205561766304e-05, "loss": 0.7223, "step": 3387 }, { "epoch": 0.51, "learning_rate": 1.031637702747442e-05, "loss": 0.7996, "step": 3388 }, { "epoch": 0.51, "learning_rate": 1.0311548419345005e-05, "loss": 0.8172, "step": 3389 }, { "epoch": 0.51, "learning_rate": 1.030671973850498e-05, "loss": 0.7769, "step": 3390 }, { "epoch": 0.51, "learning_rate": 1.030189098608129e-05, "loss": 0.7663, "step": 3391 }, { "epoch": 0.51, "learning_rate": 1.0297062163200883e-05, "loss": 0.7595, "step": 3392 }, { "epoch": 0.51, "learning_rate": 1.0292233270990739e-05, "loss": 0.7555, "step": 3393 }, { "epoch": 0.51, "learning_rate": 1.0287404310577844e-05, "loss": 0.728, "step": 3394 }, { "epoch": 0.51, "learning_rate": 1.0282575283089203e-05, "loss": 0.8557, "step": 3395 }, { "epoch": 0.51, "learning_rate": 1.0277746189651838e-05, "loss": 0.8182, "step": 3396 }, { "epoch": 0.51, "learning_rate": 1.0272917031392787e-05, "loss": 0.6977, "step": 3397 }, { "epoch": 0.51, "learning_rate": 1.0268087809439096e-05, "loss": 0.6838, "step": 3398 }, { "epoch": 0.51, "learning_rate": 1.0263258524917834e-05, "loss": 0.6486, "step": 3399 }, { "epoch": 0.51, "learning_rate": 1.0258429178956082e-05, "loss": 0.7793, "step": 3400 }, { "epoch": 0.51, "learning_rate": 1.0253599772680936e-05, "loss": 0.6667, "step": 3401 }, { "epoch": 0.51, "learning_rate": 1.0248770307219504e-05, "loss": 0.7831, "step": 3402 }, { "epoch": 0.51, "learning_rate": 1.024394078369891e-05, "loss": 0.7029, "step": 3403 }, { "epoch": 0.51, "learning_rate": 1.0239111203246284e-05, "loss": 0.6927, "step": 3404 }, { "epoch": 0.51, "learning_rate": 1.0234281566988785e-05, "loss": 0.8166, "step": 3405 }, { "epoch": 0.51, "learning_rate": 1.0229451876053574e-05, "loss": 0.7269, "step": 3406 }, { "epoch": 0.51, "learning_rate": 1.0224622131567822e-05, "loss": 0.7524, "step": 3407 }, { "epoch": 0.51, "learning_rate": 1.0219792334658721e-05, "loss": 0.7577, "step": 3408 }, { "epoch": 0.51, "learning_rate": 1.0214962486453465e-05, "loss": 0.7789, "step": 3409 }, { "epoch": 0.51, "learning_rate": 1.0210132588079278e-05, "loss": 0.7602, "step": 3410 }, { "epoch": 0.51, "learning_rate": 1.0205302640663372e-05, "loss": 0.7693, "step": 3411 }, { "epoch": 0.51, "learning_rate": 1.0200472645332993e-05, "loss": 0.8065, "step": 3412 }, { "epoch": 0.51, "learning_rate": 1.0195642603215377e-05, "loss": 0.7372, "step": 3413 }, { "epoch": 0.51, "learning_rate": 1.0190812515437792e-05, "loss": 0.7449, "step": 3414 }, { "epoch": 0.51, "learning_rate": 1.0185982383127496e-05, "loss": 0.7527, "step": 3415 }, { "epoch": 0.51, "learning_rate": 1.0181152207411776e-05, "loss": 0.7102, "step": 3416 }, { "epoch": 0.51, "learning_rate": 1.0176321989417914e-05, "loss": 0.7472, "step": 3417 }, { "epoch": 0.51, "learning_rate": 1.0171491730273216e-05, "loss": 0.7298, "step": 3418 }, { "epoch": 0.51, "learning_rate": 1.0166661431104981e-05, "loss": 0.7384, "step": 3419 }, { "epoch": 0.51, "learning_rate": 1.0161831093040535e-05, "loss": 0.7439, "step": 3420 }, { "epoch": 0.51, "learning_rate": 1.0157000717207202e-05, "loss": 0.8038, "step": 3421 }, { "epoch": 0.51, "learning_rate": 1.0152170304732314e-05, "loss": 0.7247, "step": 3422 }, { "epoch": 0.51, "learning_rate": 1.0147339856743216e-05, "loss": 0.6826, "step": 3423 }, { "epoch": 0.51, "learning_rate": 1.0142509374367266e-05, "loss": 0.7561, "step": 3424 }, { "epoch": 0.51, "learning_rate": 1.013767885873182e-05, "loss": 0.697, "step": 3425 }, { "epoch": 0.51, "learning_rate": 1.0132848310964245e-05, "loss": 0.7262, "step": 3426 }, { "epoch": 0.51, "learning_rate": 1.0128017732191917e-05, "loss": 0.7764, "step": 3427 }, { "epoch": 0.51, "learning_rate": 1.012318712354222e-05, "loss": 0.7332, "step": 3428 }, { "epoch": 0.51, "learning_rate": 1.0118356486142545e-05, "loss": 0.7336, "step": 3429 }, { "epoch": 0.51, "learning_rate": 1.0113525821120282e-05, "loss": 0.7124, "step": 3430 }, { "epoch": 0.51, "learning_rate": 1.0108695129602841e-05, "loss": 0.7307, "step": 3431 }, { "epoch": 0.51, "learning_rate": 1.0103864412717632e-05, "loss": 0.7472, "step": 3432 }, { "epoch": 0.51, "learning_rate": 1.0099033671592064e-05, "loss": 0.6886, "step": 3433 }, { "epoch": 0.51, "learning_rate": 1.0094202907353558e-05, "loss": 0.815, "step": 3434 }, { "epoch": 0.51, "learning_rate": 1.0089372121129549e-05, "loss": 0.685, "step": 3435 }, { "epoch": 0.51, "learning_rate": 1.0084541314047455e-05, "loss": 0.3185, "step": 3436 }, { "epoch": 0.51, "learning_rate": 1.007971048723472e-05, "loss": 0.6826, "step": 3437 }, { "epoch": 0.51, "learning_rate": 1.0074879641818784e-05, "loss": 0.7834, "step": 3438 }, { "epoch": 0.51, "learning_rate": 1.0070048778927095e-05, "loss": 0.6617, "step": 3439 }, { "epoch": 0.51, "learning_rate": 1.0065217899687095e-05, "loss": 0.7407, "step": 3440 }, { "epoch": 0.51, "learning_rate": 1.0060387005226238e-05, "loss": 0.8016, "step": 3441 }, { "epoch": 0.51, "learning_rate": 1.0055556096671988e-05, "loss": 0.801, "step": 3442 }, { "epoch": 0.51, "learning_rate": 1.0050725175151799e-05, "loss": 0.6526, "step": 3443 }, { "epoch": 0.51, "learning_rate": 1.004589424179313e-05, "loss": 0.6756, "step": 3444 }, { "epoch": 0.51, "learning_rate": 1.0041063297723453e-05, "loss": 0.6481, "step": 3445 }, { "epoch": 0.51, "learning_rate": 1.0036232344070236e-05, "loss": 0.7908, "step": 3446 }, { "epoch": 0.51, "learning_rate": 1.0031401381960945e-05, "loss": 0.3425, "step": 3447 }, { "epoch": 0.51, "learning_rate": 1.0026570412523056e-05, "loss": 0.7124, "step": 3448 }, { "epoch": 0.51, "learning_rate": 1.0021739436884042e-05, "loss": 0.7389, "step": 3449 }, { "epoch": 0.51, "learning_rate": 1.0016908456171375e-05, "loss": 0.7085, "step": 3450 }, { "epoch": 0.51, "learning_rate": 1.0012077471512536e-05, "loss": 0.7466, "step": 3451 }, { "epoch": 0.51, "learning_rate": 1.0007246484035e-05, "loss": 0.7036, "step": 3452 }, { "epoch": 0.51, "learning_rate": 1.0002415494866249e-05, "loss": 0.7146, "step": 3453 }, { "epoch": 0.52, "learning_rate": 9.997584505133755e-06, "loss": 0.3275, "step": 3454 }, { "epoch": 0.52, "learning_rate": 9.992753515965e-06, "loss": 0.7653, "step": 3455 }, { "epoch": 0.52, "learning_rate": 9.987922528487466e-06, "loss": 0.6868, "step": 3456 }, { "epoch": 0.52, "learning_rate": 9.983091543828628e-06, "loss": 0.7109, "step": 3457 }, { "epoch": 0.52, "learning_rate": 9.978260563115961e-06, "loss": 0.7528, "step": 3458 }, { "epoch": 0.52, "learning_rate": 9.973429587476947e-06, "loss": 0.6872, "step": 3459 }, { "epoch": 0.52, "learning_rate": 9.96859861803906e-06, "loss": 0.7425, "step": 3460 }, { "epoch": 0.52, "learning_rate": 9.963767655929766e-06, "loss": 0.785, "step": 3461 }, { "epoch": 0.52, "learning_rate": 9.958936702276549e-06, "loss": 0.6904, "step": 3462 }, { "epoch": 0.52, "learning_rate": 9.954105758206873e-06, "loss": 0.8018, "step": 3463 }, { "epoch": 0.52, "learning_rate": 9.949274824848206e-06, "loss": 0.7754, "step": 3464 }, { "epoch": 0.52, "learning_rate": 9.944443903328016e-06, "loss": 0.7409, "step": 3465 }, { "epoch": 0.52, "learning_rate": 9.939612994773764e-06, "loss": 0.7341, "step": 3466 }, { "epoch": 0.52, "learning_rate": 9.934782100312908e-06, "loss": 0.7511, "step": 3467 }, { "epoch": 0.52, "learning_rate": 9.929951221072912e-06, "loss": 0.7576, "step": 3468 }, { "epoch": 0.52, "learning_rate": 9.925120358181215e-06, "loss": 0.7393, "step": 3469 }, { "epoch": 0.52, "learning_rate": 9.920289512765281e-06, "loss": 0.653, "step": 3470 }, { "epoch": 0.52, "learning_rate": 9.915458685952548e-06, "loss": 0.7495, "step": 3471 }, { "epoch": 0.52, "learning_rate": 9.910627878870456e-06, "loss": 0.7933, "step": 3472 }, { "epoch": 0.52, "learning_rate": 9.905797092646444e-06, "loss": 0.7197, "step": 3473 }, { "epoch": 0.52, "learning_rate": 9.90096632840794e-06, "loss": 0.6811, "step": 3474 }, { "epoch": 0.52, "learning_rate": 9.896135587282373e-06, "loss": 0.7295, "step": 3475 }, { "epoch": 0.52, "learning_rate": 9.891304870397159e-06, "loss": 0.7607, "step": 3476 }, { "epoch": 0.52, "learning_rate": 9.886474178879718e-06, "loss": 0.7858, "step": 3477 }, { "epoch": 0.52, "learning_rate": 9.881643513857458e-06, "loss": 0.7868, "step": 3478 }, { "epoch": 0.52, "learning_rate": 9.876812876457782e-06, "loss": 0.7201, "step": 3479 }, { "epoch": 0.52, "learning_rate": 9.871982267808086e-06, "loss": 0.767, "step": 3480 }, { "epoch": 0.52, "learning_rate": 9.867151689035758e-06, "loss": 0.7169, "step": 3481 }, { "epoch": 0.52, "learning_rate": 9.862321141268183e-06, "loss": 0.3102, "step": 3482 }, { "epoch": 0.52, "learning_rate": 9.857490625632737e-06, "loss": 0.7124, "step": 3483 }, { "epoch": 0.52, "learning_rate": 9.852660143256782e-06, "loss": 0.8745, "step": 3484 }, { "epoch": 0.52, "learning_rate": 9.84782969526769e-06, "loss": 0.769, "step": 3485 }, { "epoch": 0.52, "learning_rate": 9.8429992827928e-06, "loss": 0.6903, "step": 3486 }, { "epoch": 0.52, "learning_rate": 9.838168906959467e-06, "loss": 0.3231, "step": 3487 }, { "epoch": 0.52, "learning_rate": 9.83333856889502e-06, "loss": 0.7604, "step": 3488 }, { "epoch": 0.52, "learning_rate": 9.828508269726791e-06, "loss": 0.7001, "step": 3489 }, { "epoch": 0.52, "learning_rate": 9.82367801058209e-06, "loss": 0.788, "step": 3490 }, { "epoch": 0.52, "learning_rate": 9.818847792588226e-06, "loss": 0.7252, "step": 3491 }, { "epoch": 0.52, "learning_rate": 9.814017616872506e-06, "loss": 0.6455, "step": 3492 }, { "epoch": 0.52, "learning_rate": 9.809187484562213e-06, "loss": 0.7208, "step": 3493 }, { "epoch": 0.52, "learning_rate": 9.804357396784627e-06, "loss": 0.7354, "step": 3494 }, { "epoch": 0.52, "learning_rate": 9.79952735466701e-06, "loss": 0.7645, "step": 3495 }, { "epoch": 0.52, "learning_rate": 9.79469735933663e-06, "loss": 0.7298, "step": 3496 }, { "epoch": 0.52, "learning_rate": 9.789867411920727e-06, "loss": 0.3356, "step": 3497 }, { "epoch": 0.52, "learning_rate": 9.785037513546535e-06, "loss": 0.762, "step": 3498 }, { "epoch": 0.52, "learning_rate": 9.780207665341284e-06, "loss": 0.7628, "step": 3499 }, { "epoch": 0.52, "learning_rate": 9.77537786843218e-06, "loss": 0.6386, "step": 3500 }, { "epoch": 0.52, "learning_rate": 9.77054812394643e-06, "loss": 0.698, "step": 3501 }, { "epoch": 0.52, "learning_rate": 9.765718433011218e-06, "loss": 0.7248, "step": 3502 }, { "epoch": 0.52, "learning_rate": 9.76088879675372e-06, "loss": 0.7028, "step": 3503 }, { "epoch": 0.52, "learning_rate": 9.756059216301096e-06, "loss": 0.7112, "step": 3504 }, { "epoch": 0.52, "learning_rate": 9.7512296927805e-06, "loss": 0.7601, "step": 3505 }, { "epoch": 0.52, "learning_rate": 9.746400227319064e-06, "loss": 0.7699, "step": 3506 }, { "epoch": 0.52, "learning_rate": 9.74157082104392e-06, "loss": 0.3163, "step": 3507 }, { "epoch": 0.52, "learning_rate": 9.736741475082169e-06, "loss": 0.7329, "step": 3508 }, { "epoch": 0.52, "learning_rate": 9.731912190560907e-06, "loss": 0.6705, "step": 3509 }, { "epoch": 0.52, "learning_rate": 9.727082968607218e-06, "loss": 0.7447, "step": 3510 }, { "epoch": 0.52, "learning_rate": 9.722253810348165e-06, "loss": 0.7536, "step": 3511 }, { "epoch": 0.52, "learning_rate": 9.717424716910799e-06, "loss": 0.7209, "step": 3512 }, { "epoch": 0.52, "learning_rate": 9.712595689422158e-06, "loss": 0.8062, "step": 3513 }, { "epoch": 0.52, "learning_rate": 9.707766729009263e-06, "loss": 0.7291, "step": 3514 }, { "epoch": 0.52, "learning_rate": 9.702937836799118e-06, "loss": 0.6229, "step": 3515 }, { "epoch": 0.52, "learning_rate": 9.698109013918716e-06, "loss": 0.748, "step": 3516 }, { "epoch": 0.52, "learning_rate": 9.693280261495023e-06, "loss": 0.3356, "step": 3517 }, { "epoch": 0.52, "learning_rate": 9.688451580654998e-06, "loss": 0.7497, "step": 3518 }, { "epoch": 0.52, "learning_rate": 9.683622972525583e-06, "loss": 0.7359, "step": 3519 }, { "epoch": 0.52, "learning_rate": 9.678794438233701e-06, "loss": 0.7371, "step": 3520 }, { "epoch": 0.53, "learning_rate": 9.673965978906252e-06, "loss": 0.7694, "step": 3521 }, { "epoch": 0.53, "learning_rate": 9.669137595670133e-06, "loss": 0.7377, "step": 3522 }, { "epoch": 0.53, "learning_rate": 9.664309289652208e-06, "loss": 0.6496, "step": 3523 }, { "epoch": 0.53, "learning_rate": 9.659481061979333e-06, "loss": 0.7975, "step": 3524 }, { "epoch": 0.53, "learning_rate": 9.654652913778338e-06, "loss": 0.7137, "step": 3525 }, { "epoch": 0.53, "learning_rate": 9.64982484617604e-06, "loss": 0.829, "step": 3526 }, { "epoch": 0.53, "learning_rate": 9.64499686029924e-06, "loss": 0.7769, "step": 3527 }, { "epoch": 0.53, "learning_rate": 9.640168957274708e-06, "loss": 0.78, "step": 3528 }, { "epoch": 0.53, "learning_rate": 9.635341138229209e-06, "loss": 0.6783, "step": 3529 }, { "epoch": 0.53, "learning_rate": 9.63051340428948e-06, "loss": 0.77, "step": 3530 }, { "epoch": 0.53, "learning_rate": 9.625685756582238e-06, "loss": 0.716, "step": 3531 }, { "epoch": 0.53, "learning_rate": 9.620858196234183e-06, "loss": 0.782, "step": 3532 }, { "epoch": 0.53, "learning_rate": 9.616030724371991e-06, "loss": 0.7486, "step": 3533 }, { "epoch": 0.53, "learning_rate": 9.611203342122327e-06, "loss": 0.6765, "step": 3534 }, { "epoch": 0.53, "learning_rate": 9.606376050611814e-06, "loss": 0.7536, "step": 3535 }, { "epoch": 0.53, "learning_rate": 9.601548850967082e-06, "loss": 0.716, "step": 3536 }, { "epoch": 0.53, "learning_rate": 9.596721744314717e-06, "loss": 0.7135, "step": 3537 }, { "epoch": 0.53, "learning_rate": 9.591894731781295e-06, "loss": 0.8136, "step": 3538 }, { "epoch": 0.53, "learning_rate": 9.587067814493365e-06, "loss": 0.7215, "step": 3539 }, { "epoch": 0.53, "learning_rate": 9.582240993577454e-06, "loss": 0.7486, "step": 3540 }, { "epoch": 0.53, "learning_rate": 9.577414270160072e-06, "loss": 0.7643, "step": 3541 }, { "epoch": 0.53, "learning_rate": 9.572587645367697e-06, "loss": 0.7506, "step": 3542 }, { "epoch": 0.53, "learning_rate": 9.567761120326788e-06, "loss": 0.7222, "step": 3543 }, { "epoch": 0.53, "learning_rate": 9.56293469616379e-06, "loss": 0.8289, "step": 3544 }, { "epoch": 0.53, "learning_rate": 9.558108374005107e-06, "loss": 0.7624, "step": 3545 }, { "epoch": 0.53, "learning_rate": 9.553282154977135e-06, "loss": 0.65, "step": 3546 }, { "epoch": 0.53, "learning_rate": 9.548456040206238e-06, "loss": 0.7474, "step": 3547 }, { "epoch": 0.53, "learning_rate": 9.543630030818755e-06, "loss": 0.6814, "step": 3548 }, { "epoch": 0.53, "learning_rate": 9.538804127941e-06, "loss": 0.7673, "step": 3549 }, { "epoch": 0.53, "learning_rate": 9.533978332699267e-06, "loss": 0.7497, "step": 3550 }, { "epoch": 0.53, "learning_rate": 9.529152646219823e-06, "loss": 0.733, "step": 3551 }, { "epoch": 0.53, "learning_rate": 9.524327069628912e-06, "loss": 0.7961, "step": 3552 }, { "epoch": 0.53, "learning_rate": 9.519501604052746e-06, "loss": 0.7446, "step": 3553 }, { "epoch": 0.53, "learning_rate": 9.514676250617511e-06, "loss": 0.7503, "step": 3554 }, { "epoch": 0.53, "learning_rate": 9.509851010449377e-06, "loss": 0.7051, "step": 3555 }, { "epoch": 0.53, "learning_rate": 9.505025884674478e-06, "loss": 0.7211, "step": 3556 }, { "epoch": 0.53, "learning_rate": 9.500200874418923e-06, "loss": 0.8372, "step": 3557 }, { "epoch": 0.53, "learning_rate": 9.495375980808791e-06, "loss": 0.6762, "step": 3558 }, { "epoch": 0.53, "learning_rate": 9.490551204970148e-06, "loss": 0.805, "step": 3559 }, { "epoch": 0.53, "learning_rate": 9.485726548029017e-06, "loss": 0.7325, "step": 3560 }, { "epoch": 0.53, "learning_rate": 9.4809020111114e-06, "loss": 0.6988, "step": 3561 }, { "epoch": 0.53, "learning_rate": 9.476077595343268e-06, "loss": 0.3347, "step": 3562 }, { "epoch": 0.53, "learning_rate": 9.471253301850565e-06, "loss": 0.3305, "step": 3563 }, { "epoch": 0.53, "learning_rate": 9.466429131759211e-06, "loss": 0.7971, "step": 3564 }, { "epoch": 0.53, "learning_rate": 9.461605086195084e-06, "loss": 0.731, "step": 3565 }, { "epoch": 0.53, "learning_rate": 9.456781166284054e-06, "loss": 0.7767, "step": 3566 }, { "epoch": 0.53, "learning_rate": 9.451957373151943e-06, "loss": 0.6662, "step": 3567 }, { "epoch": 0.53, "learning_rate": 9.44713370792455e-06, "loss": 0.7227, "step": 3568 }, { "epoch": 0.53, "learning_rate": 9.442310171727646e-06, "loss": 0.7603, "step": 3569 }, { "epoch": 0.53, "learning_rate": 9.43748676568697e-06, "loss": 0.6844, "step": 3570 }, { "epoch": 0.53, "learning_rate": 9.432663490928232e-06, "loss": 0.7248, "step": 3571 }, { "epoch": 0.53, "learning_rate": 9.427840348577101e-06, "loss": 0.7888, "step": 3572 }, { "epoch": 0.53, "learning_rate": 9.423017339759238e-06, "loss": 0.7235, "step": 3573 }, { "epoch": 0.53, "learning_rate": 9.418194465600251e-06, "loss": 0.7471, "step": 3574 }, { "epoch": 0.53, "learning_rate": 9.413371727225728e-06, "loss": 0.7834, "step": 3575 }, { "epoch": 0.53, "learning_rate": 9.40854912576122e-06, "loss": 0.8072, "step": 3576 }, { "epoch": 0.53, "learning_rate": 9.403726662332247e-06, "loss": 0.8002, "step": 3577 }, { "epoch": 0.53, "learning_rate": 9.3989043380643e-06, "loss": 0.7893, "step": 3578 }, { "epoch": 0.53, "learning_rate": 9.394082154082835e-06, "loss": 0.7374, "step": 3579 }, { "epoch": 0.53, "learning_rate": 9.38926011151327e-06, "loss": 0.7554, "step": 3580 }, { "epoch": 0.53, "learning_rate": 9.384438211481006e-06, "loss": 0.7389, "step": 3581 }, { "epoch": 0.53, "learning_rate": 9.379616455111393e-06, "loss": 0.7496, "step": 3582 }, { "epoch": 0.53, "learning_rate": 9.374794843529757e-06, "loss": 0.7111, "step": 3583 }, { "epoch": 0.53, "learning_rate": 9.36997337786139e-06, "loss": 0.7339, "step": 3584 }, { "epoch": 0.53, "learning_rate": 9.365152059231544e-06, "loss": 0.7284, "step": 3585 }, { "epoch": 0.53, "learning_rate": 9.36033088876544e-06, "loss": 0.6714, "step": 3586 }, { "epoch": 0.53, "learning_rate": 9.355509867588265e-06, "loss": 0.7663, "step": 3587 }, { "epoch": 0.54, "learning_rate": 9.350688996825178e-06, "loss": 0.8185, "step": 3588 }, { "epoch": 0.54, "learning_rate": 9.345868277601292e-06, "loss": 0.779, "step": 3589 }, { "epoch": 0.54, "learning_rate": 9.341047711041687e-06, "loss": 0.6827, "step": 3590 }, { "epoch": 0.54, "learning_rate": 9.336227298271408e-06, "loss": 0.7041, "step": 3591 }, { "epoch": 0.54, "learning_rate": 9.331407040415468e-06, "loss": 0.721, "step": 3592 }, { "epoch": 0.54, "learning_rate": 9.326586938598843e-06, "loss": 0.7516, "step": 3593 }, { "epoch": 0.54, "learning_rate": 9.321766993946466e-06, "loss": 0.7654, "step": 3594 }, { "epoch": 0.54, "learning_rate": 9.31694720758324e-06, "loss": 0.7144, "step": 3595 }, { "epoch": 0.54, "learning_rate": 9.312127580634027e-06, "loss": 0.7776, "step": 3596 }, { "epoch": 0.54, "learning_rate": 9.307308114223658e-06, "loss": 0.7614, "step": 3597 }, { "epoch": 0.54, "learning_rate": 9.302488809476923e-06, "loss": 0.333, "step": 3598 }, { "epoch": 0.54, "learning_rate": 9.297669667518569e-06, "loss": 0.816, "step": 3599 }, { "epoch": 0.54, "learning_rate": 9.292850689473308e-06, "loss": 0.7762, "step": 3600 }, { "epoch": 0.54, "learning_rate": 9.288031876465822e-06, "loss": 0.7165, "step": 3601 }, { "epoch": 0.54, "learning_rate": 9.28321322962074e-06, "loss": 0.7254, "step": 3602 }, { "epoch": 0.54, "learning_rate": 9.27839475006267e-06, "loss": 0.8159, "step": 3603 }, { "epoch": 0.54, "learning_rate": 9.273576438916164e-06, "loss": 0.7593, "step": 3604 }, { "epoch": 0.54, "learning_rate": 9.268758297305742e-06, "loss": 0.7316, "step": 3605 }, { "epoch": 0.54, "learning_rate": 9.263940326355886e-06, "loss": 0.729, "step": 3606 }, { "epoch": 0.54, "learning_rate": 9.259122527191037e-06, "loss": 0.7542, "step": 3607 }, { "epoch": 0.54, "learning_rate": 9.254304900935593e-06, "loss": 0.7372, "step": 3608 }, { "epoch": 0.54, "learning_rate": 9.249487448713914e-06, "loss": 0.7007, "step": 3609 }, { "epoch": 0.54, "learning_rate": 9.24467017165032e-06, "loss": 0.8029, "step": 3610 }, { "epoch": 0.54, "learning_rate": 9.239853070869091e-06, "loss": 0.723, "step": 3611 }, { "epoch": 0.54, "learning_rate": 9.235036147494463e-06, "loss": 0.3643, "step": 3612 }, { "epoch": 0.54, "learning_rate": 9.230219402650629e-06, "loss": 0.6618, "step": 3613 }, { "epoch": 0.54, "learning_rate": 9.225402837461748e-06, "loss": 0.6578, "step": 3614 }, { "epoch": 0.54, "learning_rate": 9.22058645305193e-06, "loss": 0.6337, "step": 3615 }, { "epoch": 0.54, "learning_rate": 9.215770250545245e-06, "loss": 0.7587, "step": 3616 }, { "epoch": 0.54, "learning_rate": 9.210954231065715e-06, "loss": 0.7732, "step": 3617 }, { "epoch": 0.54, "learning_rate": 9.206138395737336e-06, "loss": 0.683, "step": 3618 }, { "epoch": 0.54, "learning_rate": 9.201322745684042e-06, "loss": 0.6903, "step": 3619 }, { "epoch": 0.54, "learning_rate": 9.196507282029735e-06, "loss": 0.6914, "step": 3620 }, { "epoch": 0.54, "learning_rate": 9.19169200589827e-06, "loss": 0.731, "step": 3621 }, { "epoch": 0.54, "learning_rate": 9.186876918413455e-06, "loss": 0.7626, "step": 3622 }, { "epoch": 0.54, "learning_rate": 9.18206202069906e-06, "loss": 0.7049, "step": 3623 }, { "epoch": 0.54, "learning_rate": 9.177247313878807e-06, "loss": 0.7335, "step": 3624 }, { "epoch": 0.54, "learning_rate": 9.172432799076378e-06, "loss": 0.7309, "step": 3625 }, { "epoch": 0.54, "learning_rate": 9.167618477415404e-06, "loss": 0.7519, "step": 3626 }, { "epoch": 0.54, "learning_rate": 9.162804350019471e-06, "loss": 0.6583, "step": 3627 }, { "epoch": 0.54, "learning_rate": 9.157990418012127e-06, "loss": 0.7535, "step": 3628 }, { "epoch": 0.54, "learning_rate": 9.153176682516867e-06, "loss": 0.8513, "step": 3629 }, { "epoch": 0.54, "learning_rate": 9.148363144657145e-06, "loss": 0.8304, "step": 3630 }, { "epoch": 0.54, "learning_rate": 9.143549805556363e-06, "loss": 0.6396, "step": 3631 }, { "epoch": 0.54, "learning_rate": 9.13873666633788e-06, "loss": 0.8164, "step": 3632 }, { "epoch": 0.54, "learning_rate": 9.133923728125014e-06, "loss": 0.7274, "step": 3633 }, { "epoch": 0.54, "learning_rate": 9.12911099204103e-06, "loss": 0.7775, "step": 3634 }, { "epoch": 0.54, "learning_rate": 9.124298459209144e-06, "loss": 0.7172, "step": 3635 }, { "epoch": 0.54, "learning_rate": 9.119486130752524e-06, "loss": 0.8458, "step": 3636 }, { "epoch": 0.54, "learning_rate": 9.1146740077943e-06, "loss": 0.732, "step": 3637 }, { "epoch": 0.54, "learning_rate": 9.109862091457544e-06, "loss": 0.7597, "step": 3638 }, { "epoch": 0.54, "learning_rate": 9.10505038286528e-06, "loss": 0.6111, "step": 3639 }, { "epoch": 0.54, "learning_rate": 9.100238883140493e-06, "loss": 0.72, "step": 3640 }, { "epoch": 0.54, "learning_rate": 9.095427593406112e-06, "loss": 0.813, "step": 3641 }, { "epoch": 0.54, "learning_rate": 9.090616514785015e-06, "loss": 0.3409, "step": 3642 }, { "epoch": 0.54, "learning_rate": 9.085805648400037e-06, "loss": 0.7087, "step": 3643 }, { "epoch": 0.54, "learning_rate": 9.08099499537396e-06, "loss": 0.7825, "step": 3644 }, { "epoch": 0.54, "learning_rate": 9.076184556829511e-06, "loss": 0.7281, "step": 3645 }, { "epoch": 0.54, "learning_rate": 9.071374333889373e-06, "loss": 0.7067, "step": 3646 }, { "epoch": 0.54, "learning_rate": 9.066564327676187e-06, "loss": 0.7285, "step": 3647 }, { "epoch": 0.54, "learning_rate": 9.061754539312528e-06, "loss": 0.7338, "step": 3648 }, { "epoch": 0.54, "learning_rate": 9.05694496992093e-06, "loss": 0.7813, "step": 3649 }, { "epoch": 0.54, "learning_rate": 9.052135620623864e-06, "loss": 0.7909, "step": 3650 }, { "epoch": 0.54, "learning_rate": 9.047326492543769e-06, "loss": 0.7771, "step": 3651 }, { "epoch": 0.54, "learning_rate": 9.042517586803015e-06, "loss": 0.7936, "step": 3652 }, { "epoch": 0.54, "learning_rate": 9.037708904523928e-06, "loss": 0.76, "step": 3653 }, { "epoch": 0.54, "learning_rate": 9.032900446828777e-06, "loss": 0.7736, "step": 3654 }, { "epoch": 0.55, "learning_rate": 9.028092214839789e-06, "loss": 0.6963, "step": 3655 }, { "epoch": 0.55, "learning_rate": 9.023284209679128e-06, "loss": 0.3142, "step": 3656 }, { "epoch": 0.55, "learning_rate": 9.018476432468912e-06, "loss": 0.7917, "step": 3657 }, { "epoch": 0.55, "learning_rate": 9.013668884331195e-06, "loss": 0.7783, "step": 3658 }, { "epoch": 0.55, "learning_rate": 9.008861566387988e-06, "loss": 0.7836, "step": 3659 }, { "epoch": 0.55, "learning_rate": 9.004054479761248e-06, "loss": 0.7389, "step": 3660 }, { "epoch": 0.55, "learning_rate": 8.999247625572868e-06, "loss": 0.7894, "step": 3661 }, { "epoch": 0.55, "learning_rate": 8.994441004944704e-06, "loss": 0.3225, "step": 3662 }, { "epoch": 0.55, "learning_rate": 8.989634618998541e-06, "loss": 0.8071, "step": 3663 }, { "epoch": 0.55, "learning_rate": 8.984828468856115e-06, "loss": 0.7127, "step": 3664 }, { "epoch": 0.55, "learning_rate": 8.98002255563911e-06, "loss": 0.3119, "step": 3665 }, { "epoch": 0.55, "learning_rate": 8.975216880469152e-06, "loss": 0.6805, "step": 3666 }, { "epoch": 0.55, "learning_rate": 8.970411444467808e-06, "loss": 0.7364, "step": 3667 }, { "epoch": 0.55, "learning_rate": 8.9656062487566e-06, "loss": 0.7417, "step": 3668 }, { "epoch": 0.55, "learning_rate": 8.960801294456974e-06, "loss": 0.8441, "step": 3669 }, { "epoch": 0.55, "learning_rate": 8.955996582690349e-06, "loss": 0.7458, "step": 3670 }, { "epoch": 0.55, "learning_rate": 8.95119211457806e-06, "loss": 0.7173, "step": 3671 }, { "epoch": 0.55, "learning_rate": 8.9463878912414e-06, "loss": 0.7838, "step": 3672 }, { "epoch": 0.55, "learning_rate": 8.941583913801598e-06, "loss": 0.7351, "step": 3673 }, { "epoch": 0.55, "learning_rate": 8.93678018337983e-06, "loss": 0.7782, "step": 3674 }, { "epoch": 0.55, "learning_rate": 8.931976701097215e-06, "loss": 0.7603, "step": 3675 }, { "epoch": 0.55, "learning_rate": 8.927173468074804e-06, "loss": 0.3308, "step": 3676 }, { "epoch": 0.55, "learning_rate": 8.922370485433609e-06, "loss": 0.6977, "step": 3677 }, { "epoch": 0.55, "learning_rate": 8.917567754294561e-06, "loss": 0.3301, "step": 3678 }, { "epoch": 0.55, "learning_rate": 8.912765275778554e-06, "loss": 0.6784, "step": 3679 }, { "epoch": 0.55, "learning_rate": 8.907963051006407e-06, "loss": 0.6744, "step": 3680 }, { "epoch": 0.55, "learning_rate": 8.903161081098885e-06, "loss": 0.7628, "step": 3681 }, { "epoch": 0.55, "learning_rate": 8.898359367176696e-06, "loss": 0.7483, "step": 3682 }, { "epoch": 0.55, "learning_rate": 8.893557910360483e-06, "loss": 0.6537, "step": 3683 }, { "epoch": 0.55, "learning_rate": 8.888756711770835e-06, "loss": 0.7074, "step": 3684 }, { "epoch": 0.55, "learning_rate": 8.88395577252828e-06, "loss": 0.7442, "step": 3685 }, { "epoch": 0.55, "learning_rate": 8.87915509375328e-06, "loss": 0.7749, "step": 3686 }, { "epoch": 0.55, "learning_rate": 8.874354676566239e-06, "loss": 0.7039, "step": 3687 }, { "epoch": 0.55, "learning_rate": 8.869554522087502e-06, "loss": 0.7638, "step": 3688 }, { "epoch": 0.55, "learning_rate": 8.86475463143735e-06, "loss": 0.694, "step": 3689 }, { "epoch": 0.55, "learning_rate": 8.859955005736006e-06, "loss": 0.7448, "step": 3690 }, { "epoch": 0.55, "learning_rate": 8.855155646103627e-06, "loss": 0.7582, "step": 3691 }, { "epoch": 0.55, "learning_rate": 8.850356553660308e-06, "loss": 0.7862, "step": 3692 }, { "epoch": 0.55, "learning_rate": 8.845557729526089e-06, "loss": 0.3284, "step": 3693 }, { "epoch": 0.55, "learning_rate": 8.840759174820937e-06, "loss": 0.3183, "step": 3694 }, { "epoch": 0.55, "learning_rate": 8.835960890664762e-06, "loss": 0.6832, "step": 3695 }, { "epoch": 0.55, "learning_rate": 8.83116287817741e-06, "loss": 0.6526, "step": 3696 }, { "epoch": 0.55, "learning_rate": 8.826365138478664e-06, "loss": 0.7894, "step": 3697 }, { "epoch": 0.55, "learning_rate": 8.821567672688237e-06, "loss": 0.7759, "step": 3698 }, { "epoch": 0.55, "learning_rate": 8.816770481925793e-06, "loss": 0.5961, "step": 3699 }, { "epoch": 0.55, "learning_rate": 8.811973567310917e-06, "loss": 0.6547, "step": 3700 }, { "epoch": 0.55, "learning_rate": 8.807176929963136e-06, "loss": 0.7637, "step": 3701 }, { "epoch": 0.55, "learning_rate": 8.802380571001914e-06, "loss": 0.7326, "step": 3702 }, { "epoch": 0.55, "learning_rate": 8.797584491546643e-06, "loss": 0.7662, "step": 3703 }, { "epoch": 0.55, "learning_rate": 8.792788692716655e-06, "loss": 0.7891, "step": 3704 }, { "epoch": 0.55, "learning_rate": 8.787993175631218e-06, "loss": 0.6353, "step": 3705 }, { "epoch": 0.55, "learning_rate": 8.783197941409527e-06, "loss": 0.742, "step": 3706 }, { "epoch": 0.55, "learning_rate": 8.778402991170725e-06, "loss": 0.7269, "step": 3707 }, { "epoch": 0.55, "learning_rate": 8.773608326033873e-06, "loss": 0.3229, "step": 3708 }, { "epoch": 0.55, "learning_rate": 8.76881394711797e-06, "loss": 0.772, "step": 3709 }, { "epoch": 0.55, "learning_rate": 8.764019855541957e-06, "loss": 0.7644, "step": 3710 }, { "epoch": 0.55, "learning_rate": 8.759226052424697e-06, "loss": 0.7279, "step": 3711 }, { "epoch": 0.55, "learning_rate": 8.754432538884993e-06, "loss": 0.6535, "step": 3712 }, { "epoch": 0.55, "learning_rate": 8.749639316041569e-06, "loss": 0.7484, "step": 3713 }, { "epoch": 0.55, "learning_rate": 8.744846385013097e-06, "loss": 0.702, "step": 3714 }, { "epoch": 0.55, "learning_rate": 8.740053746918175e-06, "loss": 0.7289, "step": 3715 }, { "epoch": 0.55, "learning_rate": 8.735261402875328e-06, "loss": 0.7015, "step": 3716 }, { "epoch": 0.55, "learning_rate": 8.730469354003015e-06, "loss": 0.729, "step": 3717 }, { "epoch": 0.55, "learning_rate": 8.725677601419624e-06, "loss": 0.6206, "step": 3718 }, { "epoch": 0.55, "learning_rate": 8.720886146243482e-06, "loss": 0.7354, "step": 3719 }, { "epoch": 0.55, "learning_rate": 8.716094989592834e-06, "loss": 0.7781, "step": 3720 }, { "epoch": 0.55, "learning_rate": 8.71130413258587e-06, "loss": 0.6656, "step": 3721 }, { "epoch": 0.56, "learning_rate": 8.7065135763407e-06, "loss": 0.8379, "step": 3722 }, { "epoch": 0.56, "learning_rate": 8.701723321975361e-06, "loss": 0.7964, "step": 3723 }, { "epoch": 0.56, "learning_rate": 8.696933370607831e-06, "loss": 0.6916, "step": 3724 }, { "epoch": 0.56, "learning_rate": 8.69214372335601e-06, "loss": 0.6891, "step": 3725 }, { "epoch": 0.56, "learning_rate": 8.687354381337724e-06, "loss": 0.8333, "step": 3726 }, { "epoch": 0.56, "learning_rate": 8.682565345670735e-06, "loss": 0.7779, "step": 3727 }, { "epoch": 0.56, "learning_rate": 8.677776617472725e-06, "loss": 0.7296, "step": 3728 }, { "epoch": 0.56, "learning_rate": 8.67298819786132e-06, "loss": 0.7145, "step": 3729 }, { "epoch": 0.56, "learning_rate": 8.668200087954056e-06, "loss": 0.3315, "step": 3730 }, { "epoch": 0.56, "learning_rate": 8.663412288868407e-06, "loss": 0.7202, "step": 3731 }, { "epoch": 0.56, "learning_rate": 8.658624801721766e-06, "loss": 0.7434, "step": 3732 }, { "epoch": 0.56, "learning_rate": 8.653837627631468e-06, "loss": 0.7522, "step": 3733 }, { "epoch": 0.56, "learning_rate": 8.64905076771476e-06, "loss": 0.7247, "step": 3734 }, { "epoch": 0.56, "learning_rate": 8.644264223088816e-06, "loss": 0.7767, "step": 3735 }, { "epoch": 0.56, "learning_rate": 8.639477994870755e-06, "loss": 0.6467, "step": 3736 }, { "epoch": 0.56, "learning_rate": 8.634692084177601e-06, "loss": 0.7645, "step": 3737 }, { "epoch": 0.56, "learning_rate": 8.629906492126313e-06, "loss": 0.7065, "step": 3738 }, { "epoch": 0.56, "learning_rate": 8.625121219833777e-06, "loss": 0.7858, "step": 3739 }, { "epoch": 0.56, "learning_rate": 8.620336268416797e-06, "loss": 0.6959, "step": 3740 }, { "epoch": 0.56, "learning_rate": 8.61555163899211e-06, "loss": 0.7407, "step": 3741 }, { "epoch": 0.56, "learning_rate": 8.610767332676378e-06, "loss": 0.6732, "step": 3742 }, { "epoch": 0.56, "learning_rate": 8.605983350586175e-06, "loss": 0.7568, "step": 3743 }, { "epoch": 0.56, "learning_rate": 8.60119969383802e-06, "loss": 0.7431, "step": 3744 }, { "epoch": 0.56, "learning_rate": 8.59641636354834e-06, "loss": 0.6931, "step": 3745 }, { "epoch": 0.56, "learning_rate": 8.591633360833489e-06, "loss": 0.7485, "step": 3746 }, { "epoch": 0.56, "learning_rate": 8.58685068680975e-06, "loss": 0.6975, "step": 3747 }, { "epoch": 0.56, "learning_rate": 8.582068342593324e-06, "loss": 0.7328, "step": 3748 }, { "epoch": 0.56, "learning_rate": 8.577286329300333e-06, "loss": 0.7815, "step": 3749 }, { "epoch": 0.56, "learning_rate": 8.57250464804683e-06, "loss": 0.6389, "step": 3750 }, { "epoch": 0.56, "learning_rate": 8.567723299948782e-06, "loss": 0.7523, "step": 3751 }, { "epoch": 0.56, "learning_rate": 8.562942286122087e-06, "loss": 0.7746, "step": 3752 }, { "epoch": 0.56, "learning_rate": 8.55816160768256e-06, "loss": 0.7591, "step": 3753 }, { "epoch": 0.56, "learning_rate": 8.553381265745933e-06, "loss": 0.3013, "step": 3754 }, { "epoch": 0.56, "learning_rate": 8.548601261427865e-06, "loss": 0.7843, "step": 3755 }, { "epoch": 0.56, "learning_rate": 8.54382159584394e-06, "loss": 0.7656, "step": 3756 }, { "epoch": 0.56, "learning_rate": 8.539042270109651e-06, "loss": 0.7942, "step": 3757 }, { "epoch": 0.56, "learning_rate": 8.534263285340427e-06, "loss": 0.6897, "step": 3758 }, { "epoch": 0.56, "learning_rate": 8.529484642651608e-06, "loss": 0.7458, "step": 3759 }, { "epoch": 0.56, "learning_rate": 8.524706343158449e-06, "loss": 0.3664, "step": 3760 }, { "epoch": 0.56, "learning_rate": 8.519928387976141e-06, "loss": 0.7381, "step": 3761 }, { "epoch": 0.56, "learning_rate": 8.51515077821978e-06, "loss": 0.6372, "step": 3762 }, { "epoch": 0.56, "learning_rate": 8.510373515004386e-06, "loss": 0.3243, "step": 3763 }, { "epoch": 0.56, "learning_rate": 8.505596599444901e-06, "loss": 0.3141, "step": 3764 }, { "epoch": 0.56, "learning_rate": 8.50082003265618e-06, "loss": 0.6929, "step": 3765 }, { "epoch": 0.56, "learning_rate": 8.496043815753004e-06, "loss": 0.749, "step": 3766 }, { "epoch": 0.56, "learning_rate": 8.49126794985007e-06, "loss": 0.769, "step": 3767 }, { "epoch": 0.56, "learning_rate": 8.486492436061986e-06, "loss": 0.6416, "step": 3768 }, { "epoch": 0.56, "learning_rate": 8.481717275503289e-06, "loss": 0.7577, "step": 3769 }, { "epoch": 0.56, "learning_rate": 8.476942469288426e-06, "loss": 0.6627, "step": 3770 }, { "epoch": 0.56, "learning_rate": 8.472168018531764e-06, "loss": 0.317, "step": 3771 }, { "epoch": 0.56, "learning_rate": 8.467393924347577e-06, "loss": 0.7345, "step": 3772 }, { "epoch": 0.56, "learning_rate": 8.462620187850082e-06, "loss": 0.7149, "step": 3773 }, { "epoch": 0.56, "learning_rate": 8.457846810153383e-06, "loss": 0.7581, "step": 3774 }, { "epoch": 0.56, "learning_rate": 8.45307379237152e-06, "loss": 0.7058, "step": 3775 }, { "epoch": 0.56, "learning_rate": 8.448301135618438e-06, "loss": 0.7199, "step": 3776 }, { "epoch": 0.56, "learning_rate": 8.443528841008002e-06, "loss": 0.7562, "step": 3777 }, { "epoch": 0.56, "learning_rate": 8.438756909653994e-06, "loss": 0.7802, "step": 3778 }, { "epoch": 0.56, "learning_rate": 8.433985342670108e-06, "loss": 0.3122, "step": 3779 }, { "epoch": 0.56, "learning_rate": 8.42921414116995e-06, "loss": 0.7344, "step": 3780 }, { "epoch": 0.56, "learning_rate": 8.424443306267054e-06, "loss": 0.7703, "step": 3781 }, { "epoch": 0.56, "learning_rate": 8.419672839074853e-06, "loss": 0.7224, "step": 3782 }, { "epoch": 0.56, "learning_rate": 8.414902740706706e-06, "loss": 0.6697, "step": 3783 }, { "epoch": 0.56, "learning_rate": 8.410133012275877e-06, "loss": 0.6825, "step": 3784 }, { "epoch": 0.56, "learning_rate": 8.405363654895546e-06, "loss": 0.672, "step": 3785 }, { "epoch": 0.56, "learning_rate": 8.400594669678808e-06, "loss": 0.7536, "step": 3786 }, { "epoch": 0.56, "learning_rate": 8.395826057738668e-06, "loss": 0.3439, "step": 3787 }, { "epoch": 0.56, "learning_rate": 8.391057820188058e-06, "loss": 0.7224, "step": 3788 }, { "epoch": 0.57, "learning_rate": 8.386289958139802e-06, "loss": 0.8039, "step": 3789 }, { "epoch": 0.57, "learning_rate": 8.381522472706648e-06, "loss": 0.7144, "step": 3790 }, { "epoch": 0.57, "learning_rate": 8.376755365001252e-06, "loss": 0.7149, "step": 3791 }, { "epoch": 0.57, "learning_rate": 8.371988636136186e-06, "loss": 0.7866, "step": 3792 }, { "epoch": 0.57, "learning_rate": 8.367222287223931e-06, "loss": 0.6817, "step": 3793 }, { "epoch": 0.57, "learning_rate": 8.362456319376874e-06, "loss": 0.7747, "step": 3794 }, { "epoch": 0.57, "learning_rate": 8.357690733707327e-06, "loss": 0.8329, "step": 3795 }, { "epoch": 0.57, "learning_rate": 8.352925531327499e-06, "loss": 0.7464, "step": 3796 }, { "epoch": 0.57, "learning_rate": 8.34816071334952e-06, "loss": 0.7344, "step": 3797 }, { "epoch": 0.57, "learning_rate": 8.34339628088542e-06, "loss": 0.3118, "step": 3798 }, { "epoch": 0.57, "learning_rate": 8.338632235047148e-06, "loss": 0.7833, "step": 3799 }, { "epoch": 0.57, "learning_rate": 8.333868576946554e-06, "loss": 0.6763, "step": 3800 }, { "epoch": 0.57, "learning_rate": 8.329105307695406e-06, "loss": 0.3535, "step": 3801 }, { "epoch": 0.57, "learning_rate": 8.324342428405376e-06, "loss": 0.8266, "step": 3802 }, { "epoch": 0.57, "learning_rate": 8.31957994018805e-06, "loss": 0.7476, "step": 3803 }, { "epoch": 0.57, "learning_rate": 8.31481784415492e-06, "loss": 0.6849, "step": 3804 }, { "epoch": 0.57, "learning_rate": 8.31005614141738e-06, "loss": 0.7448, "step": 3805 }, { "epoch": 0.57, "learning_rate": 8.305294833086743e-06, "loss": 0.7037, "step": 3806 }, { "epoch": 0.57, "learning_rate": 8.300533920274226e-06, "loss": 0.8196, "step": 3807 }, { "epoch": 0.57, "learning_rate": 8.29577340409095e-06, "loss": 0.6946, "step": 3808 }, { "epoch": 0.57, "learning_rate": 8.291013285647943e-06, "loss": 0.7356, "step": 3809 }, { "epoch": 0.57, "learning_rate": 8.286253566056149e-06, "loss": 0.6855, "step": 3810 }, { "epoch": 0.57, "learning_rate": 8.281494246426413e-06, "loss": 0.8043, "step": 3811 }, { "epoch": 0.57, "learning_rate": 8.276735327869487e-06, "loss": 0.7226, "step": 3812 }, { "epoch": 0.57, "learning_rate": 8.271976811496028e-06, "loss": 0.758, "step": 3813 }, { "epoch": 0.57, "learning_rate": 8.267218698416599e-06, "loss": 0.6884, "step": 3814 }, { "epoch": 0.57, "learning_rate": 8.262460989741675e-06, "loss": 0.3032, "step": 3815 }, { "epoch": 0.57, "learning_rate": 8.257703686581626e-06, "loss": 0.6587, "step": 3816 }, { "epoch": 0.57, "learning_rate": 8.252946790046735e-06, "loss": 0.7564, "step": 3817 }, { "epoch": 0.57, "learning_rate": 8.248190301247193e-06, "loss": 0.7633, "step": 3818 }, { "epoch": 0.57, "learning_rate": 8.243434221293086e-06, "loss": 0.7584, "step": 3819 }, { "epoch": 0.57, "learning_rate": 8.238678551294415e-06, "loss": 0.7872, "step": 3820 }, { "epoch": 0.57, "learning_rate": 8.233923292361076e-06, "loss": 0.7526, "step": 3821 }, { "epoch": 0.57, "learning_rate": 8.229168445602878e-06, "loss": 0.6907, "step": 3822 }, { "epoch": 0.57, "learning_rate": 8.22441401212952e-06, "loss": 0.749, "step": 3823 }, { "epoch": 0.57, "learning_rate": 8.219659993050619e-06, "loss": 0.6481, "step": 3824 }, { "epoch": 0.57, "learning_rate": 8.214906389475695e-06, "loss": 0.7155, "step": 3825 }, { "epoch": 0.57, "learning_rate": 8.21015320251416e-06, "loss": 0.781, "step": 3826 }, { "epoch": 0.57, "learning_rate": 8.205400433275336e-06, "loss": 0.7022, "step": 3827 }, { "epoch": 0.57, "learning_rate": 8.200648082868446e-06, "loss": 0.7838, "step": 3828 }, { "epoch": 0.57, "learning_rate": 8.195896152402616e-06, "loss": 0.7964, "step": 3829 }, { "epoch": 0.57, "learning_rate": 8.191144642986875e-06, "loss": 0.8436, "step": 3830 }, { "epoch": 0.57, "learning_rate": 8.186393555730144e-06, "loss": 0.698, "step": 3831 }, { "epoch": 0.57, "learning_rate": 8.181642891741265e-06, "loss": 0.7452, "step": 3832 }, { "epoch": 0.57, "learning_rate": 8.176892652128965e-06, "loss": 0.6619, "step": 3833 }, { "epoch": 0.57, "learning_rate": 8.172142838001876e-06, "loss": 0.8181, "step": 3834 }, { "epoch": 0.57, "learning_rate": 8.167393450468533e-06, "loss": 0.7481, "step": 3835 }, { "epoch": 0.57, "learning_rate": 8.16264449063737e-06, "loss": 0.811, "step": 3836 }, { "epoch": 0.57, "learning_rate": 8.15789595961672e-06, "loss": 0.7663, "step": 3837 }, { "epoch": 0.57, "learning_rate": 8.153147858514818e-06, "loss": 0.6522, "step": 3838 }, { "epoch": 0.57, "learning_rate": 8.148400188439794e-06, "loss": 0.6937, "step": 3839 }, { "epoch": 0.57, "learning_rate": 8.143652950499689e-06, "loss": 0.6376, "step": 3840 }, { "epoch": 0.57, "learning_rate": 8.13890614580243e-06, "loss": 0.7776, "step": 3841 }, { "epoch": 0.57, "learning_rate": 8.134159775455848e-06, "loss": 0.7451, "step": 3842 }, { "epoch": 0.57, "learning_rate": 8.129413840567675e-06, "loss": 0.7373, "step": 3843 }, { "epoch": 0.57, "learning_rate": 8.124668342245539e-06, "loss": 0.6964, "step": 3844 }, { "epoch": 0.57, "learning_rate": 8.119923281596963e-06, "loss": 0.6658, "step": 3845 }, { "epoch": 0.57, "learning_rate": 8.115178659729373e-06, "loss": 0.7102, "step": 3846 }, { "epoch": 0.57, "learning_rate": 8.110434477750093e-06, "loss": 0.7292, "step": 3847 }, { "epoch": 0.57, "learning_rate": 8.105690736766342e-06, "loss": 0.7384, "step": 3848 }, { "epoch": 0.57, "learning_rate": 8.100947437885233e-06, "loss": 0.7151, "step": 3849 }, { "epoch": 0.57, "learning_rate": 8.096204582213781e-06, "loss": 0.7688, "step": 3850 }, { "epoch": 0.57, "learning_rate": 8.091462170858897e-06, "loss": 0.7214, "step": 3851 }, { "epoch": 0.57, "learning_rate": 8.086720204927383e-06, "loss": 0.7981, "step": 3852 }, { "epoch": 0.57, "learning_rate": 8.081978685525946e-06, "loss": 0.81, "step": 3853 }, { "epoch": 0.57, "learning_rate": 8.077237613761175e-06, "loss": 0.7423, "step": 3854 }, { "epoch": 0.57, "learning_rate": 8.072496990739571e-06, "loss": 0.69, "step": 3855 }, { "epoch": 0.58, "learning_rate": 8.067756817567523e-06, "loss": 0.7225, "step": 3856 }, { "epoch": 0.58, "learning_rate": 8.06301709535131e-06, "loss": 0.7698, "step": 3857 }, { "epoch": 0.58, "learning_rate": 8.058277825197115e-06, "loss": 0.7905, "step": 3858 }, { "epoch": 0.58, "learning_rate": 8.053539008211004e-06, "loss": 0.6997, "step": 3859 }, { "epoch": 0.58, "learning_rate": 8.04880064549895e-06, "loss": 0.8118, "step": 3860 }, { "epoch": 0.58, "learning_rate": 8.044062738166809e-06, "loss": 0.7988, "step": 3861 }, { "epoch": 0.58, "learning_rate": 8.039325287320343e-06, "loss": 0.6794, "step": 3862 }, { "epoch": 0.58, "learning_rate": 8.034588294065194e-06, "loss": 0.7033, "step": 3863 }, { "epoch": 0.58, "learning_rate": 8.029851759506903e-06, "loss": 0.8085, "step": 3864 }, { "epoch": 0.58, "learning_rate": 8.025115684750911e-06, "loss": 0.706, "step": 3865 }, { "epoch": 0.58, "learning_rate": 8.020380070902538e-06, "loss": 0.7222, "step": 3866 }, { "epoch": 0.58, "learning_rate": 8.015644919067008e-06, "loss": 0.667, "step": 3867 }, { "epoch": 0.58, "learning_rate": 8.010910230349423e-06, "loss": 0.6694, "step": 3868 }, { "epoch": 0.58, "learning_rate": 8.0061760058548e-06, "loss": 0.7097, "step": 3869 }, { "epoch": 0.58, "learning_rate": 8.001442246688027e-06, "loss": 0.7461, "step": 3870 }, { "epoch": 0.58, "learning_rate": 7.996708953953894e-06, "loss": 0.7675, "step": 3871 }, { "epoch": 0.58, "learning_rate": 7.991976128757073e-06, "loss": 0.7399, "step": 3872 }, { "epoch": 0.58, "learning_rate": 7.987243772202138e-06, "loss": 0.6724, "step": 3873 }, { "epoch": 0.58, "learning_rate": 7.982511885393547e-06, "loss": 0.6911, "step": 3874 }, { "epoch": 0.58, "learning_rate": 7.977780469435648e-06, "loss": 0.7108, "step": 3875 }, { "epoch": 0.58, "learning_rate": 7.973049525432678e-06, "loss": 0.7339, "step": 3876 }, { "epoch": 0.58, "learning_rate": 7.968319054488774e-06, "loss": 0.7919, "step": 3877 }, { "epoch": 0.58, "learning_rate": 7.96358905770795e-06, "loss": 0.756, "step": 3878 }, { "epoch": 0.58, "learning_rate": 7.958859536194116e-06, "loss": 0.7893, "step": 3879 }, { "epoch": 0.58, "learning_rate": 7.95413049105107e-06, "loss": 0.7499, "step": 3880 }, { "epoch": 0.58, "learning_rate": 7.9494019233825e-06, "loss": 0.6436, "step": 3881 }, { "epoch": 0.58, "learning_rate": 7.944673834291974e-06, "loss": 0.7483, "step": 3882 }, { "epoch": 0.58, "learning_rate": 7.939946224882959e-06, "loss": 0.3508, "step": 3883 }, { "epoch": 0.58, "learning_rate": 7.93521909625881e-06, "loss": 0.7627, "step": 3884 }, { "epoch": 0.58, "learning_rate": 7.930492449522765e-06, "loss": 0.7015, "step": 3885 }, { "epoch": 0.58, "learning_rate": 7.925766285777949e-06, "loss": 0.7229, "step": 3886 }, { "epoch": 0.58, "learning_rate": 7.921040606127374e-06, "loss": 0.72, "step": 3887 }, { "epoch": 0.58, "learning_rate": 7.916315411673945e-06, "loss": 0.7682, "step": 3888 }, { "epoch": 0.58, "learning_rate": 7.911590703520446e-06, "loss": 0.8115, "step": 3889 }, { "epoch": 0.58, "learning_rate": 7.90686648276955e-06, "loss": 0.6478, "step": 3890 }, { "epoch": 0.58, "learning_rate": 7.902142750523824e-06, "loss": 0.7672, "step": 3891 }, { "epoch": 0.58, "learning_rate": 7.897419507885709e-06, "loss": 0.7827, "step": 3892 }, { "epoch": 0.58, "learning_rate": 7.892696755957541e-06, "loss": 0.7067, "step": 3893 }, { "epoch": 0.58, "learning_rate": 7.887974495841536e-06, "loss": 0.3336, "step": 3894 }, { "epoch": 0.58, "learning_rate": 7.883252728639794e-06, "loss": 0.7533, "step": 3895 }, { "epoch": 0.58, "learning_rate": 7.878531455454304e-06, "loss": 0.5986, "step": 3896 }, { "epoch": 0.58, "learning_rate": 7.873810677386942e-06, "loss": 0.3389, "step": 3897 }, { "epoch": 0.58, "learning_rate": 7.86909039553946e-06, "loss": 0.3219, "step": 3898 }, { "epoch": 0.58, "learning_rate": 7.864370611013505e-06, "loss": 0.7704, "step": 3899 }, { "epoch": 0.58, "learning_rate": 7.8596513249106e-06, "loss": 0.7776, "step": 3900 }, { "epoch": 0.58, "learning_rate": 7.85493253833215e-06, "loss": 0.6532, "step": 3901 }, { "epoch": 0.58, "learning_rate": 7.850214252379452e-06, "loss": 0.7835, "step": 3902 }, { "epoch": 0.58, "learning_rate": 7.845496468153679e-06, "loss": 0.7029, "step": 3903 }, { "epoch": 0.58, "learning_rate": 7.840779186755888e-06, "loss": 0.8575, "step": 3904 }, { "epoch": 0.58, "learning_rate": 7.836062409287022e-06, "loss": 0.6385, "step": 3905 }, { "epoch": 0.58, "learning_rate": 7.831346136847903e-06, "loss": 0.6856, "step": 3906 }, { "epoch": 0.58, "learning_rate": 7.826630370539241e-06, "loss": 0.7363, "step": 3907 }, { "epoch": 0.58, "learning_rate": 7.821915111461618e-06, "loss": 0.6649, "step": 3908 }, { "epoch": 0.58, "learning_rate": 7.817200360715506e-06, "loss": 0.773, "step": 3909 }, { "epoch": 0.58, "learning_rate": 7.81248611940125e-06, "loss": 0.7299, "step": 3910 }, { "epoch": 0.58, "learning_rate": 7.807772388619089e-06, "loss": 0.689, "step": 3911 }, { "epoch": 0.58, "learning_rate": 7.80305916946913e-06, "loss": 0.6885, "step": 3912 }, { "epoch": 0.58, "learning_rate": 7.798346463051366e-06, "loss": 0.3182, "step": 3913 }, { "epoch": 0.58, "learning_rate": 7.793634270465673e-06, "loss": 0.7677, "step": 3914 }, { "epoch": 0.58, "learning_rate": 7.788922592811801e-06, "loss": 0.7033, "step": 3915 }, { "epoch": 0.58, "learning_rate": 7.784211431189389e-06, "loss": 0.713, "step": 3916 }, { "epoch": 0.58, "learning_rate": 7.779500786697944e-06, "loss": 0.663, "step": 3917 }, { "epoch": 0.58, "learning_rate": 7.774790660436857e-06, "loss": 0.6845, "step": 3918 }, { "epoch": 0.58, "learning_rate": 7.770081053505404e-06, "loss": 0.3501, "step": 3919 }, { "epoch": 0.58, "learning_rate": 7.765371967002729e-06, "loss": 0.7565, "step": 3920 }, { "epoch": 0.58, "learning_rate": 7.760663402027868e-06, "loss": 0.7225, "step": 3921 }, { "epoch": 0.58, "learning_rate": 7.755955359679724e-06, "loss": 0.6615, "step": 3922 }, { "epoch": 0.59, "learning_rate": 7.751247841057077e-06, "loss": 0.6842, "step": 3923 }, { "epoch": 0.59, "learning_rate": 7.746540847258597e-06, "loss": 0.846, "step": 3924 }, { "epoch": 0.59, "learning_rate": 7.74183437938282e-06, "loss": 0.7125, "step": 3925 }, { "epoch": 0.59, "learning_rate": 7.737128438528163e-06, "loss": 0.6514, "step": 3926 }, { "epoch": 0.59, "learning_rate": 7.732423025792918e-06, "loss": 0.621, "step": 3927 }, { "epoch": 0.59, "learning_rate": 7.727718142275262e-06, "loss": 0.6927, "step": 3928 }, { "epoch": 0.59, "learning_rate": 7.723013789073237e-06, "loss": 0.7026, "step": 3929 }, { "epoch": 0.59, "learning_rate": 7.718309967284771e-06, "loss": 0.7863, "step": 3930 }, { "epoch": 0.59, "learning_rate": 7.713606678007663e-06, "loss": 0.8299, "step": 3931 }, { "epoch": 0.59, "learning_rate": 7.708903922339584e-06, "loss": 0.7528, "step": 3932 }, { "epoch": 0.59, "learning_rate": 7.70420170137809e-06, "loss": 0.721, "step": 3933 }, { "epoch": 0.59, "learning_rate": 7.699500016220604e-06, "loss": 0.7421, "step": 3934 }, { "epoch": 0.59, "learning_rate": 7.694798867964423e-06, "loss": 0.7717, "step": 3935 }, { "epoch": 0.59, "learning_rate": 7.690098257706734e-06, "loss": 0.7392, "step": 3936 }, { "epoch": 0.59, "learning_rate": 7.685398186544577e-06, "loss": 0.7585, "step": 3937 }, { "epoch": 0.59, "learning_rate": 7.680698655574884e-06, "loss": 0.7655, "step": 3938 }, { "epoch": 0.59, "learning_rate": 7.675999665894446e-06, "loss": 0.693, "step": 3939 }, { "epoch": 0.59, "learning_rate": 7.67130121859994e-06, "loss": 0.7323, "step": 3940 }, { "epoch": 0.59, "learning_rate": 7.666603314787908e-06, "loss": 0.7223, "step": 3941 }, { "epoch": 0.59, "learning_rate": 7.661905955554768e-06, "loss": 0.7536, "step": 3942 }, { "epoch": 0.59, "learning_rate": 7.657209141996815e-06, "loss": 0.7293, "step": 3943 }, { "epoch": 0.59, "learning_rate": 7.652512875210213e-06, "loss": 0.7587, "step": 3944 }, { "epoch": 0.59, "learning_rate": 7.647817156290997e-06, "loss": 0.8, "step": 3945 }, { "epoch": 0.59, "learning_rate": 7.643121986335073e-06, "loss": 0.656, "step": 3946 }, { "epoch": 0.59, "learning_rate": 7.638427366438225e-06, "loss": 0.7972, "step": 3947 }, { "epoch": 0.59, "learning_rate": 7.633733297696104e-06, "loss": 0.6518, "step": 3948 }, { "epoch": 0.59, "learning_rate": 7.629039781204235e-06, "loss": 0.7946, "step": 3949 }, { "epoch": 0.59, "learning_rate": 7.624346818058005e-06, "loss": 0.7235, "step": 3950 }, { "epoch": 0.59, "learning_rate": 7.619654409352688e-06, "loss": 0.6828, "step": 3951 }, { "epoch": 0.59, "learning_rate": 7.614962556183418e-06, "loss": 0.7235, "step": 3952 }, { "epoch": 0.59, "learning_rate": 7.6102712596452035e-06, "loss": 0.7422, "step": 3953 }, { "epoch": 0.59, "learning_rate": 7.605580520832916e-06, "loss": 0.6599, "step": 3954 }, { "epoch": 0.59, "learning_rate": 7.600890340841302e-06, "loss": 0.7095, "step": 3955 }, { "epoch": 0.59, "learning_rate": 7.596200720764981e-06, "loss": 0.7621, "step": 3956 }, { "epoch": 0.59, "learning_rate": 7.591511661698433e-06, "loss": 0.7497, "step": 3957 }, { "epoch": 0.59, "learning_rate": 7.5868231647360205e-06, "loss": 0.7788, "step": 3958 }, { "epoch": 0.59, "learning_rate": 7.582135230971961e-06, "loss": 0.7184, "step": 3959 }, { "epoch": 0.59, "learning_rate": 7.577447861500344e-06, "loss": 0.738, "step": 3960 }, { "epoch": 0.59, "learning_rate": 7.5727610574151345e-06, "loss": 0.7123, "step": 3961 }, { "epoch": 0.59, "learning_rate": 7.56807481981016e-06, "loss": 0.754, "step": 3962 }, { "epoch": 0.59, "learning_rate": 7.563389149779113e-06, "loss": 0.7309, "step": 3963 }, { "epoch": 0.59, "learning_rate": 7.558704048415555e-06, "loss": 0.7264, "step": 3964 }, { "epoch": 0.59, "learning_rate": 7.554019516812921e-06, "loss": 0.3456, "step": 3965 }, { "epoch": 0.59, "learning_rate": 7.5493355560645106e-06, "loss": 0.7942, "step": 3966 }, { "epoch": 0.59, "learning_rate": 7.5446521672634855e-06, "loss": 0.7378, "step": 3967 }, { "epoch": 0.59, "learning_rate": 7.539969351502876e-06, "loss": 0.8063, "step": 3968 }, { "epoch": 0.59, "learning_rate": 7.5352871098755765e-06, "loss": 0.7937, "step": 3969 }, { "epoch": 0.59, "learning_rate": 7.530605443474357e-06, "loss": 0.7254, "step": 3970 }, { "epoch": 0.59, "learning_rate": 7.525924353391842e-06, "loss": 0.7385, "step": 3971 }, { "epoch": 0.59, "learning_rate": 7.521243840720522e-06, "loss": 0.6832, "step": 3972 }, { "epoch": 0.59, "learning_rate": 7.5165639065527655e-06, "loss": 0.7647, "step": 3973 }, { "epoch": 0.59, "learning_rate": 7.51188455198079e-06, "loss": 0.7255, "step": 3974 }, { "epoch": 0.59, "learning_rate": 7.507205778096691e-06, "loss": 0.7404, "step": 3975 }, { "epoch": 0.59, "learning_rate": 7.502527585992419e-06, "loss": 0.7026, "step": 3976 }, { "epoch": 0.59, "learning_rate": 7.497849976759792e-06, "loss": 0.8202, "step": 3977 }, { "epoch": 0.59, "learning_rate": 7.493172951490491e-06, "loss": 0.7103, "step": 3978 }, { "epoch": 0.59, "learning_rate": 7.488496511276059e-06, "loss": 0.7334, "step": 3979 }, { "epoch": 0.59, "learning_rate": 7.4838206572079145e-06, "loss": 0.7113, "step": 3980 }, { "epoch": 0.59, "learning_rate": 7.479145390377325e-06, "loss": 0.7375, "step": 3981 }, { "epoch": 0.59, "learning_rate": 7.474470711875424e-06, "loss": 0.8134, "step": 3982 }, { "epoch": 0.59, "learning_rate": 7.469796622793211e-06, "loss": 0.7398, "step": 3983 }, { "epoch": 0.59, "learning_rate": 7.465123124221547e-06, "loss": 0.3428, "step": 3984 }, { "epoch": 0.59, "learning_rate": 7.460450217251155e-06, "loss": 0.6861, "step": 3985 }, { "epoch": 0.59, "learning_rate": 7.455777902972616e-06, "loss": 0.7493, "step": 3986 }, { "epoch": 0.59, "learning_rate": 7.45110618247638e-06, "loss": 0.7793, "step": 3987 }, { "epoch": 0.59, "learning_rate": 7.4464350568527535e-06, "loss": 0.7061, "step": 3988 }, { "epoch": 0.59, "learning_rate": 7.441764527191908e-06, "loss": 0.6614, "step": 3989 }, { "epoch": 0.6, "learning_rate": 7.437094594583872e-06, "loss": 0.7289, "step": 3990 }, { "epoch": 0.6, "learning_rate": 7.4324252601185295e-06, "loss": 0.7245, "step": 3991 }, { "epoch": 0.6, "learning_rate": 7.427756524885641e-06, "loss": 0.7791, "step": 3992 }, { "epoch": 0.6, "learning_rate": 7.423088389974813e-06, "loss": 0.7787, "step": 3993 }, { "epoch": 0.6, "learning_rate": 7.418420856475512e-06, "loss": 0.7683, "step": 3994 }, { "epoch": 0.6, "learning_rate": 7.413753925477078e-06, "loss": 0.6989, "step": 3995 }, { "epoch": 0.6, "learning_rate": 7.409087598068696e-06, "loss": 0.7707, "step": 3996 }, { "epoch": 0.6, "learning_rate": 7.404421875339413e-06, "loss": 0.78, "step": 3997 }, { "epoch": 0.6, "learning_rate": 7.399756758378142e-06, "loss": 0.7588, "step": 3998 }, { "epoch": 0.6, "learning_rate": 7.395092248273645e-06, "loss": 0.6536, "step": 3999 }, { "epoch": 0.6, "learning_rate": 7.3904283461145465e-06, "loss": 0.7542, "step": 4000 }, { "epoch": 0.6, "learning_rate": 7.385765052989332e-06, "loss": 0.69, "step": 4001 }, { "epoch": 0.6, "learning_rate": 7.381102369986342e-06, "loss": 0.6348, "step": 4002 }, { "epoch": 0.6, "learning_rate": 7.376440298193776e-06, "loss": 0.7253, "step": 4003 }, { "epoch": 0.6, "learning_rate": 7.37177883869969e-06, "loss": 0.6783, "step": 4004 }, { "epoch": 0.6, "learning_rate": 7.367117992591992e-06, "loss": 0.6944, "step": 4005 }, { "epoch": 0.6, "learning_rate": 7.362457760958459e-06, "loss": 0.6563, "step": 4006 }, { "epoch": 0.6, "learning_rate": 7.357798144886712e-06, "loss": 0.6093, "step": 4007 }, { "epoch": 0.6, "learning_rate": 7.353139145464237e-06, "loss": 0.8213, "step": 4008 }, { "epoch": 0.6, "learning_rate": 7.348480763778367e-06, "loss": 0.7342, "step": 4009 }, { "epoch": 0.6, "learning_rate": 7.3438230009163045e-06, "loss": 0.7003, "step": 4010 }, { "epoch": 0.6, "learning_rate": 7.339165857965095e-06, "loss": 0.7829, "step": 4011 }, { "epoch": 0.6, "learning_rate": 7.334509336011646e-06, "loss": 0.8264, "step": 4012 }, { "epoch": 0.6, "learning_rate": 7.329853436142719e-06, "loss": 0.7377, "step": 4013 }, { "epoch": 0.6, "learning_rate": 7.325198159444924e-06, "loss": 0.6729, "step": 4014 }, { "epoch": 0.6, "learning_rate": 7.320543507004738e-06, "loss": 0.6831, "step": 4015 }, { "epoch": 0.6, "learning_rate": 7.315889479908476e-06, "loss": 0.7392, "step": 4016 }, { "epoch": 0.6, "learning_rate": 7.311236079242327e-06, "loss": 0.3835, "step": 4017 }, { "epoch": 0.6, "learning_rate": 7.30658330609232e-06, "loss": 0.7367, "step": 4018 }, { "epoch": 0.6, "learning_rate": 7.301931161544336e-06, "loss": 0.63, "step": 4019 }, { "epoch": 0.6, "learning_rate": 7.2972796466841185e-06, "loss": 0.7807, "step": 4020 }, { "epoch": 0.6, "learning_rate": 7.292628762597259e-06, "loss": 0.7146, "step": 4021 }, { "epoch": 0.6, "learning_rate": 7.287978510369201e-06, "loss": 0.7087, "step": 4022 }, { "epoch": 0.6, "learning_rate": 7.28332889108524e-06, "loss": 0.7337, "step": 4023 }, { "epoch": 0.6, "learning_rate": 7.2786799058305265e-06, "loss": 0.7129, "step": 4024 }, { "epoch": 0.6, "learning_rate": 7.274031555690068e-06, "loss": 0.7119, "step": 4025 }, { "epoch": 0.6, "learning_rate": 7.2693838417487114e-06, "loss": 0.6199, "step": 4026 }, { "epoch": 0.6, "learning_rate": 7.264736765091166e-06, "loss": 0.7433, "step": 4027 }, { "epoch": 0.6, "learning_rate": 7.260090326801983e-06, "loss": 0.7132, "step": 4028 }, { "epoch": 0.6, "learning_rate": 7.255444527965574e-06, "loss": 0.6813, "step": 4029 }, { "epoch": 0.6, "learning_rate": 7.250799369666195e-06, "loss": 0.6957, "step": 4030 }, { "epoch": 0.6, "learning_rate": 7.24615485298795e-06, "loss": 0.7385, "step": 4031 }, { "epoch": 0.6, "learning_rate": 7.241510979014807e-06, "loss": 0.6672, "step": 4032 }, { "epoch": 0.6, "learning_rate": 7.23686774883057e-06, "loss": 0.7508, "step": 4033 }, { "epoch": 0.6, "learning_rate": 7.232225163518898e-06, "loss": 0.7327, "step": 4034 }, { "epoch": 0.6, "learning_rate": 7.2275832241633e-06, "loss": 0.7959, "step": 4035 }, { "epoch": 0.6, "learning_rate": 7.222941931847132e-06, "loss": 0.6873, "step": 4036 }, { "epoch": 0.6, "learning_rate": 7.218301287653597e-06, "loss": 0.6796, "step": 4037 }, { "epoch": 0.6, "learning_rate": 7.2136612926657525e-06, "loss": 0.6753, "step": 4038 }, { "epoch": 0.6, "learning_rate": 7.209021947966506e-06, "loss": 0.7456, "step": 4039 }, { "epoch": 0.6, "learning_rate": 7.204383254638605e-06, "loss": 0.7223, "step": 4040 }, { "epoch": 0.6, "learning_rate": 7.199745213764652e-06, "loss": 0.772, "step": 4041 }, { "epoch": 0.6, "learning_rate": 7.19510782642709e-06, "loss": 0.7244, "step": 4042 }, { "epoch": 0.6, "learning_rate": 7.190471093708218e-06, "loss": 0.6768, "step": 4043 }, { "epoch": 0.6, "learning_rate": 7.185835016690176e-06, "loss": 0.8275, "step": 4044 }, { "epoch": 0.6, "learning_rate": 7.181199596454954e-06, "loss": 0.6888, "step": 4045 }, { "epoch": 0.6, "learning_rate": 7.176564834084383e-06, "loss": 0.6686, "step": 4046 }, { "epoch": 0.6, "learning_rate": 7.171930730660151e-06, "loss": 0.6781, "step": 4047 }, { "epoch": 0.6, "learning_rate": 7.167297287263788e-06, "loss": 0.7066, "step": 4048 }, { "epoch": 0.6, "learning_rate": 7.1626645049766654e-06, "loss": 0.7513, "step": 4049 }, { "epoch": 0.6, "learning_rate": 7.1580323848800025e-06, "loss": 0.3492, "step": 4050 }, { "epoch": 0.6, "learning_rate": 7.153400928054865e-06, "loss": 0.7047, "step": 4051 }, { "epoch": 0.6, "learning_rate": 7.148770135582165e-06, "loss": 0.3194, "step": 4052 }, { "epoch": 0.6, "learning_rate": 7.1441400085426536e-06, "loss": 0.6425, "step": 4053 }, { "epoch": 0.6, "learning_rate": 7.139510548016939e-06, "loss": 0.621, "step": 4054 }, { "epoch": 0.6, "learning_rate": 7.134881755085463e-06, "loss": 0.7585, "step": 4055 }, { "epoch": 0.6, "learning_rate": 7.130253630828513e-06, "loss": 0.6731, "step": 4056 }, { "epoch": 0.61, "learning_rate": 7.125626176326224e-06, "loss": 0.7743, "step": 4057 }, { "epoch": 0.61, "learning_rate": 7.120999392658572e-06, "loss": 0.7411, "step": 4058 }, { "epoch": 0.61, "learning_rate": 7.116373280905375e-06, "loss": 0.6654, "step": 4059 }, { "epoch": 0.61, "learning_rate": 7.111747842146299e-06, "loss": 0.6681, "step": 4060 }, { "epoch": 0.61, "learning_rate": 7.107123077460847e-06, "loss": 0.7905, "step": 4061 }, { "epoch": 0.61, "learning_rate": 7.102498987928373e-06, "loss": 0.685, "step": 4062 }, { "epoch": 0.61, "learning_rate": 7.0978755746280656e-06, "loss": 0.6471, "step": 4063 }, { "epoch": 0.61, "learning_rate": 7.09325283863896e-06, "loss": 0.7301, "step": 4064 }, { "epoch": 0.61, "learning_rate": 7.0886307810399265e-06, "loss": 0.6372, "step": 4065 }, { "epoch": 0.61, "learning_rate": 7.084009402909688e-06, "loss": 0.637, "step": 4066 }, { "epoch": 0.61, "learning_rate": 7.079388705326802e-06, "loss": 0.7586, "step": 4067 }, { "epoch": 0.61, "learning_rate": 7.074768689369663e-06, "loss": 0.7919, "step": 4068 }, { "epoch": 0.61, "learning_rate": 7.0701493561165206e-06, "loss": 0.5761, "step": 4069 }, { "epoch": 0.61, "learning_rate": 7.065530706645447e-06, "loss": 0.7258, "step": 4070 }, { "epoch": 0.61, "learning_rate": 7.060912742034371e-06, "loss": 0.6596, "step": 4071 }, { "epoch": 0.61, "learning_rate": 7.0562954633610536e-06, "loss": 0.7059, "step": 4072 }, { "epoch": 0.61, "learning_rate": 7.0516788717030914e-06, "loss": 0.7442, "step": 4073 }, { "epoch": 0.61, "learning_rate": 7.047062968137932e-06, "loss": 0.6382, "step": 4074 }, { "epoch": 0.61, "learning_rate": 7.042447753742849e-06, "loss": 0.7214, "step": 4075 }, { "epoch": 0.61, "learning_rate": 7.037833229594973e-06, "loss": 0.6928, "step": 4076 }, { "epoch": 0.61, "learning_rate": 7.033219396771255e-06, "loss": 0.7493, "step": 4077 }, { "epoch": 0.61, "learning_rate": 7.028606256348494e-06, "loss": 0.701, "step": 4078 }, { "epoch": 0.61, "learning_rate": 7.0239938094033265e-06, "loss": 0.7871, "step": 4079 }, { "epoch": 0.61, "learning_rate": 7.019382057012228e-06, "loss": 0.7513, "step": 4080 }, { "epoch": 0.61, "learning_rate": 7.01477100025151e-06, "loss": 0.7658, "step": 4081 }, { "epoch": 0.61, "learning_rate": 7.010160640197319e-06, "loss": 0.7213, "step": 4082 }, { "epoch": 0.61, "learning_rate": 7.005550977925645e-06, "loss": 0.7361, "step": 4083 }, { "epoch": 0.61, "learning_rate": 7.000942014512312e-06, "loss": 0.7075, "step": 4084 }, { "epoch": 0.61, "learning_rate": 6.996333751032982e-06, "loss": 0.735, "step": 4085 }, { "epoch": 0.61, "learning_rate": 6.991726188563154e-06, "loss": 0.5961, "step": 4086 }, { "epoch": 0.61, "learning_rate": 6.987119328178157e-06, "loss": 0.6597, "step": 4087 }, { "epoch": 0.61, "learning_rate": 6.982513170953167e-06, "loss": 0.6258, "step": 4088 }, { "epoch": 0.61, "learning_rate": 6.977907717963188e-06, "loss": 0.7392, "step": 4089 }, { "epoch": 0.61, "learning_rate": 6.973302970283056e-06, "loss": 0.6982, "step": 4090 }, { "epoch": 0.61, "learning_rate": 6.968698928987459e-06, "loss": 0.6979, "step": 4091 }, { "epoch": 0.61, "learning_rate": 6.964095595150903e-06, "loss": 0.7656, "step": 4092 }, { "epoch": 0.61, "learning_rate": 6.959492969847739e-06, "loss": 0.6699, "step": 4093 }, { "epoch": 0.61, "learning_rate": 6.954891054152145e-06, "loss": 0.7281, "step": 4094 }, { "epoch": 0.61, "learning_rate": 6.95028984913814e-06, "loss": 0.6849, "step": 4095 }, { "epoch": 0.61, "learning_rate": 6.94568935587957e-06, "loss": 0.7441, "step": 4096 }, { "epoch": 0.61, "learning_rate": 6.941089575450126e-06, "loss": 0.7357, "step": 4097 }, { "epoch": 0.61, "learning_rate": 6.9364905089233196e-06, "loss": 0.7151, "step": 4098 }, { "epoch": 0.61, "learning_rate": 6.931892157372508e-06, "loss": 0.7195, "step": 4099 }, { "epoch": 0.61, "learning_rate": 6.927294521870874e-06, "loss": 0.8076, "step": 4100 }, { "epoch": 0.61, "learning_rate": 6.922697603491432e-06, "loss": 0.7281, "step": 4101 }, { "epoch": 0.61, "learning_rate": 6.918101403307036e-06, "loss": 0.3014, "step": 4102 }, { "epoch": 0.61, "learning_rate": 6.913505922390367e-06, "loss": 0.7383, "step": 4103 }, { "epoch": 0.61, "learning_rate": 6.908911161813938e-06, "loss": 0.7355, "step": 4104 }, { "epoch": 0.61, "learning_rate": 6.904317122650093e-06, "loss": 0.654, "step": 4105 }, { "epoch": 0.61, "learning_rate": 6.899723805971015e-06, "loss": 0.7782, "step": 4106 }, { "epoch": 0.61, "learning_rate": 6.8951312128487156e-06, "loss": 0.7189, "step": 4107 }, { "epoch": 0.61, "learning_rate": 6.890539344355032e-06, "loss": 0.6913, "step": 4108 }, { "epoch": 0.61, "learning_rate": 6.885948201561634e-06, "loss": 0.6372, "step": 4109 }, { "epoch": 0.61, "learning_rate": 6.881357785540024e-06, "loss": 0.7637, "step": 4110 }, { "epoch": 0.61, "learning_rate": 6.876768097361537e-06, "loss": 0.756, "step": 4111 }, { "epoch": 0.61, "learning_rate": 6.872179138097331e-06, "loss": 0.7055, "step": 4112 }, { "epoch": 0.61, "learning_rate": 6.867590908818406e-06, "loss": 0.7252, "step": 4113 }, { "epoch": 0.61, "learning_rate": 6.86300341059558e-06, "loss": 0.6812, "step": 4114 }, { "epoch": 0.61, "learning_rate": 6.858416644499503e-06, "loss": 0.7066, "step": 4115 }, { "epoch": 0.61, "learning_rate": 6.853830611600658e-06, "loss": 0.776, "step": 4116 }, { "epoch": 0.61, "learning_rate": 6.849245312969355e-06, "loss": 0.6441, "step": 4117 }, { "epoch": 0.61, "learning_rate": 6.844660749675731e-06, "loss": 0.8657, "step": 4118 }, { "epoch": 0.61, "learning_rate": 6.840076922789749e-06, "loss": 0.6877, "step": 4119 }, { "epoch": 0.61, "learning_rate": 6.835493833381206e-06, "loss": 0.7689, "step": 4120 }, { "epoch": 0.61, "learning_rate": 6.83091148251973e-06, "loss": 0.6932, "step": 4121 }, { "epoch": 0.61, "learning_rate": 6.826329871274766e-06, "loss": 0.7614, "step": 4122 }, { "epoch": 0.61, "learning_rate": 6.821749000715594e-06, "loss": 0.7938, "step": 4123 }, { "epoch": 0.62, "learning_rate": 6.817168871911313e-06, "loss": 0.7082, "step": 4124 }, { "epoch": 0.62, "learning_rate": 6.812589485930863e-06, "loss": 0.7059, "step": 4125 }, { "epoch": 0.62, "learning_rate": 6.808010843842998e-06, "loss": 0.8373, "step": 4126 }, { "epoch": 0.62, "learning_rate": 6.8034329467162975e-06, "loss": 0.8226, "step": 4127 }, { "epoch": 0.62, "learning_rate": 6.798855795619181e-06, "loss": 0.7248, "step": 4128 }, { "epoch": 0.62, "learning_rate": 6.794279391619881e-06, "loss": 0.6714, "step": 4129 }, { "epoch": 0.62, "learning_rate": 6.7897037357864625e-06, "loss": 0.7283, "step": 4130 }, { "epoch": 0.62, "learning_rate": 6.785128829186811e-06, "loss": 0.7597, "step": 4131 }, { "epoch": 0.62, "learning_rate": 6.78055467288864e-06, "loss": 0.7786, "step": 4132 }, { "epoch": 0.62, "learning_rate": 6.775981267959482e-06, "loss": 0.7158, "step": 4133 }, { "epoch": 0.62, "learning_rate": 6.771408615466709e-06, "loss": 0.7538, "step": 4134 }, { "epoch": 0.62, "learning_rate": 6.766836716477499e-06, "loss": 0.7994, "step": 4135 }, { "epoch": 0.62, "learning_rate": 6.7622655720588685e-06, "loss": 0.3284, "step": 4136 }, { "epoch": 0.62, "learning_rate": 6.75769518327765e-06, "loss": 0.6656, "step": 4137 }, { "epoch": 0.62, "learning_rate": 6.7531255512005035e-06, "loss": 0.7584, "step": 4138 }, { "epoch": 0.62, "learning_rate": 6.7485566768939095e-06, "loss": 0.7458, "step": 4139 }, { "epoch": 0.62, "learning_rate": 6.743988561424175e-06, "loss": 0.744, "step": 4140 }, { "epoch": 0.62, "learning_rate": 6.739421205857423e-06, "loss": 0.6469, "step": 4141 }, { "epoch": 0.62, "learning_rate": 6.734854611259607e-06, "loss": 0.7321, "step": 4142 }, { "epoch": 0.62, "learning_rate": 6.7302887786965e-06, "loss": 0.7002, "step": 4143 }, { "epoch": 0.62, "learning_rate": 6.7257237092337e-06, "loss": 0.657, "step": 4144 }, { "epoch": 0.62, "learning_rate": 6.7211594039366214e-06, "loss": 0.7584, "step": 4145 }, { "epoch": 0.62, "learning_rate": 6.7165958638704984e-06, "loss": 0.7807, "step": 4146 }, { "epoch": 0.62, "learning_rate": 6.7120330901004e-06, "loss": 0.7763, "step": 4147 }, { "epoch": 0.62, "learning_rate": 6.707471083691202e-06, "loss": 0.7143, "step": 4148 }, { "epoch": 0.62, "learning_rate": 6.702909845707603e-06, "loss": 0.7341, "step": 4149 }, { "epoch": 0.62, "learning_rate": 6.698349377214133e-06, "loss": 0.7873, "step": 4150 }, { "epoch": 0.62, "learning_rate": 6.693789679275133e-06, "loss": 0.8117, "step": 4151 }, { "epoch": 0.62, "learning_rate": 6.689230752954764e-06, "loss": 0.7885, "step": 4152 }, { "epoch": 0.62, "learning_rate": 6.684672599317012e-06, "loss": 0.7436, "step": 4153 }, { "epoch": 0.62, "learning_rate": 6.68011521942568e-06, "loss": 0.7207, "step": 4154 }, { "epoch": 0.62, "learning_rate": 6.675558614344386e-06, "loss": 0.6837, "step": 4155 }, { "epoch": 0.62, "learning_rate": 6.671002785136578e-06, "loss": 0.6836, "step": 4156 }, { "epoch": 0.62, "learning_rate": 6.666447732865509e-06, "loss": 0.7305, "step": 4157 }, { "epoch": 0.62, "learning_rate": 6.661893458594265e-06, "loss": 0.6223, "step": 4158 }, { "epoch": 0.62, "learning_rate": 6.657339963385741e-06, "loss": 0.6743, "step": 4159 }, { "epoch": 0.62, "learning_rate": 6.652787248302652e-06, "loss": 0.7545, "step": 4160 }, { "epoch": 0.62, "learning_rate": 6.648235314407534e-06, "loss": 0.7465, "step": 4161 }, { "epoch": 0.62, "learning_rate": 6.643684162762736e-06, "loss": 0.6588, "step": 4162 }, { "epoch": 0.62, "learning_rate": 6.639133794430427e-06, "loss": 0.6141, "step": 4163 }, { "epoch": 0.62, "learning_rate": 6.63458421047259e-06, "loss": 0.7373, "step": 4164 }, { "epoch": 0.62, "learning_rate": 6.630035411951035e-06, "loss": 0.7061, "step": 4165 }, { "epoch": 0.62, "learning_rate": 6.625487399927376e-06, "loss": 0.6944, "step": 4166 }, { "epoch": 0.62, "learning_rate": 6.620940175463053e-06, "loss": 0.7733, "step": 4167 }, { "epoch": 0.62, "learning_rate": 6.616393739619316e-06, "loss": 0.7628, "step": 4168 }, { "epoch": 0.62, "learning_rate": 6.611848093457232e-06, "loss": 0.7374, "step": 4169 }, { "epoch": 0.62, "learning_rate": 6.607303238037687e-06, "loss": 0.7901, "step": 4170 }, { "epoch": 0.62, "learning_rate": 6.602759174421381e-06, "loss": 0.8177, "step": 4171 }, { "epoch": 0.62, "learning_rate": 6.5982159036688235e-06, "loss": 0.765, "step": 4172 }, { "epoch": 0.62, "learning_rate": 6.59367342684035e-06, "loss": 0.7809, "step": 4173 }, { "epoch": 0.62, "learning_rate": 6.5891317449961015e-06, "loss": 0.7028, "step": 4174 }, { "epoch": 0.62, "learning_rate": 6.5845908591960394e-06, "loss": 0.718, "step": 4175 }, { "epoch": 0.62, "learning_rate": 6.580050770499935e-06, "loss": 0.6587, "step": 4176 }, { "epoch": 0.62, "learning_rate": 6.575511479967375e-06, "loss": 0.7176, "step": 4177 }, { "epoch": 0.62, "learning_rate": 6.570972988657758e-06, "loss": 0.6307, "step": 4178 }, { "epoch": 0.62, "learning_rate": 6.566435297630296e-06, "loss": 0.7461, "step": 4179 }, { "epoch": 0.62, "learning_rate": 6.561898407944026e-06, "loss": 0.7376, "step": 4180 }, { "epoch": 0.62, "learning_rate": 6.557362320657783e-06, "loss": 0.8132, "step": 4181 }, { "epoch": 0.62, "learning_rate": 6.552827036830218e-06, "loss": 0.767, "step": 4182 }, { "epoch": 0.62, "learning_rate": 6.5482925575197975e-06, "loss": 0.8729, "step": 4183 }, { "epoch": 0.62, "learning_rate": 6.543758883784802e-06, "loss": 0.63, "step": 4184 }, { "epoch": 0.62, "learning_rate": 6.539226016683318e-06, "loss": 0.7244, "step": 4185 }, { "epoch": 0.62, "learning_rate": 6.534693957273244e-06, "loss": 0.7319, "step": 4186 }, { "epoch": 0.62, "learning_rate": 6.530162706612302e-06, "loss": 0.7193, "step": 4187 }, { "epoch": 0.62, "learning_rate": 6.5256322657580075e-06, "loss": 0.7384, "step": 4188 }, { "epoch": 0.62, "learning_rate": 6.521102635767702e-06, "loss": 0.7217, "step": 4189 }, { "epoch": 0.62, "learning_rate": 6.516573817698528e-06, "loss": 0.7557, "step": 4190 }, { "epoch": 0.63, "learning_rate": 6.512045812607444e-06, "loss": 0.762, "step": 4191 }, { "epoch": 0.63, "learning_rate": 6.507518621551212e-06, "loss": 0.3482, "step": 4192 }, { "epoch": 0.63, "learning_rate": 6.502992245586416e-06, "loss": 0.748, "step": 4193 }, { "epoch": 0.63, "learning_rate": 6.498466685769434e-06, "loss": 0.7071, "step": 4194 }, { "epoch": 0.63, "learning_rate": 6.49394194315647e-06, "loss": 0.7604, "step": 4195 }, { "epoch": 0.63, "learning_rate": 6.489418018803527e-06, "loss": 0.7621, "step": 4196 }, { "epoch": 0.63, "learning_rate": 6.484894913766417e-06, "loss": 0.6943, "step": 4197 }, { "epoch": 0.63, "learning_rate": 6.480372629100765e-06, "loss": 0.8111, "step": 4198 }, { "epoch": 0.63, "learning_rate": 6.475851165862004e-06, "loss": 0.7706, "step": 4199 }, { "epoch": 0.63, "learning_rate": 6.471330525105371e-06, "loss": 0.7398, "step": 4200 }, { "epoch": 0.63, "learning_rate": 6.466810707885911e-06, "loss": 0.6155, "step": 4201 }, { "epoch": 0.63, "learning_rate": 6.462291715258488e-06, "loss": 0.8176, "step": 4202 }, { "epoch": 0.63, "learning_rate": 6.457773548277761e-06, "loss": 0.703, "step": 4203 }, { "epoch": 0.63, "learning_rate": 6.453256207998203e-06, "loss": 0.6558, "step": 4204 }, { "epoch": 0.63, "learning_rate": 6.448739695474089e-06, "loss": 0.6335, "step": 4205 }, { "epoch": 0.63, "learning_rate": 6.444224011759503e-06, "loss": 0.8573, "step": 4206 }, { "epoch": 0.63, "learning_rate": 6.439709157908341e-06, "loss": 0.7447, "step": 4207 }, { "epoch": 0.63, "learning_rate": 6.435195134974295e-06, "loss": 0.8075, "step": 4208 }, { "epoch": 0.63, "learning_rate": 6.4306819440108656e-06, "loss": 0.6027, "step": 4209 }, { "epoch": 0.63, "learning_rate": 6.426169586071373e-06, "loss": 0.7442, "step": 4210 }, { "epoch": 0.63, "learning_rate": 6.421658062208922e-06, "loss": 0.7614, "step": 4211 }, { "epoch": 0.63, "learning_rate": 6.417147373476441e-06, "loss": 0.686, "step": 4212 }, { "epoch": 0.63, "learning_rate": 6.4126375209266515e-06, "loss": 0.6778, "step": 4213 }, { "epoch": 0.63, "learning_rate": 6.408128505612081e-06, "loss": 0.673, "step": 4214 }, { "epoch": 0.63, "learning_rate": 6.403620328585069e-06, "loss": 0.8048, "step": 4215 }, { "epoch": 0.63, "learning_rate": 6.399112990897749e-06, "loss": 0.7183, "step": 4216 }, { "epoch": 0.63, "learning_rate": 6.394606493602071e-06, "loss": 0.6614, "step": 4217 }, { "epoch": 0.63, "learning_rate": 6.39010083774978e-06, "loss": 0.7107, "step": 4218 }, { "epoch": 0.63, "learning_rate": 6.385596024392426e-06, "loss": 0.7258, "step": 4219 }, { "epoch": 0.63, "learning_rate": 6.381092054581359e-06, "loss": 0.7525, "step": 4220 }, { "epoch": 0.63, "learning_rate": 6.3765889293677445e-06, "loss": 0.6277, "step": 4221 }, { "epoch": 0.63, "learning_rate": 6.372086649802538e-06, "loss": 0.7177, "step": 4222 }, { "epoch": 0.63, "learning_rate": 6.367585216936498e-06, "loss": 0.7738, "step": 4223 }, { "epoch": 0.63, "learning_rate": 6.363084631820198e-06, "loss": 0.6968, "step": 4224 }, { "epoch": 0.63, "learning_rate": 6.358584895504001e-06, "loss": 0.6584, "step": 4225 }, { "epoch": 0.63, "learning_rate": 6.354086009038079e-06, "loss": 0.3104, "step": 4226 }, { "epoch": 0.63, "learning_rate": 6.3495879734724e-06, "loss": 0.7359, "step": 4227 }, { "epoch": 0.63, "learning_rate": 6.345090789856735e-06, "loss": 0.6552, "step": 4228 }, { "epoch": 0.63, "learning_rate": 6.340594459240662e-06, "loss": 0.2941, "step": 4229 }, { "epoch": 0.63, "learning_rate": 6.336098982673553e-06, "loss": 0.7604, "step": 4230 }, { "epoch": 0.63, "learning_rate": 6.331604361204579e-06, "loss": 0.6381, "step": 4231 }, { "epoch": 0.63, "learning_rate": 6.327110595882725e-06, "loss": 0.7887, "step": 4232 }, { "epoch": 0.63, "learning_rate": 6.32261768775676e-06, "loss": 0.7722, "step": 4233 }, { "epoch": 0.63, "learning_rate": 6.31812563787526e-06, "loss": 0.7525, "step": 4234 }, { "epoch": 0.63, "learning_rate": 6.313634447286603e-06, "loss": 0.3271, "step": 4235 }, { "epoch": 0.63, "learning_rate": 6.309144117038961e-06, "loss": 0.6836, "step": 4236 }, { "epoch": 0.63, "learning_rate": 6.3046546481803085e-06, "loss": 0.6992, "step": 4237 }, { "epoch": 0.63, "learning_rate": 6.300166041758419e-06, "loss": 0.7807, "step": 4238 }, { "epoch": 0.63, "learning_rate": 6.295678298820864e-06, "loss": 0.7345, "step": 4239 }, { "epoch": 0.63, "learning_rate": 6.291191420415016e-06, "loss": 0.6877, "step": 4240 }, { "epoch": 0.63, "learning_rate": 6.286705407588042e-06, "loss": 0.7383, "step": 4241 }, { "epoch": 0.63, "learning_rate": 6.282220261386905e-06, "loss": 0.7591, "step": 4242 }, { "epoch": 0.63, "learning_rate": 6.277735982858375e-06, "loss": 0.6869, "step": 4243 }, { "epoch": 0.63, "learning_rate": 6.273252573049009e-06, "loss": 0.7693, "step": 4244 }, { "epoch": 0.63, "learning_rate": 6.2687700330051695e-06, "loss": 0.7043, "step": 4245 }, { "epoch": 0.63, "learning_rate": 6.264288363773004e-06, "loss": 0.7378, "step": 4246 }, { "epoch": 0.63, "learning_rate": 6.259807566398475e-06, "loss": 0.2896, "step": 4247 }, { "epoch": 0.63, "learning_rate": 6.255327641927329e-06, "loss": 0.7393, "step": 4248 }, { "epoch": 0.63, "learning_rate": 6.250848591405112e-06, "loss": 0.7996, "step": 4249 }, { "epoch": 0.63, "learning_rate": 6.2463704158771636e-06, "loss": 0.7008, "step": 4250 }, { "epoch": 0.63, "learning_rate": 6.24189311638862e-06, "loss": 0.7405, "step": 4251 }, { "epoch": 0.63, "learning_rate": 6.2374166939844175e-06, "loss": 0.659, "step": 4252 }, { "epoch": 0.63, "learning_rate": 6.232941149709278e-06, "loss": 0.6779, "step": 4253 }, { "epoch": 0.63, "learning_rate": 6.228466484607734e-06, "loss": 0.6158, "step": 4254 }, { "epoch": 0.63, "learning_rate": 6.223992699724097e-06, "loss": 0.7158, "step": 4255 }, { "epoch": 0.63, "learning_rate": 6.21951979610248e-06, "loss": 0.6813, "step": 4256 }, { "epoch": 0.63, "learning_rate": 6.215047774786793e-06, "loss": 0.7256, "step": 4257 }, { "epoch": 0.64, "learning_rate": 6.210576636820736e-06, "loss": 0.7857, "step": 4258 }, { "epoch": 0.64, "learning_rate": 6.206106383247802e-06, "loss": 0.3366, "step": 4259 }, { "epoch": 0.64, "learning_rate": 6.201637015111276e-06, "loss": 0.3097, "step": 4260 }, { "epoch": 0.64, "learning_rate": 6.197168533454247e-06, "loss": 0.6541, "step": 4261 }, { "epoch": 0.64, "learning_rate": 6.192700939319588e-06, "loss": 0.6978, "step": 4262 }, { "epoch": 0.64, "learning_rate": 6.188234233749966e-06, "loss": 0.7311, "step": 4263 }, { "epoch": 0.64, "learning_rate": 6.183768417787841e-06, "loss": 0.7141, "step": 4264 }, { "epoch": 0.64, "learning_rate": 6.179303492475465e-06, "loss": 0.7695, "step": 4265 }, { "epoch": 0.64, "learning_rate": 6.1748394588548844e-06, "loss": 0.7427, "step": 4266 }, { "epoch": 0.64, "learning_rate": 6.170376317967936e-06, "loss": 0.7758, "step": 4267 }, { "epoch": 0.64, "learning_rate": 6.165914070856243e-06, "loss": 0.3252, "step": 4268 }, { "epoch": 0.64, "learning_rate": 6.161452718561234e-06, "loss": 0.7805, "step": 4269 }, { "epoch": 0.64, "learning_rate": 6.156992262124113e-06, "loss": 0.8545, "step": 4270 }, { "epoch": 0.64, "learning_rate": 6.152532702585889e-06, "loss": 0.6903, "step": 4271 }, { "epoch": 0.64, "learning_rate": 6.1480740409873485e-06, "loss": 0.7061, "step": 4272 }, { "epoch": 0.64, "learning_rate": 6.143616278369076e-06, "loss": 0.738, "step": 4273 }, { "epoch": 0.64, "learning_rate": 6.139159415771444e-06, "loss": 0.6541, "step": 4274 }, { "epoch": 0.64, "learning_rate": 6.134703454234614e-06, "loss": 0.7479, "step": 4275 }, { "epoch": 0.64, "learning_rate": 6.130248394798546e-06, "loss": 0.7218, "step": 4276 }, { "epoch": 0.64, "learning_rate": 6.125794238502978e-06, "loss": 0.6557, "step": 4277 }, { "epoch": 0.64, "learning_rate": 6.121340986387443e-06, "loss": 0.7133, "step": 4278 }, { "epoch": 0.64, "learning_rate": 6.116888639491256e-06, "loss": 0.7237, "step": 4279 }, { "epoch": 0.64, "learning_rate": 6.112437198853533e-06, "loss": 0.6691, "step": 4280 }, { "epoch": 0.64, "learning_rate": 6.107986665513168e-06, "loss": 0.7808, "step": 4281 }, { "epoch": 0.64, "learning_rate": 6.103537040508848e-06, "loss": 0.7251, "step": 4282 }, { "epoch": 0.64, "learning_rate": 6.099088324879046e-06, "loss": 0.6638, "step": 4283 }, { "epoch": 0.64, "learning_rate": 6.094640519662025e-06, "loss": 0.7564, "step": 4284 }, { "epoch": 0.64, "learning_rate": 6.090193625895837e-06, "loss": 0.6829, "step": 4285 }, { "epoch": 0.64, "learning_rate": 6.085747644618316e-06, "loss": 0.7458, "step": 4286 }, { "epoch": 0.64, "learning_rate": 6.081302576867085e-06, "loss": 0.6515, "step": 4287 }, { "epoch": 0.64, "learning_rate": 6.076858423679552e-06, "loss": 0.6583, "step": 4288 }, { "epoch": 0.64, "learning_rate": 6.0724151860929215e-06, "loss": 0.7526, "step": 4289 }, { "epoch": 0.64, "learning_rate": 6.067972865144167e-06, "loss": 0.7665, "step": 4290 }, { "epoch": 0.64, "learning_rate": 6.063531461870067e-06, "loss": 0.7614, "step": 4291 }, { "epoch": 0.64, "learning_rate": 6.059090977307171e-06, "loss": 0.7083, "step": 4292 }, { "epoch": 0.64, "learning_rate": 6.054651412491823e-06, "loss": 0.7462, "step": 4293 }, { "epoch": 0.64, "learning_rate": 6.050212768460149e-06, "loss": 0.7777, "step": 4294 }, { "epoch": 0.64, "learning_rate": 6.045775046248057e-06, "loss": 0.6573, "step": 4295 }, { "epoch": 0.64, "learning_rate": 6.041338246891245e-06, "loss": 0.7116, "step": 4296 }, { "epoch": 0.64, "learning_rate": 6.036902371425193e-06, "loss": 0.3183, "step": 4297 }, { "epoch": 0.64, "learning_rate": 6.0324674208851675e-06, "loss": 0.7813, "step": 4298 }, { "epoch": 0.64, "learning_rate": 6.028033396306219e-06, "loss": 0.7228, "step": 4299 }, { "epoch": 0.64, "learning_rate": 6.023600298723179e-06, "loss": 0.7724, "step": 4300 }, { "epoch": 0.64, "learning_rate": 6.019168129170664e-06, "loss": 0.7004, "step": 4301 }, { "epoch": 0.64, "learning_rate": 6.014736888683073e-06, "loss": 0.71, "step": 4302 }, { "epoch": 0.64, "learning_rate": 6.010306578294591e-06, "loss": 0.731, "step": 4303 }, { "epoch": 0.64, "learning_rate": 6.005877199039185e-06, "loss": 0.7634, "step": 4304 }, { "epoch": 0.64, "learning_rate": 6.001448751950598e-06, "loss": 0.6668, "step": 4305 }, { "epoch": 0.64, "learning_rate": 5.997021238062373e-06, "loss": 0.7267, "step": 4306 }, { "epoch": 0.64, "learning_rate": 5.992594658407812e-06, "loss": 0.7718, "step": 4307 }, { "epoch": 0.64, "learning_rate": 5.98816901402002e-06, "loss": 0.7285, "step": 4308 }, { "epoch": 0.64, "learning_rate": 5.983744305931869e-06, "loss": 0.8057, "step": 4309 }, { "epoch": 0.64, "learning_rate": 5.979320535176016e-06, "loss": 0.7009, "step": 4310 }, { "epoch": 0.64, "learning_rate": 5.974897702784906e-06, "loss": 0.8263, "step": 4311 }, { "epoch": 0.64, "learning_rate": 5.970475809790755e-06, "loss": 0.7547, "step": 4312 }, { "epoch": 0.64, "learning_rate": 5.96605485722557e-06, "loss": 0.6237, "step": 4313 }, { "epoch": 0.64, "learning_rate": 5.961634846121133e-06, "loss": 0.664, "step": 4314 }, { "epoch": 0.64, "learning_rate": 5.957215777509001e-06, "loss": 0.8119, "step": 4315 }, { "epoch": 0.64, "learning_rate": 5.9527976524205234e-06, "loss": 0.7494, "step": 4316 }, { "epoch": 0.64, "learning_rate": 5.948380471886819e-06, "loss": 0.7427, "step": 4317 }, { "epoch": 0.64, "learning_rate": 5.943964236938791e-06, "loss": 0.6406, "step": 4318 }, { "epoch": 0.64, "learning_rate": 5.9395489486071165e-06, "loss": 0.7781, "step": 4319 }, { "epoch": 0.64, "learning_rate": 5.93513460792226e-06, "loss": 0.6507, "step": 4320 }, { "epoch": 0.64, "learning_rate": 5.930721215914459e-06, "loss": 0.7171, "step": 4321 }, { "epoch": 0.64, "learning_rate": 5.926308773613736e-06, "loss": 0.6223, "step": 4322 }, { "epoch": 0.64, "learning_rate": 5.9218972820498836e-06, "loss": 0.774, "step": 4323 }, { "epoch": 0.64, "learning_rate": 5.917486742252473e-06, "loss": 0.644, "step": 4324 }, { "epoch": 0.65, "learning_rate": 5.913077155250861e-06, "loss": 0.6142, "step": 4325 }, { "epoch": 0.65, "learning_rate": 5.908668522074177e-06, "loss": 0.7208, "step": 4326 }, { "epoch": 0.65, "learning_rate": 5.9042608437513215e-06, "loss": 0.6226, "step": 4327 }, { "epoch": 0.65, "learning_rate": 5.899854121310991e-06, "loss": 0.3162, "step": 4328 }, { "epoch": 0.65, "learning_rate": 5.895448355781634e-06, "loss": 0.6907, "step": 4329 }, { "epoch": 0.65, "learning_rate": 5.8910435481915e-06, "loss": 0.6396, "step": 4330 }, { "epoch": 0.65, "learning_rate": 5.886639699568595e-06, "loss": 0.7153, "step": 4331 }, { "epoch": 0.65, "learning_rate": 5.882236810940714e-06, "loss": 0.6846, "step": 4332 }, { "epoch": 0.65, "learning_rate": 5.8778348833354186e-06, "loss": 0.7216, "step": 4333 }, { "epoch": 0.65, "learning_rate": 5.8734339177800535e-06, "loss": 0.7345, "step": 4334 }, { "epoch": 0.65, "learning_rate": 5.86903391530174e-06, "loss": 0.7654, "step": 4335 }, { "epoch": 0.65, "learning_rate": 5.864634876927368e-06, "loss": 0.6564, "step": 4336 }, { "epoch": 0.65, "learning_rate": 5.860236803683606e-06, "loss": 0.7036, "step": 4337 }, { "epoch": 0.65, "learning_rate": 5.855839696596892e-06, "loss": 0.7452, "step": 4338 }, { "epoch": 0.65, "learning_rate": 5.85144355669345e-06, "loss": 0.7886, "step": 4339 }, { "epoch": 0.65, "learning_rate": 5.84704838499927e-06, "loss": 0.6783, "step": 4340 }, { "epoch": 0.65, "learning_rate": 5.842654182540115e-06, "loss": 0.677, "step": 4341 }, { "epoch": 0.65, "learning_rate": 5.838260950341521e-06, "loss": 0.7302, "step": 4342 }, { "epoch": 0.65, "learning_rate": 5.8338686894288074e-06, "loss": 0.7811, "step": 4343 }, { "epoch": 0.65, "learning_rate": 5.829477400827062e-06, "loss": 0.6884, "step": 4344 }, { "epoch": 0.65, "learning_rate": 5.8250870855611364e-06, "loss": 0.793, "step": 4345 }, { "epoch": 0.65, "learning_rate": 5.8206977446556665e-06, "loss": 0.6864, "step": 4346 }, { "epoch": 0.65, "learning_rate": 5.816309379135061e-06, "loss": 0.7396, "step": 4347 }, { "epoch": 0.65, "learning_rate": 5.811921990023488e-06, "loss": 0.7808, "step": 4348 }, { "epoch": 0.65, "learning_rate": 5.807535578344901e-06, "loss": 0.7626, "step": 4349 }, { "epoch": 0.65, "learning_rate": 5.803150145123022e-06, "loss": 0.6905, "step": 4350 }, { "epoch": 0.65, "learning_rate": 5.798765691381343e-06, "loss": 0.6684, "step": 4351 }, { "epoch": 0.65, "learning_rate": 5.794382218143132e-06, "loss": 0.3125, "step": 4352 }, { "epoch": 0.65, "learning_rate": 5.7899997264314164e-06, "loss": 0.7478, "step": 4353 }, { "epoch": 0.65, "learning_rate": 5.7856182172690054e-06, "loss": 0.7468, "step": 4354 }, { "epoch": 0.65, "learning_rate": 5.7812376916784804e-06, "loss": 0.7306, "step": 4355 }, { "epoch": 0.65, "learning_rate": 5.776858150682183e-06, "loss": 0.6926, "step": 4356 }, { "epoch": 0.65, "learning_rate": 5.7724795953022315e-06, "loss": 0.8281, "step": 4357 }, { "epoch": 0.65, "learning_rate": 5.768102026560513e-06, "loss": 0.7665, "step": 4358 }, { "epoch": 0.65, "learning_rate": 5.76372544547869e-06, "loss": 0.7437, "step": 4359 }, { "epoch": 0.65, "learning_rate": 5.759349853078187e-06, "loss": 0.6961, "step": 4360 }, { "epoch": 0.65, "learning_rate": 5.754975250380195e-06, "loss": 0.7523, "step": 4361 }, { "epoch": 0.65, "learning_rate": 5.750601638405686e-06, "loss": 0.7164, "step": 4362 }, { "epoch": 0.65, "learning_rate": 5.746229018175385e-06, "loss": 0.696, "step": 4363 }, { "epoch": 0.65, "learning_rate": 5.741857390709797e-06, "loss": 0.7006, "step": 4364 }, { "epoch": 0.65, "learning_rate": 5.737486757029202e-06, "loss": 0.7353, "step": 4365 }, { "epoch": 0.65, "learning_rate": 5.73311711815363e-06, "loss": 0.8015, "step": 4366 }, { "epoch": 0.65, "learning_rate": 5.728748475102892e-06, "loss": 0.6757, "step": 4367 }, { "epoch": 0.65, "learning_rate": 5.724380828896554e-06, "loss": 0.3342, "step": 4368 }, { "epoch": 0.65, "learning_rate": 5.720014180553964e-06, "loss": 0.7442, "step": 4369 }, { "epoch": 0.65, "learning_rate": 5.715648531094233e-06, "loss": 0.7336, "step": 4370 }, { "epoch": 0.65, "learning_rate": 5.7112838815362245e-06, "loss": 0.7574, "step": 4371 }, { "epoch": 0.65, "learning_rate": 5.706920232898599e-06, "loss": 0.782, "step": 4372 }, { "epoch": 0.65, "learning_rate": 5.7025575861997485e-06, "loss": 0.6894, "step": 4373 }, { "epoch": 0.65, "learning_rate": 5.698195942457853e-06, "loss": 0.6504, "step": 4374 }, { "epoch": 0.65, "learning_rate": 5.6938353026908595e-06, "loss": 0.7471, "step": 4375 }, { "epoch": 0.65, "learning_rate": 5.689475667916466e-06, "loss": 0.6706, "step": 4376 }, { "epoch": 0.65, "learning_rate": 5.685117039152146e-06, "loss": 0.7985, "step": 4377 }, { "epoch": 0.65, "learning_rate": 5.6807594174151405e-06, "loss": 0.7552, "step": 4378 }, { "epoch": 0.65, "learning_rate": 5.676402803722441e-06, "loss": 0.5967, "step": 4379 }, { "epoch": 0.65, "learning_rate": 5.672047199090829e-06, "loss": 0.7297, "step": 4380 }, { "epoch": 0.65, "learning_rate": 5.667692604536823e-06, "loss": 0.6798, "step": 4381 }, { "epoch": 0.65, "learning_rate": 5.663339021076725e-06, "loss": 0.7344, "step": 4382 }, { "epoch": 0.65, "learning_rate": 5.658986449726596e-06, "loss": 0.7185, "step": 4383 }, { "epoch": 0.65, "learning_rate": 5.654634891502252e-06, "loss": 0.6335, "step": 4384 }, { "epoch": 0.65, "learning_rate": 5.650284347419284e-06, "loss": 0.766, "step": 4385 }, { "epoch": 0.65, "learning_rate": 5.645934818493042e-06, "loss": 0.3237, "step": 4386 }, { "epoch": 0.65, "learning_rate": 5.641586305738637e-06, "loss": 0.6833, "step": 4387 }, { "epoch": 0.65, "learning_rate": 5.637238810170953e-06, "loss": 0.3245, "step": 4388 }, { "epoch": 0.65, "learning_rate": 5.632892332804618e-06, "loss": 0.7238, "step": 4389 }, { "epoch": 0.65, "learning_rate": 5.628546874654037e-06, "loss": 0.6883, "step": 4390 }, { "epoch": 0.65, "learning_rate": 5.624202436733378e-06, "loss": 0.6131, "step": 4391 }, { "epoch": 0.66, "learning_rate": 5.619859020056557e-06, "loss": 0.7632, "step": 4392 }, { "epoch": 0.66, "learning_rate": 5.615516625637271e-06, "loss": 0.7345, "step": 4393 }, { "epoch": 0.66, "learning_rate": 5.611175254488952e-06, "loss": 0.6943, "step": 4394 }, { "epoch": 0.66, "learning_rate": 5.60683490762483e-06, "loss": 0.3363, "step": 4395 }, { "epoch": 0.66, "learning_rate": 5.6024955860578574e-06, "loss": 0.74, "step": 4396 }, { "epoch": 0.66, "learning_rate": 5.598157290800774e-06, "loss": 0.717, "step": 4397 }, { "epoch": 0.66, "learning_rate": 5.593820022866075e-06, "loss": 0.7136, "step": 4398 }, { "epoch": 0.66, "learning_rate": 5.589483783266001e-06, "loss": 0.7323, "step": 4399 }, { "epoch": 0.66, "learning_rate": 5.585148573012569e-06, "loss": 0.5751, "step": 4400 }, { "epoch": 0.66, "learning_rate": 5.580814393117552e-06, "loss": 0.711, "step": 4401 }, { "epoch": 0.66, "learning_rate": 5.5764812445924776e-06, "loss": 0.768, "step": 4402 }, { "epoch": 0.66, "learning_rate": 5.572149128448644e-06, "loss": 0.726, "step": 4403 }, { "epoch": 0.66, "learning_rate": 5.567818045697089e-06, "loss": 0.6925, "step": 4404 }, { "epoch": 0.66, "learning_rate": 5.563487997348627e-06, "loss": 0.6381, "step": 4405 }, { "epoch": 0.66, "learning_rate": 5.559158984413827e-06, "loss": 0.8194, "step": 4406 }, { "epoch": 0.66, "learning_rate": 5.5548310079030075e-06, "loss": 0.7741, "step": 4407 }, { "epoch": 0.66, "learning_rate": 5.550504068826255e-06, "loss": 0.6937, "step": 4408 }, { "epoch": 0.66, "learning_rate": 5.54617816819341e-06, "loss": 0.681, "step": 4409 }, { "epoch": 0.66, "learning_rate": 5.541853307014071e-06, "loss": 0.7616, "step": 4410 }, { "epoch": 0.66, "learning_rate": 5.5375294862975995e-06, "loss": 0.3203, "step": 4411 }, { "epoch": 0.66, "learning_rate": 5.5332067070531e-06, "loss": 0.767, "step": 4412 }, { "epoch": 0.66, "learning_rate": 5.528884970289447e-06, "loss": 0.7602, "step": 4413 }, { "epoch": 0.66, "learning_rate": 5.524564277015271e-06, "loss": 0.6516, "step": 4414 }, { "epoch": 0.66, "learning_rate": 5.520244628238947e-06, "loss": 0.7675, "step": 4415 }, { "epoch": 0.66, "learning_rate": 5.515926024968619e-06, "loss": 0.8116, "step": 4416 }, { "epoch": 0.66, "learning_rate": 5.5116084682121814e-06, "loss": 0.7356, "step": 4417 }, { "epoch": 0.66, "learning_rate": 5.5072919589772884e-06, "loss": 0.7822, "step": 4418 }, { "epoch": 0.66, "learning_rate": 5.5029764982713484e-06, "loss": 0.7781, "step": 4419 }, { "epoch": 0.66, "learning_rate": 5.4986620871015175e-06, "loss": 0.7323, "step": 4420 }, { "epoch": 0.66, "learning_rate": 5.494348726474719e-06, "loss": 0.7542, "step": 4421 }, { "epoch": 0.66, "learning_rate": 5.490036417397617e-06, "loss": 0.7557, "step": 4422 }, { "epoch": 0.66, "learning_rate": 5.485725160876638e-06, "loss": 0.7902, "step": 4423 }, { "epoch": 0.66, "learning_rate": 5.481414957917977e-06, "loss": 0.7635, "step": 4424 }, { "epoch": 0.66, "learning_rate": 5.4771058095275565e-06, "loss": 0.8266, "step": 4425 }, { "epoch": 0.66, "learning_rate": 5.4727977167110716e-06, "loss": 0.769, "step": 4426 }, { "epoch": 0.66, "learning_rate": 5.468490680473957e-06, "loss": 0.6921, "step": 4427 }, { "epoch": 0.66, "learning_rate": 5.464184701821416e-06, "loss": 0.6814, "step": 4428 }, { "epoch": 0.66, "learning_rate": 5.459879781758397e-06, "loss": 0.7313, "step": 4429 }, { "epoch": 0.66, "learning_rate": 5.455575921289597e-06, "loss": 0.668, "step": 4430 }, { "epoch": 0.66, "learning_rate": 5.451273121419476e-06, "loss": 0.2908, "step": 4431 }, { "epoch": 0.66, "learning_rate": 5.446971383152239e-06, "loss": 0.6285, "step": 4432 }, { "epoch": 0.66, "learning_rate": 5.442670707491846e-06, "loss": 0.6527, "step": 4433 }, { "epoch": 0.66, "learning_rate": 5.438371095442013e-06, "loss": 0.6105, "step": 4434 }, { "epoch": 0.66, "learning_rate": 5.434072548006195e-06, "loss": 0.6459, "step": 4435 }, { "epoch": 0.66, "learning_rate": 5.429775066187611e-06, "loss": 0.7271, "step": 4436 }, { "epoch": 0.66, "learning_rate": 5.425478650989231e-06, "loss": 0.7263, "step": 4437 }, { "epoch": 0.66, "learning_rate": 5.421183303413762e-06, "loss": 0.6172, "step": 4438 }, { "epoch": 0.66, "learning_rate": 5.416889024463685e-06, "loss": 0.6921, "step": 4439 }, { "epoch": 0.66, "learning_rate": 5.412595815141208e-06, "loss": 0.8207, "step": 4440 }, { "epoch": 0.66, "learning_rate": 5.408303676448303e-06, "loss": 0.7355, "step": 4441 }, { "epoch": 0.66, "learning_rate": 5.4040126093866965e-06, "loss": 0.6949, "step": 4442 }, { "epoch": 0.66, "learning_rate": 5.3997226149578454e-06, "loss": 0.7541, "step": 4443 }, { "epoch": 0.66, "learning_rate": 5.395433694162976e-06, "loss": 0.7213, "step": 4444 }, { "epoch": 0.66, "learning_rate": 5.391145848003055e-06, "loss": 0.7898, "step": 4445 }, { "epoch": 0.66, "learning_rate": 5.386859077478797e-06, "loss": 0.6994, "step": 4446 }, { "epoch": 0.66, "learning_rate": 5.382573383590677e-06, "loss": 0.7669, "step": 4447 }, { "epoch": 0.66, "learning_rate": 5.378288767338897e-06, "loss": 0.7443, "step": 4448 }, { "epoch": 0.66, "learning_rate": 5.374005229723434e-06, "loss": 0.7728, "step": 4449 }, { "epoch": 0.66, "learning_rate": 5.369722771743988e-06, "loss": 0.7471, "step": 4450 }, { "epoch": 0.66, "learning_rate": 5.365441394400025e-06, "loss": 0.7272, "step": 4451 }, { "epoch": 0.66, "learning_rate": 5.361161098690755e-06, "loss": 0.7871, "step": 4452 }, { "epoch": 0.66, "learning_rate": 5.356881885615122e-06, "loss": 0.6505, "step": 4453 }, { "epoch": 0.66, "learning_rate": 5.352603756171843e-06, "loss": 0.8056, "step": 4454 }, { "epoch": 0.66, "learning_rate": 5.348326711359357e-06, "loss": 0.7352, "step": 4455 }, { "epoch": 0.66, "learning_rate": 5.344050752175866e-06, "loss": 0.7135, "step": 4456 }, { "epoch": 0.66, "learning_rate": 5.3397758796193144e-06, "loss": 0.3292, "step": 4457 }, { "epoch": 0.66, "learning_rate": 5.335502094687385e-06, "loss": 0.6754, "step": 4458 }, { "epoch": 0.67, "learning_rate": 5.331229398377517e-06, "loss": 0.7297, "step": 4459 }, { "epoch": 0.67, "learning_rate": 5.326957791686892e-06, "loss": 0.7296, "step": 4460 }, { "epoch": 0.67, "learning_rate": 5.322687275612437e-06, "loss": 0.3322, "step": 4461 }, { "epoch": 0.67, "learning_rate": 5.31841785115083e-06, "loss": 0.7798, "step": 4462 }, { "epoch": 0.67, "learning_rate": 5.31414951929848e-06, "loss": 0.7287, "step": 4463 }, { "epoch": 0.67, "learning_rate": 5.309882281051555e-06, "loss": 0.6928, "step": 4464 }, { "epoch": 0.67, "learning_rate": 5.305616137405964e-06, "loss": 0.3098, "step": 4465 }, { "epoch": 0.67, "learning_rate": 5.301351089357356e-06, "loss": 0.6601, "step": 4466 }, { "epoch": 0.67, "learning_rate": 5.297087137901127e-06, "loss": 0.7222, "step": 4467 }, { "epoch": 0.67, "learning_rate": 5.292824284032421e-06, "loss": 0.3144, "step": 4468 }, { "epoch": 0.67, "learning_rate": 5.288562528746119e-06, "loss": 0.6721, "step": 4469 }, { "epoch": 0.67, "learning_rate": 5.284301873036857e-06, "loss": 0.7735, "step": 4470 }, { "epoch": 0.67, "learning_rate": 5.280042317898995e-06, "loss": 0.7641, "step": 4471 }, { "epoch": 0.67, "learning_rate": 5.275783864326656e-06, "loss": 0.7406, "step": 4472 }, { "epoch": 0.67, "learning_rate": 5.271526513313697e-06, "loss": 0.7223, "step": 4473 }, { "epoch": 0.67, "learning_rate": 5.267270265853712e-06, "loss": 0.7794, "step": 4474 }, { "epoch": 0.67, "learning_rate": 5.26301512294005e-06, "loss": 0.6788, "step": 4475 }, { "epoch": 0.67, "learning_rate": 5.2587610855657915e-06, "loss": 0.711, "step": 4476 }, { "epoch": 0.67, "learning_rate": 5.254508154723768e-06, "loss": 0.7486, "step": 4477 }, { "epoch": 0.67, "learning_rate": 5.250256331406547e-06, "loss": 0.7679, "step": 4478 }, { "epoch": 0.67, "learning_rate": 5.246005616606435e-06, "loss": 0.6623, "step": 4479 }, { "epoch": 0.67, "learning_rate": 5.241756011315491e-06, "loss": 0.3081, "step": 4480 }, { "epoch": 0.67, "learning_rate": 5.237507516525497e-06, "loss": 0.7316, "step": 4481 }, { "epoch": 0.67, "learning_rate": 5.233260133227991e-06, "loss": 0.639, "step": 4482 }, { "epoch": 0.67, "learning_rate": 5.22901386241425e-06, "loss": 0.6324, "step": 4483 }, { "epoch": 0.67, "learning_rate": 5.224768705075284e-06, "loss": 0.6742, "step": 4484 }, { "epoch": 0.67, "learning_rate": 5.220524662201853e-06, "loss": 0.7063, "step": 4485 }, { "epoch": 0.67, "learning_rate": 5.216281734784445e-06, "loss": 0.7249, "step": 4486 }, { "epoch": 0.67, "learning_rate": 5.212039923813297e-06, "loss": 0.708, "step": 4487 }, { "epoch": 0.67, "learning_rate": 5.207799230278385e-06, "loss": 0.7553, "step": 4488 }, { "epoch": 0.67, "learning_rate": 5.203559655169417e-06, "loss": 0.6924, "step": 4489 }, { "epoch": 0.67, "learning_rate": 5.199321199475844e-06, "loss": 0.7979, "step": 4490 }, { "epoch": 0.67, "learning_rate": 5.195083864186861e-06, "loss": 0.7444, "step": 4491 }, { "epoch": 0.67, "learning_rate": 5.190847650291393e-06, "loss": 0.6441, "step": 4492 }, { "epoch": 0.67, "learning_rate": 5.186612558778112e-06, "loss": 0.7267, "step": 4493 }, { "epoch": 0.67, "learning_rate": 5.182378590635419e-06, "loss": 0.7105, "step": 4494 }, { "epoch": 0.67, "learning_rate": 5.1781457468514555e-06, "loss": 0.7518, "step": 4495 }, { "epoch": 0.67, "learning_rate": 5.1739140284141095e-06, "loss": 0.7546, "step": 4496 }, { "epoch": 0.67, "learning_rate": 5.169683436310986e-06, "loss": 0.7525, "step": 4497 }, { "epoch": 0.67, "learning_rate": 5.165453971529457e-06, "loss": 0.7083, "step": 4498 }, { "epoch": 0.67, "learning_rate": 5.161225635056602e-06, "loss": 0.7355, "step": 4499 }, { "epoch": 0.67, "learning_rate": 5.156998427879252e-06, "loss": 0.6631, "step": 4500 }, { "epoch": 0.67, "learning_rate": 5.152772350983979e-06, "loss": 0.6978, "step": 4501 }, { "epoch": 0.67, "learning_rate": 5.148547405357074e-06, "loss": 0.6899, "step": 4502 }, { "epoch": 0.67, "learning_rate": 5.144323591984583e-06, "loss": 0.7212, "step": 4503 }, { "epoch": 0.67, "learning_rate": 5.140100911852274e-06, "loss": 0.7227, "step": 4504 }, { "epoch": 0.67, "learning_rate": 5.135879365945651e-06, "loss": 0.6526, "step": 4505 }, { "epoch": 0.67, "learning_rate": 5.131658955249972e-06, "loss": 0.8453, "step": 4506 }, { "epoch": 0.67, "learning_rate": 5.127439680750204e-06, "loss": 0.6104, "step": 4507 }, { "epoch": 0.67, "learning_rate": 5.123221543431068e-06, "loss": 0.6257, "step": 4508 }, { "epoch": 0.67, "learning_rate": 5.119004544277006e-06, "loss": 0.6793, "step": 4509 }, { "epoch": 0.67, "learning_rate": 5.114788684272203e-06, "loss": 0.7361, "step": 4510 }, { "epoch": 0.67, "learning_rate": 5.11057396440058e-06, "loss": 0.7308, "step": 4511 }, { "epoch": 0.67, "learning_rate": 5.106360385645778e-06, "loss": 0.7567, "step": 4512 }, { "epoch": 0.67, "learning_rate": 5.102147948991194e-06, "loss": 0.6656, "step": 4513 }, { "epoch": 0.67, "learning_rate": 5.097936655419937e-06, "loss": 0.667, "step": 4514 }, { "epoch": 0.67, "learning_rate": 5.09372650591486e-06, "loss": 0.7107, "step": 4515 }, { "epoch": 0.67, "learning_rate": 5.089517501458553e-06, "loss": 0.7138, "step": 4516 }, { "epoch": 0.67, "learning_rate": 5.085309643033323e-06, "loss": 0.6916, "step": 4517 }, { "epoch": 0.67, "learning_rate": 5.081102931621224e-06, "loss": 0.6122, "step": 4518 }, { "epoch": 0.67, "learning_rate": 5.076897368204039e-06, "loss": 0.7449, "step": 4519 }, { "epoch": 0.67, "learning_rate": 5.072692953763281e-06, "loss": 0.6995, "step": 4520 }, { "epoch": 0.67, "learning_rate": 5.068489689280199e-06, "loss": 0.6515, "step": 4521 }, { "epoch": 0.67, "learning_rate": 5.064287575735763e-06, "loss": 0.7245, "step": 4522 }, { "epoch": 0.67, "learning_rate": 5.060086614110685e-06, "loss": 0.334, "step": 4523 }, { "epoch": 0.67, "learning_rate": 5.055886805385411e-06, "loss": 0.7337, "step": 4524 }, { "epoch": 0.67, "learning_rate": 5.051688150540099e-06, "loss": 0.6815, "step": 4525 }, { "epoch": 0.68, "learning_rate": 5.04749065055466e-06, "loss": 0.6534, "step": 4526 }, { "epoch": 0.68, "learning_rate": 5.043294306408722e-06, "loss": 0.7039, "step": 4527 }, { "epoch": 0.68, "learning_rate": 5.039099119081649e-06, "loss": 0.7611, "step": 4528 }, { "epoch": 0.68, "learning_rate": 5.034905089552537e-06, "loss": 0.6895, "step": 4529 }, { "epoch": 0.68, "learning_rate": 5.0307122188002e-06, "loss": 0.7036, "step": 4530 }, { "epoch": 0.68, "learning_rate": 5.026520507803194e-06, "loss": 0.6722, "step": 4531 }, { "epoch": 0.68, "learning_rate": 5.022329957539802e-06, "loss": 0.7503, "step": 4532 }, { "epoch": 0.68, "learning_rate": 5.018140568988028e-06, "loss": 0.7938, "step": 4533 }, { "epoch": 0.68, "learning_rate": 5.0139523431256135e-06, "loss": 0.7868, "step": 4534 }, { "epoch": 0.68, "learning_rate": 5.009765280930025e-06, "loss": 0.6788, "step": 4535 }, { "epoch": 0.68, "learning_rate": 5.005579383378467e-06, "loss": 0.6398, "step": 4536 }, { "epoch": 0.68, "learning_rate": 5.00139465144785e-06, "loss": 0.6796, "step": 4537 }, { "epoch": 0.68, "learning_rate": 4.997211086114834e-06, "loss": 0.7432, "step": 4538 }, { "epoch": 0.68, "learning_rate": 4.993028688355801e-06, "loss": 0.6785, "step": 4539 }, { "epoch": 0.68, "learning_rate": 4.988847459146851e-06, "loss": 0.6683, "step": 4540 }, { "epoch": 0.68, "learning_rate": 4.984667399463822e-06, "loss": 0.6839, "step": 4541 }, { "epoch": 0.68, "learning_rate": 4.9804885102822755e-06, "loss": 0.7603, "step": 4542 }, { "epoch": 0.68, "learning_rate": 4.9763107925775e-06, "loss": 0.7724, "step": 4543 }, { "epoch": 0.68, "learning_rate": 4.972134247324516e-06, "loss": 0.6533, "step": 4544 }, { "epoch": 0.68, "learning_rate": 4.967958875498054e-06, "loss": 0.7353, "step": 4545 }, { "epoch": 0.68, "learning_rate": 4.963784678072589e-06, "loss": 0.7686, "step": 4546 }, { "epoch": 0.68, "learning_rate": 4.9596116560223175e-06, "loss": 0.6866, "step": 4547 }, { "epoch": 0.68, "learning_rate": 4.9554398103211485e-06, "loss": 0.8038, "step": 4548 }, { "epoch": 0.68, "learning_rate": 4.9512691419427325e-06, "loss": 0.7788, "step": 4549 }, { "epoch": 0.68, "learning_rate": 4.94709965186044e-06, "loss": 0.6656, "step": 4550 }, { "epoch": 0.68, "learning_rate": 4.942931341047363e-06, "loss": 0.7742, "step": 4551 }, { "epoch": 0.68, "learning_rate": 4.938764210476328e-06, "loss": 0.6629, "step": 4552 }, { "epoch": 0.68, "learning_rate": 4.934598261119871e-06, "loss": 0.7742, "step": 4553 }, { "epoch": 0.68, "learning_rate": 4.930433493950262e-06, "loss": 0.7415, "step": 4554 }, { "epoch": 0.68, "learning_rate": 4.9262699099395e-06, "loss": 0.7267, "step": 4555 }, { "epoch": 0.68, "learning_rate": 4.922107510059288e-06, "loss": 0.7053, "step": 4556 }, { "epoch": 0.68, "learning_rate": 4.917946295281083e-06, "loss": 0.6524, "step": 4557 }, { "epoch": 0.68, "learning_rate": 4.913786266576037e-06, "loss": 0.7282, "step": 4558 }, { "epoch": 0.68, "learning_rate": 4.909627424915039e-06, "loss": 0.3431, "step": 4559 }, { "epoch": 0.68, "learning_rate": 4.905469771268705e-06, "loss": 0.7296, "step": 4560 }, { "epoch": 0.68, "learning_rate": 4.901313306607357e-06, "loss": 0.7053, "step": 4561 }, { "epoch": 0.68, "learning_rate": 4.8971580319010595e-06, "loss": 0.7104, "step": 4562 }, { "epoch": 0.68, "learning_rate": 4.89300394811958e-06, "loss": 0.6729, "step": 4563 }, { "epoch": 0.68, "learning_rate": 4.888851056232421e-06, "loss": 0.6652, "step": 4564 }, { "epoch": 0.68, "learning_rate": 4.884699357208812e-06, "loss": 0.6254, "step": 4565 }, { "epoch": 0.68, "learning_rate": 4.880548852017686e-06, "loss": 0.7015, "step": 4566 }, { "epoch": 0.68, "learning_rate": 4.8763995416277145e-06, "loss": 0.3293, "step": 4567 }, { "epoch": 0.68, "learning_rate": 4.872251427007274e-06, "loss": 0.6906, "step": 4568 }, { "epoch": 0.68, "learning_rate": 4.868104509124475e-06, "loss": 0.7654, "step": 4569 }, { "epoch": 0.68, "learning_rate": 4.863958788947148e-06, "loss": 0.6767, "step": 4570 }, { "epoch": 0.68, "learning_rate": 4.85981426744283e-06, "loss": 0.3398, "step": 4571 }, { "epoch": 0.68, "learning_rate": 4.855670945578803e-06, "loss": 0.7366, "step": 4572 }, { "epoch": 0.68, "learning_rate": 4.851528824322044e-06, "loss": 0.7056, "step": 4573 }, { "epoch": 0.68, "learning_rate": 4.847387904639262e-06, "loss": 0.6834, "step": 4574 }, { "epoch": 0.68, "learning_rate": 4.84324818749689e-06, "loss": 0.7335, "step": 4575 }, { "epoch": 0.68, "learning_rate": 4.8391096738610655e-06, "loss": 0.7083, "step": 4576 }, { "epoch": 0.68, "learning_rate": 4.834972364697657e-06, "loss": 0.8165, "step": 4577 }, { "epoch": 0.68, "learning_rate": 4.830836260972256e-06, "loss": 0.7988, "step": 4578 }, { "epoch": 0.68, "learning_rate": 4.82670136365015e-06, "loss": 0.7096, "step": 4579 }, { "epoch": 0.68, "learning_rate": 4.822567673696378e-06, "loss": 0.7513, "step": 4580 }, { "epoch": 0.68, "learning_rate": 4.818435192075668e-06, "loss": 0.8143, "step": 4581 }, { "epoch": 0.68, "learning_rate": 4.81430391975248e-06, "loss": 0.7351, "step": 4582 }, { "epoch": 0.68, "learning_rate": 4.810173857690995e-06, "loss": 0.7051, "step": 4583 }, { "epoch": 0.68, "learning_rate": 4.806045006855098e-06, "loss": 0.7793, "step": 4584 }, { "epoch": 0.68, "learning_rate": 4.8019173682084066e-06, "loss": 0.5837, "step": 4585 }, { "epoch": 0.68, "learning_rate": 4.7977909427142356e-06, "loss": 0.6663, "step": 4586 }, { "epoch": 0.68, "learning_rate": 4.7936657313356426e-06, "loss": 0.6923, "step": 4587 }, { "epoch": 0.68, "learning_rate": 4.789541735035388e-06, "loss": 0.7937, "step": 4588 }, { "epoch": 0.68, "learning_rate": 4.78541895477594e-06, "loss": 0.747, "step": 4589 }, { "epoch": 0.68, "learning_rate": 4.781297391519501e-06, "loss": 0.7769, "step": 4590 }, { "epoch": 0.68, "learning_rate": 4.777177046227974e-06, "loss": 0.7079, "step": 4591 }, { "epoch": 0.68, "learning_rate": 4.7730579198629856e-06, "loss": 0.7145, "step": 4592 }, { "epoch": 0.69, "learning_rate": 4.768940013385877e-06, "loss": 0.7152, "step": 4593 }, { "epoch": 0.69, "learning_rate": 4.7648233277577075e-06, "loss": 0.7286, "step": 4594 }, { "epoch": 0.69, "learning_rate": 4.760707863939247e-06, "loss": 0.7186, "step": 4595 }, { "epoch": 0.69, "learning_rate": 4.756593622890977e-06, "loss": 0.7516, "step": 4596 }, { "epoch": 0.69, "learning_rate": 4.752480605573101e-06, "loss": 0.7151, "step": 4597 }, { "epoch": 0.69, "learning_rate": 4.748368812945539e-06, "loss": 0.7093, "step": 4598 }, { "epoch": 0.69, "learning_rate": 4.74425824596791e-06, "loss": 0.645, "step": 4599 }, { "epoch": 0.69, "learning_rate": 4.740148905599562e-06, "loss": 0.628, "step": 4600 }, { "epoch": 0.69, "learning_rate": 4.736040792799553e-06, "loss": 0.6875, "step": 4601 }, { "epoch": 0.69, "learning_rate": 4.731933908526652e-06, "loss": 0.788, "step": 4602 }, { "epoch": 0.69, "learning_rate": 4.727828253739347e-06, "loss": 0.7348, "step": 4603 }, { "epoch": 0.69, "learning_rate": 4.723723829395828e-06, "loss": 0.5851, "step": 4604 }, { "epoch": 0.69, "learning_rate": 4.719620636454007e-06, "loss": 0.7873, "step": 4605 }, { "epoch": 0.69, "learning_rate": 4.715518675871509e-06, "loss": 0.7017, "step": 4606 }, { "epoch": 0.69, "learning_rate": 4.711417948605663e-06, "loss": 0.6896, "step": 4607 }, { "epoch": 0.69, "learning_rate": 4.707318455613519e-06, "loss": 0.7398, "step": 4608 }, { "epoch": 0.69, "learning_rate": 4.703220197851834e-06, "loss": 0.6339, "step": 4609 }, { "epoch": 0.69, "learning_rate": 4.69912317627708e-06, "loss": 0.7205, "step": 4610 }, { "epoch": 0.69, "learning_rate": 4.695027391845443e-06, "loss": 0.7571, "step": 4611 }, { "epoch": 0.69, "learning_rate": 4.690932845512806e-06, "loss": 0.594, "step": 4612 }, { "epoch": 0.69, "learning_rate": 4.6868395382347796e-06, "loss": 0.7293, "step": 4613 }, { "epoch": 0.69, "learning_rate": 4.682747470966682e-06, "loss": 0.6699, "step": 4614 }, { "epoch": 0.69, "learning_rate": 4.67865664466353e-06, "loss": 0.8225, "step": 4615 }, { "epoch": 0.69, "learning_rate": 4.6745670602800634e-06, "loss": 0.7083, "step": 4616 }, { "epoch": 0.69, "learning_rate": 4.67047871877073e-06, "loss": 0.6787, "step": 4617 }, { "epoch": 0.69, "learning_rate": 4.666391621089688e-06, "loss": 0.6985, "step": 4618 }, { "epoch": 0.69, "learning_rate": 4.662305768190796e-06, "loss": 0.5925, "step": 4619 }, { "epoch": 0.69, "learning_rate": 4.6582211610276325e-06, "loss": 0.5452, "step": 4620 }, { "epoch": 0.69, "learning_rate": 4.654137800553487e-06, "loss": 0.6438, "step": 4621 }, { "epoch": 0.69, "learning_rate": 4.650055687721344e-06, "loss": 0.6615, "step": 4622 }, { "epoch": 0.69, "learning_rate": 4.6459748234839115e-06, "loss": 0.6691, "step": 4623 }, { "epoch": 0.69, "learning_rate": 4.641895208793599e-06, "loss": 0.7534, "step": 4624 }, { "epoch": 0.69, "learning_rate": 4.637816844602526e-06, "loss": 0.68, "step": 4625 }, { "epoch": 0.69, "learning_rate": 4.633739731862524e-06, "loss": 0.75, "step": 4626 }, { "epoch": 0.69, "learning_rate": 4.6296638715251204e-06, "loss": 0.6719, "step": 4627 }, { "epoch": 0.69, "learning_rate": 4.6255892645415635e-06, "loss": 0.7189, "step": 4628 }, { "epoch": 0.69, "learning_rate": 4.621515911862805e-06, "loss": 0.7322, "step": 4629 }, { "epoch": 0.69, "learning_rate": 4.617443814439494e-06, "loss": 0.7403, "step": 4630 }, { "epoch": 0.69, "learning_rate": 4.61337297322201e-06, "loss": 0.3397, "step": 4631 }, { "epoch": 0.69, "learning_rate": 4.609303389160412e-06, "loss": 0.6829, "step": 4632 }, { "epoch": 0.69, "learning_rate": 4.605235063204483e-06, "loss": 0.7411, "step": 4633 }, { "epoch": 0.69, "learning_rate": 4.601167996303713e-06, "loss": 0.7191, "step": 4634 }, { "epoch": 0.69, "learning_rate": 4.597102189407282e-06, "loss": 0.6815, "step": 4635 }, { "epoch": 0.69, "learning_rate": 4.593037643464093e-06, "loss": 0.7051, "step": 4636 }, { "epoch": 0.69, "learning_rate": 4.588974359422752e-06, "loss": 0.6064, "step": 4637 }, { "epoch": 0.69, "learning_rate": 4.584912338231554e-06, "loss": 0.7539, "step": 4638 }, { "epoch": 0.69, "learning_rate": 4.58085158083853e-06, "loss": 0.2862, "step": 4639 }, { "epoch": 0.69, "learning_rate": 4.5767920881913855e-06, "loss": 0.7152, "step": 4640 }, { "epoch": 0.69, "learning_rate": 4.572733861237547e-06, "loss": 0.6994, "step": 4641 }, { "epoch": 0.69, "learning_rate": 4.568676900924147e-06, "loss": 0.6985, "step": 4642 }, { "epoch": 0.69, "learning_rate": 4.56462120819801e-06, "loss": 0.3322, "step": 4643 }, { "epoch": 0.69, "learning_rate": 4.56056678400568e-06, "loss": 0.7774, "step": 4644 }, { "epoch": 0.69, "learning_rate": 4.556513629293384e-06, "loss": 0.6626, "step": 4645 }, { "epoch": 0.69, "learning_rate": 4.55246174500708e-06, "loss": 0.7155, "step": 4646 }, { "epoch": 0.69, "learning_rate": 4.548411132092413e-06, "loss": 0.2885, "step": 4647 }, { "epoch": 0.69, "learning_rate": 4.544361791494728e-06, "loss": 0.7111, "step": 4648 }, { "epoch": 0.69, "learning_rate": 4.540313724159087e-06, "loss": 0.7141, "step": 4649 }, { "epoch": 0.69, "learning_rate": 4.536266931030236e-06, "loss": 0.6636, "step": 4650 }, { "epoch": 0.69, "learning_rate": 4.532221413052641e-06, "loss": 0.718, "step": 4651 }, { "epoch": 0.69, "learning_rate": 4.528177171170466e-06, "loss": 0.6768, "step": 4652 }, { "epoch": 0.69, "learning_rate": 4.524134206327564e-06, "loss": 0.7793, "step": 4653 }, { "epoch": 0.69, "learning_rate": 4.5200925194675145e-06, "loss": 0.8127, "step": 4654 }, { "epoch": 0.69, "learning_rate": 4.516052111533577e-06, "loss": 0.672, "step": 4655 }, { "epoch": 0.69, "learning_rate": 4.512012983468721e-06, "loss": 0.6684, "step": 4656 }, { "epoch": 0.69, "learning_rate": 4.507975136215622e-06, "loss": 0.6587, "step": 4657 }, { "epoch": 0.69, "learning_rate": 4.503938570716645e-06, "loss": 0.7359, "step": 4658 }, { "epoch": 0.69, "learning_rate": 4.4999032879138635e-06, "loss": 0.7615, "step": 4659 }, { "epoch": 0.7, "learning_rate": 4.495869288749053e-06, "loss": 0.7478, "step": 4660 }, { "epoch": 0.7, "learning_rate": 4.491836574163685e-06, "loss": 0.7739, "step": 4661 }, { "epoch": 0.7, "learning_rate": 4.487805145098938e-06, "loss": 0.6384, "step": 4662 }, { "epoch": 0.7, "learning_rate": 4.483775002495677e-06, "loss": 0.7092, "step": 4663 }, { "epoch": 0.7, "learning_rate": 4.479746147294481e-06, "loss": 0.6585, "step": 4664 }, { "epoch": 0.7, "learning_rate": 4.475718580435624e-06, "loss": 0.6906, "step": 4665 }, { "epoch": 0.7, "learning_rate": 4.471692302859072e-06, "loss": 0.6853, "step": 4666 }, { "epoch": 0.7, "learning_rate": 4.467667315504499e-06, "loss": 0.7571, "step": 4667 }, { "epoch": 0.7, "learning_rate": 4.463643619311278e-06, "loss": 0.6739, "step": 4668 }, { "epoch": 0.7, "learning_rate": 4.4596212152184735e-06, "loss": 0.6591, "step": 4669 }, { "epoch": 0.7, "learning_rate": 4.45560010416486e-06, "loss": 0.7647, "step": 4670 }, { "epoch": 0.7, "learning_rate": 4.451580287088894e-06, "loss": 0.6736, "step": 4671 }, { "epoch": 0.7, "learning_rate": 4.447561764928746e-06, "loss": 0.7421, "step": 4672 }, { "epoch": 0.7, "learning_rate": 4.443544538622272e-06, "loss": 0.7878, "step": 4673 }, { "epoch": 0.7, "learning_rate": 4.439528609107032e-06, "loss": 0.6213, "step": 4674 }, { "epoch": 0.7, "learning_rate": 4.435513977320284e-06, "loss": 0.696, "step": 4675 }, { "epoch": 0.7, "learning_rate": 4.431500644198979e-06, "loss": 0.692, "step": 4676 }, { "epoch": 0.7, "learning_rate": 4.427488610679773e-06, "loss": 0.6804, "step": 4677 }, { "epoch": 0.7, "learning_rate": 4.4234778776990065e-06, "loss": 0.7746, "step": 4678 }, { "epoch": 0.7, "learning_rate": 4.419468446192723e-06, "loss": 0.7359, "step": 4679 }, { "epoch": 0.7, "learning_rate": 4.41546031709667e-06, "loss": 0.7826, "step": 4680 }, { "epoch": 0.7, "learning_rate": 4.411453491346271e-06, "loss": 0.6376, "step": 4681 }, { "epoch": 0.7, "learning_rate": 4.407447969876666e-06, "loss": 0.7854, "step": 4682 }, { "epoch": 0.7, "learning_rate": 4.403443753622678e-06, "loss": 0.6901, "step": 4683 }, { "epoch": 0.7, "learning_rate": 4.399440843518833e-06, "loss": 0.5859, "step": 4684 }, { "epoch": 0.7, "learning_rate": 4.395439240499351e-06, "loss": 0.8154, "step": 4685 }, { "epoch": 0.7, "learning_rate": 4.391438945498135e-06, "loss": 0.6529, "step": 4686 }, { "epoch": 0.7, "learning_rate": 4.387439959448801e-06, "loss": 0.8181, "step": 4687 }, { "epoch": 0.7, "learning_rate": 4.383442283284649e-06, "loss": 0.6776, "step": 4688 }, { "epoch": 0.7, "learning_rate": 4.379445917938667e-06, "loss": 0.7157, "step": 4689 }, { "epoch": 0.7, "learning_rate": 4.37545086434356e-06, "loss": 0.2931, "step": 4690 }, { "epoch": 0.7, "learning_rate": 4.3714571234317005e-06, "loss": 0.7869, "step": 4691 }, { "epoch": 0.7, "learning_rate": 4.3674646961351695e-06, "loss": 0.7313, "step": 4692 }, { "epoch": 0.7, "learning_rate": 4.363473583385742e-06, "loss": 0.7274, "step": 4693 }, { "epoch": 0.7, "learning_rate": 4.359483786114876e-06, "loss": 0.7213, "step": 4694 }, { "epoch": 0.7, "learning_rate": 4.355495305253731e-06, "loss": 0.7722, "step": 4695 }, { "epoch": 0.7, "learning_rate": 4.351508141733162e-06, "loss": 0.5766, "step": 4696 }, { "epoch": 0.7, "learning_rate": 4.3475222964837e-06, "loss": 0.6615, "step": 4697 }, { "epoch": 0.7, "learning_rate": 4.343537770435594e-06, "loss": 0.7349, "step": 4698 }, { "epoch": 0.7, "learning_rate": 4.339554564518761e-06, "loss": 0.3126, "step": 4699 }, { "epoch": 0.7, "learning_rate": 4.335572679662823e-06, "loss": 0.7054, "step": 4700 }, { "epoch": 0.7, "learning_rate": 4.331592116797095e-06, "loss": 0.7498, "step": 4701 }, { "epoch": 0.7, "learning_rate": 4.327612876850572e-06, "loss": 0.7342, "step": 4702 }, { "epoch": 0.7, "learning_rate": 4.3236349607519545e-06, "loss": 0.7474, "step": 4703 }, { "epoch": 0.7, "learning_rate": 4.319658369429615e-06, "loss": 0.8475, "step": 4704 }, { "epoch": 0.7, "learning_rate": 4.315683103811644e-06, "loss": 0.7121, "step": 4705 }, { "epoch": 0.7, "learning_rate": 4.311709164825797e-06, "loss": 0.6417, "step": 4706 }, { "epoch": 0.7, "learning_rate": 4.307736553399534e-06, "loss": 0.7987, "step": 4707 }, { "epoch": 0.7, "learning_rate": 4.303765270460003e-06, "loss": 0.6662, "step": 4708 }, { "epoch": 0.7, "learning_rate": 4.299795316934035e-06, "loss": 0.3497, "step": 4709 }, { "epoch": 0.7, "learning_rate": 4.295826693748159e-06, "loss": 0.7781, "step": 4710 }, { "epoch": 0.7, "learning_rate": 4.291859401828596e-06, "loss": 0.837, "step": 4711 }, { "epoch": 0.7, "learning_rate": 4.287893442101237e-06, "loss": 0.6388, "step": 4712 }, { "epoch": 0.7, "learning_rate": 4.283928815491692e-06, "loss": 0.659, "step": 4713 }, { "epoch": 0.7, "learning_rate": 4.279965522925234e-06, "loss": 0.3332, "step": 4714 }, { "epoch": 0.7, "learning_rate": 4.2760035653268365e-06, "loss": 0.7118, "step": 4715 }, { "epoch": 0.7, "learning_rate": 4.272042943621164e-06, "loss": 0.757, "step": 4716 }, { "epoch": 0.7, "learning_rate": 4.268083658732557e-06, "loss": 0.658, "step": 4717 }, { "epoch": 0.7, "learning_rate": 4.264125711585054e-06, "loss": 0.7324, "step": 4718 }, { "epoch": 0.7, "learning_rate": 4.260169103102383e-06, "loss": 0.7037, "step": 4719 }, { "epoch": 0.7, "learning_rate": 4.256213834207951e-06, "loss": 0.7262, "step": 4720 }, { "epoch": 0.7, "learning_rate": 4.252259905824864e-06, "loss": 0.6831, "step": 4721 }, { "epoch": 0.7, "learning_rate": 4.248307318875898e-06, "loss": 0.7336, "step": 4722 }, { "epoch": 0.7, "learning_rate": 4.244356074283531e-06, "loss": 0.7721, "step": 4723 }, { "epoch": 0.7, "learning_rate": 4.240406172969927e-06, "loss": 0.7675, "step": 4724 }, { "epoch": 0.7, "learning_rate": 4.236457615856925e-06, "loss": 0.6882, "step": 4725 }, { "epoch": 0.7, "learning_rate": 4.232510403866059e-06, "loss": 0.7908, "step": 4726 }, { "epoch": 0.7, "learning_rate": 4.228564537918549e-06, "loss": 0.7089, "step": 4727 }, { "epoch": 0.71, "learning_rate": 4.2246200189353e-06, "loss": 0.7208, "step": 4728 }, { "epoch": 0.71, "learning_rate": 4.2206768478369055e-06, "loss": 0.7821, "step": 4729 }, { "epoch": 0.71, "learning_rate": 4.216735025543633e-06, "loss": 0.6713, "step": 4730 }, { "epoch": 0.71, "learning_rate": 4.21279455297545e-06, "loss": 0.6223, "step": 4731 }, { "epoch": 0.71, "learning_rate": 4.208855431051995e-06, "loss": 0.6142, "step": 4732 }, { "epoch": 0.71, "learning_rate": 4.204917660692605e-06, "loss": 0.717, "step": 4733 }, { "epoch": 0.71, "learning_rate": 4.200981242816291e-06, "loss": 0.773, "step": 4734 }, { "epoch": 0.71, "learning_rate": 4.197046178341755e-06, "loss": 0.647, "step": 4735 }, { "epoch": 0.71, "learning_rate": 4.193112468187383e-06, "loss": 0.6075, "step": 4736 }, { "epoch": 0.71, "learning_rate": 4.189180113271235e-06, "loss": 0.7867, "step": 4737 }, { "epoch": 0.71, "learning_rate": 4.185249114511066e-06, "loss": 0.7076, "step": 4738 }, { "epoch": 0.71, "learning_rate": 4.181319472824316e-06, "loss": 0.2929, "step": 4739 }, { "epoch": 0.71, "learning_rate": 4.1773911891280935e-06, "loss": 0.8087, "step": 4740 }, { "epoch": 0.71, "learning_rate": 4.173464264339204e-06, "loss": 0.7427, "step": 4741 }, { "epoch": 0.71, "learning_rate": 4.169538699374131e-06, "loss": 0.6653, "step": 4742 }, { "epoch": 0.71, "learning_rate": 4.165614495149041e-06, "loss": 0.7939, "step": 4743 }, { "epoch": 0.71, "learning_rate": 4.1616916525797865e-06, "loss": 0.7112, "step": 4744 }, { "epoch": 0.71, "learning_rate": 4.157770172581892e-06, "loss": 0.7847, "step": 4745 }, { "epoch": 0.71, "learning_rate": 4.153850056070574e-06, "loss": 0.7038, "step": 4746 }, { "epoch": 0.71, "learning_rate": 4.149931303960731e-06, "loss": 0.7252, "step": 4747 }, { "epoch": 0.71, "learning_rate": 4.146013917166931e-06, "loss": 0.7589, "step": 4748 }, { "epoch": 0.71, "learning_rate": 4.142097896603436e-06, "loss": 0.3187, "step": 4749 }, { "epoch": 0.71, "learning_rate": 4.138183243184187e-06, "loss": 0.7071, "step": 4750 }, { "epoch": 0.71, "learning_rate": 4.134269957822802e-06, "loss": 0.7312, "step": 4751 }, { "epoch": 0.71, "learning_rate": 4.130358041432585e-06, "loss": 0.7682, "step": 4752 }, { "epoch": 0.71, "learning_rate": 4.12644749492651e-06, "loss": 0.7182, "step": 4753 }, { "epoch": 0.71, "learning_rate": 4.122538319217243e-06, "loss": 0.7343, "step": 4754 }, { "epoch": 0.71, "learning_rate": 4.118630515217128e-06, "loss": 0.7011, "step": 4755 }, { "epoch": 0.71, "learning_rate": 4.114724083838177e-06, "loss": 0.7221, "step": 4756 }, { "epoch": 0.71, "learning_rate": 4.110819025992105e-06, "loss": 0.6599, "step": 4757 }, { "epoch": 0.71, "learning_rate": 4.10691534259028e-06, "loss": 0.7284, "step": 4758 }, { "epoch": 0.71, "learning_rate": 4.103013034543769e-06, "loss": 0.6048, "step": 4759 }, { "epoch": 0.71, "learning_rate": 4.099112102763305e-06, "loss": 0.7237, "step": 4760 }, { "epoch": 0.71, "learning_rate": 4.0952125481593095e-06, "loss": 0.6994, "step": 4761 }, { "epoch": 0.71, "learning_rate": 4.0913143716418806e-06, "loss": 0.7498, "step": 4762 }, { "epoch": 0.71, "learning_rate": 4.087417574120782e-06, "loss": 0.7299, "step": 4763 }, { "epoch": 0.71, "learning_rate": 4.083522156505482e-06, "loss": 0.7153, "step": 4764 }, { "epoch": 0.71, "learning_rate": 4.079628119705099e-06, "loss": 0.6885, "step": 4765 }, { "epoch": 0.71, "learning_rate": 4.0757354646284465e-06, "loss": 0.6407, "step": 4766 }, { "epoch": 0.71, "learning_rate": 4.071844192184011e-06, "loss": 0.6678, "step": 4767 }, { "epoch": 0.71, "learning_rate": 4.067954303279952e-06, "loss": 0.6974, "step": 4768 }, { "epoch": 0.71, "learning_rate": 4.064065798824111e-06, "loss": 0.6543, "step": 4769 }, { "epoch": 0.71, "learning_rate": 4.060178679724008e-06, "loss": 0.7187, "step": 4770 }, { "epoch": 0.71, "learning_rate": 4.056292946886829e-06, "loss": 0.7397, "step": 4771 }, { "epoch": 0.71, "learning_rate": 4.0524086012194575e-06, "loss": 0.6944, "step": 4772 }, { "epoch": 0.71, "learning_rate": 4.048525643628427e-06, "loss": 0.709, "step": 4773 }, { "epoch": 0.71, "learning_rate": 4.044644075019966e-06, "loss": 0.6564, "step": 4774 }, { "epoch": 0.71, "learning_rate": 4.040763896299977e-06, "loss": 0.6997, "step": 4775 }, { "epoch": 0.71, "learning_rate": 4.036885108374024e-06, "loss": 0.7199, "step": 4776 }, { "epoch": 0.71, "learning_rate": 4.033007712147364e-06, "loss": 0.7465, "step": 4777 }, { "epoch": 0.71, "learning_rate": 4.0291317085249185e-06, "loss": 0.7823, "step": 4778 }, { "epoch": 0.71, "learning_rate": 4.025257098411287e-06, "loss": 0.6766, "step": 4779 }, { "epoch": 0.71, "learning_rate": 4.0213838827107495e-06, "loss": 0.714, "step": 4780 }, { "epoch": 0.71, "learning_rate": 4.017512062327247e-06, "loss": 0.7526, "step": 4781 }, { "epoch": 0.71, "learning_rate": 4.013641638164405e-06, "loss": 0.3313, "step": 4782 }, { "epoch": 0.71, "learning_rate": 4.009772611125528e-06, "loss": 0.7358, "step": 4783 }, { "epoch": 0.71, "learning_rate": 4.005904982113577e-06, "loss": 0.7336, "step": 4784 }, { "epoch": 0.71, "learning_rate": 4.002038752031205e-06, "loss": 0.6675, "step": 4785 }, { "epoch": 0.71, "learning_rate": 3.9981739217807194e-06, "loss": 0.797, "step": 4786 }, { "epoch": 0.71, "learning_rate": 3.994310492264124e-06, "loss": 0.7021, "step": 4787 }, { "epoch": 0.71, "learning_rate": 3.990448464383085e-06, "loss": 0.6597, "step": 4788 }, { "epoch": 0.71, "learning_rate": 3.98658783903893e-06, "loss": 0.67, "step": 4789 }, { "epoch": 0.71, "learning_rate": 3.982728617132679e-06, "loss": 0.7604, "step": 4790 }, { "epoch": 0.71, "learning_rate": 3.978870799565008e-06, "loss": 0.7401, "step": 4791 }, { "epoch": 0.71, "learning_rate": 3.975014387236274e-06, "loss": 0.7077, "step": 4792 }, { "epoch": 0.71, "learning_rate": 3.971159381046506e-06, "loss": 0.6894, "step": 4793 }, { "epoch": 0.71, "learning_rate": 3.967305781895402e-06, "loss": 0.7166, "step": 4794 }, { "epoch": 0.72, "learning_rate": 3.9634535906823365e-06, "loss": 0.3004, "step": 4795 }, { "epoch": 0.72, "learning_rate": 3.959602808306347e-06, "loss": 0.3163, "step": 4796 }, { "epoch": 0.72, "learning_rate": 3.955753435666147e-06, "loss": 0.8229, "step": 4797 }, { "epoch": 0.72, "learning_rate": 3.951905473660126e-06, "loss": 0.6975, "step": 4798 }, { "epoch": 0.72, "learning_rate": 3.948058923186331e-06, "loss": 0.741, "step": 4799 }, { "epoch": 0.72, "learning_rate": 3.944213785142494e-06, "loss": 0.6733, "step": 4800 }, { "epoch": 0.72, "learning_rate": 3.9403700604260066e-06, "loss": 0.7182, "step": 4801 }, { "epoch": 0.72, "learning_rate": 3.9365277499339395e-06, "loss": 0.3076, "step": 4802 }, { "epoch": 0.72, "learning_rate": 3.932686854563029e-06, "loss": 0.7746, "step": 4803 }, { "epoch": 0.72, "learning_rate": 3.928847375209677e-06, "loss": 0.6432, "step": 4804 }, { "epoch": 0.72, "learning_rate": 3.925009312769961e-06, "loss": 0.6877, "step": 4805 }, { "epoch": 0.72, "learning_rate": 3.921172668139628e-06, "loss": 0.5824, "step": 4806 }, { "epoch": 0.72, "learning_rate": 3.9173374422140866e-06, "loss": 0.7406, "step": 4807 }, { "epoch": 0.72, "learning_rate": 3.913503635888421e-06, "loss": 0.6319, "step": 4808 }, { "epoch": 0.72, "learning_rate": 3.909671250057385e-06, "loss": 0.6432, "step": 4809 }, { "epoch": 0.72, "learning_rate": 3.9058402856153985e-06, "loss": 0.7, "step": 4810 }, { "epoch": 0.72, "learning_rate": 3.902010743456551e-06, "loss": 0.7484, "step": 4811 }, { "epoch": 0.72, "learning_rate": 3.898182624474593e-06, "loss": 0.7496, "step": 4812 }, { "epoch": 0.72, "learning_rate": 3.894355929562956e-06, "loss": 0.7677, "step": 4813 }, { "epoch": 0.72, "learning_rate": 3.890530659614724e-06, "loss": 0.7063, "step": 4814 }, { "epoch": 0.72, "learning_rate": 3.886706815522657e-06, "loss": 0.7573, "step": 4815 }, { "epoch": 0.72, "learning_rate": 3.882884398179191e-06, "loss": 0.6764, "step": 4816 }, { "epoch": 0.72, "learning_rate": 3.879063408476409e-06, "loss": 0.3134, "step": 4817 }, { "epoch": 0.72, "learning_rate": 3.875243847306078e-06, "loss": 0.3596, "step": 4818 }, { "epoch": 0.72, "learning_rate": 3.871425715559619e-06, "loss": 0.7022, "step": 4819 }, { "epoch": 0.72, "learning_rate": 3.867609014128127e-06, "loss": 0.6926, "step": 4820 }, { "epoch": 0.72, "learning_rate": 3.863793743902366e-06, "loss": 0.7658, "step": 4821 }, { "epoch": 0.72, "learning_rate": 3.859979905772756e-06, "loss": 0.6269, "step": 4822 }, { "epoch": 0.72, "learning_rate": 3.8561675006293885e-06, "loss": 0.7523, "step": 4823 }, { "epoch": 0.72, "learning_rate": 3.852356529362021e-06, "loss": 0.7322, "step": 4824 }, { "epoch": 0.72, "learning_rate": 3.848546992860078e-06, "loss": 0.7225, "step": 4825 }, { "epoch": 0.72, "learning_rate": 3.844738892012647e-06, "loss": 0.6318, "step": 4826 }, { "epoch": 0.72, "learning_rate": 3.840932227708476e-06, "loss": 0.7445, "step": 4827 }, { "epoch": 0.72, "learning_rate": 3.837127000835983e-06, "loss": 0.6488, "step": 4828 }, { "epoch": 0.72, "learning_rate": 3.833323212283254e-06, "loss": 0.7369, "step": 4829 }, { "epoch": 0.72, "learning_rate": 3.829520862938025e-06, "loss": 0.7339, "step": 4830 }, { "epoch": 0.72, "learning_rate": 3.825719953687717e-06, "loss": 0.6995, "step": 4831 }, { "epoch": 0.72, "learning_rate": 3.821920485419397e-06, "loss": 0.7396, "step": 4832 }, { "epoch": 0.72, "learning_rate": 3.818122459019803e-06, "loss": 0.6644, "step": 4833 }, { "epoch": 0.72, "learning_rate": 3.814325875375342e-06, "loss": 0.309, "step": 4834 }, { "epoch": 0.72, "learning_rate": 3.810530735372069e-06, "loss": 0.7265, "step": 4835 }, { "epoch": 0.72, "learning_rate": 3.806737039895716e-06, "loss": 0.6943, "step": 4836 }, { "epoch": 0.72, "learning_rate": 3.8029447898316718e-06, "loss": 0.6786, "step": 4837 }, { "epoch": 0.72, "learning_rate": 3.7991539860649917e-06, "loss": 0.6876, "step": 4838 }, { "epoch": 0.72, "learning_rate": 3.7953646294803926e-06, "loss": 0.7159, "step": 4839 }, { "epoch": 0.72, "learning_rate": 3.7915767209622443e-06, "loss": 0.742, "step": 4840 }, { "epoch": 0.72, "learning_rate": 3.787790261394596e-06, "loss": 0.6408, "step": 4841 }, { "epoch": 0.72, "learning_rate": 3.7840052516611403e-06, "loss": 0.6473, "step": 4842 }, { "epoch": 0.72, "learning_rate": 3.780221692645245e-06, "loss": 0.6778, "step": 4843 }, { "epoch": 0.72, "learning_rate": 3.7764395852299363e-06, "loss": 0.6898, "step": 4844 }, { "epoch": 0.72, "learning_rate": 3.77265893029789e-06, "loss": 0.7693, "step": 4845 }, { "epoch": 0.72, "learning_rate": 3.768879728731468e-06, "loss": 0.6562, "step": 4846 }, { "epoch": 0.72, "learning_rate": 3.7651019814126656e-06, "loss": 0.6966, "step": 4847 }, { "epoch": 0.72, "learning_rate": 3.7613256892231553e-06, "loss": 0.7463, "step": 4848 }, { "epoch": 0.72, "learning_rate": 3.7575508530442695e-06, "loss": 0.8103, "step": 4849 }, { "epoch": 0.72, "learning_rate": 3.75377747375699e-06, "loss": 0.6975, "step": 4850 }, { "epoch": 0.72, "learning_rate": 3.7500055522419676e-06, "loss": 0.7132, "step": 4851 }, { "epoch": 0.72, "learning_rate": 3.7462350893795118e-06, "loss": 0.6498, "step": 4852 }, { "epoch": 0.72, "learning_rate": 3.742466086049591e-06, "loss": 0.6667, "step": 4853 }, { "epoch": 0.72, "learning_rate": 3.7386985431318345e-06, "loss": 0.6623, "step": 4854 }, { "epoch": 0.72, "learning_rate": 3.7349324615055236e-06, "loss": 0.6392, "step": 4855 }, { "epoch": 0.72, "learning_rate": 3.731167842049607e-06, "loss": 0.7864, "step": 4856 }, { "epoch": 0.72, "learning_rate": 3.7274046856426905e-06, "loss": 0.7728, "step": 4857 }, { "epoch": 0.72, "learning_rate": 3.7236429931630325e-06, "loss": 0.7127, "step": 4858 }, { "epoch": 0.72, "learning_rate": 3.7198827654885574e-06, "loss": 0.6397, "step": 4859 }, { "epoch": 0.72, "learning_rate": 3.7161240034968427e-06, "loss": 0.7164, "step": 4860 }, { "epoch": 0.72, "learning_rate": 3.7123667080651273e-06, "loss": 0.6272, "step": 4861 }, { "epoch": 0.73, "learning_rate": 3.7086108800703093e-06, "loss": 0.6995, "step": 4862 }, { "epoch": 0.73, "learning_rate": 3.704856520388933e-06, "loss": 0.7168, "step": 4863 }, { "epoch": 0.73, "learning_rate": 3.7011036298972137e-06, "loss": 0.7106, "step": 4864 }, { "epoch": 0.73, "learning_rate": 3.6973522094710203e-06, "loss": 0.6475, "step": 4865 }, { "epoch": 0.73, "learning_rate": 3.69360225998587e-06, "loss": 0.724, "step": 4866 }, { "epoch": 0.73, "learning_rate": 3.689853782316947e-06, "loss": 0.7008, "step": 4867 }, { "epoch": 0.73, "learning_rate": 3.686106777339088e-06, "loss": 0.7448, "step": 4868 }, { "epoch": 0.73, "learning_rate": 3.6823612459267875e-06, "loss": 0.7181, "step": 4869 }, { "epoch": 0.73, "learning_rate": 3.6786171889541965e-06, "loss": 0.7469, "step": 4870 }, { "epoch": 0.73, "learning_rate": 3.674874607295115e-06, "loss": 0.7438, "step": 4871 }, { "epoch": 0.73, "learning_rate": 3.67113350182301e-06, "loss": 0.8025, "step": 4872 }, { "epoch": 0.73, "learning_rate": 3.6673938734109913e-06, "loss": 0.8007, "step": 4873 }, { "epoch": 0.73, "learning_rate": 3.6636557229318335e-06, "loss": 0.7179, "step": 4874 }, { "epoch": 0.73, "learning_rate": 3.6599190512579652e-06, "loss": 0.7255, "step": 4875 }, { "epoch": 0.73, "learning_rate": 3.656183859261466e-06, "loss": 0.6802, "step": 4876 }, { "epoch": 0.73, "learning_rate": 3.6524501478140773e-06, "loss": 0.6821, "step": 4877 }, { "epoch": 0.73, "learning_rate": 3.648717917787181e-06, "loss": 0.7096, "step": 4878 }, { "epoch": 0.73, "learning_rate": 3.644987170051828e-06, "loss": 0.6979, "step": 4879 }, { "epoch": 0.73, "learning_rate": 3.64125790547872e-06, "loss": 0.7224, "step": 4880 }, { "epoch": 0.73, "learning_rate": 3.6375301249382032e-06, "loss": 0.6976, "step": 4881 }, { "epoch": 0.73, "learning_rate": 3.6338038293002864e-06, "loss": 0.7123, "step": 4882 }, { "epoch": 0.73, "learning_rate": 3.6300790194346312e-06, "loss": 0.7553, "step": 4883 }, { "epoch": 0.73, "learning_rate": 3.6263556962105516e-06, "loss": 0.7257, "step": 4884 }, { "epoch": 0.73, "learning_rate": 3.6226338604970146e-06, "loss": 0.6623, "step": 4885 }, { "epoch": 0.73, "learning_rate": 3.618913513162635e-06, "loss": 0.6832, "step": 4886 }, { "epoch": 0.73, "learning_rate": 3.615194655075688e-06, "loss": 0.6457, "step": 4887 }, { "epoch": 0.73, "learning_rate": 3.6114772871040994e-06, "loss": 0.6911, "step": 4888 }, { "epoch": 0.73, "learning_rate": 3.6077614101154366e-06, "loss": 0.651, "step": 4889 }, { "epoch": 0.73, "learning_rate": 3.604047024976942e-06, "loss": 0.6607, "step": 4890 }, { "epoch": 0.73, "learning_rate": 3.6003341325554853e-06, "loss": 0.6452, "step": 4891 }, { "epoch": 0.73, "learning_rate": 3.596622733717603e-06, "loss": 0.7502, "step": 4892 }, { "epoch": 0.73, "learning_rate": 3.5929128293294804e-06, "loss": 0.6532, "step": 4893 }, { "epoch": 0.73, "learning_rate": 3.5892044202569456e-06, "loss": 0.6573, "step": 4894 }, { "epoch": 0.73, "learning_rate": 3.585497507365492e-06, "loss": 0.6737, "step": 4895 }, { "epoch": 0.73, "learning_rate": 3.5817920915202476e-06, "loss": 0.6786, "step": 4896 }, { "epoch": 0.73, "learning_rate": 3.578088173586002e-06, "loss": 0.7275, "step": 4897 }, { "epoch": 0.73, "learning_rate": 3.5743857544271996e-06, "loss": 0.3177, "step": 4898 }, { "epoch": 0.73, "learning_rate": 3.5706848349079204e-06, "loss": 0.7505, "step": 4899 }, { "epoch": 0.73, "learning_rate": 3.5669854158919092e-06, "loss": 0.7956, "step": 4900 }, { "epoch": 0.73, "learning_rate": 3.563287498242545e-06, "loss": 0.6778, "step": 4901 }, { "epoch": 0.73, "learning_rate": 3.5595910828228673e-06, "loss": 0.6517, "step": 4902 }, { "epoch": 0.73, "learning_rate": 3.5558961704955697e-06, "loss": 0.6819, "step": 4903 }, { "epoch": 0.73, "learning_rate": 3.552202762122975e-06, "loss": 0.7404, "step": 4904 }, { "epoch": 0.73, "learning_rate": 3.548510858567081e-06, "loss": 0.7771, "step": 4905 }, { "epoch": 0.73, "learning_rate": 3.5448204606895132e-06, "loss": 0.629, "step": 4906 }, { "epoch": 0.73, "learning_rate": 3.541131569351556e-06, "loss": 0.7399, "step": 4907 }, { "epoch": 0.73, "learning_rate": 3.5374441854141427e-06, "loss": 0.706, "step": 4908 }, { "epoch": 0.73, "learning_rate": 3.533758309737847e-06, "loss": 0.6916, "step": 4909 }, { "epoch": 0.73, "learning_rate": 3.5300739431828968e-06, "loss": 0.7924, "step": 4910 }, { "epoch": 0.73, "learning_rate": 3.5263910866091678e-06, "loss": 0.6732, "step": 4911 }, { "epoch": 0.73, "learning_rate": 3.5227097408761814e-06, "loss": 0.659, "step": 4912 }, { "epoch": 0.73, "learning_rate": 3.519029906843112e-06, "loss": 0.7444, "step": 4913 }, { "epoch": 0.73, "learning_rate": 3.515351585368767e-06, "loss": 0.6535, "step": 4914 }, { "epoch": 0.73, "learning_rate": 3.5116747773116154e-06, "loss": 0.7249, "step": 4915 }, { "epoch": 0.73, "learning_rate": 3.5079994835297713e-06, "loss": 0.5871, "step": 4916 }, { "epoch": 0.73, "learning_rate": 3.504325704880983e-06, "loss": 0.6262, "step": 4917 }, { "epoch": 0.73, "learning_rate": 3.5006534422226592e-06, "loss": 0.6689, "step": 4918 }, { "epoch": 0.73, "learning_rate": 3.4969826964118482e-06, "loss": 0.7322, "step": 4919 }, { "epoch": 0.73, "learning_rate": 3.493313468305246e-06, "loss": 0.7817, "step": 4920 }, { "epoch": 0.73, "learning_rate": 3.489645758759198e-06, "loss": 0.6832, "step": 4921 }, { "epoch": 0.73, "learning_rate": 3.4859795686296836e-06, "loss": 0.6971, "step": 4922 }, { "epoch": 0.73, "learning_rate": 3.4823148987723388e-06, "loss": 0.7038, "step": 4923 }, { "epoch": 0.73, "learning_rate": 3.4786517500424456e-06, "loss": 0.3342, "step": 4924 }, { "epoch": 0.73, "learning_rate": 3.474990123294918e-06, "loss": 0.6609, "step": 4925 }, { "epoch": 0.73, "learning_rate": 3.471330019384328e-06, "loss": 0.6914, "step": 4926 }, { "epoch": 0.73, "learning_rate": 3.467671439164887e-06, "loss": 0.7085, "step": 4927 }, { "epoch": 0.73, "learning_rate": 3.464014383490455e-06, "loss": 0.7417, "step": 4928 }, { "epoch": 0.74, "learning_rate": 3.460358853214526e-06, "loss": 0.6771, "step": 4929 }, { "epoch": 0.74, "learning_rate": 3.4567048491902466e-06, "loss": 0.6534, "step": 4930 }, { "epoch": 0.74, "learning_rate": 3.4530523722704102e-06, "loss": 0.6584, "step": 4931 }, { "epoch": 0.74, "learning_rate": 3.4494014233074413e-06, "loss": 0.7206, "step": 4932 }, { "epoch": 0.74, "learning_rate": 3.4457520031534186e-06, "loss": 0.6771, "step": 4933 }, { "epoch": 0.74, "learning_rate": 3.4421041126600607e-06, "loss": 0.7073, "step": 4934 }, { "epoch": 0.74, "learning_rate": 3.438457752678729e-06, "loss": 0.3205, "step": 4935 }, { "epoch": 0.74, "learning_rate": 3.434812924060431e-06, "loss": 0.715, "step": 4936 }, { "epoch": 0.74, "learning_rate": 3.431169627655807e-06, "loss": 0.7396, "step": 4937 }, { "epoch": 0.74, "learning_rate": 3.42752786431515e-06, "loss": 0.6851, "step": 4938 }, { "epoch": 0.74, "learning_rate": 3.4238876348883955e-06, "loss": 0.7372, "step": 4939 }, { "epoch": 0.74, "learning_rate": 3.420248940225109e-06, "loss": 0.737, "step": 4940 }, { "epoch": 0.74, "learning_rate": 3.416611781174509e-06, "loss": 0.7388, "step": 4941 }, { "epoch": 0.74, "learning_rate": 3.4129761585854547e-06, "loss": 0.7985, "step": 4942 }, { "epoch": 0.74, "learning_rate": 3.409342073306443e-06, "loss": 0.6627, "step": 4943 }, { "epoch": 0.74, "learning_rate": 3.4057095261856166e-06, "loss": 0.776, "step": 4944 }, { "epoch": 0.74, "learning_rate": 3.4020785180707495e-06, "loss": 0.6755, "step": 4945 }, { "epoch": 0.74, "learning_rate": 3.398449049809267e-06, "loss": 0.7014, "step": 4946 }, { "epoch": 0.74, "learning_rate": 3.3948211222482343e-06, "loss": 0.6729, "step": 4947 }, { "epoch": 0.74, "learning_rate": 3.3911947362343433e-06, "loss": 0.7009, "step": 4948 }, { "epoch": 0.74, "learning_rate": 3.387569892613951e-06, "loss": 0.6869, "step": 4949 }, { "epoch": 0.74, "learning_rate": 3.3839465922330293e-06, "loss": 0.8044, "step": 4950 }, { "epoch": 0.74, "learning_rate": 3.3803248359372056e-06, "loss": 0.7527, "step": 4951 }, { "epoch": 0.74, "learning_rate": 3.376704624571744e-06, "loss": 0.6804, "step": 4952 }, { "epoch": 0.74, "learning_rate": 3.373085958981539e-06, "loss": 0.7862, "step": 4953 }, { "epoch": 0.74, "learning_rate": 3.36946884001114e-06, "loss": 0.789, "step": 4954 }, { "epoch": 0.74, "learning_rate": 3.3658532685047187e-06, "loss": 0.6681, "step": 4955 }, { "epoch": 0.74, "learning_rate": 3.362239245306095e-06, "loss": 0.721, "step": 4956 }, { "epoch": 0.74, "learning_rate": 3.3586267712587353e-06, "loss": 0.7041, "step": 4957 }, { "epoch": 0.74, "learning_rate": 3.3550158472057257e-06, "loss": 0.7906, "step": 4958 }, { "epoch": 0.74, "learning_rate": 3.351406473989808e-06, "loss": 0.6912, "step": 4959 }, { "epoch": 0.74, "learning_rate": 3.3477986524533457e-06, "loss": 0.7448, "step": 4960 }, { "epoch": 0.74, "learning_rate": 3.344192383438355e-06, "loss": 0.6967, "step": 4961 }, { "epoch": 0.74, "learning_rate": 3.3405876677864846e-06, "loss": 0.7203, "step": 4962 }, { "epoch": 0.74, "learning_rate": 3.3369845063390104e-06, "loss": 0.7109, "step": 4963 }, { "epoch": 0.74, "learning_rate": 3.333382899936869e-06, "loss": 0.662, "step": 4964 }, { "epoch": 0.74, "learning_rate": 3.3297828494206084e-06, "loss": 0.6771, "step": 4965 }, { "epoch": 0.74, "learning_rate": 3.32618435563043e-06, "loss": 0.6837, "step": 4966 }, { "epoch": 0.74, "learning_rate": 3.322587419406168e-06, "loss": 0.6826, "step": 4967 }, { "epoch": 0.74, "learning_rate": 3.318992041587288e-06, "loss": 0.6451, "step": 4968 }, { "epoch": 0.74, "learning_rate": 3.3153982230128966e-06, "loss": 0.6942, "step": 4969 }, { "epoch": 0.74, "learning_rate": 3.3118059645217416e-06, "loss": 0.705, "step": 4970 }, { "epoch": 0.74, "learning_rate": 3.3082152669521896e-06, "loss": 0.7078, "step": 4971 }, { "epoch": 0.74, "learning_rate": 3.3046261311422678e-06, "loss": 0.7266, "step": 4972 }, { "epoch": 0.74, "learning_rate": 3.3010385579296147e-06, "loss": 0.6836, "step": 4973 }, { "epoch": 0.74, "learning_rate": 3.2974525481515185e-06, "loss": 0.307, "step": 4974 }, { "epoch": 0.74, "learning_rate": 3.2938681026449017e-06, "loss": 0.7581, "step": 4975 }, { "epoch": 0.74, "learning_rate": 3.2902852222463134e-06, "loss": 0.6835, "step": 4976 }, { "epoch": 0.74, "learning_rate": 3.286703907791947e-06, "loss": 0.6613, "step": 4977 }, { "epoch": 0.74, "learning_rate": 3.283124160117619e-06, "loss": 0.6285, "step": 4978 }, { "epoch": 0.74, "learning_rate": 3.2795459800587947e-06, "loss": 0.6631, "step": 4979 }, { "epoch": 0.74, "learning_rate": 3.275969368450569e-06, "loss": 0.7399, "step": 4980 }, { "epoch": 0.74, "learning_rate": 3.272394326127659e-06, "loss": 0.6914, "step": 4981 }, { "epoch": 0.74, "learning_rate": 3.268820853924433e-06, "loss": 0.7147, "step": 4982 }, { "epoch": 0.74, "learning_rate": 3.2652489526748767e-06, "loss": 0.6907, "step": 4983 }, { "epoch": 0.74, "learning_rate": 3.2616786232126206e-06, "loss": 0.6343, "step": 4984 }, { "epoch": 0.74, "learning_rate": 3.2581098663709244e-06, "loss": 0.6311, "step": 4985 }, { "epoch": 0.74, "learning_rate": 3.254542682982682e-06, "loss": 0.7123, "step": 4986 }, { "epoch": 0.74, "learning_rate": 3.2509770738804213e-06, "loss": 0.6612, "step": 4987 }, { "epoch": 0.74, "learning_rate": 3.2474130398962943e-06, "loss": 0.7285, "step": 4988 }, { "epoch": 0.74, "learning_rate": 3.243850581862096e-06, "loss": 0.6753, "step": 4989 }, { "epoch": 0.74, "learning_rate": 3.2402897006092514e-06, "loss": 0.7082, "step": 4990 }, { "epoch": 0.74, "learning_rate": 3.236730396968809e-06, "loss": 0.656, "step": 4991 }, { "epoch": 0.74, "learning_rate": 3.2331726717714584e-06, "loss": 0.2934, "step": 4992 }, { "epoch": 0.74, "learning_rate": 3.2296165258475197e-06, "loss": 0.7098, "step": 4993 }, { "epoch": 0.74, "learning_rate": 3.2260619600269416e-06, "loss": 0.7233, "step": 4994 }, { "epoch": 0.74, "learning_rate": 3.222508975139307e-06, "loss": 0.7569, "step": 4995 }, { "epoch": 0.75, "learning_rate": 3.2189575720138234e-06, "loss": 0.7545, "step": 4996 }, { "epoch": 0.75, "learning_rate": 3.215407751479337e-06, "loss": 0.7061, "step": 4997 }, { "epoch": 0.75, "learning_rate": 3.211859514364323e-06, "loss": 0.7318, "step": 4998 }, { "epoch": 0.75, "learning_rate": 3.2083128614968795e-06, "loss": 0.7143, "step": 4999 }, { "epoch": 0.75, "learning_rate": 3.204767793704744e-06, "loss": 0.6771, "step": 5000 }, { "epoch": 0.75, "learning_rate": 3.201224311815281e-06, "loss": 0.6823, "step": 5001 }, { "epoch": 0.75, "learning_rate": 3.197682416655484e-06, "loss": 0.6504, "step": 5002 }, { "epoch": 0.75, "learning_rate": 3.194142109051982e-06, "loss": 0.3279, "step": 5003 }, { "epoch": 0.75, "learning_rate": 3.1906033898310197e-06, "loss": 0.6465, "step": 5004 }, { "epoch": 0.75, "learning_rate": 3.1870662598184833e-06, "loss": 0.6832, "step": 5005 }, { "epoch": 0.75, "learning_rate": 3.183530719839889e-06, "loss": 0.6526, "step": 5006 }, { "epoch": 0.75, "learning_rate": 3.1799967707203706e-06, "loss": 0.7011, "step": 5007 }, { "epoch": 0.75, "learning_rate": 3.1764644132846988e-06, "loss": 0.6463, "step": 5008 }, { "epoch": 0.75, "learning_rate": 3.1729336483572736e-06, "loss": 0.732, "step": 5009 }, { "epoch": 0.75, "learning_rate": 3.169404476762121e-06, "loss": 0.6995, "step": 5010 }, { "epoch": 0.75, "learning_rate": 3.1658768993228985e-06, "loss": 0.6461, "step": 5011 }, { "epoch": 0.75, "learning_rate": 3.162350916862882e-06, "loss": 0.7204, "step": 5012 }, { "epoch": 0.75, "learning_rate": 3.158826530204986e-06, "loss": 0.3216, "step": 5013 }, { "epoch": 0.75, "learning_rate": 3.1553037401717445e-06, "loss": 0.6847, "step": 5014 }, { "epoch": 0.75, "learning_rate": 3.1517825475853247e-06, "loss": 0.7267, "step": 5015 }, { "epoch": 0.75, "learning_rate": 3.148262953267518e-06, "loss": 0.7118, "step": 5016 }, { "epoch": 0.75, "learning_rate": 3.144744958039745e-06, "loss": 0.6127, "step": 5017 }, { "epoch": 0.75, "learning_rate": 3.141228562723054e-06, "loss": 0.7057, "step": 5018 }, { "epoch": 0.75, "learning_rate": 3.137713768138112e-06, "loss": 0.7874, "step": 5019 }, { "epoch": 0.75, "learning_rate": 3.1342005751052198e-06, "loss": 0.7068, "step": 5020 }, { "epoch": 0.75, "learning_rate": 3.1306889844443054e-06, "loss": 0.7299, "step": 5021 }, { "epoch": 0.75, "learning_rate": 3.1271789969749133e-06, "loss": 0.7524, "step": 5022 }, { "epoch": 0.75, "learning_rate": 3.123670613516231e-06, "loss": 0.6612, "step": 5023 }, { "epoch": 0.75, "learning_rate": 3.1201638348870524e-06, "loss": 0.72, "step": 5024 }, { "epoch": 0.75, "learning_rate": 3.116658661905808e-06, "loss": 0.7393, "step": 5025 }, { "epoch": 0.75, "learning_rate": 3.1131550953905567e-06, "loss": 0.6631, "step": 5026 }, { "epoch": 0.75, "learning_rate": 3.109653136158969e-06, "loss": 0.712, "step": 5027 }, { "epoch": 0.75, "learning_rate": 3.1061527850283503e-06, "loss": 0.67, "step": 5028 }, { "epoch": 0.75, "learning_rate": 3.1026540428156338e-06, "loss": 0.7928, "step": 5029 }, { "epoch": 0.75, "learning_rate": 3.099156910337363e-06, "loss": 0.6505, "step": 5030 }, { "epoch": 0.75, "learning_rate": 3.0956613884097255e-06, "loss": 0.6885, "step": 5031 }, { "epoch": 0.75, "learning_rate": 3.0921674778485143e-06, "loss": 0.6703, "step": 5032 }, { "epoch": 0.75, "learning_rate": 3.088675179469157e-06, "loss": 0.6288, "step": 5033 }, { "epoch": 0.75, "learning_rate": 3.085184494086706e-06, "loss": 0.6329, "step": 5034 }, { "epoch": 0.75, "learning_rate": 3.0816954225158267e-06, "loss": 0.6959, "step": 5035 }, { "epoch": 0.75, "learning_rate": 3.078207965570822e-06, "loss": 0.6681, "step": 5036 }, { "epoch": 0.75, "learning_rate": 3.0747221240656e-06, "loss": 0.2924, "step": 5037 }, { "epoch": 0.75, "learning_rate": 3.0712378988137128e-06, "loss": 0.775, "step": 5038 }, { "epoch": 0.75, "learning_rate": 3.067755290628324e-06, "loss": 0.8104, "step": 5039 }, { "epoch": 0.75, "learning_rate": 3.064274300322215e-06, "loss": 0.7612, "step": 5040 }, { "epoch": 0.75, "learning_rate": 3.060794928707802e-06, "loss": 0.7985, "step": 5041 }, { "epoch": 0.75, "learning_rate": 3.057317176597111e-06, "loss": 0.6234, "step": 5042 }, { "epoch": 0.75, "learning_rate": 3.0538410448017973e-06, "loss": 0.6848, "step": 5043 }, { "epoch": 0.75, "learning_rate": 3.0503665341331413e-06, "loss": 0.7109, "step": 5044 }, { "epoch": 0.75, "learning_rate": 3.0468936454020303e-06, "loss": 0.6831, "step": 5045 }, { "epoch": 0.75, "learning_rate": 3.0434223794189954e-06, "loss": 0.6732, "step": 5046 }, { "epoch": 0.75, "learning_rate": 3.0399527369941683e-06, "loss": 0.7603, "step": 5047 }, { "epoch": 0.75, "learning_rate": 3.0364847189373126e-06, "loss": 0.6771, "step": 5048 }, { "epoch": 0.75, "learning_rate": 3.0330183260578128e-06, "loss": 0.7259, "step": 5049 }, { "epoch": 0.75, "learning_rate": 3.029553559164666e-06, "loss": 0.688, "step": 5050 }, { "epoch": 0.75, "learning_rate": 3.0260904190664995e-06, "loss": 0.7179, "step": 5051 }, { "epoch": 0.75, "learning_rate": 3.0226289065715563e-06, "loss": 0.6445, "step": 5052 }, { "epoch": 0.75, "learning_rate": 3.0191690224877003e-06, "loss": 0.7951, "step": 5053 }, { "epoch": 0.75, "learning_rate": 3.015710767622417e-06, "loss": 0.7896, "step": 5054 }, { "epoch": 0.75, "learning_rate": 3.0122541427828056e-06, "loss": 0.7479, "step": 5055 }, { "epoch": 0.75, "learning_rate": 3.0087991487755906e-06, "loss": 0.6209, "step": 5056 }, { "epoch": 0.75, "learning_rate": 3.005345786407119e-06, "loss": 0.7461, "step": 5057 }, { "epoch": 0.75, "learning_rate": 3.0018940564833454e-06, "loss": 0.662, "step": 5058 }, { "epoch": 0.75, "learning_rate": 2.998443959809855e-06, "loss": 0.6636, "step": 5059 }, { "epoch": 0.75, "learning_rate": 2.9949954971918447e-06, "loss": 0.5582, "step": 5060 }, { "epoch": 0.75, "learning_rate": 2.9915486694341344e-06, "loss": 0.6387, "step": 5061 }, { "epoch": 0.75, "learning_rate": 2.988103477341164e-06, "loss": 0.6876, "step": 5062 }, { "epoch": 0.76, "learning_rate": 2.9846599217169814e-06, "loss": 0.6794, "step": 5063 }, { "epoch": 0.76, "learning_rate": 2.981218003365265e-06, "loss": 0.814, "step": 5064 }, { "epoch": 0.76, "learning_rate": 2.9777777230893013e-06, "loss": 0.7643, "step": 5065 }, { "epoch": 0.76, "learning_rate": 2.9743390816920003e-06, "loss": 0.8271, "step": 5066 }, { "epoch": 0.76, "learning_rate": 2.9709020799758893e-06, "loss": 0.7565, "step": 5067 }, { "epoch": 0.76, "learning_rate": 2.9674667187431106e-06, "loss": 0.8127, "step": 5068 }, { "epoch": 0.76, "learning_rate": 2.9640329987954274e-06, "loss": 0.6729, "step": 5069 }, { "epoch": 0.76, "learning_rate": 2.9606009209342126e-06, "loss": 0.6493, "step": 5070 }, { "epoch": 0.76, "learning_rate": 2.9571704859604623e-06, "loss": 0.7477, "step": 5071 }, { "epoch": 0.76, "learning_rate": 2.9537416946747897e-06, "loss": 0.666, "step": 5072 }, { "epoch": 0.76, "learning_rate": 2.9503145478774176e-06, "loss": 0.7527, "step": 5073 }, { "epoch": 0.76, "learning_rate": 2.946889046368191e-06, "loss": 0.7323, "step": 5074 }, { "epoch": 0.76, "learning_rate": 2.9434651909465695e-06, "loss": 0.6771, "step": 5075 }, { "epoch": 0.76, "learning_rate": 2.9400429824116293e-06, "loss": 0.6117, "step": 5076 }, { "epoch": 0.76, "learning_rate": 2.936622421562063e-06, "loss": 0.7237, "step": 5077 }, { "epoch": 0.76, "learning_rate": 2.9332035091961708e-06, "loss": 0.6997, "step": 5078 }, { "epoch": 0.76, "learning_rate": 2.929786246111879e-06, "loss": 0.6223, "step": 5079 }, { "epoch": 0.76, "learning_rate": 2.926370633106724e-06, "loss": 0.6692, "step": 5080 }, { "epoch": 0.76, "learning_rate": 2.9229566709778554e-06, "loss": 0.686, "step": 5081 }, { "epoch": 0.76, "learning_rate": 2.9195443605220397e-06, "loss": 0.6781, "step": 5082 }, { "epoch": 0.76, "learning_rate": 2.9161337025356573e-06, "loss": 0.6988, "step": 5083 }, { "epoch": 0.76, "learning_rate": 2.912724697814706e-06, "loss": 0.6719, "step": 5084 }, { "epoch": 0.76, "learning_rate": 2.9093173471547954e-06, "loss": 0.7202, "step": 5085 }, { "epoch": 0.76, "learning_rate": 2.9059116513511443e-06, "loss": 0.7733, "step": 5086 }, { "epoch": 0.76, "learning_rate": 2.902507611198592e-06, "loss": 0.6575, "step": 5087 }, { "epoch": 0.76, "learning_rate": 2.899105227491592e-06, "loss": 0.7813, "step": 5088 }, { "epoch": 0.76, "learning_rate": 2.8957045010242004e-06, "loss": 0.662, "step": 5089 }, { "epoch": 0.76, "learning_rate": 2.8923054325901046e-06, "loss": 0.7237, "step": 5090 }, { "epoch": 0.76, "learning_rate": 2.888908022982587e-06, "loss": 0.695, "step": 5091 }, { "epoch": 0.76, "learning_rate": 2.885512272994555e-06, "loss": 0.5631, "step": 5092 }, { "epoch": 0.76, "learning_rate": 2.8821181834185252e-06, "loss": 0.3179, "step": 5093 }, { "epoch": 0.76, "learning_rate": 2.8787257550466207e-06, "loss": 0.7686, "step": 5094 }, { "epoch": 0.76, "learning_rate": 2.8753349886705885e-06, "loss": 0.79, "step": 5095 }, { "epoch": 0.76, "learning_rate": 2.8719458850817716e-06, "loss": 0.6885, "step": 5096 }, { "epoch": 0.76, "learning_rate": 2.868558445071148e-06, "loss": 0.6948, "step": 5097 }, { "epoch": 0.76, "learning_rate": 2.8651726694292835e-06, "loss": 0.3103, "step": 5098 }, { "epoch": 0.76, "learning_rate": 2.86178855894637e-06, "loss": 0.7728, "step": 5099 }, { "epoch": 0.76, "learning_rate": 2.858406114412209e-06, "loss": 0.7302, "step": 5100 }, { "epoch": 0.76, "learning_rate": 2.855025336616206e-06, "loss": 0.6644, "step": 5101 }, { "epoch": 0.76, "learning_rate": 2.8516462263473853e-06, "loss": 0.7754, "step": 5102 }, { "epoch": 0.76, "learning_rate": 2.8482687843943813e-06, "loss": 0.82, "step": 5103 }, { "epoch": 0.76, "learning_rate": 2.8448930115454297e-06, "loss": 0.7278, "step": 5104 }, { "epoch": 0.76, "learning_rate": 2.841518908588394e-06, "loss": 0.6119, "step": 5105 }, { "epoch": 0.76, "learning_rate": 2.8381464763107315e-06, "loss": 0.7484, "step": 5106 }, { "epoch": 0.76, "learning_rate": 2.8347757154995157e-06, "loss": 0.6763, "step": 5107 }, { "epoch": 0.76, "learning_rate": 2.8314066269414363e-06, "loss": 0.6547, "step": 5108 }, { "epoch": 0.76, "learning_rate": 2.8280392114227795e-06, "loss": 0.7062, "step": 5109 }, { "epoch": 0.76, "learning_rate": 2.824673469729451e-06, "loss": 0.6364, "step": 5110 }, { "epoch": 0.76, "learning_rate": 2.8213094026469633e-06, "loss": 0.7354, "step": 5111 }, { "epoch": 0.76, "learning_rate": 2.8179470109604378e-06, "loss": 0.6857, "step": 5112 }, { "epoch": 0.76, "learning_rate": 2.814586295454609e-06, "loss": 0.8208, "step": 5113 }, { "epoch": 0.76, "learning_rate": 2.811227256913809e-06, "loss": 0.6211, "step": 5114 }, { "epoch": 0.76, "learning_rate": 2.8078698961219896e-06, "loss": 0.6131, "step": 5115 }, { "epoch": 0.76, "learning_rate": 2.804514213862709e-06, "loss": 0.7226, "step": 5116 }, { "epoch": 0.76, "learning_rate": 2.801160210919127e-06, "loss": 0.7274, "step": 5117 }, { "epoch": 0.76, "learning_rate": 2.797807888074022e-06, "loss": 0.6603, "step": 5118 }, { "epoch": 0.76, "learning_rate": 2.7944572461097664e-06, "loss": 0.7613, "step": 5119 }, { "epoch": 0.76, "learning_rate": 2.791108285808355e-06, "loss": 0.6753, "step": 5120 }, { "epoch": 0.76, "learning_rate": 2.787761007951386e-06, "loss": 0.6868, "step": 5121 }, { "epoch": 0.76, "learning_rate": 2.7844154133200564e-06, "loss": 0.6968, "step": 5122 }, { "epoch": 0.76, "learning_rate": 2.781071502695181e-06, "loss": 0.6727, "step": 5123 }, { "epoch": 0.76, "learning_rate": 2.777729276857172e-06, "loss": 0.7437, "step": 5124 }, { "epoch": 0.76, "learning_rate": 2.774388736586057e-06, "loss": 0.7174, "step": 5125 }, { "epoch": 0.76, "learning_rate": 2.771049882661465e-06, "loss": 0.6342, "step": 5126 }, { "epoch": 0.76, "learning_rate": 2.7677127158626347e-06, "loss": 0.6755, "step": 5127 }, { "epoch": 0.76, "learning_rate": 2.764377236968413e-06, "loss": 0.7261, "step": 5128 }, { "epoch": 0.76, "learning_rate": 2.761043446757242e-06, "loss": 0.7087, "step": 5129 }, { "epoch": 0.77, "learning_rate": 2.7577113460071813e-06, "loss": 0.6517, "step": 5130 }, { "epoch": 0.77, "learning_rate": 2.7543809354958927e-06, "loss": 0.6814, "step": 5131 }, { "epoch": 0.77, "learning_rate": 2.751052216000639e-06, "loss": 0.3309, "step": 5132 }, { "epoch": 0.77, "learning_rate": 2.7477251882982956e-06, "loss": 0.6892, "step": 5133 }, { "epoch": 0.77, "learning_rate": 2.744399853165337e-06, "loss": 0.7722, "step": 5134 }, { "epoch": 0.77, "learning_rate": 2.741076211377848e-06, "loss": 0.6351, "step": 5135 }, { "epoch": 0.77, "learning_rate": 2.7377542637115162e-06, "loss": 0.724, "step": 5136 }, { "epoch": 0.77, "learning_rate": 2.734434010941628e-06, "loss": 0.6934, "step": 5137 }, { "epoch": 0.77, "learning_rate": 2.731115453843083e-06, "loss": 0.7268, "step": 5138 }, { "epoch": 0.77, "learning_rate": 2.7277985931903828e-06, "loss": 0.7182, "step": 5139 }, { "epoch": 0.77, "learning_rate": 2.724483429757627e-06, "loss": 0.7133, "step": 5140 }, { "epoch": 0.77, "learning_rate": 2.7211699643185265e-06, "loss": 0.6964, "step": 5141 }, { "epoch": 0.77, "learning_rate": 2.7178581976463934e-06, "loss": 0.685, "step": 5142 }, { "epoch": 0.77, "learning_rate": 2.7145481305141417e-06, "loss": 0.6488, "step": 5143 }, { "epoch": 0.77, "learning_rate": 2.7112397636942954e-06, "loss": 0.7455, "step": 5144 }, { "epoch": 0.77, "learning_rate": 2.707933097958968e-06, "loss": 0.6153, "step": 5145 }, { "epoch": 0.77, "learning_rate": 2.7046281340798897e-06, "loss": 0.7898, "step": 5146 }, { "epoch": 0.77, "learning_rate": 2.7013248728283905e-06, "loss": 0.7269, "step": 5147 }, { "epoch": 0.77, "learning_rate": 2.6980233149753897e-06, "loss": 0.6628, "step": 5148 }, { "epoch": 0.77, "learning_rate": 2.6947234612914353e-06, "loss": 0.7352, "step": 5149 }, { "epoch": 0.77, "learning_rate": 2.6914253125466514e-06, "loss": 0.7387, "step": 5150 }, { "epoch": 0.77, "learning_rate": 2.688128869510782e-06, "loss": 0.7003, "step": 5151 }, { "epoch": 0.77, "learning_rate": 2.6848341329531592e-06, "loss": 0.7094, "step": 5152 }, { "epoch": 0.77, "learning_rate": 2.6815411036427284e-06, "loss": 0.742, "step": 5153 }, { "epoch": 0.77, "learning_rate": 2.6782497823480336e-06, "loss": 0.6941, "step": 5154 }, { "epoch": 0.77, "learning_rate": 2.674960169837214e-06, "loss": 0.8702, "step": 5155 }, { "epoch": 0.77, "learning_rate": 2.671672266878016e-06, "loss": 0.6347, "step": 5156 }, { "epoch": 0.77, "learning_rate": 2.6683860742377853e-06, "loss": 0.6535, "step": 5157 }, { "epoch": 0.77, "learning_rate": 2.6651015926834713e-06, "loss": 0.7444, "step": 5158 }, { "epoch": 0.77, "learning_rate": 2.661818822981622e-06, "loss": 0.6149, "step": 5159 }, { "epoch": 0.77, "learning_rate": 2.6585377658983802e-06, "loss": 0.6981, "step": 5160 }, { "epoch": 0.77, "learning_rate": 2.655258422199497e-06, "loss": 0.7479, "step": 5161 }, { "epoch": 0.77, "learning_rate": 2.6519807926503248e-06, "loss": 0.6903, "step": 5162 }, { "epoch": 0.77, "learning_rate": 2.648704878015801e-06, "loss": 0.7253, "step": 5163 }, { "epoch": 0.77, "learning_rate": 2.6454306790604867e-06, "loss": 0.6351, "step": 5164 }, { "epoch": 0.77, "learning_rate": 2.642158196548521e-06, "loss": 0.3016, "step": 5165 }, { "epoch": 0.77, "learning_rate": 2.638887431243654e-06, "loss": 0.3322, "step": 5166 }, { "epoch": 0.77, "learning_rate": 2.635618383909234e-06, "loss": 0.6875, "step": 5167 }, { "epoch": 0.77, "learning_rate": 2.632351055308201e-06, "loss": 0.7519, "step": 5168 }, { "epoch": 0.77, "learning_rate": 2.6290854462031023e-06, "loss": 0.6983, "step": 5169 }, { "epoch": 0.77, "learning_rate": 2.625821557356081e-06, "loss": 0.7074, "step": 5170 }, { "epoch": 0.77, "learning_rate": 2.622559389528877e-06, "loss": 0.7382, "step": 5171 }, { "epoch": 0.77, "learning_rate": 2.619298943482835e-06, "loss": 0.6909, "step": 5172 }, { "epoch": 0.77, "learning_rate": 2.616040219978887e-06, "loss": 0.6908, "step": 5173 }, { "epoch": 0.77, "learning_rate": 2.6127832197775716e-06, "loss": 0.7342, "step": 5174 }, { "epoch": 0.77, "learning_rate": 2.609527943639024e-06, "loss": 0.6591, "step": 5175 }, { "epoch": 0.77, "learning_rate": 2.6062743923229718e-06, "loss": 0.7152, "step": 5176 }, { "epoch": 0.77, "learning_rate": 2.603022566588748e-06, "loss": 0.6688, "step": 5177 }, { "epoch": 0.77, "learning_rate": 2.5997724671952706e-06, "loss": 0.7204, "step": 5178 }, { "epoch": 0.77, "learning_rate": 2.5965240949010715e-06, "loss": 0.773, "step": 5179 }, { "epoch": 0.77, "learning_rate": 2.5932774504642723e-06, "loss": 0.6927, "step": 5180 }, { "epoch": 0.77, "learning_rate": 2.5900325346425815e-06, "loss": 0.7802, "step": 5181 }, { "epoch": 0.77, "learning_rate": 2.5867893481933194e-06, "loss": 0.7282, "step": 5182 }, { "epoch": 0.77, "learning_rate": 2.58354789187339e-06, "loss": 0.7214, "step": 5183 }, { "epoch": 0.77, "learning_rate": 2.5803081664393038e-06, "loss": 0.7142, "step": 5184 }, { "epoch": 0.77, "learning_rate": 2.5770701726471604e-06, "loss": 0.7155, "step": 5185 }, { "epoch": 0.77, "learning_rate": 2.573833911252659e-06, "loss": 0.8057, "step": 5186 }, { "epoch": 0.77, "learning_rate": 2.5705993830110965e-06, "loss": 0.6665, "step": 5187 }, { "epoch": 0.77, "learning_rate": 2.567366588677356e-06, "loss": 0.7536, "step": 5188 }, { "epoch": 0.77, "learning_rate": 2.5641355290059235e-06, "loss": 0.6861, "step": 5189 }, { "epoch": 0.77, "learning_rate": 2.5609062047508824e-06, "loss": 0.7208, "step": 5190 }, { "epoch": 0.77, "learning_rate": 2.557678616665903e-06, "loss": 0.5832, "step": 5191 }, { "epoch": 0.77, "learning_rate": 2.554452765504256e-06, "loss": 0.7048, "step": 5192 }, { "epoch": 0.77, "learning_rate": 2.551228652018806e-06, "loss": 0.7225, "step": 5193 }, { "epoch": 0.77, "learning_rate": 2.548006276962011e-06, "loss": 0.7693, "step": 5194 }, { "epoch": 0.77, "learning_rate": 2.544785641085926e-06, "loss": 0.7242, "step": 5195 }, { "epoch": 0.77, "learning_rate": 2.5415667451421943e-06, "loss": 0.6981, "step": 5196 }, { "epoch": 0.78, "learning_rate": 2.5383495898820574e-06, "loss": 0.7507, "step": 5197 }, { "epoch": 0.78, "learning_rate": 2.5351341760563543e-06, "loss": 0.7694, "step": 5198 }, { "epoch": 0.78, "learning_rate": 2.531920504415507e-06, "loss": 0.7828, "step": 5199 }, { "epoch": 0.78, "learning_rate": 2.5287085757095387e-06, "loss": 0.7415, "step": 5200 }, { "epoch": 0.78, "learning_rate": 2.5254983906880657e-06, "loss": 0.6142, "step": 5201 }, { "epoch": 0.78, "learning_rate": 2.522289950100295e-06, "loss": 0.3234, "step": 5202 }, { "epoch": 0.78, "learning_rate": 2.5190832546950305e-06, "loss": 0.6899, "step": 5203 }, { "epoch": 0.78, "learning_rate": 2.515878305220659e-06, "loss": 0.7749, "step": 5204 }, { "epoch": 0.78, "learning_rate": 2.512675102425174e-06, "loss": 0.7723, "step": 5205 }, { "epoch": 0.78, "learning_rate": 2.5094736470561465e-06, "loss": 0.7634, "step": 5206 }, { "epoch": 0.78, "learning_rate": 2.5062739398607473e-06, "loss": 0.7244, "step": 5207 }, { "epoch": 0.78, "learning_rate": 2.5030759815857464e-06, "loss": 0.5851, "step": 5208 }, { "epoch": 0.78, "learning_rate": 2.4998797729774916e-06, "loss": 0.6369, "step": 5209 }, { "epoch": 0.78, "learning_rate": 2.496685314781934e-06, "loss": 0.6827, "step": 5210 }, { "epoch": 0.78, "learning_rate": 2.4934926077446033e-06, "loss": 0.2898, "step": 5211 }, { "epoch": 0.78, "learning_rate": 2.4903016526106326e-06, "loss": 0.6906, "step": 5212 }, { "epoch": 0.78, "learning_rate": 2.487112450124746e-06, "loss": 0.6864, "step": 5213 }, { "epoch": 0.78, "learning_rate": 2.4839250010312454e-06, "loss": 0.6844, "step": 5214 }, { "epoch": 0.78, "learning_rate": 2.480739306074038e-06, "loss": 0.6936, "step": 5215 }, { "epoch": 0.78, "learning_rate": 2.4775553659966147e-06, "loss": 0.6397, "step": 5216 }, { "epoch": 0.78, "learning_rate": 2.474373181542058e-06, "loss": 0.702, "step": 5217 }, { "epoch": 0.78, "learning_rate": 2.4711927534530445e-06, "loss": 0.6352, "step": 5218 }, { "epoch": 0.78, "learning_rate": 2.468014082471831e-06, "loss": 0.6827, "step": 5219 }, { "epoch": 0.78, "learning_rate": 2.4648371693402728e-06, "loss": 0.7215, "step": 5220 }, { "epoch": 0.78, "learning_rate": 2.461662014799816e-06, "loss": 0.6255, "step": 5221 }, { "epoch": 0.78, "learning_rate": 2.458488619591485e-06, "loss": 0.7155, "step": 5222 }, { "epoch": 0.78, "learning_rate": 2.455316984455911e-06, "loss": 0.6001, "step": 5223 }, { "epoch": 0.78, "learning_rate": 2.452147110133298e-06, "loss": 0.7038, "step": 5224 }, { "epoch": 0.78, "learning_rate": 2.448978997363448e-06, "loss": 0.6311, "step": 5225 }, { "epoch": 0.78, "learning_rate": 2.4458126468857537e-06, "loss": 0.6967, "step": 5226 }, { "epoch": 0.78, "learning_rate": 2.4426480594391845e-06, "loss": 0.706, "step": 5227 }, { "epoch": 0.78, "learning_rate": 2.439485235762311e-06, "loss": 0.6829, "step": 5228 }, { "epoch": 0.78, "learning_rate": 2.4363241765932897e-06, "loss": 0.7395, "step": 5229 }, { "epoch": 0.78, "learning_rate": 2.4331648826698562e-06, "loss": 0.6682, "step": 5230 }, { "epoch": 0.78, "learning_rate": 2.4300073547293502e-06, "loss": 0.6898, "step": 5231 }, { "epoch": 0.78, "learning_rate": 2.4268515935086836e-06, "loss": 0.6628, "step": 5232 }, { "epoch": 0.78, "learning_rate": 2.4236975997443646e-06, "loss": 0.31, "step": 5233 }, { "epoch": 0.78, "learning_rate": 2.4205453741724894e-06, "loss": 0.6355, "step": 5234 }, { "epoch": 0.78, "learning_rate": 2.4173949175287324e-06, "loss": 0.6707, "step": 5235 }, { "epoch": 0.78, "learning_rate": 2.4142462305483703e-06, "loss": 0.6867, "step": 5236 }, { "epoch": 0.78, "learning_rate": 2.4110993139662465e-06, "loss": 0.7728, "step": 5237 }, { "epoch": 0.78, "learning_rate": 2.407954168516816e-06, "loss": 0.7335, "step": 5238 }, { "epoch": 0.78, "learning_rate": 2.4048107949340993e-06, "loss": 0.6238, "step": 5239 }, { "epoch": 0.78, "learning_rate": 2.4016691939517133e-06, "loss": 0.6584, "step": 5240 }, { "epoch": 0.78, "learning_rate": 2.398529366302862e-06, "loss": 0.6318, "step": 5241 }, { "epoch": 0.78, "learning_rate": 2.3953913127203276e-06, "loss": 0.7284, "step": 5242 }, { "epoch": 0.78, "learning_rate": 2.392255033936487e-06, "loss": 0.7595, "step": 5243 }, { "epoch": 0.78, "learning_rate": 2.389120530683299e-06, "loss": 0.6886, "step": 5244 }, { "epoch": 0.78, "learning_rate": 2.3859878036923067e-06, "loss": 0.7255, "step": 5245 }, { "epoch": 0.78, "learning_rate": 2.382856853694646e-06, "loss": 0.7414, "step": 5246 }, { "epoch": 0.78, "learning_rate": 2.3797276814210247e-06, "loss": 0.707, "step": 5247 }, { "epoch": 0.78, "learning_rate": 2.376600287601748e-06, "loss": 0.6142, "step": 5248 }, { "epoch": 0.78, "learning_rate": 2.3734746729667037e-06, "loss": 0.6754, "step": 5249 }, { "epoch": 0.78, "learning_rate": 2.3703508382453553e-06, "loss": 0.7293, "step": 5250 }, { "epoch": 0.78, "learning_rate": 2.3672287841667628e-06, "loss": 0.641, "step": 5251 }, { "epoch": 0.78, "learning_rate": 2.364108511459564e-06, "loss": 0.7992, "step": 5252 }, { "epoch": 0.78, "learning_rate": 2.360990020851982e-06, "loss": 0.6783, "step": 5253 }, { "epoch": 0.78, "learning_rate": 2.35787331307183e-06, "loss": 0.6923, "step": 5254 }, { "epoch": 0.78, "learning_rate": 2.354758388846491e-06, "loss": 0.6267, "step": 5255 }, { "epoch": 0.78, "learning_rate": 2.351645248902945e-06, "loss": 0.7266, "step": 5256 }, { "epoch": 0.78, "learning_rate": 2.348533893967754e-06, "loss": 0.7498, "step": 5257 }, { "epoch": 0.78, "learning_rate": 2.3454243247670537e-06, "loss": 0.6782, "step": 5258 }, { "epoch": 0.78, "learning_rate": 2.3423165420265724e-06, "loss": 0.7398, "step": 5259 }, { "epoch": 0.78, "learning_rate": 2.3392105464716207e-06, "loss": 0.701, "step": 5260 }, { "epoch": 0.78, "learning_rate": 2.3361063388270877e-06, "loss": 0.8259, "step": 5261 }, { "epoch": 0.78, "learning_rate": 2.3330039198174527e-06, "loss": 0.6214, "step": 5262 }, { "epoch": 0.78, "learning_rate": 2.329903290166765e-06, "loss": 0.654, "step": 5263 }, { "epoch": 0.79, "learning_rate": 2.3268044505986708e-06, "loss": 0.6958, "step": 5264 }, { "epoch": 0.79, "learning_rate": 2.323707401836385e-06, "loss": 0.796, "step": 5265 }, { "epoch": 0.79, "learning_rate": 2.320612144602715e-06, "loss": 0.6403, "step": 5266 }, { "epoch": 0.79, "learning_rate": 2.3175186796200454e-06, "loss": 0.6839, "step": 5267 }, { "epoch": 0.79, "learning_rate": 2.314427007610345e-06, "loss": 0.6592, "step": 5268 }, { "epoch": 0.79, "learning_rate": 2.3113371292951627e-06, "loss": 0.7117, "step": 5269 }, { "epoch": 0.79, "learning_rate": 2.3082490453956242e-06, "loss": 0.7923, "step": 5270 }, { "epoch": 0.79, "learning_rate": 2.3051627566324443e-06, "loss": 0.7601, "step": 5271 }, { "epoch": 0.79, "learning_rate": 2.3020782637259165e-06, "loss": 0.6125, "step": 5272 }, { "epoch": 0.79, "learning_rate": 2.298995567395911e-06, "loss": 0.7025, "step": 5273 }, { "epoch": 0.79, "learning_rate": 2.2959146683618814e-06, "loss": 0.7507, "step": 5274 }, { "epoch": 0.79, "learning_rate": 2.2928355673428635e-06, "loss": 0.6802, "step": 5275 }, { "epoch": 0.79, "learning_rate": 2.2897582650574735e-06, "loss": 0.6821, "step": 5276 }, { "epoch": 0.79, "learning_rate": 2.2866827622239072e-06, "loss": 0.7002, "step": 5277 }, { "epoch": 0.79, "learning_rate": 2.283609059559935e-06, "loss": 0.708, "step": 5278 }, { "epoch": 0.79, "learning_rate": 2.280537157782914e-06, "loss": 0.7918, "step": 5279 }, { "epoch": 0.79, "learning_rate": 2.2774670576097824e-06, "loss": 0.6553, "step": 5280 }, { "epoch": 0.79, "learning_rate": 2.274398759757046e-06, "loss": 0.6643, "step": 5281 }, { "epoch": 0.79, "learning_rate": 2.2713322649408087e-06, "loss": 0.7122, "step": 5282 }, { "epoch": 0.79, "learning_rate": 2.2682675738767345e-06, "loss": 0.749, "step": 5283 }, { "epoch": 0.79, "learning_rate": 2.2652046872800804e-06, "loss": 0.6797, "step": 5284 }, { "epoch": 0.79, "learning_rate": 2.2621436058656766e-06, "loss": 0.3311, "step": 5285 }, { "epoch": 0.79, "learning_rate": 2.259084330347929e-06, "loss": 0.6679, "step": 5286 }, { "epoch": 0.79, "learning_rate": 2.25602686144083e-06, "loss": 0.6538, "step": 5287 }, { "epoch": 0.79, "learning_rate": 2.2529711998579407e-06, "loss": 0.7373, "step": 5288 }, { "epoch": 0.79, "learning_rate": 2.249917346312406e-06, "loss": 0.6909, "step": 5289 }, { "epoch": 0.79, "learning_rate": 2.2468653015169563e-06, "loss": 0.6719, "step": 5290 }, { "epoch": 0.79, "learning_rate": 2.243815066183882e-06, "loss": 0.5987, "step": 5291 }, { "epoch": 0.79, "learning_rate": 2.2407666410250694e-06, "loss": 0.7109, "step": 5292 }, { "epoch": 0.79, "learning_rate": 2.237720026751966e-06, "loss": 0.7017, "step": 5293 }, { "epoch": 0.79, "learning_rate": 2.23467522407561e-06, "loss": 0.7502, "step": 5294 }, { "epoch": 0.79, "learning_rate": 2.231632233706611e-06, "loss": 0.7007, "step": 5295 }, { "epoch": 0.79, "learning_rate": 2.2285910563551506e-06, "loss": 0.6667, "step": 5296 }, { "epoch": 0.79, "learning_rate": 2.225551692731004e-06, "loss": 0.7506, "step": 5297 }, { "epoch": 0.79, "learning_rate": 2.2225141435435017e-06, "loss": 0.6308, "step": 5298 }, { "epoch": 0.79, "learning_rate": 2.2194784095015642e-06, "loss": 0.7143, "step": 5299 }, { "epoch": 0.79, "learning_rate": 2.2164444913136895e-06, "loss": 0.703, "step": 5300 }, { "epoch": 0.79, "learning_rate": 2.21341238968794e-06, "loss": 0.6821, "step": 5301 }, { "epoch": 0.79, "learning_rate": 2.210382105331965e-06, "loss": 0.7226, "step": 5302 }, { "epoch": 0.79, "learning_rate": 2.207353638952989e-06, "loss": 0.7193, "step": 5303 }, { "epoch": 0.79, "learning_rate": 2.2043269912578025e-06, "loss": 0.6852, "step": 5304 }, { "epoch": 0.79, "learning_rate": 2.2013021629527876e-06, "loss": 0.7131, "step": 5305 }, { "epoch": 0.79, "learning_rate": 2.1982791547438853e-06, "loss": 0.7232, "step": 5306 }, { "epoch": 0.79, "learning_rate": 2.1952579673366215e-06, "loss": 0.6842, "step": 5307 }, { "epoch": 0.79, "learning_rate": 2.1922386014360976e-06, "loss": 0.7184, "step": 5308 }, { "epoch": 0.79, "learning_rate": 2.189221057746983e-06, "loss": 0.3173, "step": 5309 }, { "epoch": 0.79, "learning_rate": 2.186205336973527e-06, "loss": 0.6721, "step": 5310 }, { "epoch": 0.79, "learning_rate": 2.183191439819554e-06, "loss": 0.6836, "step": 5311 }, { "epoch": 0.79, "learning_rate": 2.1801793669884597e-06, "loss": 0.6468, "step": 5312 }, { "epoch": 0.79, "learning_rate": 2.17716911918322e-06, "loss": 0.6643, "step": 5313 }, { "epoch": 0.79, "learning_rate": 2.1741606971063733e-06, "loss": 0.7348, "step": 5314 }, { "epoch": 0.79, "learning_rate": 2.1711541014600426e-06, "loss": 0.7003, "step": 5315 }, { "epoch": 0.79, "learning_rate": 2.1681493329459235e-06, "loss": 0.6675, "step": 5316 }, { "epoch": 0.79, "learning_rate": 2.165146392265277e-06, "loss": 0.7511, "step": 5317 }, { "epoch": 0.79, "learning_rate": 2.162145280118948e-06, "loss": 0.7501, "step": 5318 }, { "epoch": 0.79, "learning_rate": 2.159145997207347e-06, "loss": 0.6921, "step": 5319 }, { "epoch": 0.79, "learning_rate": 2.1561485442304653e-06, "loss": 0.7611, "step": 5320 }, { "epoch": 0.79, "learning_rate": 2.1531529218878555e-06, "loss": 0.7815, "step": 5321 }, { "epoch": 0.79, "learning_rate": 2.150159130878654e-06, "loss": 0.7303, "step": 5322 }, { "epoch": 0.79, "learning_rate": 2.1471671719015675e-06, "loss": 0.7261, "step": 5323 }, { "epoch": 0.79, "learning_rate": 2.1441770456548672e-06, "loss": 0.6986, "step": 5324 }, { "epoch": 0.79, "learning_rate": 2.141188752836405e-06, "loss": 0.7676, "step": 5325 }, { "epoch": 0.79, "learning_rate": 2.1382022941436045e-06, "loss": 0.6874, "step": 5326 }, { "epoch": 0.79, "learning_rate": 2.1352176702734585e-06, "loss": 0.6402, "step": 5327 }, { "epoch": 0.79, "learning_rate": 2.1322348819225335e-06, "loss": 0.6038, "step": 5328 }, { "epoch": 0.79, "learning_rate": 2.129253929786962e-06, "loss": 0.6838, "step": 5329 }, { "epoch": 0.79, "learning_rate": 2.1262748145624555e-06, "loss": 0.6384, "step": 5330 }, { "epoch": 0.8, "learning_rate": 2.1232975369442952e-06, "loss": 0.6475, "step": 5331 }, { "epoch": 0.8, "learning_rate": 2.1203220976273285e-06, "loss": 0.8071, "step": 5332 }, { "epoch": 0.8, "learning_rate": 2.117348497305979e-06, "loss": 0.757, "step": 5333 }, { "epoch": 0.8, "learning_rate": 2.1143767366742373e-06, "loss": 0.6858, "step": 5334 }, { "epoch": 0.8, "learning_rate": 2.1114068164256683e-06, "loss": 0.6564, "step": 5335 }, { "epoch": 0.8, "learning_rate": 2.10843873725341e-06, "loss": 0.7528, "step": 5336 }, { "epoch": 0.8, "learning_rate": 2.1054724998501584e-06, "loss": 0.7343, "step": 5337 }, { "epoch": 0.8, "learning_rate": 2.102508104908191e-06, "loss": 0.5735, "step": 5338 }, { "epoch": 0.8, "learning_rate": 2.0995455531193554e-06, "loss": 0.6997, "step": 5339 }, { "epoch": 0.8, "learning_rate": 2.0965848451750605e-06, "loss": 0.6465, "step": 5340 }, { "epoch": 0.8, "learning_rate": 2.0936259817662907e-06, "loss": 0.6191, "step": 5341 }, { "epoch": 0.8, "learning_rate": 2.090668963583602e-06, "loss": 0.746, "step": 5342 }, { "epoch": 0.8, "learning_rate": 2.087713791317114e-06, "loss": 0.6579, "step": 5343 }, { "epoch": 0.8, "learning_rate": 2.0847604656565234e-06, "loss": 0.7833, "step": 5344 }, { "epoch": 0.8, "learning_rate": 2.081808987291085e-06, "loss": 0.7413, "step": 5345 }, { "epoch": 0.8, "learning_rate": 2.078859356909634e-06, "loss": 0.6276, "step": 5346 }, { "epoch": 0.8, "learning_rate": 2.0759115752005622e-06, "loss": 0.7023, "step": 5347 }, { "epoch": 0.8, "learning_rate": 2.0729656428518373e-06, "loss": 0.6419, "step": 5348 }, { "epoch": 0.8, "learning_rate": 2.0700215605510033e-06, "loss": 0.7155, "step": 5349 }, { "epoch": 0.8, "learning_rate": 2.0670793289851553e-06, "loss": 0.6773, "step": 5350 }, { "epoch": 0.8, "learning_rate": 2.0641389488409704e-06, "loss": 0.6529, "step": 5351 }, { "epoch": 0.8, "learning_rate": 2.061200420804681e-06, "loss": 0.8375, "step": 5352 }, { "epoch": 0.8, "learning_rate": 2.058263745562099e-06, "loss": 0.6222, "step": 5353 }, { "epoch": 0.8, "learning_rate": 2.0553289237986018e-06, "loss": 0.6769, "step": 5354 }, { "epoch": 0.8, "learning_rate": 2.0523959561991224e-06, "loss": 0.7306, "step": 5355 }, { "epoch": 0.8, "learning_rate": 2.049464843448182e-06, "loss": 0.7063, "step": 5356 }, { "epoch": 0.8, "learning_rate": 2.0465355862298495e-06, "loss": 0.708, "step": 5357 }, { "epoch": 0.8, "learning_rate": 2.04360818522777e-06, "loss": 0.7159, "step": 5358 }, { "epoch": 0.8, "learning_rate": 2.040682641125158e-06, "loss": 0.6684, "step": 5359 }, { "epoch": 0.8, "learning_rate": 2.037758954604784e-06, "loss": 0.7718, "step": 5360 }, { "epoch": 0.8, "learning_rate": 2.0348371263489943e-06, "loss": 0.6643, "step": 5361 }, { "epoch": 0.8, "learning_rate": 2.031917157039701e-06, "loss": 0.6917, "step": 5362 }, { "epoch": 0.8, "learning_rate": 2.0289990473583733e-06, "loss": 0.7433, "step": 5363 }, { "epoch": 0.8, "learning_rate": 2.026082797986062e-06, "loss": 0.6933, "step": 5364 }, { "epoch": 0.8, "learning_rate": 2.0231684096033687e-06, "loss": 0.6605, "step": 5365 }, { "epoch": 0.8, "learning_rate": 2.020255882890468e-06, "loss": 0.7254, "step": 5366 }, { "epoch": 0.8, "learning_rate": 2.0173452185271023e-06, "loss": 0.7159, "step": 5367 }, { "epoch": 0.8, "learning_rate": 2.01443641719257e-06, "loss": 0.7709, "step": 5368 }, { "epoch": 0.8, "learning_rate": 2.0115294795657448e-06, "loss": 0.679, "step": 5369 }, { "epoch": 0.8, "learning_rate": 2.0086244063250594e-06, "loss": 0.6794, "step": 5370 }, { "epoch": 0.8, "learning_rate": 2.005721198148514e-06, "loss": 0.7039, "step": 5371 }, { "epoch": 0.8, "learning_rate": 2.0028198557136746e-06, "loss": 0.6308, "step": 5372 }, { "epoch": 0.8, "learning_rate": 1.999920379697665e-06, "loss": 0.7471, "step": 5373 }, { "epoch": 0.8, "learning_rate": 1.997022770777185e-06, "loss": 0.8023, "step": 5374 }, { "epoch": 0.8, "learning_rate": 1.9941270296284855e-06, "loss": 0.6571, "step": 5375 }, { "epoch": 0.8, "learning_rate": 1.991233156927389e-06, "loss": 0.6493, "step": 5376 }, { "epoch": 0.8, "learning_rate": 1.988341153349287e-06, "loss": 0.7839, "step": 5377 }, { "epoch": 0.8, "learning_rate": 1.985451019569118e-06, "loss": 0.7422, "step": 5378 }, { "epoch": 0.8, "learning_rate": 1.9825627562614057e-06, "loss": 0.6366, "step": 5379 }, { "epoch": 0.8, "learning_rate": 1.9796763641002193e-06, "loss": 0.6097, "step": 5380 }, { "epoch": 0.8, "learning_rate": 1.9767918437592003e-06, "loss": 0.6771, "step": 5381 }, { "epoch": 0.8, "learning_rate": 1.973909195911555e-06, "loss": 0.6514, "step": 5382 }, { "epoch": 0.8, "learning_rate": 1.9710284212300422e-06, "loss": 0.7144, "step": 5383 }, { "epoch": 0.8, "learning_rate": 1.968149520386995e-06, "loss": 0.6229, "step": 5384 }, { "epoch": 0.8, "learning_rate": 1.965272494054302e-06, "loss": 0.8241, "step": 5385 }, { "epoch": 0.8, "learning_rate": 1.9623973429034194e-06, "loss": 0.6604, "step": 5386 }, { "epoch": 0.8, "learning_rate": 1.959524067605364e-06, "loss": 0.6884, "step": 5387 }, { "epoch": 0.8, "learning_rate": 1.9566526688307107e-06, "loss": 0.7621, "step": 5388 }, { "epoch": 0.8, "learning_rate": 1.9537831472496005e-06, "loss": 0.6301, "step": 5389 }, { "epoch": 0.8, "learning_rate": 1.9509155035317396e-06, "loss": 0.6302, "step": 5390 }, { "epoch": 0.8, "learning_rate": 1.9480497383463857e-06, "loss": 0.7082, "step": 5391 }, { "epoch": 0.8, "learning_rate": 1.945185852362368e-06, "loss": 0.7457, "step": 5392 }, { "epoch": 0.8, "learning_rate": 1.942323846248072e-06, "loss": 0.6763, "step": 5393 }, { "epoch": 0.8, "learning_rate": 1.939463720671447e-06, "loss": 0.6492, "step": 5394 }, { "epoch": 0.8, "learning_rate": 1.9366054763000053e-06, "loss": 0.5926, "step": 5395 }, { "epoch": 0.8, "learning_rate": 1.9337491138008103e-06, "loss": 0.6011, "step": 5396 }, { "epoch": 0.8, "learning_rate": 1.9308946338404976e-06, "loss": 0.7267, "step": 5397 }, { "epoch": 0.81, "learning_rate": 1.9280420370852606e-06, "loss": 0.698, "step": 5398 }, { "epoch": 0.81, "learning_rate": 1.9251913242008456e-06, "loss": 0.7206, "step": 5399 }, { "epoch": 0.81, "learning_rate": 1.9223424958525695e-06, "loss": 0.6437, "step": 5400 }, { "epoch": 0.81, "learning_rate": 1.9194955527053027e-06, "loss": 0.7054, "step": 5401 }, { "epoch": 0.81, "learning_rate": 1.91665049542348e-06, "loss": 0.6529, "step": 5402 }, { "epoch": 0.81, "learning_rate": 1.9138073246710954e-06, "loss": 0.7409, "step": 5403 }, { "epoch": 0.81, "learning_rate": 1.910966041111697e-06, "loss": 0.6045, "step": 5404 }, { "epoch": 0.81, "learning_rate": 1.908126645408401e-06, "loss": 0.7273, "step": 5405 }, { "epoch": 0.81, "learning_rate": 1.9052891382238735e-06, "loss": 0.6674, "step": 5406 }, { "epoch": 0.81, "learning_rate": 1.9024535202203486e-06, "loss": 0.7002, "step": 5407 }, { "epoch": 0.81, "learning_rate": 1.8996197920596148e-06, "loss": 0.6095, "step": 5408 }, { "epoch": 0.81, "learning_rate": 1.896787954403021e-06, "loss": 0.7043, "step": 5409 }, { "epoch": 0.81, "learning_rate": 1.8939580079114771e-06, "loss": 0.7658, "step": 5410 }, { "epoch": 0.81, "learning_rate": 1.8911299532454453e-06, "loss": 0.5805, "step": 5411 }, { "epoch": 0.81, "learning_rate": 1.8883037910649504e-06, "loss": 0.6804, "step": 5412 }, { "epoch": 0.81, "learning_rate": 1.8854795220295784e-06, "loss": 0.6625, "step": 5413 }, { "epoch": 0.81, "learning_rate": 1.8826571467984666e-06, "loss": 0.7876, "step": 5414 }, { "epoch": 0.81, "learning_rate": 1.8798366660303147e-06, "loss": 0.6967, "step": 5415 }, { "epoch": 0.81, "learning_rate": 1.8770180803833804e-06, "loss": 0.6742, "step": 5416 }, { "epoch": 0.81, "learning_rate": 1.8742013905154777e-06, "loss": 0.7573, "step": 5417 }, { "epoch": 0.81, "learning_rate": 1.8713865970839818e-06, "loss": 0.7141, "step": 5418 }, { "epoch": 0.81, "learning_rate": 1.868573700745817e-06, "loss": 0.6617, "step": 5419 }, { "epoch": 0.81, "learning_rate": 1.8657627021574721e-06, "loss": 0.7057, "step": 5420 }, { "epoch": 0.81, "learning_rate": 1.862953601974995e-06, "loss": 0.5954, "step": 5421 }, { "epoch": 0.81, "learning_rate": 1.860146400853976e-06, "loss": 0.7465, "step": 5422 }, { "epoch": 0.81, "learning_rate": 1.8573410994495855e-06, "loss": 0.6849, "step": 5423 }, { "epoch": 0.81, "learning_rate": 1.8545376984165286e-06, "loss": 0.6779, "step": 5424 }, { "epoch": 0.81, "learning_rate": 1.8517361984090799e-06, "loss": 0.7, "step": 5425 }, { "epoch": 0.81, "learning_rate": 1.8489366000810672e-06, "loss": 0.7412, "step": 5426 }, { "epoch": 0.81, "learning_rate": 1.8461389040858702e-06, "loss": 0.5819, "step": 5427 }, { "epoch": 0.81, "learning_rate": 1.8433431110764322e-06, "loss": 0.712, "step": 5428 }, { "epoch": 0.81, "learning_rate": 1.8405492217052413e-06, "loss": 0.6609, "step": 5429 }, { "epoch": 0.81, "learning_rate": 1.8377572366243546e-06, "loss": 0.6929, "step": 5430 }, { "epoch": 0.81, "learning_rate": 1.83496715648538e-06, "loss": 0.7638, "step": 5431 }, { "epoch": 0.81, "learning_rate": 1.8321789819394731e-06, "loss": 0.7202, "step": 5432 }, { "epoch": 0.81, "learning_rate": 1.829392713637358e-06, "loss": 0.7498, "step": 5433 }, { "epoch": 0.81, "learning_rate": 1.8266083522292988e-06, "loss": 0.7109, "step": 5434 }, { "epoch": 0.81, "learning_rate": 1.8238258983651268e-06, "loss": 0.7416, "step": 5435 }, { "epoch": 0.81, "learning_rate": 1.8210453526942273e-06, "loss": 0.7684, "step": 5436 }, { "epoch": 0.81, "learning_rate": 1.8182667158655276e-06, "loss": 0.7423, "step": 5437 }, { "epoch": 0.81, "learning_rate": 1.8154899885275301e-06, "loss": 0.7595, "step": 5438 }, { "epoch": 0.81, "learning_rate": 1.812715171328272e-06, "loss": 0.6924, "step": 5439 }, { "epoch": 0.81, "learning_rate": 1.8099422649153563e-06, "loss": 0.7754, "step": 5440 }, { "epoch": 0.81, "learning_rate": 1.8071712699359378e-06, "loss": 0.33, "step": 5441 }, { "epoch": 0.81, "learning_rate": 1.8044021870367202e-06, "loss": 0.7401, "step": 5442 }, { "epoch": 0.81, "learning_rate": 1.8016350168639673e-06, "loss": 0.7255, "step": 5443 }, { "epoch": 0.81, "learning_rate": 1.798869760063494e-06, "loss": 0.7773, "step": 5444 }, { "epoch": 0.81, "learning_rate": 1.7961064172806685e-06, "loss": 0.7419, "step": 5445 }, { "epoch": 0.81, "learning_rate": 1.7933449891604148e-06, "loss": 0.7256, "step": 5446 }, { "epoch": 0.81, "learning_rate": 1.790585476347203e-06, "loss": 0.6948, "step": 5447 }, { "epoch": 0.81, "learning_rate": 1.7878278794850634e-06, "loss": 0.6688, "step": 5448 }, { "epoch": 0.81, "learning_rate": 1.785072199217579e-06, "loss": 0.6124, "step": 5449 }, { "epoch": 0.81, "learning_rate": 1.7823184361878787e-06, "loss": 0.6681, "step": 5450 }, { "epoch": 0.81, "learning_rate": 1.7795665910386507e-06, "loss": 0.322, "step": 5451 }, { "epoch": 0.81, "learning_rate": 1.7768166644121332e-06, "loss": 0.7342, "step": 5452 }, { "epoch": 0.81, "learning_rate": 1.7740686569501163e-06, "loss": 0.7442, "step": 5453 }, { "epoch": 0.81, "learning_rate": 1.7713225692939462e-06, "loss": 0.7369, "step": 5454 }, { "epoch": 0.81, "learning_rate": 1.768578402084512e-06, "loss": 0.7115, "step": 5455 }, { "epoch": 0.81, "learning_rate": 1.7658361559622661e-06, "loss": 0.2991, "step": 5456 }, { "epoch": 0.81, "learning_rate": 1.7630958315671999e-06, "loss": 0.6677, "step": 5457 }, { "epoch": 0.81, "learning_rate": 1.7603574295388658e-06, "loss": 0.359, "step": 5458 }, { "epoch": 0.81, "learning_rate": 1.7576209505163656e-06, "loss": 0.6578, "step": 5459 }, { "epoch": 0.81, "learning_rate": 1.75488639513835e-06, "loss": 0.7153, "step": 5460 }, { "epoch": 0.81, "learning_rate": 1.7521537640430264e-06, "loss": 0.689, "step": 5461 }, { "epoch": 0.81, "learning_rate": 1.7494230578681425e-06, "loss": 0.6621, "step": 5462 }, { "epoch": 0.81, "learning_rate": 1.7466942772510064e-06, "loss": 0.6903, "step": 5463 }, { "epoch": 0.81, "learning_rate": 1.7439674228284765e-06, "loss": 0.656, "step": 5464 }, { "epoch": 0.82, "learning_rate": 1.7412424952369522e-06, "loss": 0.7827, "step": 5465 }, { "epoch": 0.82, "learning_rate": 1.7385194951123929e-06, "loss": 0.6589, "step": 5466 }, { "epoch": 0.82, "learning_rate": 1.7357984230903046e-06, "loss": 0.3172, "step": 5467 }, { "epoch": 0.82, "learning_rate": 1.733079279805745e-06, "loss": 0.7548, "step": 5468 }, { "epoch": 0.82, "learning_rate": 1.7303620658933207e-06, "loss": 0.6459, "step": 5469 }, { "epoch": 0.82, "learning_rate": 1.7276467819871845e-06, "loss": 0.6863, "step": 5470 }, { "epoch": 0.82, "learning_rate": 1.724933428721044e-06, "loss": 0.7107, "step": 5471 }, { "epoch": 0.82, "learning_rate": 1.7222220067281559e-06, "loss": 0.7728, "step": 5472 }, { "epoch": 0.82, "learning_rate": 1.7195125166413197e-06, "loss": 0.6276, "step": 5473 }, { "epoch": 0.82, "learning_rate": 1.7168049590928915e-06, "loss": 0.6551, "step": 5474 }, { "epoch": 0.82, "learning_rate": 1.7140993347147727e-06, "loss": 0.7082, "step": 5475 }, { "epoch": 0.82, "learning_rate": 1.711395644138416e-06, "loss": 0.6451, "step": 5476 }, { "epoch": 0.82, "learning_rate": 1.7086938879948222e-06, "loss": 0.6584, "step": 5477 }, { "epoch": 0.82, "learning_rate": 1.7059940669145348e-06, "loss": 0.6733, "step": 5478 }, { "epoch": 0.82, "learning_rate": 1.703296181527654e-06, "loss": 0.7404, "step": 5479 }, { "epoch": 0.82, "learning_rate": 1.700600232463827e-06, "loss": 0.5925, "step": 5480 }, { "epoch": 0.82, "learning_rate": 1.6979062203522378e-06, "loss": 0.7181, "step": 5481 }, { "epoch": 0.82, "learning_rate": 1.6952141458216398e-06, "loss": 0.7185, "step": 5482 }, { "epoch": 0.82, "learning_rate": 1.6925240095003126e-06, "loss": 0.7886, "step": 5483 }, { "epoch": 0.82, "learning_rate": 1.6898358120160951e-06, "loss": 0.6869, "step": 5484 }, { "epoch": 0.82, "learning_rate": 1.6871495539963756e-06, "loss": 0.6449, "step": 5485 }, { "epoch": 0.82, "learning_rate": 1.684465236068078e-06, "loss": 0.7387, "step": 5486 }, { "epoch": 0.82, "learning_rate": 1.6817828588576867e-06, "loss": 0.7272, "step": 5487 }, { "epoch": 0.82, "learning_rate": 1.679102422991221e-06, "loss": 0.6982, "step": 5488 }, { "epoch": 0.82, "learning_rate": 1.6764239290942585e-06, "loss": 0.7792, "step": 5489 }, { "epoch": 0.82, "learning_rate": 1.6737473777919211e-06, "loss": 0.663, "step": 5490 }, { "epoch": 0.82, "learning_rate": 1.671072769708868e-06, "loss": 0.7026, "step": 5491 }, { "epoch": 0.82, "learning_rate": 1.6684001054693167e-06, "loss": 0.7094, "step": 5492 }, { "epoch": 0.82, "learning_rate": 1.6657293856970213e-06, "loss": 0.6938, "step": 5493 }, { "epoch": 0.82, "learning_rate": 1.6630606110152891e-06, "loss": 0.7201, "step": 5494 }, { "epoch": 0.82, "learning_rate": 1.660393782046974e-06, "loss": 0.6999, "step": 5495 }, { "epoch": 0.82, "learning_rate": 1.6577288994144646e-06, "loss": 0.539, "step": 5496 }, { "epoch": 0.82, "learning_rate": 1.6550659637397138e-06, "loss": 0.6453, "step": 5497 }, { "epoch": 0.82, "learning_rate": 1.6524049756442029e-06, "loss": 0.7659, "step": 5498 }, { "epoch": 0.82, "learning_rate": 1.6497459357489676e-06, "loss": 0.7472, "step": 5499 }, { "epoch": 0.82, "learning_rate": 1.6470888446745892e-06, "loss": 0.6759, "step": 5500 }, { "epoch": 0.82, "learning_rate": 1.6444337030411862e-06, "loss": 0.6736, "step": 5501 }, { "epoch": 0.82, "learning_rate": 1.6417805114684325e-06, "loss": 0.7203, "step": 5502 }, { "epoch": 0.82, "learning_rate": 1.6391292705755402e-06, "loss": 0.7922, "step": 5503 }, { "epoch": 0.82, "learning_rate": 1.6364799809812692e-06, "loss": 0.6269, "step": 5504 }, { "epoch": 0.82, "learning_rate": 1.6338326433039252e-06, "loss": 0.6586, "step": 5505 }, { "epoch": 0.82, "learning_rate": 1.6311872581613508e-06, "loss": 0.7318, "step": 5506 }, { "epoch": 0.82, "learning_rate": 1.6285438261709407e-06, "loss": 0.733, "step": 5507 }, { "epoch": 0.82, "learning_rate": 1.625902347949634e-06, "loss": 0.7385, "step": 5508 }, { "epoch": 0.82, "learning_rate": 1.6232628241139058e-06, "loss": 0.6956, "step": 5509 }, { "epoch": 0.82, "learning_rate": 1.6206252552797862e-06, "loss": 0.6858, "step": 5510 }, { "epoch": 0.82, "learning_rate": 1.617989642062835e-06, "loss": 0.6444, "step": 5511 }, { "epoch": 0.82, "learning_rate": 1.615355985078171e-06, "loss": 0.738, "step": 5512 }, { "epoch": 0.82, "learning_rate": 1.6127242849404501e-06, "loss": 0.6866, "step": 5513 }, { "epoch": 0.82, "learning_rate": 1.6100945422638647e-06, "loss": 0.7109, "step": 5514 }, { "epoch": 0.82, "learning_rate": 1.6074667576621617e-06, "loss": 0.692, "step": 5515 }, { "epoch": 0.82, "learning_rate": 1.6048409317486213e-06, "loss": 0.6812, "step": 5516 }, { "epoch": 0.82, "learning_rate": 1.6022170651360725e-06, "loss": 0.7883, "step": 5517 }, { "epoch": 0.82, "learning_rate": 1.599595158436885e-06, "loss": 0.3015, "step": 5518 }, { "epoch": 0.82, "learning_rate": 1.5969752122629722e-06, "loss": 0.6693, "step": 5519 }, { "epoch": 0.82, "learning_rate": 1.5943572272257924e-06, "loss": 0.6709, "step": 5520 }, { "epoch": 0.82, "learning_rate": 1.5917412039363367e-06, "loss": 0.6994, "step": 5521 }, { "epoch": 0.82, "learning_rate": 1.5891271430051481e-06, "loss": 0.3268, "step": 5522 }, { "epoch": 0.82, "learning_rate": 1.5865150450423095e-06, "loss": 0.6148, "step": 5523 }, { "epoch": 0.82, "learning_rate": 1.5839049106574412e-06, "loss": 0.6646, "step": 5524 }, { "epoch": 0.82, "learning_rate": 1.581296740459709e-06, "loss": 0.6056, "step": 5525 }, { "epoch": 0.82, "learning_rate": 1.578690535057822e-06, "loss": 0.8419, "step": 5526 }, { "epoch": 0.82, "learning_rate": 1.5760862950600265e-06, "loss": 0.7706, "step": 5527 }, { "epoch": 0.82, "learning_rate": 1.5734840210741154e-06, "loss": 0.648, "step": 5528 }, { "epoch": 0.82, "learning_rate": 1.570883713707414e-06, "loss": 0.7103, "step": 5529 }, { "epoch": 0.82, "learning_rate": 1.5682853735667968e-06, "loss": 0.6703, "step": 5530 }, { "epoch": 0.82, "learning_rate": 1.565689001258679e-06, "loss": 0.7486, "step": 5531 }, { "epoch": 0.83, "learning_rate": 1.5630945973890089e-06, "loss": 0.5505, "step": 5532 }, { "epoch": 0.83, "learning_rate": 1.5605021625632832e-06, "loss": 0.7639, "step": 5533 }, { "epoch": 0.83, "learning_rate": 1.557911697386536e-06, "loss": 0.7233, "step": 5534 }, { "epoch": 0.83, "learning_rate": 1.5553232024633413e-06, "loss": 0.677, "step": 5535 }, { "epoch": 0.83, "learning_rate": 1.5527366783978171e-06, "loss": 0.7381, "step": 5536 }, { "epoch": 0.83, "learning_rate": 1.5501521257936147e-06, "loss": 0.6891, "step": 5537 }, { "epoch": 0.83, "learning_rate": 1.5475695452539296e-06, "loss": 0.6873, "step": 5538 }, { "epoch": 0.83, "learning_rate": 1.5449889373814986e-06, "loss": 0.7371, "step": 5539 }, { "epoch": 0.83, "learning_rate": 1.542410302778591e-06, "loss": 0.6493, "step": 5540 }, { "epoch": 0.83, "learning_rate": 1.5398336420470272e-06, "loss": 0.6407, "step": 5541 }, { "epoch": 0.83, "learning_rate": 1.5372589557881546e-06, "loss": 0.7009, "step": 5542 }, { "epoch": 0.83, "learning_rate": 1.53468624460287e-06, "loss": 0.7725, "step": 5543 }, { "epoch": 0.83, "learning_rate": 1.5321155090915996e-06, "loss": 0.6861, "step": 5544 }, { "epoch": 0.83, "learning_rate": 1.529546749854316e-06, "loss": 0.732, "step": 5545 }, { "epoch": 0.83, "learning_rate": 1.5269799674905306e-06, "loss": 0.6345, "step": 5546 }, { "epoch": 0.83, "learning_rate": 1.5244151625992865e-06, "loss": 0.6694, "step": 5547 }, { "epoch": 0.83, "learning_rate": 1.5218523357791703e-06, "loss": 0.8066, "step": 5548 }, { "epoch": 0.83, "learning_rate": 1.5192914876283093e-06, "loss": 0.6969, "step": 5549 }, { "epoch": 0.83, "learning_rate": 1.5167326187443632e-06, "loss": 0.7485, "step": 5550 }, { "epoch": 0.83, "learning_rate": 1.5141757297245363e-06, "loss": 0.7763, "step": 5551 }, { "epoch": 0.83, "learning_rate": 1.5116208211655624e-06, "loss": 0.7378, "step": 5552 }, { "epoch": 0.83, "learning_rate": 1.5090678936637214e-06, "loss": 0.7153, "step": 5553 }, { "epoch": 0.83, "learning_rate": 1.5065169478148278e-06, "loss": 0.7318, "step": 5554 }, { "epoch": 0.83, "learning_rate": 1.5039679842142263e-06, "loss": 0.7255, "step": 5555 }, { "epoch": 0.83, "learning_rate": 1.5014210034568154e-06, "loss": 0.6727, "step": 5556 }, { "epoch": 0.83, "learning_rate": 1.4988760061370156e-06, "loss": 0.6104, "step": 5557 }, { "epoch": 0.83, "learning_rate": 1.4963329928487901e-06, "loss": 0.7231, "step": 5558 }, { "epoch": 0.83, "learning_rate": 1.4937919641856436e-06, "loss": 0.6776, "step": 5559 }, { "epoch": 0.83, "learning_rate": 1.4912529207406068e-06, "loss": 0.7493, "step": 5560 }, { "epoch": 0.83, "learning_rate": 1.488715863106256e-06, "loss": 0.8059, "step": 5561 }, { "epoch": 0.83, "learning_rate": 1.486180791874703e-06, "loss": 0.6513, "step": 5562 }, { "epoch": 0.83, "learning_rate": 1.4836477076375922e-06, "loss": 0.7592, "step": 5563 }, { "epoch": 0.83, "learning_rate": 1.4811166109861096e-06, "loss": 0.6844, "step": 5564 }, { "epoch": 0.83, "learning_rate": 1.4785875025109708e-06, "loss": 0.6172, "step": 5565 }, { "epoch": 0.83, "learning_rate": 1.4760603828024312e-06, "loss": 0.6457, "step": 5566 }, { "epoch": 0.83, "learning_rate": 1.4735352524502856e-06, "loss": 0.7583, "step": 5567 }, { "epoch": 0.83, "learning_rate": 1.4710121120438558e-06, "loss": 0.7492, "step": 5568 }, { "epoch": 0.83, "learning_rate": 1.4684909621720068e-06, "loss": 0.6876, "step": 5569 }, { "epoch": 0.83, "learning_rate": 1.4659718034231318e-06, "loss": 0.7317, "step": 5570 }, { "epoch": 0.83, "learning_rate": 1.4634546363851688e-06, "loss": 0.773, "step": 5571 }, { "epoch": 0.83, "learning_rate": 1.4609394616455862e-06, "loss": 0.7026, "step": 5572 }, { "epoch": 0.83, "learning_rate": 1.458426279791383e-06, "loss": 0.6802, "step": 5573 }, { "epoch": 0.83, "learning_rate": 1.4559150914091014e-06, "loss": 0.6561, "step": 5574 }, { "epoch": 0.83, "learning_rate": 1.4534058970848096e-06, "loss": 0.7726, "step": 5575 }, { "epoch": 0.83, "learning_rate": 1.4508986974041173e-06, "loss": 0.6815, "step": 5576 }, { "epoch": 0.83, "learning_rate": 1.448393492952167e-06, "loss": 0.7066, "step": 5577 }, { "epoch": 0.83, "learning_rate": 1.4458902843136335e-06, "loss": 0.6445, "step": 5578 }, { "epoch": 0.83, "learning_rate": 1.4433890720727307e-06, "loss": 0.3309, "step": 5579 }, { "epoch": 0.83, "learning_rate": 1.4408898568131979e-06, "loss": 0.7184, "step": 5580 }, { "epoch": 0.83, "learning_rate": 1.4383926391183156e-06, "loss": 0.7637, "step": 5581 }, { "epoch": 0.83, "learning_rate": 1.4358974195708987e-06, "loss": 0.7536, "step": 5582 }, { "epoch": 0.83, "learning_rate": 1.4334041987532887e-06, "loss": 0.7399, "step": 5583 }, { "epoch": 0.83, "learning_rate": 1.4309129772473661e-06, "loss": 0.7381, "step": 5584 }, { "epoch": 0.83, "learning_rate": 1.4284237556345459e-06, "loss": 0.3217, "step": 5585 }, { "epoch": 0.83, "learning_rate": 1.4259365344957721e-06, "loss": 0.6739, "step": 5586 }, { "epoch": 0.83, "learning_rate": 1.4234513144115259e-06, "loss": 0.7862, "step": 5587 }, { "epoch": 0.83, "learning_rate": 1.4209680959618165e-06, "loss": 0.6622, "step": 5588 }, { "epoch": 0.83, "learning_rate": 1.4184868797261908e-06, "loss": 0.7315, "step": 5589 }, { "epoch": 0.83, "learning_rate": 1.4160076662837284e-06, "loss": 0.7562, "step": 5590 }, { "epoch": 0.83, "learning_rate": 1.4135304562130348e-06, "loss": 0.6376, "step": 5591 }, { "epoch": 0.83, "learning_rate": 1.4110552500922548e-06, "loss": 0.727, "step": 5592 }, { "epoch": 0.83, "learning_rate": 1.4085820484990642e-06, "loss": 0.7216, "step": 5593 }, { "epoch": 0.83, "learning_rate": 1.4061108520106702e-06, "loss": 0.6642, "step": 5594 }, { "epoch": 0.83, "learning_rate": 1.403641661203814e-06, "loss": 0.6565, "step": 5595 }, { "epoch": 0.83, "learning_rate": 1.401174476654762e-06, "loss": 0.7442, "step": 5596 }, { "epoch": 0.83, "learning_rate": 1.3987092989393213e-06, "loss": 0.7309, "step": 5597 }, { "epoch": 0.83, "learning_rate": 1.3962461286328233e-06, "loss": 0.3101, "step": 5598 }, { "epoch": 0.84, "learning_rate": 1.3937849663101333e-06, "loss": 0.6853, "step": 5599 }, { "epoch": 0.84, "learning_rate": 1.3913258125456552e-06, "loss": 0.7345, "step": 5600 }, { "epoch": 0.84, "learning_rate": 1.3888686679133113e-06, "loss": 0.7195, "step": 5601 }, { "epoch": 0.84, "learning_rate": 1.3864135329865657e-06, "loss": 0.6548, "step": 5602 }, { "epoch": 0.84, "learning_rate": 1.383960408338405e-06, "loss": 0.7382, "step": 5603 }, { "epoch": 0.84, "learning_rate": 1.3815092945413522e-06, "loss": 0.6237, "step": 5604 }, { "epoch": 0.84, "learning_rate": 1.379060192167463e-06, "loss": 0.7839, "step": 5605 }, { "epoch": 0.84, "learning_rate": 1.3766131017883144e-06, "loss": 0.7096, "step": 5606 }, { "epoch": 0.84, "learning_rate": 1.3741680239750222e-06, "loss": 0.6913, "step": 5607 }, { "epoch": 0.84, "learning_rate": 1.3717249592982307e-06, "loss": 0.6714, "step": 5608 }, { "epoch": 0.84, "learning_rate": 1.369283908328114e-06, "loss": 0.7103, "step": 5609 }, { "epoch": 0.84, "learning_rate": 1.3668448716343762e-06, "loss": 0.6369, "step": 5610 }, { "epoch": 0.84, "learning_rate": 1.3644078497862478e-06, "loss": 0.6217, "step": 5611 }, { "epoch": 0.84, "learning_rate": 1.3619728433524937e-06, "loss": 0.7067, "step": 5612 }, { "epoch": 0.84, "learning_rate": 1.3595398529014103e-06, "loss": 0.6862, "step": 5613 }, { "epoch": 0.84, "learning_rate": 1.3571088790008125e-06, "loss": 0.7534, "step": 5614 }, { "epoch": 0.84, "learning_rate": 1.3546799222180618e-06, "loss": 0.7216, "step": 5615 }, { "epoch": 0.84, "learning_rate": 1.3522529831200326e-06, "loss": 0.5909, "step": 5616 }, { "epoch": 0.84, "learning_rate": 1.3498280622731363e-06, "loss": 0.6906, "step": 5617 }, { "epoch": 0.84, "learning_rate": 1.3474051602433158e-06, "loss": 0.7322, "step": 5618 }, { "epoch": 0.84, "learning_rate": 1.344984277596033e-06, "loss": 0.6819, "step": 5619 }, { "epoch": 0.84, "learning_rate": 1.3425654148962885e-06, "loss": 0.6728, "step": 5620 }, { "epoch": 0.84, "learning_rate": 1.3401485727086105e-06, "loss": 0.7463, "step": 5621 }, { "epoch": 0.84, "learning_rate": 1.3377337515970435e-06, "loss": 0.7019, "step": 5622 }, { "epoch": 0.84, "learning_rate": 1.3353209521251808e-06, "loss": 0.7247, "step": 5623 }, { "epoch": 0.84, "learning_rate": 1.3329101748561246e-06, "loss": 0.7423, "step": 5624 }, { "epoch": 0.84, "learning_rate": 1.3305014203525157e-06, "loss": 0.7689, "step": 5625 }, { "epoch": 0.84, "learning_rate": 1.3280946891765233e-06, "loss": 0.3053, "step": 5626 }, { "epoch": 0.84, "learning_rate": 1.3256899818898372e-06, "loss": 0.7289, "step": 5627 }, { "epoch": 0.84, "learning_rate": 1.3232872990536827e-06, "loss": 0.7289, "step": 5628 }, { "epoch": 0.84, "learning_rate": 1.3208866412288023e-06, "loss": 0.6578, "step": 5629 }, { "epoch": 0.84, "learning_rate": 1.3184880089754815e-06, "loss": 0.752, "step": 5630 }, { "epoch": 0.84, "learning_rate": 1.3160914028535178e-06, "loss": 0.6972, "step": 5631 }, { "epoch": 0.84, "learning_rate": 1.3136968234222447e-06, "loss": 0.6946, "step": 5632 }, { "epoch": 0.84, "learning_rate": 1.3113042712405222e-06, "loss": 0.7178, "step": 5633 }, { "epoch": 0.84, "learning_rate": 1.30891374686673e-06, "loss": 0.6596, "step": 5634 }, { "epoch": 0.84, "learning_rate": 1.3065252508587834e-06, "loss": 0.7734, "step": 5635 }, { "epoch": 0.84, "learning_rate": 1.3041387837741192e-06, "loss": 0.718, "step": 5636 }, { "epoch": 0.84, "learning_rate": 1.3017543461697034e-06, "loss": 0.6969, "step": 5637 }, { "epoch": 0.84, "learning_rate": 1.2993719386020287e-06, "loss": 0.7005, "step": 5638 }, { "epoch": 0.84, "learning_rate": 1.2969915616271068e-06, "loss": 0.7641, "step": 5639 }, { "epoch": 0.84, "learning_rate": 1.2946132158004853e-06, "loss": 0.7039, "step": 5640 }, { "epoch": 0.84, "learning_rate": 1.292236901677234e-06, "loss": 0.6863, "step": 5641 }, { "epoch": 0.84, "learning_rate": 1.2898626198119456e-06, "loss": 0.6613, "step": 5642 }, { "epoch": 0.84, "learning_rate": 1.2874903707587416e-06, "loss": 0.6669, "step": 5643 }, { "epoch": 0.84, "learning_rate": 1.2851201550712678e-06, "loss": 0.7307, "step": 5644 }, { "epoch": 0.84, "learning_rate": 1.2827519733026982e-06, "loss": 0.6746, "step": 5645 }, { "epoch": 0.84, "learning_rate": 1.2803858260057311e-06, "loss": 0.3515, "step": 5646 }, { "epoch": 0.84, "learning_rate": 1.2780217137325835e-06, "loss": 0.7331, "step": 5647 }, { "epoch": 0.84, "learning_rate": 1.2756596370350072e-06, "loss": 0.7216, "step": 5648 }, { "epoch": 0.84, "learning_rate": 1.2732995964642746e-06, "loss": 0.6152, "step": 5649 }, { "epoch": 0.84, "learning_rate": 1.270941592571181e-06, "loss": 0.6403, "step": 5650 }, { "epoch": 0.84, "learning_rate": 1.2685856259060469e-06, "loss": 0.71, "step": 5651 }, { "epoch": 0.84, "learning_rate": 1.2662316970187228e-06, "loss": 0.3299, "step": 5652 }, { "epoch": 0.84, "learning_rate": 1.2638798064585755e-06, "loss": 0.7352, "step": 5653 }, { "epoch": 0.84, "learning_rate": 1.2615299547745053e-06, "loss": 0.6951, "step": 5654 }, { "epoch": 0.84, "learning_rate": 1.2591821425149253e-06, "loss": 0.7158, "step": 5655 }, { "epoch": 0.84, "learning_rate": 1.256836370227784e-06, "loss": 0.6479, "step": 5656 }, { "epoch": 0.84, "learning_rate": 1.254492638460545e-06, "loss": 0.7309, "step": 5657 }, { "epoch": 0.84, "learning_rate": 1.252150947760199e-06, "loss": 0.6621, "step": 5658 }, { "epoch": 0.84, "learning_rate": 1.2498112986732624e-06, "loss": 0.6783, "step": 5659 }, { "epoch": 0.84, "learning_rate": 1.247473691745773e-06, "loss": 0.3061, "step": 5660 }, { "epoch": 0.84, "learning_rate": 1.2451381275232943e-06, "loss": 0.7036, "step": 5661 }, { "epoch": 0.84, "learning_rate": 1.242804606550907e-06, "loss": 0.617, "step": 5662 }, { "epoch": 0.84, "learning_rate": 1.2404731293732208e-06, "loss": 0.6338, "step": 5663 }, { "epoch": 0.84, "learning_rate": 1.2381436965343685e-06, "loss": 0.6363, "step": 5664 }, { "epoch": 0.84, "learning_rate": 1.2358163085780006e-06, "loss": 0.7168, "step": 5665 }, { "epoch": 0.85, "learning_rate": 1.233490966047295e-06, "loss": 0.8125, "step": 5666 }, { "epoch": 0.85, "learning_rate": 1.2311676694849506e-06, "loss": 0.7375, "step": 5667 }, { "epoch": 0.85, "learning_rate": 1.2288464194331906e-06, "loss": 0.7333, "step": 5668 }, { "epoch": 0.85, "learning_rate": 1.2265272164337593e-06, "loss": 0.7224, "step": 5669 }, { "epoch": 0.85, "learning_rate": 1.2242100610279195e-06, "loss": 0.7021, "step": 5670 }, { "epoch": 0.85, "learning_rate": 1.2218949537564628e-06, "loss": 0.7457, "step": 5671 }, { "epoch": 0.85, "learning_rate": 1.2195818951597004e-06, "loss": 0.6576, "step": 5672 }, { "epoch": 0.85, "learning_rate": 1.2172708857774584e-06, "loss": 0.7015, "step": 5673 }, { "epoch": 0.85, "learning_rate": 1.2149619261490998e-06, "loss": 0.7385, "step": 5674 }, { "epoch": 0.85, "learning_rate": 1.2126550168134931e-06, "loss": 0.6903, "step": 5675 }, { "epoch": 0.85, "learning_rate": 1.2103501583090382e-06, "loss": 0.3099, "step": 5676 }, { "epoch": 0.85, "learning_rate": 1.2080473511736558e-06, "loss": 0.6641, "step": 5677 }, { "epoch": 0.85, "learning_rate": 1.2057465959447823e-06, "loss": 0.7164, "step": 5678 }, { "epoch": 0.85, "learning_rate": 1.2034478931593807e-06, "loss": 0.7151, "step": 5679 }, { "epoch": 0.85, "learning_rate": 1.2011512433539296e-06, "loss": 0.6782, "step": 5680 }, { "epoch": 0.85, "learning_rate": 1.1988566470644326e-06, "loss": 0.6794, "step": 5681 }, { "epoch": 0.85, "learning_rate": 1.1965641048264186e-06, "loss": 0.6877, "step": 5682 }, { "epoch": 0.85, "learning_rate": 1.194273617174926e-06, "loss": 0.6381, "step": 5683 }, { "epoch": 0.85, "learning_rate": 1.1919851846445218e-06, "loss": 0.7679, "step": 5684 }, { "epoch": 0.85, "learning_rate": 1.1896988077692895e-06, "loss": 0.7282, "step": 5685 }, { "epoch": 0.85, "learning_rate": 1.187414487082834e-06, "loss": 0.7631, "step": 5686 }, { "epoch": 0.85, "learning_rate": 1.185132223118285e-06, "loss": 0.3323, "step": 5687 }, { "epoch": 0.85, "learning_rate": 1.1828520164082791e-06, "loss": 0.6774, "step": 5688 }, { "epoch": 0.85, "learning_rate": 1.180573867484992e-06, "loss": 0.6932, "step": 5689 }, { "epoch": 0.85, "learning_rate": 1.1782977768801007e-06, "loss": 0.6476, "step": 5690 }, { "epoch": 0.85, "learning_rate": 1.1760237451248125e-06, "loss": 0.7856, "step": 5691 }, { "epoch": 0.85, "learning_rate": 1.1737517727498527e-06, "loss": 0.3219, "step": 5692 }, { "epoch": 0.85, "learning_rate": 1.1714818602854628e-06, "loss": 0.715, "step": 5693 }, { "epoch": 0.85, "learning_rate": 1.1692140082614045e-06, "loss": 0.7237, "step": 5694 }, { "epoch": 0.85, "learning_rate": 1.1669482172069646e-06, "loss": 0.6919, "step": 5695 }, { "epoch": 0.85, "learning_rate": 1.1646844876509355e-06, "loss": 0.7745, "step": 5696 }, { "epoch": 0.85, "learning_rate": 1.1624228201216459e-06, "loss": 0.6978, "step": 5697 }, { "epoch": 0.85, "learning_rate": 1.1601632151469278e-06, "loss": 0.7428, "step": 5698 }, { "epoch": 0.85, "learning_rate": 1.1579056732541415e-06, "loss": 0.8405, "step": 5699 }, { "epoch": 0.85, "learning_rate": 1.1556501949701627e-06, "loss": 0.724, "step": 5700 }, { "epoch": 0.85, "learning_rate": 1.153396780821382e-06, "loss": 0.7001, "step": 5701 }, { "epoch": 0.85, "learning_rate": 1.1511454313337145e-06, "loss": 0.8474, "step": 5702 }, { "epoch": 0.85, "learning_rate": 1.1488961470325898e-06, "loss": 0.6798, "step": 5703 }, { "epoch": 0.85, "learning_rate": 1.146648928442956e-06, "loss": 0.7151, "step": 5704 }, { "epoch": 0.85, "learning_rate": 1.1444037760892812e-06, "loss": 0.6303, "step": 5705 }, { "epoch": 0.85, "learning_rate": 1.1421606904955473e-06, "loss": 0.6622, "step": 5706 }, { "epoch": 0.85, "learning_rate": 1.1399196721852557e-06, "loss": 0.3106, "step": 5707 }, { "epoch": 0.85, "learning_rate": 1.1376807216814278e-06, "loss": 0.6165, "step": 5708 }, { "epoch": 0.85, "learning_rate": 1.1354438395065982e-06, "loss": 0.3061, "step": 5709 }, { "epoch": 0.85, "learning_rate": 1.13320902618282e-06, "loss": 0.6952, "step": 5710 }, { "epoch": 0.85, "learning_rate": 1.130976282231666e-06, "loss": 0.7025, "step": 5711 }, { "epoch": 0.85, "learning_rate": 1.1287456081742253e-06, "loss": 0.6716, "step": 5712 }, { "epoch": 0.85, "learning_rate": 1.1265170045311026e-06, "loss": 0.7532, "step": 5713 }, { "epoch": 0.85, "learning_rate": 1.1242904718224167e-06, "loss": 0.7232, "step": 5714 }, { "epoch": 0.85, "learning_rate": 1.1220660105678104e-06, "loss": 0.6512, "step": 5715 }, { "epoch": 0.85, "learning_rate": 1.119843621286434e-06, "loss": 0.6151, "step": 5716 }, { "epoch": 0.85, "learning_rate": 1.117623304496962e-06, "loss": 0.7143, "step": 5717 }, { "epoch": 0.85, "learning_rate": 1.1154050607175814e-06, "loss": 0.7215, "step": 5718 }, { "epoch": 0.85, "learning_rate": 1.113188890465996e-06, "loss": 0.8218, "step": 5719 }, { "epoch": 0.85, "learning_rate": 1.1109747942594285e-06, "loss": 0.671, "step": 5720 }, { "epoch": 0.85, "learning_rate": 1.108762772614611e-06, "loss": 0.683, "step": 5721 }, { "epoch": 0.85, "learning_rate": 1.1065528260477965e-06, "loss": 0.6892, "step": 5722 }, { "epoch": 0.85, "learning_rate": 1.1043449550747554e-06, "loss": 0.7944, "step": 5723 }, { "epoch": 0.85, "learning_rate": 1.1021391602107657e-06, "loss": 0.7274, "step": 5724 }, { "epoch": 0.85, "learning_rate": 1.0999354419706298e-06, "loss": 0.6661, "step": 5725 }, { "epoch": 0.85, "learning_rate": 1.09773380086866e-06, "loss": 0.688, "step": 5726 }, { "epoch": 0.85, "learning_rate": 1.095534237418685e-06, "loss": 0.7329, "step": 5727 }, { "epoch": 0.85, "learning_rate": 1.0933367521340532e-06, "loss": 0.6575, "step": 5728 }, { "epoch": 0.85, "learning_rate": 1.0911413455276187e-06, "loss": 0.7148, "step": 5729 }, { "epoch": 0.85, "learning_rate": 1.0889480181117573e-06, "loss": 0.8153, "step": 5730 }, { "epoch": 0.85, "learning_rate": 1.0867567703983607e-06, "loss": 0.7068, "step": 5731 }, { "epoch": 0.85, "learning_rate": 1.0845676028988283e-06, "loss": 0.7208, "step": 5732 }, { "epoch": 0.86, "learning_rate": 1.0823805161240785e-06, "loss": 0.6983, "step": 5733 }, { "epoch": 0.86, "learning_rate": 1.0801955105845464e-06, "loss": 0.7428, "step": 5734 }, { "epoch": 0.86, "learning_rate": 1.078012586790177e-06, "loss": 0.7237, "step": 5735 }, { "epoch": 0.86, "learning_rate": 1.075831745250433e-06, "loss": 0.713, "step": 5736 }, { "epoch": 0.86, "learning_rate": 1.073652986474285e-06, "loss": 0.7357, "step": 5737 }, { "epoch": 0.86, "learning_rate": 1.0714763109702276e-06, "loss": 0.603, "step": 5738 }, { "epoch": 0.86, "learning_rate": 1.0693017192462584e-06, "loss": 0.7326, "step": 5739 }, { "epoch": 0.86, "learning_rate": 1.067129211809893e-06, "loss": 0.657, "step": 5740 }, { "epoch": 0.86, "learning_rate": 1.0649587891681679e-06, "loss": 0.7512, "step": 5741 }, { "epoch": 0.86, "learning_rate": 1.06279045182762e-06, "loss": 0.7141, "step": 5742 }, { "epoch": 0.86, "learning_rate": 1.06062420029431e-06, "loss": 0.7489, "step": 5743 }, { "epoch": 0.86, "learning_rate": 1.058460035073804e-06, "loss": 0.7466, "step": 5744 }, { "epoch": 0.86, "learning_rate": 1.056297956671186e-06, "loss": 0.6979, "step": 5745 }, { "epoch": 0.86, "learning_rate": 1.0541379655910555e-06, "loss": 0.7663, "step": 5746 }, { "epoch": 0.86, "learning_rate": 1.0519800623375142e-06, "loss": 0.6148, "step": 5747 }, { "epoch": 0.86, "learning_rate": 1.0498242474141907e-06, "loss": 0.7211, "step": 5748 }, { "epoch": 0.86, "learning_rate": 1.0476705213242144e-06, "loss": 0.7297, "step": 5749 }, { "epoch": 0.86, "learning_rate": 1.0455188845702324e-06, "loss": 0.6869, "step": 5750 }, { "epoch": 0.86, "learning_rate": 1.0433693376544074e-06, "loss": 0.7459, "step": 5751 }, { "epoch": 0.86, "learning_rate": 1.0412218810784058e-06, "loss": 0.6899, "step": 5752 }, { "epoch": 0.86, "learning_rate": 1.0390765153434123e-06, "loss": 0.6068, "step": 5753 }, { "epoch": 0.86, "learning_rate": 1.0369332409501254e-06, "loss": 0.5931, "step": 5754 }, { "epoch": 0.86, "learning_rate": 1.0347920583987448e-06, "loss": 0.7112, "step": 5755 }, { "epoch": 0.86, "learning_rate": 1.0326529681889997e-06, "loss": 0.6907, "step": 5756 }, { "epoch": 0.86, "learning_rate": 1.0305159708201128e-06, "loss": 0.766, "step": 5757 }, { "epoch": 0.86, "learning_rate": 1.028381066790829e-06, "loss": 0.6305, "step": 5758 }, { "epoch": 0.86, "learning_rate": 1.0262482565994047e-06, "loss": 0.6914, "step": 5759 }, { "epoch": 0.86, "learning_rate": 1.024117540743601e-06, "loss": 0.6919, "step": 5760 }, { "epoch": 0.86, "learning_rate": 1.0219889197206968e-06, "loss": 0.7573, "step": 5761 }, { "epoch": 0.86, "learning_rate": 1.0198623940274766e-06, "loss": 0.6651, "step": 5762 }, { "epoch": 0.86, "learning_rate": 1.0177379641602414e-06, "loss": 0.6009, "step": 5763 }, { "epoch": 0.86, "learning_rate": 1.0156156306148024e-06, "loss": 0.7375, "step": 5764 }, { "epoch": 0.86, "learning_rate": 1.0134953938864744e-06, "loss": 0.6411, "step": 5765 }, { "epoch": 0.86, "learning_rate": 1.0113772544700927e-06, "loss": 0.5515, "step": 5766 }, { "epoch": 0.86, "learning_rate": 1.0092612128599954e-06, "loss": 0.6387, "step": 5767 }, { "epoch": 0.86, "learning_rate": 1.0071472695500339e-06, "loss": 0.7037, "step": 5768 }, { "epoch": 0.86, "learning_rate": 1.0050354250335736e-06, "loss": 0.5788, "step": 5769 }, { "epoch": 0.86, "learning_rate": 1.0029256798034803e-06, "loss": 0.6149, "step": 5770 }, { "epoch": 0.86, "learning_rate": 1.0008180343521434e-06, "loss": 0.2845, "step": 5771 }, { "epoch": 0.86, "learning_rate": 9.987124891714494e-07, "loss": 0.6944, "step": 5772 }, { "epoch": 0.86, "learning_rate": 9.966090447528032e-07, "loss": 0.5961, "step": 5773 }, { "epoch": 0.86, "learning_rate": 9.945077015871163e-07, "loss": 0.6974, "step": 5774 }, { "epoch": 0.86, "learning_rate": 9.924084601648076e-07, "loss": 0.7182, "step": 5775 }, { "epoch": 0.86, "learning_rate": 9.903113209758098e-07, "loss": 0.7432, "step": 5776 }, { "epoch": 0.86, "learning_rate": 9.882162845095612e-07, "loss": 0.7394, "step": 5777 }, { "epoch": 0.86, "learning_rate": 9.861233512550116e-07, "loss": 0.7598, "step": 5778 }, { "epoch": 0.86, "learning_rate": 9.84032521700623e-07, "loss": 0.6753, "step": 5779 }, { "epoch": 0.86, "learning_rate": 9.819437963343581e-07, "loss": 0.7171, "step": 5780 }, { "epoch": 0.86, "learning_rate": 9.798571756436937e-07, "loss": 0.689, "step": 5781 }, { "epoch": 0.86, "learning_rate": 9.77772660115619e-07, "loss": 0.6849, "step": 5782 }, { "epoch": 0.86, "learning_rate": 9.756902502366227e-07, "loss": 0.6953, "step": 5783 }, { "epoch": 0.86, "learning_rate": 9.736099464927108e-07, "loss": 0.709, "step": 5784 }, { "epoch": 0.86, "learning_rate": 9.715317493693921e-07, "loss": 0.7013, "step": 5785 }, { "epoch": 0.86, "learning_rate": 9.694556593516868e-07, "loss": 0.6361, "step": 5786 }, { "epoch": 0.86, "learning_rate": 9.67381676924124e-07, "loss": 0.6669, "step": 5787 }, { "epoch": 0.86, "learning_rate": 9.653098025707364e-07, "loss": 0.7616, "step": 5788 }, { "epoch": 0.86, "learning_rate": 9.632400367750694e-07, "loss": 0.69, "step": 5789 }, { "epoch": 0.86, "learning_rate": 9.611723800201744e-07, "loss": 0.6866, "step": 5790 }, { "epoch": 0.86, "learning_rate": 9.59106832788609e-07, "loss": 0.6539, "step": 5791 }, { "epoch": 0.86, "learning_rate": 9.570433955624414e-07, "loss": 0.6653, "step": 5792 }, { "epoch": 0.86, "learning_rate": 9.54982068823247e-07, "loss": 0.6577, "step": 5793 }, { "epoch": 0.86, "learning_rate": 9.52922853052105e-07, "loss": 0.6783, "step": 5794 }, { "epoch": 0.86, "learning_rate": 9.508657487296103e-07, "loss": 0.6758, "step": 5795 }, { "epoch": 0.86, "learning_rate": 9.488107563358529e-07, "loss": 0.6943, "step": 5796 }, { "epoch": 0.86, "learning_rate": 9.467578763504404e-07, "loss": 0.7549, "step": 5797 }, { "epoch": 0.86, "learning_rate": 9.447071092524818e-07, "loss": 0.6671, "step": 5798 }, { "epoch": 0.86, "learning_rate": 9.426584555205942e-07, "loss": 0.8353, "step": 5799 }, { "epoch": 0.87, "learning_rate": 9.406119156329019e-07, "loss": 0.6359, "step": 5800 }, { "epoch": 0.87, "learning_rate": 9.385674900670372e-07, "loss": 0.7366, "step": 5801 }, { "epoch": 0.87, "learning_rate": 9.365251793001396e-07, "loss": 0.7685, "step": 5802 }, { "epoch": 0.87, "learning_rate": 9.344849838088477e-07, "loss": 0.68, "step": 5803 }, { "epoch": 0.87, "learning_rate": 9.32446904069314e-07, "loss": 0.6667, "step": 5804 }, { "epoch": 0.87, "learning_rate": 9.304109405571982e-07, "loss": 0.6911, "step": 5805 }, { "epoch": 0.87, "learning_rate": 9.283770937476578e-07, "loss": 0.7237, "step": 5806 }, { "epoch": 0.87, "learning_rate": 9.26345364115363e-07, "loss": 0.7324, "step": 5807 }, { "epoch": 0.87, "learning_rate": 9.243157521344892e-07, "loss": 0.6765, "step": 5808 }, { "epoch": 0.87, "learning_rate": 9.222882582787163e-07, "loss": 0.671, "step": 5809 }, { "epoch": 0.87, "learning_rate": 9.202628830212323e-07, "loss": 0.7438, "step": 5810 }, { "epoch": 0.87, "learning_rate": 9.182396268347249e-07, "loss": 0.7552, "step": 5811 }, { "epoch": 0.87, "learning_rate": 9.162184901913917e-07, "loss": 0.6902, "step": 5812 }, { "epoch": 0.87, "learning_rate": 9.141994735629378e-07, "loss": 0.3071, "step": 5813 }, { "epoch": 0.87, "learning_rate": 9.12182577420565e-07, "loss": 0.7671, "step": 5814 }, { "epoch": 0.87, "learning_rate": 9.101678022349936e-07, "loss": 0.635, "step": 5815 }, { "epoch": 0.87, "learning_rate": 9.08155148476435e-07, "loss": 0.7875, "step": 5816 }, { "epoch": 0.87, "learning_rate": 9.061446166146127e-07, "loss": 0.7519, "step": 5817 }, { "epoch": 0.87, "learning_rate": 9.041362071187576e-07, "loss": 0.5912, "step": 5818 }, { "epoch": 0.87, "learning_rate": 9.021299204575961e-07, "loss": 0.5711, "step": 5819 }, { "epoch": 0.87, "learning_rate": 9.001257570993694e-07, "loss": 0.7133, "step": 5820 }, { "epoch": 0.87, "learning_rate": 8.98123717511813e-07, "loss": 0.668, "step": 5821 }, { "epoch": 0.87, "learning_rate": 8.961238021621776e-07, "loss": 0.675, "step": 5822 }, { "epoch": 0.87, "learning_rate": 8.941260115172112e-07, "loss": 0.7316, "step": 5823 }, { "epoch": 0.87, "learning_rate": 8.921303460431662e-07, "loss": 0.6983, "step": 5824 }, { "epoch": 0.87, "learning_rate": 8.901368062058013e-07, "loss": 0.7034, "step": 5825 }, { "epoch": 0.87, "learning_rate": 8.881453924703765e-07, "loss": 0.7183, "step": 5826 }, { "epoch": 0.87, "learning_rate": 8.861561053016576e-07, "loss": 0.7224, "step": 5827 }, { "epoch": 0.87, "learning_rate": 8.841689451639168e-07, "loss": 0.7612, "step": 5828 }, { "epoch": 0.87, "learning_rate": 8.821839125209197e-07, "loss": 0.6341, "step": 5829 }, { "epoch": 0.87, "learning_rate": 8.802010078359513e-07, "loss": 0.7145, "step": 5830 }, { "epoch": 0.87, "learning_rate": 8.782202315717836e-07, "loss": 0.738, "step": 5831 }, { "epoch": 0.87, "learning_rate": 8.762415841907024e-07, "loss": 0.8102, "step": 5832 }, { "epoch": 0.87, "learning_rate": 8.74265066154496e-07, "loss": 0.7032, "step": 5833 }, { "epoch": 0.87, "learning_rate": 8.722906779244478e-07, "loss": 0.7108, "step": 5834 }, { "epoch": 0.87, "learning_rate": 8.703184199613534e-07, "loss": 0.7651, "step": 5835 }, { "epoch": 0.87, "learning_rate": 8.68348292725506e-07, "loss": 0.6701, "step": 5836 }, { "epoch": 0.87, "learning_rate": 8.663802966767043e-07, "loss": 0.6322, "step": 5837 }, { "epoch": 0.87, "learning_rate": 8.644144322742487e-07, "loss": 0.6196, "step": 5838 }, { "epoch": 0.87, "learning_rate": 8.624506999769399e-07, "loss": 0.7197, "step": 5839 }, { "epoch": 0.87, "learning_rate": 8.604891002430826e-07, "loss": 0.7348, "step": 5840 }, { "epoch": 0.87, "learning_rate": 8.585296335304871e-07, "loss": 0.7198, "step": 5841 }, { "epoch": 0.87, "learning_rate": 8.565723002964577e-07, "loss": 0.8009, "step": 5842 }, { "epoch": 0.87, "learning_rate": 8.546171009978099e-07, "loss": 0.6769, "step": 5843 }, { "epoch": 0.87, "learning_rate": 8.526640360908556e-07, "loss": 0.6614, "step": 5844 }, { "epoch": 0.87, "learning_rate": 8.507131060314111e-07, "loss": 0.6609, "step": 5845 }, { "epoch": 0.87, "learning_rate": 8.487643112747945e-07, "loss": 0.6706, "step": 5846 }, { "epoch": 0.87, "learning_rate": 8.468176522758209e-07, "loss": 0.7255, "step": 5847 }, { "epoch": 0.87, "learning_rate": 8.448731294888124e-07, "loss": 0.755, "step": 5848 }, { "epoch": 0.87, "learning_rate": 8.429307433675926e-07, "loss": 0.6858, "step": 5849 }, { "epoch": 0.87, "learning_rate": 8.409904943654823e-07, "loss": 0.6469, "step": 5850 }, { "epoch": 0.87, "learning_rate": 8.390523829353048e-07, "loss": 0.3063, "step": 5851 }, { "epoch": 0.87, "learning_rate": 8.371164095293882e-07, "loss": 0.7233, "step": 5852 }, { "epoch": 0.87, "learning_rate": 8.35182574599559e-07, "loss": 0.6565, "step": 5853 }, { "epoch": 0.87, "learning_rate": 8.332508785971416e-07, "loss": 0.6255, "step": 5854 }, { "epoch": 0.87, "learning_rate": 8.313213219729654e-07, "loss": 0.7481, "step": 5855 }, { "epoch": 0.87, "learning_rate": 8.293939051773614e-07, "loss": 0.6595, "step": 5856 }, { "epoch": 0.87, "learning_rate": 8.274686286601552e-07, "loss": 0.7032, "step": 5857 }, { "epoch": 0.87, "learning_rate": 8.255454928706796e-07, "loss": 0.7219, "step": 5858 }, { "epoch": 0.87, "learning_rate": 8.236244982577634e-07, "loss": 0.6742, "step": 5859 }, { "epoch": 0.87, "learning_rate": 8.217056452697381e-07, "loss": 0.7568, "step": 5860 }, { "epoch": 0.87, "learning_rate": 8.197889343544363e-07, "loss": 0.7488, "step": 5861 }, { "epoch": 0.87, "learning_rate": 8.178743659591848e-07, "loss": 0.7852, "step": 5862 }, { "epoch": 0.87, "learning_rate": 8.159619405308172e-07, "loss": 0.7051, "step": 5863 }, { "epoch": 0.87, "learning_rate": 8.140516585156644e-07, "loss": 0.7871, "step": 5864 }, { "epoch": 0.87, "learning_rate": 8.12143520359554e-07, "loss": 0.3034, "step": 5865 }, { "epoch": 0.87, "learning_rate": 8.102375265078177e-07, "loss": 0.6487, "step": 5866 }, { "epoch": 0.88, "learning_rate": 8.083336774052852e-07, "loss": 0.7392, "step": 5867 }, { "epoch": 0.88, "learning_rate": 8.064319734962866e-07, "loss": 0.6283, "step": 5868 }, { "epoch": 0.88, "learning_rate": 8.045324152246514e-07, "loss": 0.68, "step": 5869 }, { "epoch": 0.88, "learning_rate": 8.026350030337027e-07, "loss": 0.6759, "step": 5870 }, { "epoch": 0.88, "learning_rate": 8.007397373662707e-07, "loss": 0.6924, "step": 5871 }, { "epoch": 0.88, "learning_rate": 7.988466186646815e-07, "loss": 0.665, "step": 5872 }, { "epoch": 0.88, "learning_rate": 7.969556473707563e-07, "loss": 0.741, "step": 5873 }, { "epoch": 0.88, "learning_rate": 7.950668239258241e-07, "loss": 0.6508, "step": 5874 }, { "epoch": 0.88, "learning_rate": 7.931801487707036e-07, "loss": 0.7962, "step": 5875 }, { "epoch": 0.88, "learning_rate": 7.912956223457158e-07, "loss": 0.6544, "step": 5876 }, { "epoch": 0.88, "learning_rate": 7.894132450906833e-07, "loss": 0.721, "step": 5877 }, { "epoch": 0.88, "learning_rate": 7.875330174449203e-07, "loss": 0.7182, "step": 5878 }, { "epoch": 0.88, "learning_rate": 7.856549398472457e-07, "loss": 0.6564, "step": 5879 }, { "epoch": 0.88, "learning_rate": 7.837790127359713e-07, "loss": 0.6423, "step": 5880 }, { "epoch": 0.88, "learning_rate": 7.819052365489088e-07, "loss": 0.6539, "step": 5881 }, { "epoch": 0.88, "learning_rate": 7.800336117233742e-07, "loss": 0.7184, "step": 5882 }, { "epoch": 0.88, "learning_rate": 7.781641386961714e-07, "loss": 0.6611, "step": 5883 }, { "epoch": 0.88, "learning_rate": 7.762968179036101e-07, "loss": 0.6825, "step": 5884 }, { "epoch": 0.88, "learning_rate": 7.744316497814896e-07, "loss": 0.7236, "step": 5885 }, { "epoch": 0.88, "learning_rate": 7.72568634765114e-07, "loss": 0.7343, "step": 5886 }, { "epoch": 0.88, "learning_rate": 7.707077732892842e-07, "loss": 0.7151, "step": 5887 }, { "epoch": 0.88, "learning_rate": 7.688490657882907e-07, "loss": 0.659, "step": 5888 }, { "epoch": 0.88, "learning_rate": 7.669925126959355e-07, "loss": 0.6971, "step": 5889 }, { "epoch": 0.88, "learning_rate": 7.651381144455039e-07, "loss": 0.3186, "step": 5890 }, { "epoch": 0.88, "learning_rate": 7.632858714697844e-07, "loss": 0.7022, "step": 5891 }, { "epoch": 0.88, "learning_rate": 7.614357842010655e-07, "loss": 0.7678, "step": 5892 }, { "epoch": 0.88, "learning_rate": 7.595878530711243e-07, "loss": 0.7322, "step": 5893 }, { "epoch": 0.88, "learning_rate": 7.577420785112411e-07, "loss": 0.6593, "step": 5894 }, { "epoch": 0.88, "learning_rate": 7.558984609521935e-07, "loss": 0.7688, "step": 5895 }, { "epoch": 0.88, "learning_rate": 7.540570008242509e-07, "loss": 0.6756, "step": 5896 }, { "epoch": 0.88, "learning_rate": 7.522176985571839e-07, "loss": 0.6283, "step": 5897 }, { "epoch": 0.88, "learning_rate": 7.503805545802556e-07, "loss": 0.725, "step": 5898 }, { "epoch": 0.88, "learning_rate": 7.485455693222277e-07, "loss": 0.7078, "step": 5899 }, { "epoch": 0.88, "learning_rate": 7.467127432113586e-07, "loss": 0.7238, "step": 5900 }, { "epoch": 0.88, "learning_rate": 7.448820766754006e-07, "loss": 0.7306, "step": 5901 }, { "epoch": 0.88, "learning_rate": 7.430535701416031e-07, "loss": 0.7052, "step": 5902 }, { "epoch": 0.88, "learning_rate": 7.412272240367091e-07, "loss": 0.647, "step": 5903 }, { "epoch": 0.88, "learning_rate": 7.394030387869644e-07, "loss": 0.7786, "step": 5904 }, { "epoch": 0.88, "learning_rate": 7.37581014818104e-07, "loss": 0.6736, "step": 5905 }, { "epoch": 0.88, "learning_rate": 7.357611525553588e-07, "loss": 0.7589, "step": 5906 }, { "epoch": 0.88, "learning_rate": 7.33943452423459e-07, "loss": 0.6248, "step": 5907 }, { "epoch": 0.88, "learning_rate": 7.321279148466254e-07, "loss": 0.6918, "step": 5908 }, { "epoch": 0.88, "learning_rate": 7.303145402485767e-07, "loss": 0.6682, "step": 5909 }, { "epoch": 0.88, "learning_rate": 7.285033290525279e-07, "loss": 0.6903, "step": 5910 }, { "epoch": 0.88, "learning_rate": 7.266942816811884e-07, "loss": 0.296, "step": 5911 }, { "epoch": 0.88, "learning_rate": 7.248873985567617e-07, "loss": 0.6642, "step": 5912 }, { "epoch": 0.88, "learning_rate": 7.230826801009449e-07, "loss": 0.678, "step": 5913 }, { "epoch": 0.88, "learning_rate": 7.212801267349323e-07, "loss": 0.6628, "step": 5914 }, { "epoch": 0.88, "learning_rate": 7.194797388794151e-07, "loss": 0.661, "step": 5915 }, { "epoch": 0.88, "learning_rate": 7.176815169545703e-07, "loss": 0.6694, "step": 5916 }, { "epoch": 0.88, "learning_rate": 7.158854613800792e-07, "loss": 0.7434, "step": 5917 }, { "epoch": 0.88, "learning_rate": 7.140915725751129e-07, "loss": 0.785, "step": 5918 }, { "epoch": 0.88, "learning_rate": 7.122998509583367e-07, "loss": 0.7662, "step": 5919 }, { "epoch": 0.88, "learning_rate": 7.105102969479138e-07, "loss": 0.664, "step": 5920 }, { "epoch": 0.88, "learning_rate": 7.087229109614934e-07, "loss": 0.6681, "step": 5921 }, { "epoch": 0.88, "learning_rate": 7.069376934162275e-07, "loss": 0.6937, "step": 5922 }, { "epoch": 0.88, "learning_rate": 7.051546447287594e-07, "loss": 0.5977, "step": 5923 }, { "epoch": 0.88, "learning_rate": 7.033737653152218e-07, "loss": 0.6394, "step": 5924 }, { "epoch": 0.88, "learning_rate": 7.015950555912465e-07, "loss": 0.7067, "step": 5925 }, { "epoch": 0.88, "learning_rate": 6.998185159719561e-07, "loss": 0.5888, "step": 5926 }, { "epoch": 0.88, "learning_rate": 6.980441468719689e-07, "loss": 0.6461, "step": 5927 }, { "epoch": 0.88, "learning_rate": 6.962719487053959e-07, "loss": 0.7222, "step": 5928 }, { "epoch": 0.88, "learning_rate": 6.945019218858385e-07, "loss": 0.7654, "step": 5929 }, { "epoch": 0.88, "learning_rate": 6.927340668263948e-07, "loss": 0.6795, "step": 5930 }, { "epoch": 0.88, "learning_rate": 6.909683839396563e-07, "loss": 0.7398, "step": 5931 }, { "epoch": 0.88, "learning_rate": 6.892048736377022e-07, "loss": 0.6582, "step": 5932 }, { "epoch": 0.88, "learning_rate": 6.874435363321152e-07, "loss": 0.6333, "step": 5933 }, { "epoch": 0.89, "learning_rate": 6.856843724339579e-07, "loss": 0.6839, "step": 5934 }, { "epoch": 0.89, "learning_rate": 6.839273823537973e-07, "loss": 0.6057, "step": 5935 }, { "epoch": 0.89, "learning_rate": 6.821725665016831e-07, "loss": 0.6531, "step": 5936 }, { "epoch": 0.89, "learning_rate": 6.804199252871657e-07, "loss": 0.692, "step": 5937 }, { "epoch": 0.89, "learning_rate": 6.786694591192844e-07, "loss": 0.7302, "step": 5938 }, { "epoch": 0.89, "learning_rate": 6.769211684065679e-07, "loss": 0.6332, "step": 5939 }, { "epoch": 0.89, "learning_rate": 6.751750535570445e-07, "loss": 0.6833, "step": 5940 }, { "epoch": 0.89, "learning_rate": 6.734311149782269e-07, "loss": 0.7095, "step": 5941 }, { "epoch": 0.89, "learning_rate": 6.716893530771263e-07, "loss": 0.7723, "step": 5942 }, { "epoch": 0.89, "learning_rate": 6.69949768260244e-07, "loss": 0.5881, "step": 5943 }, { "epoch": 0.89, "learning_rate": 6.682123609335689e-07, "loss": 0.639, "step": 5944 }, { "epoch": 0.89, "learning_rate": 6.664771315025864e-07, "loss": 0.6463, "step": 5945 }, { "epoch": 0.89, "learning_rate": 6.647440803722738e-07, "loss": 0.6302, "step": 5946 }, { "epoch": 0.89, "learning_rate": 6.630132079470953e-07, "loss": 0.6337, "step": 5947 }, { "epoch": 0.89, "learning_rate": 6.612845146310143e-07, "loss": 0.6683, "step": 5948 }, { "epoch": 0.89, "learning_rate": 6.595580008274782e-07, "loss": 0.7072, "step": 5949 }, { "epoch": 0.89, "learning_rate": 6.578336669394303e-07, "loss": 0.6938, "step": 5950 }, { "epoch": 0.89, "learning_rate": 6.56111513369303e-07, "loss": 0.6508, "step": 5951 }, { "epoch": 0.89, "learning_rate": 6.543915405190193e-07, "loss": 0.7033, "step": 5952 }, { "epoch": 0.89, "learning_rate": 6.526737487899959e-07, "loss": 0.3126, "step": 5953 }, { "epoch": 0.89, "learning_rate": 6.509581385831398e-07, "loss": 0.7539, "step": 5954 }, { "epoch": 0.89, "learning_rate": 6.492447102988442e-07, "loss": 0.6248, "step": 5955 }, { "epoch": 0.89, "learning_rate": 6.475334643370013e-07, "loss": 0.668, "step": 5956 }, { "epoch": 0.89, "learning_rate": 6.458244010969872e-07, "loss": 0.6808, "step": 5957 }, { "epoch": 0.89, "learning_rate": 6.441175209776718e-07, "loss": 0.2641, "step": 5958 }, { "epoch": 0.89, "learning_rate": 6.424128243774152e-07, "loss": 0.7267, "step": 5959 }, { "epoch": 0.89, "learning_rate": 6.407103116940649e-07, "loss": 0.6561, "step": 5960 }, { "epoch": 0.89, "learning_rate": 6.390099833249652e-07, "loss": 0.7242, "step": 5961 }, { "epoch": 0.89, "learning_rate": 6.373118396669397e-07, "loss": 0.7488, "step": 5962 }, { "epoch": 0.89, "learning_rate": 6.35615881116316e-07, "loss": 0.6535, "step": 5963 }, { "epoch": 0.89, "learning_rate": 6.339221080689039e-07, "loss": 0.6998, "step": 5964 }, { "epoch": 0.89, "learning_rate": 6.322305209200008e-07, "loss": 0.6597, "step": 5965 }, { "epoch": 0.89, "learning_rate": 6.305411200644007e-07, "loss": 0.5868, "step": 5966 }, { "epoch": 0.89, "learning_rate": 6.288539058963794e-07, "loss": 0.7399, "step": 5967 }, { "epoch": 0.89, "learning_rate": 6.271688788097097e-07, "loss": 0.7425, "step": 5968 }, { "epoch": 0.89, "learning_rate": 6.254860391976492e-07, "loss": 0.7172, "step": 5969 }, { "epoch": 0.89, "learning_rate": 6.238053874529493e-07, "loss": 0.6745, "step": 5970 }, { "epoch": 0.89, "learning_rate": 6.221269239678474e-07, "loss": 0.7763, "step": 5971 }, { "epoch": 0.89, "learning_rate": 6.2045064913407e-07, "loss": 0.6746, "step": 5972 }, { "epoch": 0.89, "learning_rate": 6.187765633428333e-07, "loss": 0.6328, "step": 5973 }, { "epoch": 0.89, "learning_rate": 6.171046669848469e-07, "loss": 0.7405, "step": 5974 }, { "epoch": 0.89, "learning_rate": 6.154349604503008e-07, "loss": 0.7068, "step": 5975 }, { "epoch": 0.89, "learning_rate": 6.137674441288811e-07, "loss": 0.6344, "step": 5976 }, { "epoch": 0.89, "learning_rate": 6.121021184097609e-07, "loss": 0.7731, "step": 5977 }, { "epoch": 0.89, "learning_rate": 6.104389836816016e-07, "loss": 0.7145, "step": 5978 }, { "epoch": 0.89, "learning_rate": 6.087780403325539e-07, "loss": 0.7553, "step": 5979 }, { "epoch": 0.89, "learning_rate": 6.071192887502542e-07, "loss": 0.6304, "step": 5980 }, { "epoch": 0.89, "learning_rate": 6.054627293218296e-07, "loss": 0.6314, "step": 5981 }, { "epoch": 0.89, "learning_rate": 6.038083624338997e-07, "loss": 0.6515, "step": 5982 }, { "epoch": 0.89, "learning_rate": 6.021561884725635e-07, "loss": 0.7156, "step": 5983 }, { "epoch": 0.89, "learning_rate": 6.005062078234148e-07, "loss": 0.6747, "step": 5984 }, { "epoch": 0.89, "learning_rate": 5.988584208715331e-07, "loss": 0.6349, "step": 5985 }, { "epoch": 0.89, "learning_rate": 5.972128280014877e-07, "loss": 0.6289, "step": 5986 }, { "epoch": 0.89, "learning_rate": 5.955694295973358e-07, "loss": 0.7113, "step": 5987 }, { "epoch": 0.89, "learning_rate": 5.939282260426182e-07, "loss": 0.6216, "step": 5988 }, { "epoch": 0.89, "learning_rate": 5.922892177203688e-07, "loss": 0.6631, "step": 5989 }, { "epoch": 0.89, "learning_rate": 5.906524050131058e-07, "loss": 0.6453, "step": 5990 }, { "epoch": 0.89, "learning_rate": 5.890177883028348e-07, "loss": 0.6089, "step": 5991 }, { "epoch": 0.89, "learning_rate": 5.873853679710528e-07, "loss": 0.6197, "step": 5992 }, { "epoch": 0.89, "learning_rate": 5.857551443987409e-07, "loss": 0.6205, "step": 5993 }, { "epoch": 0.89, "learning_rate": 5.841271179663699e-07, "loss": 0.7196, "step": 5994 }, { "epoch": 0.89, "learning_rate": 5.825012890538917e-07, "loss": 0.8116, "step": 5995 }, { "epoch": 0.89, "learning_rate": 5.808776580407527e-07, "loss": 0.7199, "step": 5996 }, { "epoch": 0.89, "learning_rate": 5.792562253058842e-07, "loss": 0.6909, "step": 5997 }, { "epoch": 0.89, "learning_rate": 5.776369912277013e-07, "loss": 0.716, "step": 5998 }, { "epoch": 0.89, "learning_rate": 5.760199561841085e-07, "loss": 0.79, "step": 5999 }, { "epoch": 0.89, "learning_rate": 5.744051205524992e-07, "loss": 0.7636, "step": 6000 }, { "epoch": 0.9, "learning_rate": 5.727924847097488e-07, "loss": 0.3578, "step": 6001 }, { "epoch": 0.9, "learning_rate": 5.711820490322251e-07, "loss": 0.6239, "step": 6002 }, { "epoch": 0.9, "learning_rate": 5.695738138957741e-07, "loss": 0.7201, "step": 6003 }, { "epoch": 0.9, "learning_rate": 5.679677796757365e-07, "loss": 0.6608, "step": 6004 }, { "epoch": 0.9, "learning_rate": 5.663639467469362e-07, "loss": 0.6538, "step": 6005 }, { "epoch": 0.9, "learning_rate": 5.64762315483679e-07, "loss": 0.6843, "step": 6006 }, { "epoch": 0.9, "learning_rate": 5.631628862597671e-07, "loss": 0.6668, "step": 6007 }, { "epoch": 0.9, "learning_rate": 5.615656594484786e-07, "loss": 0.6967, "step": 6008 }, { "epoch": 0.9, "learning_rate": 5.599706354225831e-07, "loss": 0.7533, "step": 6009 }, { "epoch": 0.9, "learning_rate": 5.58377814554335e-07, "loss": 0.7783, "step": 6010 }, { "epoch": 0.9, "learning_rate": 5.567871972154726e-07, "loss": 0.6848, "step": 6011 }, { "epoch": 0.9, "learning_rate": 5.551987837772221e-07, "loss": 0.6876, "step": 6012 }, { "epoch": 0.9, "learning_rate": 5.536125746102972e-07, "loss": 0.6991, "step": 6013 }, { "epoch": 0.9, "learning_rate": 5.520285700848893e-07, "loss": 0.7624, "step": 6014 }, { "epoch": 0.9, "learning_rate": 5.504467705706862e-07, "loss": 0.621, "step": 6015 }, { "epoch": 0.9, "learning_rate": 5.488671764368525e-07, "loss": 0.6865, "step": 6016 }, { "epoch": 0.9, "learning_rate": 5.472897880520423e-07, "loss": 0.7684, "step": 6017 }, { "epoch": 0.9, "learning_rate": 5.457146057843953e-07, "loss": 0.6774, "step": 6018 }, { "epoch": 0.9, "learning_rate": 5.441416300015312e-07, "loss": 0.6689, "step": 6019 }, { "epoch": 0.9, "learning_rate": 5.425708610705615e-07, "loss": 0.7203, "step": 6020 }, { "epoch": 0.9, "learning_rate": 5.410022993580755e-07, "loss": 0.6998, "step": 6021 }, { "epoch": 0.9, "learning_rate": 5.394359452301568e-07, "loss": 0.7187, "step": 6022 }, { "epoch": 0.9, "learning_rate": 5.37871799052364e-07, "loss": 0.7072, "step": 6023 }, { "epoch": 0.9, "learning_rate": 5.363098611897466e-07, "loss": 0.7553, "step": 6024 }, { "epoch": 0.9, "learning_rate": 5.347501320068382e-07, "loss": 0.7551, "step": 6025 }, { "epoch": 0.9, "learning_rate": 5.331926118676533e-07, "loss": 0.6225, "step": 6026 }, { "epoch": 0.9, "learning_rate": 5.316373011356934e-07, "loss": 0.757, "step": 6027 }, { "epoch": 0.9, "learning_rate": 5.300842001739459e-07, "loss": 0.6136, "step": 6028 }, { "epoch": 0.9, "learning_rate": 5.285333093448764e-07, "loss": 0.7002, "step": 6029 }, { "epoch": 0.9, "learning_rate": 5.269846290104452e-07, "loss": 0.7168, "step": 6030 }, { "epoch": 0.9, "learning_rate": 5.254381595320857e-07, "loss": 0.7087, "step": 6031 }, { "epoch": 0.9, "learning_rate": 5.238939012707223e-07, "loss": 0.7781, "step": 6032 }, { "epoch": 0.9, "learning_rate": 5.223518545867612e-07, "loss": 0.7269, "step": 6033 }, { "epoch": 0.9, "learning_rate": 5.208120198400901e-07, "loss": 0.7344, "step": 6034 }, { "epoch": 0.9, "learning_rate": 5.192743973900838e-07, "loss": 0.6657, "step": 6035 }, { "epoch": 0.9, "learning_rate": 5.177389875956008e-07, "loss": 0.7159, "step": 6036 }, { "epoch": 0.9, "learning_rate": 5.16205790814982e-07, "loss": 0.6858, "step": 6037 }, { "epoch": 0.9, "learning_rate": 5.146748074060526e-07, "loss": 0.6564, "step": 6038 }, { "epoch": 0.9, "learning_rate": 5.131460377261178e-07, "loss": 0.7101, "step": 6039 }, { "epoch": 0.9, "learning_rate": 5.116194821319698e-07, "loss": 0.7436, "step": 6040 }, { "epoch": 0.9, "learning_rate": 5.10095140979886e-07, "loss": 0.6718, "step": 6041 }, { "epoch": 0.9, "learning_rate": 5.085730146256219e-07, "loss": 0.6764, "step": 6042 }, { "epoch": 0.9, "learning_rate": 5.070531034244175e-07, "loss": 0.7031, "step": 6043 }, { "epoch": 0.9, "learning_rate": 5.05535407730997e-07, "loss": 0.6448, "step": 6044 }, { "epoch": 0.9, "learning_rate": 5.040199278995695e-07, "loss": 0.6933, "step": 6045 }, { "epoch": 0.9, "learning_rate": 5.025066642838239e-07, "loss": 0.7102, "step": 6046 }, { "epoch": 0.9, "learning_rate": 5.009956172369312e-07, "loss": 0.6797, "step": 6047 }, { "epoch": 0.9, "learning_rate": 4.994867871115483e-07, "loss": 0.6503, "step": 6048 }, { "epoch": 0.9, "learning_rate": 4.9798017425981e-07, "loss": 0.6688, "step": 6049 }, { "epoch": 0.9, "learning_rate": 4.964757790333397e-07, "loss": 0.7057, "step": 6050 }, { "epoch": 0.9, "learning_rate": 4.949736017832385e-07, "loss": 0.6949, "step": 6051 }, { "epoch": 0.9, "learning_rate": 4.934736428600917e-07, "loss": 0.5959, "step": 6052 }, { "epoch": 0.9, "learning_rate": 4.919759026139681e-07, "loss": 0.6894, "step": 6053 }, { "epoch": 0.9, "learning_rate": 4.904803813944147e-07, "loss": 0.6702, "step": 6054 }, { "epoch": 0.9, "learning_rate": 4.889870795504647e-07, "loss": 0.6036, "step": 6055 }, { "epoch": 0.9, "learning_rate": 4.874959974306326e-07, "loss": 0.6814, "step": 6056 }, { "epoch": 0.9, "learning_rate": 4.86007135382911e-07, "loss": 0.5843, "step": 6057 }, { "epoch": 0.9, "learning_rate": 4.845204937547798e-07, "loss": 0.6404, "step": 6058 }, { "epoch": 0.9, "learning_rate": 4.830360728931982e-07, "loss": 0.7309, "step": 6059 }, { "epoch": 0.9, "learning_rate": 4.815538731446068e-07, "loss": 0.7094, "step": 6060 }, { "epoch": 0.9, "learning_rate": 4.800738948549288e-07, "loss": 0.6595, "step": 6061 }, { "epoch": 0.9, "learning_rate": 4.78596138369567e-07, "loss": 0.6692, "step": 6062 }, { "epoch": 0.9, "learning_rate": 4.771206040334086e-07, "loss": 0.7066, "step": 6063 }, { "epoch": 0.9, "learning_rate": 4.756472921908206e-07, "loss": 0.68, "step": 6064 }, { "epoch": 0.9, "learning_rate": 4.7417620318564894e-07, "loss": 0.6956, "step": 6065 }, { "epoch": 0.9, "learning_rate": 4.7270733736122454e-07, "loss": 0.7459, "step": 6066 }, { "epoch": 0.9, "learning_rate": 4.7124069506035873e-07, "loss": 0.7035, "step": 6067 }, { "epoch": 0.9, "learning_rate": 4.697762766253433e-07, "loss": 0.7215, "step": 6068 }, { "epoch": 0.91, "learning_rate": 4.683140823979515e-07, "loss": 0.7187, "step": 6069 }, { "epoch": 0.91, "learning_rate": 4.668541127194348e-07, "loss": 0.7391, "step": 6070 }, { "epoch": 0.91, "learning_rate": 4.6539636793052845e-07, "loss": 0.7249, "step": 6071 }, { "epoch": 0.91, "learning_rate": 4.639408483714503e-07, "loss": 0.7537, "step": 6072 }, { "epoch": 0.91, "learning_rate": 4.6248755438189076e-07, "loss": 0.7741, "step": 6073 }, { "epoch": 0.91, "learning_rate": 4.610364863010319e-07, "loss": 0.7726, "step": 6074 }, { "epoch": 0.91, "learning_rate": 4.5958764446752733e-07, "loss": 0.647, "step": 6075 }, { "epoch": 0.91, "learning_rate": 4.5814102921951767e-07, "loss": 0.63, "step": 6076 }, { "epoch": 0.91, "learning_rate": 4.566966408946161e-07, "loss": 0.763, "step": 6077 }, { "epoch": 0.91, "learning_rate": 4.55254479829923e-07, "loss": 0.7799, "step": 6078 }, { "epoch": 0.91, "learning_rate": 4.5381454636201917e-07, "loss": 0.7484, "step": 6079 }, { "epoch": 0.91, "learning_rate": 4.523768408269569e-07, "loss": 0.7205, "step": 6080 }, { "epoch": 0.91, "learning_rate": 4.5094136356027996e-07, "loss": 0.6652, "step": 6081 }, { "epoch": 0.91, "learning_rate": 4.4950811489700487e-07, "loss": 0.6393, "step": 6082 }, { "epoch": 0.91, "learning_rate": 4.4807709517162843e-07, "loss": 0.7663, "step": 6083 }, { "epoch": 0.91, "learning_rate": 4.4664830471813135e-07, "loss": 0.6668, "step": 6084 }, { "epoch": 0.91, "learning_rate": 4.452217438699691e-07, "loss": 0.7121, "step": 6085 }, { "epoch": 0.91, "learning_rate": 4.4379741296007974e-07, "loss": 0.6976, "step": 6086 }, { "epoch": 0.91, "learning_rate": 4.423753123208807e-07, "loss": 0.6649, "step": 6087 }, { "epoch": 0.91, "learning_rate": 4.4095544228426657e-07, "loss": 0.2925, "step": 6088 }, { "epoch": 0.91, "learning_rate": 4.3953780318161557e-07, "loss": 0.6256, "step": 6089 }, { "epoch": 0.91, "learning_rate": 4.381223953437819e-07, "loss": 0.6572, "step": 6090 }, { "epoch": 0.91, "learning_rate": 4.3670921910109907e-07, "loss": 0.6771, "step": 6091 }, { "epoch": 0.91, "learning_rate": 4.3529827478338337e-07, "loss": 0.6677, "step": 6092 }, { "epoch": 0.91, "learning_rate": 4.3388956271992467e-07, "loss": 0.6555, "step": 6093 }, { "epoch": 0.91, "learning_rate": 4.3248308323949553e-07, "loss": 0.6783, "step": 6094 }, { "epoch": 0.91, "learning_rate": 4.3107883667034667e-07, "loss": 0.6635, "step": 6095 }, { "epoch": 0.91, "learning_rate": 4.296768233402071e-07, "loss": 0.6959, "step": 6096 }, { "epoch": 0.91, "learning_rate": 4.282770435762873e-07, "loss": 0.7804, "step": 6097 }, { "epoch": 0.91, "learning_rate": 4.268794977052726e-07, "loss": 0.6732, "step": 6098 }, { "epoch": 0.91, "learning_rate": 4.254841860533276e-07, "loss": 0.6924, "step": 6099 }, { "epoch": 0.91, "learning_rate": 4.2409110894610084e-07, "loss": 0.7573, "step": 6100 }, { "epoch": 0.91, "learning_rate": 4.227002667087099e-07, "loss": 0.7044, "step": 6101 }, { "epoch": 0.91, "learning_rate": 4.2131165966575846e-07, "loss": 0.6663, "step": 6102 }, { "epoch": 0.91, "learning_rate": 4.1992528814132515e-07, "loss": 0.6684, "step": 6103 }, { "epoch": 0.91, "learning_rate": 4.1854115245896886e-07, "loss": 0.6912, "step": 6104 }, { "epoch": 0.91, "learning_rate": 4.171592529417268e-07, "loss": 0.638, "step": 6105 }, { "epoch": 0.91, "learning_rate": 4.1577958991210977e-07, "loss": 0.7564, "step": 6106 }, { "epoch": 0.91, "learning_rate": 4.144021636921125e-07, "loss": 0.7722, "step": 6107 }, { "epoch": 0.91, "learning_rate": 4.130269746032034e-07, "loss": 0.6877, "step": 6108 }, { "epoch": 0.91, "learning_rate": 4.116540229663313e-07, "loss": 0.6414, "step": 6109 }, { "epoch": 0.91, "learning_rate": 4.102833091019209e-07, "loss": 0.2806, "step": 6110 }, { "epoch": 0.91, "learning_rate": 4.089148333298776e-07, "loss": 0.6865, "step": 6111 }, { "epoch": 0.91, "learning_rate": 4.075485959695824e-07, "loss": 0.7145, "step": 6112 }, { "epoch": 0.91, "learning_rate": 4.0618459733989256e-07, "loss": 0.7246, "step": 6113 }, { "epoch": 0.91, "learning_rate": 4.0482283775914567e-07, "loss": 0.6558, "step": 6114 }, { "epoch": 0.91, "learning_rate": 4.034633175451552e-07, "loss": 0.6356, "step": 6115 }, { "epoch": 0.91, "learning_rate": 4.021060370152119e-07, "loss": 0.7556, "step": 6116 }, { "epoch": 0.91, "learning_rate": 4.007509964860834e-07, "loss": 0.6822, "step": 6117 }, { "epoch": 0.91, "learning_rate": 3.9939819627401676e-07, "loss": 0.6899, "step": 6118 }, { "epoch": 0.91, "learning_rate": 3.980476366947339e-07, "loss": 0.6767, "step": 6119 }, { "epoch": 0.91, "learning_rate": 3.96699318063436e-07, "loss": 0.5987, "step": 6120 }, { "epoch": 0.91, "learning_rate": 3.9535324069479794e-07, "loss": 0.7841, "step": 6121 }, { "epoch": 0.91, "learning_rate": 3.940094049029741e-07, "loss": 0.7089, "step": 6122 }, { "epoch": 0.91, "learning_rate": 3.926678110015969e-07, "loss": 0.6982, "step": 6123 }, { "epoch": 0.91, "learning_rate": 3.913284593037714e-07, "loss": 0.7307, "step": 6124 }, { "epoch": 0.91, "learning_rate": 3.899913501220809e-07, "loss": 0.6988, "step": 6125 }, { "epoch": 0.91, "learning_rate": 3.886564837685891e-07, "loss": 0.6381, "step": 6126 }, { "epoch": 0.91, "learning_rate": 3.873238605548313e-07, "loss": 0.6318, "step": 6127 }, { "epoch": 0.91, "learning_rate": 3.859934807918242e-07, "loss": 0.6469, "step": 6128 }, { "epoch": 0.91, "learning_rate": 3.846653447900528e-07, "loss": 0.7756, "step": 6129 }, { "epoch": 0.91, "learning_rate": 3.8333945285948805e-07, "loss": 0.713, "step": 6130 }, { "epoch": 0.91, "learning_rate": 3.8201580530957017e-07, "loss": 0.7108, "step": 6131 }, { "epoch": 0.91, "learning_rate": 3.806944024492176e-07, "loss": 0.6164, "step": 6132 }, { "epoch": 0.91, "learning_rate": 3.793752445868304e-07, "loss": 0.7473, "step": 6133 }, { "epoch": 0.91, "learning_rate": 3.780583320302733e-07, "loss": 0.7077, "step": 6134 }, { "epoch": 0.91, "learning_rate": 3.767436650868994e-07, "loss": 0.7051, "step": 6135 }, { "epoch": 0.92, "learning_rate": 3.754312440635255e-07, "loss": 0.7067, "step": 6136 }, { "epoch": 0.92, "learning_rate": 3.741210692664554e-07, "loss": 0.6372, "step": 6137 }, { "epoch": 0.92, "learning_rate": 3.728131410014624e-07, "loss": 0.8013, "step": 6138 }, { "epoch": 0.92, "learning_rate": 3.7150745957379554e-07, "loss": 0.7315, "step": 6139 }, { "epoch": 0.92, "learning_rate": 3.702040252881811e-07, "loss": 0.6517, "step": 6140 }, { "epoch": 0.92, "learning_rate": 3.6890283844882137e-07, "loss": 0.6593, "step": 6141 }, { "epoch": 0.92, "learning_rate": 3.676038993593933e-07, "loss": 0.7301, "step": 6142 }, { "epoch": 0.92, "learning_rate": 3.663072083230512e-07, "loss": 0.3238, "step": 6143 }, { "epoch": 0.92, "learning_rate": 3.650127656424185e-07, "loss": 0.643, "step": 6144 }, { "epoch": 0.92, "learning_rate": 3.637205716196013e-07, "loss": 0.7453, "step": 6145 }, { "epoch": 0.92, "learning_rate": 3.6243062655617723e-07, "loss": 0.7309, "step": 6146 }, { "epoch": 0.92, "learning_rate": 3.6114293075319774e-07, "loss": 0.7651, "step": 6147 }, { "epoch": 0.92, "learning_rate": 3.5985748451119463e-07, "loss": 0.6883, "step": 6148 }, { "epoch": 0.92, "learning_rate": 3.58574288130169e-07, "loss": 0.7764, "step": 6149 }, { "epoch": 0.92, "learning_rate": 3.572933419095992e-07, "loss": 0.2939, "step": 6150 }, { "epoch": 0.92, "learning_rate": 3.560146461484404e-07, "loss": 0.7279, "step": 6151 }, { "epoch": 0.92, "learning_rate": 3.5473820114511834e-07, "loss": 0.6186, "step": 6152 }, { "epoch": 0.92, "learning_rate": 3.5346400719753483e-07, "loss": 0.697, "step": 6153 }, { "epoch": 0.92, "learning_rate": 3.5219206460306967e-07, "loss": 0.7076, "step": 6154 }, { "epoch": 0.92, "learning_rate": 3.509223736585732e-07, "loss": 0.6664, "step": 6155 }, { "epoch": 0.92, "learning_rate": 3.496549346603728e-07, "loss": 0.7086, "step": 6156 }, { "epoch": 0.92, "learning_rate": 3.483897479042686e-07, "loss": 0.7474, "step": 6157 }, { "epoch": 0.92, "learning_rate": 3.471268136855366e-07, "loss": 0.6934, "step": 6158 }, { "epoch": 0.92, "learning_rate": 3.458661322989232e-07, "loss": 0.6226, "step": 6159 }, { "epoch": 0.92, "learning_rate": 3.446077040386553e-07, "loss": 0.6649, "step": 6160 }, { "epoch": 0.92, "learning_rate": 3.433515291984302e-07, "loss": 0.7058, "step": 6161 }, { "epoch": 0.92, "learning_rate": 3.4209760807141666e-07, "loss": 0.7408, "step": 6162 }, { "epoch": 0.92, "learning_rate": 3.4084594095026623e-07, "loss": 0.7078, "step": 6163 }, { "epoch": 0.92, "learning_rate": 3.395965281270941e-07, "loss": 0.6967, "step": 6164 }, { "epoch": 0.92, "learning_rate": 3.383493698934959e-07, "loss": 0.7222, "step": 6165 }, { "epoch": 0.92, "learning_rate": 3.3710446654053876e-07, "loss": 0.7062, "step": 6166 }, { "epoch": 0.92, "learning_rate": 3.3586181835876363e-07, "loss": 0.7324, "step": 6167 }, { "epoch": 0.92, "learning_rate": 3.3462142563818633e-07, "loss": 0.6677, "step": 6168 }, { "epoch": 0.92, "learning_rate": 3.3338328866829417e-07, "loss": 0.7674, "step": 6169 }, { "epoch": 0.92, "learning_rate": 3.3214740773805044e-07, "loss": 0.7148, "step": 6170 }, { "epoch": 0.92, "learning_rate": 3.309137831358922e-07, "loss": 0.7419, "step": 6171 }, { "epoch": 0.92, "learning_rate": 3.2968241514972466e-07, "loss": 0.6205, "step": 6172 }, { "epoch": 0.92, "learning_rate": 3.284533040669324e-07, "loss": 0.3047, "step": 6173 }, { "epoch": 0.92, "learning_rate": 3.272264501743716e-07, "loss": 0.6582, "step": 6174 }, { "epoch": 0.92, "learning_rate": 3.260018537583698e-07, "loss": 0.6861, "step": 6175 }, { "epoch": 0.92, "learning_rate": 3.247795151047295e-07, "loss": 0.7155, "step": 6176 }, { "epoch": 0.92, "learning_rate": 3.235594344987247e-07, "loss": 0.6859, "step": 6177 }, { "epoch": 0.92, "learning_rate": 3.223416122251055e-07, "loss": 0.6808, "step": 6178 }, { "epoch": 0.92, "learning_rate": 3.2112604856809227e-07, "loss": 0.7144, "step": 6179 }, { "epoch": 0.92, "learning_rate": 3.1991274381137806e-07, "loss": 0.739, "step": 6180 }, { "epoch": 0.92, "learning_rate": 3.187016982381297e-07, "loss": 0.7079, "step": 6181 }, { "epoch": 0.92, "learning_rate": 3.1749291213098665e-07, "loss": 0.6575, "step": 6182 }, { "epoch": 0.92, "learning_rate": 3.1628638577206105e-07, "loss": 0.5969, "step": 6183 }, { "epoch": 0.92, "learning_rate": 3.1508211944293767e-07, "loss": 0.7634, "step": 6184 }, { "epoch": 0.92, "learning_rate": 3.1388011342467383e-07, "loss": 0.7177, "step": 6185 }, { "epoch": 0.92, "learning_rate": 3.1268036799779965e-07, "loss": 0.5347, "step": 6186 }, { "epoch": 0.92, "learning_rate": 3.114828834423167e-07, "loss": 0.6448, "step": 6187 }, { "epoch": 0.92, "learning_rate": 3.1028766003769915e-07, "loss": 0.5685, "step": 6188 }, { "epoch": 0.92, "learning_rate": 3.090946980628951e-07, "loss": 0.6511, "step": 6189 }, { "epoch": 0.92, "learning_rate": 3.079039977963216e-07, "loss": 0.7777, "step": 6190 }, { "epoch": 0.92, "learning_rate": 3.0671555951587e-07, "loss": 0.7154, "step": 6191 }, { "epoch": 0.92, "learning_rate": 3.0552938349890595e-07, "loss": 0.7037, "step": 6192 }, { "epoch": 0.92, "learning_rate": 3.0434547002226143e-07, "loss": 0.7491, "step": 6193 }, { "epoch": 0.92, "learning_rate": 3.0316381936224773e-07, "loss": 0.6375, "step": 6194 }, { "epoch": 0.92, "learning_rate": 3.0198443179463855e-07, "loss": 0.7246, "step": 6195 }, { "epoch": 0.92, "learning_rate": 3.0080730759468937e-07, "loss": 0.7414, "step": 6196 }, { "epoch": 0.92, "learning_rate": 2.9963244703712146e-07, "loss": 0.6883, "step": 6197 }, { "epoch": 0.92, "learning_rate": 2.984598503961278e-07, "loss": 0.732, "step": 6198 }, { "epoch": 0.92, "learning_rate": 2.97289517945375e-07, "loss": 0.7397, "step": 6199 }, { "epoch": 0.92, "learning_rate": 2.961214499580023e-07, "loss": 0.7702, "step": 6200 }, { "epoch": 0.92, "learning_rate": 2.9495564670661725e-07, "loss": 0.6816, "step": 6201 }, { "epoch": 0.92, "learning_rate": 2.937921084633011e-07, "loss": 0.7226, "step": 6202 }, { "epoch": 0.93, "learning_rate": 2.9263083549960545e-07, "loss": 0.6708, "step": 6203 }, { "epoch": 0.93, "learning_rate": 2.914718280865536e-07, "loss": 0.6276, "step": 6204 }, { "epoch": 0.93, "learning_rate": 2.903150864946402e-07, "loss": 0.7129, "step": 6205 }, { "epoch": 0.93, "learning_rate": 2.8916061099382944e-07, "loss": 0.71, "step": 6206 }, { "epoch": 0.93, "learning_rate": 2.8800840185356116e-07, "loss": 0.7321, "step": 6207 }, { "epoch": 0.93, "learning_rate": 2.868584593427415e-07, "loss": 0.7116, "step": 6208 }, { "epoch": 0.93, "learning_rate": 2.85710783729749e-07, "loss": 0.6898, "step": 6209 }, { "epoch": 0.93, "learning_rate": 2.84565375282434e-07, "loss": 0.6771, "step": 6210 }, { "epoch": 0.93, "learning_rate": 2.83422234268117e-07, "loss": 0.6633, "step": 6211 }, { "epoch": 0.93, "learning_rate": 2.8228136095359125e-07, "loss": 0.6258, "step": 6212 }, { "epoch": 0.93, "learning_rate": 2.8114275560511497e-07, "loss": 0.6143, "step": 6213 }, { "epoch": 0.93, "learning_rate": 2.8000641848842324e-07, "loss": 0.6343, "step": 6214 }, { "epoch": 0.93, "learning_rate": 2.788723498687229e-07, "loss": 0.6285, "step": 6215 }, { "epoch": 0.93, "learning_rate": 2.7774055001068334e-07, "loss": 0.6829, "step": 6216 }, { "epoch": 0.93, "learning_rate": 2.7661101917845214e-07, "loss": 0.7552, "step": 6217 }, { "epoch": 0.93, "learning_rate": 2.7548375763564285e-07, "loss": 0.6542, "step": 6218 }, { "epoch": 0.93, "learning_rate": 2.743587656453417e-07, "loss": 0.6182, "step": 6219 }, { "epoch": 0.93, "learning_rate": 2.7323604347010537e-07, "loss": 0.6289, "step": 6220 }, { "epoch": 0.93, "learning_rate": 2.7211559137195644e-07, "loss": 0.7002, "step": 6221 }, { "epoch": 0.93, "learning_rate": 2.709974096123957e-07, "loss": 0.6062, "step": 6222 }, { "epoch": 0.93, "learning_rate": 2.6988149845238787e-07, "loss": 0.6486, "step": 6223 }, { "epoch": 0.93, "learning_rate": 2.6876785815236784e-07, "loss": 0.6211, "step": 6224 }, { "epoch": 0.93, "learning_rate": 2.676564889722444e-07, "loss": 0.795, "step": 6225 }, { "epoch": 0.93, "learning_rate": 2.665473911713934e-07, "loss": 0.7799, "step": 6226 }, { "epoch": 0.93, "learning_rate": 2.6544056500866e-07, "loss": 0.6775, "step": 6227 }, { "epoch": 0.93, "learning_rate": 2.6433601074236093e-07, "loss": 0.6976, "step": 6228 }, { "epoch": 0.93, "learning_rate": 2.632337286302833e-07, "loss": 0.7024, "step": 6229 }, { "epoch": 0.93, "learning_rate": 2.621337189296824e-07, "loss": 0.639, "step": 6230 }, { "epoch": 0.93, "learning_rate": 2.6103598189728185e-07, "loss": 0.6776, "step": 6231 }, { "epoch": 0.93, "learning_rate": 2.5994051778927886e-07, "loss": 0.6879, "step": 6232 }, { "epoch": 0.93, "learning_rate": 2.5884732686133786e-07, "loss": 0.7376, "step": 6233 }, { "epoch": 0.93, "learning_rate": 2.5775640936859025e-07, "loss": 0.5499, "step": 6234 }, { "epoch": 0.93, "learning_rate": 2.566677655656413e-07, "loss": 0.7572, "step": 6235 }, { "epoch": 0.93, "learning_rate": 2.555813957065645e-07, "loss": 0.6749, "step": 6236 }, { "epoch": 0.93, "learning_rate": 2.5449730004490027e-07, "loss": 0.7023, "step": 6237 }, { "epoch": 0.93, "learning_rate": 2.534154788336607e-07, "loss": 0.624, "step": 6238 }, { "epoch": 0.93, "learning_rate": 2.5233593232532493e-07, "loss": 0.748, "step": 6239 }, { "epoch": 0.93, "learning_rate": 2.512586607718448e-07, "loss": 0.6617, "step": 6240 }, { "epoch": 0.93, "learning_rate": 2.5018366442463804e-07, "loss": 0.6796, "step": 6241 }, { "epoch": 0.93, "learning_rate": 2.491109435345906e-07, "loss": 0.6816, "step": 6242 }, { "epoch": 0.93, "learning_rate": 2.4804049835206014e-07, "loss": 0.7069, "step": 6243 }, { "epoch": 0.93, "learning_rate": 2.469723291268733e-07, "loss": 0.6812, "step": 6244 }, { "epoch": 0.93, "learning_rate": 2.4590643610832297e-07, "loss": 0.6932, "step": 6245 }, { "epoch": 0.93, "learning_rate": 2.448428195451724e-07, "loss": 0.6917, "step": 6246 }, { "epoch": 0.93, "learning_rate": 2.437814796856541e-07, "loss": 0.7178, "step": 6247 }, { "epoch": 0.93, "learning_rate": 2.427224167774678e-07, "loss": 0.6867, "step": 6248 }, { "epoch": 0.93, "learning_rate": 2.4166563106778116e-07, "loss": 0.653, "step": 6249 }, { "epoch": 0.93, "learning_rate": 2.4061112280323486e-07, "loss": 0.6442, "step": 6250 }, { "epoch": 0.93, "learning_rate": 2.3955889222993187e-07, "loss": 0.754, "step": 6251 }, { "epoch": 0.93, "learning_rate": 2.385089395934481e-07, "loss": 0.7544, "step": 6252 }, { "epoch": 0.93, "learning_rate": 2.374612651388264e-07, "loss": 0.5763, "step": 6253 }, { "epoch": 0.93, "learning_rate": 2.364158691105778e-07, "loss": 0.7173, "step": 6254 }, { "epoch": 0.93, "learning_rate": 2.353727517526805e-07, "loss": 0.72, "step": 6255 }, { "epoch": 0.93, "learning_rate": 2.3433191330858417e-07, "loss": 0.6535, "step": 6256 }, { "epoch": 0.93, "learning_rate": 2.3329335402120236e-07, "loss": 0.7534, "step": 6257 }, { "epoch": 0.93, "learning_rate": 2.3225707413292109e-07, "loss": 0.6651, "step": 6258 }, { "epoch": 0.93, "learning_rate": 2.3122307388558918e-07, "loss": 0.6443, "step": 6259 }, { "epoch": 0.93, "learning_rate": 2.3019135352052913e-07, "loss": 0.7246, "step": 6260 }, { "epoch": 0.93, "learning_rate": 2.2916191327852722e-07, "loss": 0.6856, "step": 6261 }, { "epoch": 0.93, "learning_rate": 2.2813475339983905e-07, "loss": 0.6673, "step": 6262 }, { "epoch": 0.93, "learning_rate": 2.271098741241884e-07, "loss": 0.3058, "step": 6263 }, { "epoch": 0.93, "learning_rate": 2.2608727569076728e-07, "loss": 0.6618, "step": 6264 }, { "epoch": 0.93, "learning_rate": 2.2506695833823033e-07, "loss": 0.707, "step": 6265 }, { "epoch": 0.93, "learning_rate": 2.240489223047093e-07, "loss": 0.676, "step": 6266 }, { "epoch": 0.93, "learning_rate": 2.2303316782779527e-07, "loss": 0.6217, "step": 6267 }, { "epoch": 0.93, "learning_rate": 2.220196951445497e-07, "loss": 0.6928, "step": 6268 }, { "epoch": 0.93, "learning_rate": 2.2100850449150224e-07, "loss": 0.7323, "step": 6269 }, { "epoch": 0.94, "learning_rate": 2.199995961046486e-07, "loss": 0.6748, "step": 6270 }, { "epoch": 0.94, "learning_rate": 2.1899297021945375e-07, "loss": 0.6658, "step": 6271 }, { "epoch": 0.94, "learning_rate": 2.1798862707084756e-07, "loss": 0.6907, "step": 6272 }, { "epoch": 0.94, "learning_rate": 2.1698656689322583e-07, "loss": 0.6777, "step": 6273 }, { "epoch": 0.94, "learning_rate": 2.159867899204604e-07, "loss": 0.7003, "step": 6274 }, { "epoch": 0.94, "learning_rate": 2.1498929638587796e-07, "loss": 0.8052, "step": 6275 }, { "epoch": 0.94, "learning_rate": 2.1399408652228115e-07, "loss": 0.7222, "step": 6276 }, { "epoch": 0.94, "learning_rate": 2.130011605619353e-07, "loss": 0.7347, "step": 6277 }, { "epoch": 0.94, "learning_rate": 2.1201051873657398e-07, "loss": 0.612, "step": 6278 }, { "epoch": 0.94, "learning_rate": 2.1102216127739995e-07, "loss": 0.6109, "step": 6279 }, { "epoch": 0.94, "learning_rate": 2.100360884150765e-07, "loss": 0.6272, "step": 6280 }, { "epoch": 0.94, "learning_rate": 2.0905230037974177e-07, "loss": 0.765, "step": 6281 }, { "epoch": 0.94, "learning_rate": 2.0807079740099544e-07, "loss": 0.6555, "step": 6282 }, { "epoch": 0.94, "learning_rate": 2.0709157970790429e-07, "loss": 0.677, "step": 6283 }, { "epoch": 0.94, "learning_rate": 2.0611464752900546e-07, "loss": 0.6806, "step": 6284 }, { "epoch": 0.94, "learning_rate": 2.051400010922955e-07, "loss": 0.6485, "step": 6285 }, { "epoch": 0.94, "learning_rate": 2.0416764062524576e-07, "loss": 0.3185, "step": 6286 }, { "epoch": 0.94, "learning_rate": 2.0319756635478915e-07, "loss": 0.7395, "step": 6287 }, { "epoch": 0.94, "learning_rate": 2.022297785073235e-07, "loss": 0.7814, "step": 6288 }, { "epoch": 0.94, "learning_rate": 2.0126427730871923e-07, "loss": 0.6856, "step": 6289 }, { "epoch": 0.94, "learning_rate": 2.003010629843083e-07, "loss": 0.761, "step": 6290 }, { "epoch": 0.94, "learning_rate": 1.9934013575888866e-07, "loss": 0.6643, "step": 6291 }, { "epoch": 0.94, "learning_rate": 1.9838149585672873e-07, "loss": 0.6644, "step": 6292 }, { "epoch": 0.94, "learning_rate": 1.9742514350155838e-07, "loss": 0.7331, "step": 6293 }, { "epoch": 0.94, "learning_rate": 1.9647107891657467e-07, "loss": 0.7641, "step": 6294 }, { "epoch": 0.94, "learning_rate": 1.9551930232444393e-07, "loss": 0.7058, "step": 6295 }, { "epoch": 0.94, "learning_rate": 1.9456981394729402e-07, "loss": 0.6965, "step": 6296 }, { "epoch": 0.94, "learning_rate": 1.9362261400672432e-07, "loss": 0.6963, "step": 6297 }, { "epoch": 0.94, "learning_rate": 1.926777027237925e-07, "loss": 0.7259, "step": 6298 }, { "epoch": 0.94, "learning_rate": 1.9173508031902875e-07, "loss": 0.6542, "step": 6299 }, { "epoch": 0.94, "learning_rate": 1.9079474701242607e-07, "loss": 0.6733, "step": 6300 }, { "epoch": 0.94, "learning_rate": 1.898567030234433e-07, "loss": 0.7157, "step": 6301 }, { "epoch": 0.94, "learning_rate": 1.889209485710053e-07, "loss": 0.6726, "step": 6302 }, { "epoch": 0.94, "learning_rate": 1.8798748387350408e-07, "loss": 0.7107, "step": 6303 }, { "epoch": 0.94, "learning_rate": 1.8705630914879646e-07, "loss": 0.6896, "step": 6304 }, { "epoch": 0.94, "learning_rate": 1.8612742461420197e-07, "loss": 0.6414, "step": 6305 }, { "epoch": 0.94, "learning_rate": 1.852008304865094e-07, "loss": 0.7469, "step": 6306 }, { "epoch": 0.94, "learning_rate": 1.8427652698197128e-07, "loss": 0.6028, "step": 6307 }, { "epoch": 0.94, "learning_rate": 1.8335451431630624e-07, "loss": 0.7107, "step": 6308 }, { "epoch": 0.94, "learning_rate": 1.8243479270469654e-07, "loss": 0.7534, "step": 6309 }, { "epoch": 0.94, "learning_rate": 1.8151736236179275e-07, "loss": 0.7146, "step": 6310 }, { "epoch": 0.94, "learning_rate": 1.8060222350170797e-07, "loss": 0.6763, "step": 6311 }, { "epoch": 0.94, "learning_rate": 1.7968937633802253e-07, "loss": 0.6239, "step": 6312 }, { "epoch": 0.94, "learning_rate": 1.7877882108377932e-07, "loss": 0.7198, "step": 6313 }, { "epoch": 0.94, "learning_rate": 1.7787055795148834e-07, "loss": 0.7134, "step": 6314 }, { "epoch": 0.94, "learning_rate": 1.769645871531256e-07, "loss": 0.6617, "step": 6315 }, { "epoch": 0.94, "learning_rate": 1.7606090890012972e-07, "loss": 0.6576, "step": 6316 }, { "epoch": 0.94, "learning_rate": 1.7515952340340538e-07, "loss": 0.7784, "step": 6317 }, { "epoch": 0.94, "learning_rate": 1.7426043087332202e-07, "loss": 0.7463, "step": 6318 }, { "epoch": 0.94, "learning_rate": 1.7336363151971403e-07, "loss": 0.7425, "step": 6319 }, { "epoch": 0.94, "learning_rate": 1.7246912555188067e-07, "loss": 0.6581, "step": 6320 }, { "epoch": 0.94, "learning_rate": 1.7157691317858605e-07, "loss": 0.6599, "step": 6321 }, { "epoch": 0.94, "learning_rate": 1.7068699460805804e-07, "loss": 0.7473, "step": 6322 }, { "epoch": 0.94, "learning_rate": 1.6979937004799163e-07, "loss": 0.6955, "step": 6323 }, { "epoch": 0.94, "learning_rate": 1.6891403970554222e-07, "loss": 0.7156, "step": 6324 }, { "epoch": 0.94, "learning_rate": 1.6803100378733562e-07, "loss": 0.6025, "step": 6325 }, { "epoch": 0.94, "learning_rate": 1.6715026249945587e-07, "loss": 0.6612, "step": 6326 }, { "epoch": 0.94, "learning_rate": 1.6627181604745525e-07, "loss": 0.6415, "step": 6327 }, { "epoch": 0.94, "learning_rate": 1.6539566463635082e-07, "loss": 0.7195, "step": 6328 }, { "epoch": 0.94, "learning_rate": 1.6452180847062126e-07, "loss": 0.7128, "step": 6329 }, { "epoch": 0.94, "learning_rate": 1.6365024775421233e-07, "loss": 0.6231, "step": 6330 }, { "epoch": 0.94, "learning_rate": 1.6278098269053243e-07, "loss": 0.6344, "step": 6331 }, { "epoch": 0.94, "learning_rate": 1.619140134824537e-07, "loss": 0.7694, "step": 6332 }, { "epoch": 0.94, "learning_rate": 1.6104934033231434e-07, "loss": 0.7508, "step": 6333 }, { "epoch": 0.94, "learning_rate": 1.601869634419162e-07, "loss": 0.7386, "step": 6334 }, { "epoch": 0.94, "learning_rate": 1.5932688301252498e-07, "loss": 0.6435, "step": 6335 }, { "epoch": 0.94, "learning_rate": 1.5846909924486897e-07, "loss": 0.6036, "step": 6336 }, { "epoch": 0.95, "learning_rate": 1.576136123391425e-07, "loss": 0.6851, "step": 6337 }, { "epoch": 0.95, "learning_rate": 1.5676042249500256e-07, "loss": 0.6499, "step": 6338 }, { "epoch": 0.95, "learning_rate": 1.5590952991157094e-07, "loss": 0.6865, "step": 6339 }, { "epoch": 0.95, "learning_rate": 1.5506093478743323e-07, "loss": 0.6589, "step": 6340 }, { "epoch": 0.95, "learning_rate": 1.542146373206377e-07, "loss": 0.6635, "step": 6341 }, { "epoch": 0.95, "learning_rate": 1.533706377086963e-07, "loss": 0.6568, "step": 6342 }, { "epoch": 0.95, "learning_rate": 1.5252893614858933e-07, "loss": 0.7293, "step": 6343 }, { "epoch": 0.95, "learning_rate": 1.516895328367529e-07, "loss": 0.6811, "step": 6344 }, { "epoch": 0.95, "learning_rate": 1.508524279690915e-07, "loss": 0.6992, "step": 6345 }, { "epoch": 0.95, "learning_rate": 1.5001762174097544e-07, "loss": 0.3654, "step": 6346 }, { "epoch": 0.95, "learning_rate": 1.4918511434723116e-07, "loss": 0.8021, "step": 6347 }, { "epoch": 0.95, "learning_rate": 1.4835490598215763e-07, "loss": 0.7738, "step": 6348 }, { "epoch": 0.95, "learning_rate": 1.4752699683950987e-07, "loss": 0.7678, "step": 6349 }, { "epoch": 0.95, "learning_rate": 1.467013871125089e-07, "loss": 0.6599, "step": 6350 }, { "epoch": 0.95, "learning_rate": 1.4587807699384172e-07, "loss": 0.6619, "step": 6351 }, { "epoch": 0.95, "learning_rate": 1.4505706667565455e-07, "loss": 0.7233, "step": 6352 }, { "epoch": 0.95, "learning_rate": 1.4423835634955863e-07, "loss": 0.6224, "step": 6353 }, { "epoch": 0.95, "learning_rate": 1.4342194620662775e-07, "loss": 0.6987, "step": 6354 }, { "epoch": 0.95, "learning_rate": 1.4260783643740172e-07, "loss": 0.6725, "step": 6355 }, { "epoch": 0.95, "learning_rate": 1.417960272318797e-07, "loss": 0.6185, "step": 6356 }, { "epoch": 0.95, "learning_rate": 1.4098651877952452e-07, "loss": 0.6495, "step": 6357 }, { "epoch": 0.95, "learning_rate": 1.401793112692651e-07, "loss": 0.6476, "step": 6358 }, { "epoch": 0.95, "learning_rate": 1.3937440488948962e-07, "loss": 0.6564, "step": 6359 }, { "epoch": 0.95, "learning_rate": 1.3857179982805114e-07, "loss": 0.6006, "step": 6360 }, { "epoch": 0.95, "learning_rate": 1.3777149627226537e-07, "loss": 0.7567, "step": 6361 }, { "epoch": 0.95, "learning_rate": 1.3697349440891184e-07, "loss": 0.7227, "step": 6362 }, { "epoch": 0.95, "learning_rate": 1.361777944242315e-07, "loss": 0.7181, "step": 6363 }, { "epoch": 0.95, "learning_rate": 1.353843965039281e-07, "loss": 0.6488, "step": 6364 }, { "epoch": 0.95, "learning_rate": 1.3459330083316791e-07, "loss": 0.6911, "step": 6365 }, { "epoch": 0.95, "learning_rate": 1.3380450759658215e-07, "loss": 0.6494, "step": 6366 }, { "epoch": 0.95, "learning_rate": 1.3301801697826134e-07, "loss": 0.7917, "step": 6367 }, { "epoch": 0.95, "learning_rate": 1.3223382916176085e-07, "loss": 0.6729, "step": 6368 }, { "epoch": 0.95, "learning_rate": 1.3145194433009767e-07, "loss": 0.6287, "step": 6369 }, { "epoch": 0.95, "learning_rate": 1.3067236266575356e-07, "loss": 0.7108, "step": 6370 }, { "epoch": 0.95, "learning_rate": 1.2989508435066965e-07, "loss": 0.6496, "step": 6371 }, { "epoch": 0.95, "learning_rate": 1.2912010956624976e-07, "loss": 0.7567, "step": 6372 }, { "epoch": 0.95, "learning_rate": 1.2834743849336139e-07, "loss": 0.7499, "step": 6373 }, { "epoch": 0.95, "learning_rate": 1.2757707131233588e-07, "loss": 0.6868, "step": 6374 }, { "epoch": 0.95, "learning_rate": 1.2680900820296382e-07, "loss": 0.7025, "step": 6375 }, { "epoch": 0.95, "learning_rate": 1.2604324934449853e-07, "loss": 0.7624, "step": 6376 }, { "epoch": 0.95, "learning_rate": 1.2527979491565812e-07, "loss": 0.6527, "step": 6377 }, { "epoch": 0.95, "learning_rate": 1.2451864509461897e-07, "loss": 0.6212, "step": 6378 }, { "epoch": 0.95, "learning_rate": 1.2375980005902454e-07, "loss": 0.8203, "step": 6379 }, { "epoch": 0.95, "learning_rate": 1.230032599859754e-07, "loss": 0.7107, "step": 6380 }, { "epoch": 0.95, "learning_rate": 1.2224902505203695e-07, "loss": 0.6463, "step": 6381 }, { "epoch": 0.95, "learning_rate": 1.2149709543323617e-07, "loss": 0.6537, "step": 6382 }, { "epoch": 0.95, "learning_rate": 1.2074747130506048e-07, "loss": 0.6419, "step": 6383 }, { "epoch": 0.95, "learning_rate": 1.2000015284246324e-07, "loss": 0.6853, "step": 6384 }, { "epoch": 0.95, "learning_rate": 1.1925514021985497e-07, "loss": 0.724, "step": 6385 }, { "epoch": 0.95, "learning_rate": 1.1851243361111208e-07, "loss": 0.6586, "step": 6386 }, { "epoch": 0.95, "learning_rate": 1.177720331895682e-07, "loss": 0.6664, "step": 6387 }, { "epoch": 0.95, "learning_rate": 1.1703393912802285e-07, "loss": 0.6279, "step": 6388 }, { "epoch": 0.95, "learning_rate": 1.1629815159873714e-07, "loss": 0.7331, "step": 6389 }, { "epoch": 0.95, "learning_rate": 1.1556467077343147e-07, "loss": 0.6498, "step": 6390 }, { "epoch": 0.95, "learning_rate": 1.1483349682328781e-07, "loss": 0.7373, "step": 6391 }, { "epoch": 0.95, "learning_rate": 1.1410462991895189e-07, "loss": 0.6363, "step": 6392 }, { "epoch": 0.95, "learning_rate": 1.1337807023052982e-07, "loss": 0.6442, "step": 6393 }, { "epoch": 0.95, "learning_rate": 1.1265381792759045e-07, "loss": 0.6789, "step": 6394 }, { "epoch": 0.95, "learning_rate": 1.1193187317916187e-07, "loss": 0.6438, "step": 6395 }, { "epoch": 0.95, "learning_rate": 1.11212236153736e-07, "loss": 0.7616, "step": 6396 }, { "epoch": 0.95, "learning_rate": 1.1049490701926402e-07, "loss": 0.6823, "step": 6397 }, { "epoch": 0.95, "learning_rate": 1.097798859431598e-07, "loss": 0.6349, "step": 6398 }, { "epoch": 0.95, "learning_rate": 1.0906717309229874e-07, "loss": 0.7009, "step": 6399 }, { "epoch": 0.95, "learning_rate": 1.0835676863301558e-07, "loss": 0.7899, "step": 6400 }, { "epoch": 0.95, "learning_rate": 1.076486727311099e-07, "loss": 0.6796, "step": 6401 }, { "epoch": 0.95, "learning_rate": 1.0694288555184063e-07, "loss": 0.6718, "step": 6402 }, { "epoch": 0.95, "learning_rate": 1.0623940725992488e-07, "loss": 0.7063, "step": 6403 }, { "epoch": 0.96, "learning_rate": 1.0553823801954465e-07, "loss": 0.7178, "step": 6404 }, { "epoch": 0.96, "learning_rate": 1.0483937799434351e-07, "loss": 0.7011, "step": 6405 }, { "epoch": 0.96, "learning_rate": 1.041428273474232e-07, "loss": 0.7541, "step": 6406 }, { "epoch": 0.96, "learning_rate": 1.0344858624134924e-07, "loss": 0.7318, "step": 6407 }, { "epoch": 0.96, "learning_rate": 1.0275665483814535e-07, "loss": 0.7155, "step": 6408 }, { "epoch": 0.96, "learning_rate": 1.0206703329929901e-07, "loss": 0.7463, "step": 6409 }, { "epoch": 0.96, "learning_rate": 1.0137972178575705e-07, "loss": 0.6968, "step": 6410 }, { "epoch": 0.96, "learning_rate": 1.006947204579256e-07, "loss": 0.778, "step": 6411 }, { "epoch": 0.96, "learning_rate": 1.0001202947567678e-07, "loss": 0.6883, "step": 6412 }, { "epoch": 0.96, "learning_rate": 9.933164899833536e-08, "loss": 0.7175, "step": 6413 }, { "epoch": 0.96, "learning_rate": 9.865357918469654e-08, "loss": 0.6494, "step": 6414 }, { "epoch": 0.96, "learning_rate": 9.797782019300928e-08, "loss": 0.3189, "step": 6415 }, { "epoch": 0.96, "learning_rate": 9.730437218098521e-08, "loss": 0.5953, "step": 6416 }, { "epoch": 0.96, "learning_rate": 9.663323530579639e-08, "loss": 0.6869, "step": 6417 }, { "epoch": 0.96, "learning_rate": 9.59644097240775e-08, "loss": 0.7459, "step": 6418 }, { "epoch": 0.96, "learning_rate": 9.529789559192037e-08, "loss": 0.6793, "step": 6419 }, { "epoch": 0.96, "learning_rate": 9.463369306487946e-08, "loss": 0.7039, "step": 6420 }, { "epoch": 0.96, "learning_rate": 9.397180229796965e-08, "loss": 0.6978, "step": 6421 }, { "epoch": 0.96, "learning_rate": 9.331222344566737e-08, "loss": 0.6634, "step": 6422 }, { "epoch": 0.96, "learning_rate": 9.265495666190616e-08, "loss": 0.6717, "step": 6423 }, { "epoch": 0.96, "learning_rate": 9.20000021000833e-08, "loss": 0.7242, "step": 6424 }, { "epoch": 0.96, "learning_rate": 9.134735991305543e-08, "loss": 0.6855, "step": 6425 }, { "epoch": 0.96, "learning_rate": 9.069703025313626e-08, "loss": 0.6422, "step": 6426 }, { "epoch": 0.96, "learning_rate": 9.004901327210658e-08, "loss": 0.7219, "step": 6427 }, { "epoch": 0.96, "learning_rate": 8.940330912120098e-08, "loss": 0.324, "step": 6428 }, { "epoch": 0.96, "learning_rate": 8.875991795111782e-08, "loss": 0.6618, "step": 6429 }, { "epoch": 0.96, "learning_rate": 8.811883991201587e-08, "loss": 0.7697, "step": 6430 }, { "epoch": 0.96, "learning_rate": 8.748007515350987e-08, "loss": 0.7636, "step": 6431 }, { "epoch": 0.96, "learning_rate": 8.684362382468058e-08, "loss": 0.7376, "step": 6432 }, { "epoch": 0.96, "learning_rate": 8.620948607406588e-08, "loss": 0.7445, "step": 6433 }, { "epoch": 0.96, "learning_rate": 8.557766204966067e-08, "loss": 0.652, "step": 6434 }, { "epoch": 0.96, "learning_rate": 8.494815189892591e-08, "loss": 0.775, "step": 6435 }, { "epoch": 0.96, "learning_rate": 8.432095576877852e-08, "loss": 0.6909, "step": 6436 }, { "epoch": 0.96, "learning_rate": 8.369607380559697e-08, "loss": 0.7131, "step": 6437 }, { "epoch": 0.96, "learning_rate": 8.307350615521903e-08, "loss": 0.6934, "step": 6438 }, { "epoch": 0.96, "learning_rate": 8.245325296294183e-08, "loss": 0.6622, "step": 6439 }, { "epoch": 0.96, "learning_rate": 8.183531437352399e-08, "loss": 0.6927, "step": 6440 }, { "epoch": 0.96, "learning_rate": 8.121969053118017e-08, "loss": 0.6252, "step": 6441 }, { "epoch": 0.96, "learning_rate": 8.0606381579591e-08, "loss": 0.5539, "step": 6442 }, { "epoch": 0.96, "learning_rate": 7.999538766189085e-08, "loss": 0.6657, "step": 6443 }, { "epoch": 0.96, "learning_rate": 7.93867089206779e-08, "loss": 0.6597, "step": 6444 }, { "epoch": 0.96, "learning_rate": 7.87803454980085e-08, "loss": 0.6811, "step": 6445 }, { "epoch": 0.96, "learning_rate": 7.817629753539612e-08, "loss": 0.7198, "step": 6446 }, { "epoch": 0.96, "learning_rate": 7.757456517381912e-08, "loss": 0.6042, "step": 6447 }, { "epoch": 0.96, "learning_rate": 7.69751485537118e-08, "loss": 0.7596, "step": 6448 }, { "epoch": 0.96, "learning_rate": 7.637804781496671e-08, "loss": 0.6292, "step": 6449 }, { "epoch": 0.96, "learning_rate": 7.578326309694128e-08, "loss": 0.6974, "step": 6450 }, { "epoch": 0.96, "learning_rate": 7.519079453844669e-08, "loss": 0.6647, "step": 6451 }, { "epoch": 0.96, "learning_rate": 7.460064227775676e-08, "loss": 0.746, "step": 6452 }, { "epoch": 0.96, "learning_rate": 7.401280645260467e-08, "loss": 0.7128, "step": 6453 }, { "epoch": 0.96, "learning_rate": 7.342728720018067e-08, "loss": 0.7043, "step": 6454 }, { "epoch": 0.96, "learning_rate": 7.284408465713655e-08, "loss": 0.8025, "step": 6455 }, { "epoch": 0.96, "learning_rate": 7.226319895958456e-08, "loss": 0.7093, "step": 6456 }, { "epoch": 0.96, "learning_rate": 7.16846302430918e-08, "loss": 0.6687, "step": 6457 }, { "epoch": 0.96, "learning_rate": 7.110837864268916e-08, "loss": 0.6998, "step": 6458 }, { "epoch": 0.96, "learning_rate": 7.053444429286349e-08, "loss": 0.742, "step": 6459 }, { "epoch": 0.96, "learning_rate": 6.996282732756431e-08, "loss": 0.6209, "step": 6460 }, { "epoch": 0.96, "learning_rate": 6.939352788019604e-08, "loss": 0.7084, "step": 6461 }, { "epoch": 0.96, "learning_rate": 6.882654608362572e-08, "loss": 0.7549, "step": 6462 }, { "epoch": 0.96, "learning_rate": 6.826188207017748e-08, "loss": 0.6528, "step": 6463 }, { "epoch": 0.96, "learning_rate": 6.769953597163593e-08, "loss": 0.746, "step": 6464 }, { "epoch": 0.96, "learning_rate": 6.713950791924273e-08, "loss": 0.7133, "step": 6465 }, { "epoch": 0.96, "learning_rate": 6.658179804370113e-08, "loss": 0.734, "step": 6466 }, { "epoch": 0.96, "learning_rate": 6.602640647517256e-08, "loss": 0.6905, "step": 6467 }, { "epoch": 0.96, "learning_rate": 6.547333334327443e-08, "loss": 0.7019, "step": 6468 }, { "epoch": 0.96, "learning_rate": 6.492257877708797e-08, "loss": 0.7363, "step": 6469 }, { "epoch": 0.96, "learning_rate": 6.43741429051492e-08, "loss": 0.6935, "step": 6470 }, { "epoch": 0.97, "learning_rate": 6.382802585545578e-08, "loss": 0.6718, "step": 6471 }, { "epoch": 0.97, "learning_rate": 6.328422775546128e-08, "loss": 0.7498, "step": 6472 }, { "epoch": 0.97, "learning_rate": 6.274274873208197e-08, "loss": 0.7304, "step": 6473 }, { "epoch": 0.97, "learning_rate": 6.220358891169009e-08, "loss": 0.7801, "step": 6474 }, { "epoch": 0.97, "learning_rate": 6.16667484201161e-08, "loss": 0.6715, "step": 6475 }, { "epoch": 0.97, "learning_rate": 6.113222738265201e-08, "loss": 0.7543, "step": 6476 }, { "epoch": 0.97, "learning_rate": 6.060002592404579e-08, "loss": 0.6595, "step": 6477 }, { "epoch": 0.97, "learning_rate": 6.007014416850365e-08, "loss": 0.6901, "step": 6478 }, { "epoch": 0.97, "learning_rate": 5.954258223969556e-08, "loss": 0.6938, "step": 6479 }, { "epoch": 0.97, "learning_rate": 5.901734026074302e-08, "loss": 0.7136, "step": 6480 }, { "epoch": 0.97, "learning_rate": 5.849441835423131e-08, "loss": 0.5449, "step": 6481 }, { "epoch": 0.97, "learning_rate": 5.797381664220059e-08, "loss": 0.6727, "step": 6482 }, { "epoch": 0.97, "learning_rate": 5.745553524615255e-08, "loss": 0.6638, "step": 6483 }, { "epoch": 0.97, "learning_rate": 5.6939574287047107e-08, "loss": 0.7047, "step": 6484 }, { "epoch": 0.97, "learning_rate": 5.6425933885299044e-08, "loss": 0.7428, "step": 6485 }, { "epoch": 0.97, "learning_rate": 5.591461416078581e-08, "loss": 0.308, "step": 6486 }, { "epoch": 0.97, "learning_rate": 5.540561523284194e-08, "loss": 0.673, "step": 6487 }, { "epoch": 0.97, "learning_rate": 5.4898937220259077e-08, "loss": 0.6635, "step": 6488 }, { "epoch": 0.97, "learning_rate": 5.4394580241288184e-08, "loss": 0.6891, "step": 6489 }, { "epoch": 0.97, "learning_rate": 5.3892544413638447e-08, "loss": 0.6691, "step": 6490 }, { "epoch": 0.97, "learning_rate": 5.3392829854477244e-08, "loss": 0.5798, "step": 6491 }, { "epoch": 0.97, "learning_rate": 5.289543668043018e-08, "loss": 0.3297, "step": 6492 }, { "epoch": 0.97, "learning_rate": 5.240036500758216e-08, "loss": 0.7154, "step": 6493 }, { "epoch": 0.97, "learning_rate": 5.1907614951474115e-08, "loss": 0.7291, "step": 6494 }, { "epoch": 0.97, "learning_rate": 5.1417186627106265e-08, "loss": 0.3138, "step": 6495 }, { "epoch": 0.97, "learning_rate": 5.092908014893816e-08, "loss": 0.7231, "step": 6496 }, { "epoch": 0.97, "learning_rate": 5.044329563088535e-08, "loss": 0.6971, "step": 6497 }, { "epoch": 0.97, "learning_rate": 4.995983318632269e-08, "loss": 0.677, "step": 6498 }, { "epoch": 0.97, "learning_rate": 4.947869292808327e-08, "loss": 0.7012, "step": 6499 }, { "epoch": 0.97, "learning_rate": 4.899987496845726e-08, "loss": 0.6222, "step": 6500 }, { "epoch": 0.97, "learning_rate": 4.852337941919416e-08, "loss": 0.661, "step": 6501 }, { "epoch": 0.97, "learning_rate": 4.804920639149946e-08, "loss": 0.7372, "step": 6502 }, { "epoch": 0.97, "learning_rate": 4.757735599603908e-08, "loss": 0.5892, "step": 6503 }, { "epoch": 0.97, "learning_rate": 4.710782834293604e-08, "loss": 0.6725, "step": 6504 }, { "epoch": 0.97, "learning_rate": 4.664062354176824e-08, "loss": 0.7542, "step": 6505 }, { "epoch": 0.97, "learning_rate": 4.617574170157735e-08, "loss": 0.708, "step": 6506 }, { "epoch": 0.97, "learning_rate": 4.571318293085769e-08, "loss": 0.7185, "step": 6507 }, { "epoch": 0.97, "learning_rate": 4.5252947337564025e-08, "loss": 0.6806, "step": 6508 }, { "epoch": 0.97, "learning_rate": 4.47950350291082e-08, "loss": 0.7608, "step": 6509 }, { "epoch": 0.97, "learning_rate": 4.4339446112359186e-08, "loss": 0.6927, "step": 6510 }, { "epoch": 0.97, "learning_rate": 4.388618069364525e-08, "loss": 0.677, "step": 6511 }, { "epoch": 0.97, "learning_rate": 4.3435238878751785e-08, "loss": 0.6718, "step": 6512 }, { "epoch": 0.97, "learning_rate": 4.298662077292015e-08, "loss": 0.6969, "step": 6513 }, { "epoch": 0.97, "learning_rate": 4.2540326480853266e-08, "loss": 0.7312, "step": 6514 }, { "epoch": 0.97, "learning_rate": 4.209635610670782e-08, "loss": 0.6755, "step": 6515 }, { "epoch": 0.97, "learning_rate": 4.165470975409869e-08, "loss": 0.6617, "step": 6516 }, { "epoch": 0.97, "learning_rate": 4.121538752610121e-08, "loss": 0.6703, "step": 6517 }, { "epoch": 0.97, "learning_rate": 4.077838952524671e-08, "loss": 0.7497, "step": 6518 }, { "epoch": 0.97, "learning_rate": 4.034371585352248e-08, "loss": 0.6755, "step": 6519 }, { "epoch": 0.97, "learning_rate": 3.991136661237516e-08, "loss": 0.698, "step": 6520 }, { "epoch": 0.97, "learning_rate": 3.948134190270736e-08, "loss": 0.7552, "step": 6521 }, { "epoch": 0.97, "learning_rate": 3.905364182488214e-08, "loss": 0.6893, "step": 6522 }, { "epoch": 0.97, "learning_rate": 3.8628266478717424e-08, "loss": 0.6384, "step": 6523 }, { "epoch": 0.97, "learning_rate": 3.8205215963488254e-08, "loss": 0.6979, "step": 6524 }, { "epoch": 0.97, "learning_rate": 3.7784490377928975e-08, "loss": 0.7644, "step": 6525 }, { "epoch": 0.97, "learning_rate": 3.736608982023104e-08, "loss": 0.6376, "step": 6526 }, { "epoch": 0.97, "learning_rate": 3.695001438804191e-08, "loss": 0.7081, "step": 6527 }, { "epoch": 0.97, "learning_rate": 3.653626417846612e-08, "loss": 0.6614, "step": 6528 }, { "epoch": 0.97, "learning_rate": 3.6124839288068645e-08, "loss": 0.6282, "step": 6529 }, { "epoch": 0.97, "learning_rate": 3.5715739812869355e-08, "loss": 0.7056, "step": 6530 }, { "epoch": 0.97, "learning_rate": 3.53089658483452e-08, "loss": 0.7592, "step": 6531 }, { "epoch": 0.97, "learning_rate": 3.4904517489431355e-08, "loss": 0.6973, "step": 6532 }, { "epoch": 0.97, "learning_rate": 3.450239483051898e-08, "loss": 0.6881, "step": 6533 }, { "epoch": 0.97, "learning_rate": 3.410259796545745e-08, "loss": 0.6403, "step": 6534 }, { "epoch": 0.97, "learning_rate": 3.370512698755435e-08, "loss": 0.6488, "step": 6535 }, { "epoch": 0.97, "learning_rate": 3.330998198957214e-08, "loss": 0.6016, "step": 6536 }, { "epoch": 0.97, "learning_rate": 3.2917163063732604e-08, "loss": 0.3171, "step": 6537 }, { "epoch": 0.98, "learning_rate": 3.2526670301713524e-08, "loss": 0.742, "step": 6538 }, { "epoch": 0.98, "learning_rate": 3.213850379464867e-08, "loss": 0.6765, "step": 6539 }, { "epoch": 0.98, "learning_rate": 3.175266363313112e-08, "loss": 0.6611, "step": 6540 }, { "epoch": 0.98, "learning_rate": 3.136914990720996e-08, "loss": 0.6837, "step": 6541 }, { "epoch": 0.98, "learning_rate": 3.098796270639026e-08, "loss": 0.7376, "step": 6542 }, { "epoch": 0.98, "learning_rate": 3.0609102119637527e-08, "loss": 0.7413, "step": 6543 }, { "epoch": 0.98, "learning_rate": 3.023256823537102e-08, "loss": 0.6609, "step": 6544 }, { "epoch": 0.98, "learning_rate": 2.985836114146601e-08, "loss": 0.6884, "step": 6545 }, { "epoch": 0.98, "learning_rate": 2.9486480925259296e-08, "loss": 0.7051, "step": 6546 }, { "epoch": 0.98, "learning_rate": 2.911692767354146e-08, "loss": 0.7428, "step": 6547 }, { "epoch": 0.98, "learning_rate": 2.8749701472560178e-08, "loss": 0.659, "step": 6548 }, { "epoch": 0.98, "learning_rate": 2.8384802408019108e-08, "loss": 0.7396, "step": 6549 }, { "epoch": 0.98, "learning_rate": 2.8022230565082352e-08, "loss": 0.7668, "step": 6550 }, { "epoch": 0.98, "learning_rate": 2.766198602836778e-08, "loss": 0.5656, "step": 6551 }, { "epoch": 0.98, "learning_rate": 2.7304068881950364e-08, "loss": 0.7144, "step": 6552 }, { "epoch": 0.98, "learning_rate": 2.6948479209363276e-08, "loss": 0.7121, "step": 6553 }, { "epoch": 0.98, "learning_rate": 2.6595217093593472e-08, "loss": 0.7224, "step": 6554 }, { "epoch": 0.98, "learning_rate": 2.6244282617090555e-08, "loss": 0.6922, "step": 6555 }, { "epoch": 0.98, "learning_rate": 2.5895675861754567e-08, "loss": 0.7388, "step": 6556 }, { "epoch": 0.98, "learning_rate": 2.5549396908944868e-08, "loss": 0.6434, "step": 6557 }, { "epoch": 0.98, "learning_rate": 2.5205445839480146e-08, "loss": 0.6095, "step": 6558 }, { "epoch": 0.98, "learning_rate": 2.4863822733630637e-08, "loss": 0.644, "step": 6559 }, { "epoch": 0.98, "learning_rate": 2.452452767112701e-08, "loss": 0.6216, "step": 6560 }, { "epoch": 0.98, "learning_rate": 2.4187560731154803e-08, "loss": 0.7575, "step": 6561 }, { "epoch": 0.98, "learning_rate": 2.385292199235889e-08, "loss": 0.7157, "step": 6562 }, { "epoch": 0.98, "learning_rate": 2.35206115328368e-08, "loss": 0.696, "step": 6563 }, { "epoch": 0.98, "learning_rate": 2.3190629430145383e-08, "loss": 0.6629, "step": 6564 }, { "epoch": 0.98, "learning_rate": 2.2862975761296368e-08, "loss": 0.7235, "step": 6565 }, { "epoch": 0.98, "learning_rate": 2.2537650602761917e-08, "loss": 0.6658, "step": 6566 }, { "epoch": 0.98, "learning_rate": 2.221465403046463e-08, "loss": 0.6854, "step": 6567 }, { "epoch": 0.98, "learning_rate": 2.1893986119788656e-08, "loss": 0.7134, "step": 6568 }, { "epoch": 0.98, "learning_rate": 2.1575646945571904e-08, "loss": 0.3177, "step": 6569 }, { "epoch": 0.98, "learning_rate": 2.125963658211161e-08, "loss": 0.737, "step": 6570 }, { "epoch": 0.98, "learning_rate": 2.0945955103158776e-08, "loss": 0.3228, "step": 6571 }, { "epoch": 0.98, "learning_rate": 2.0634602581921516e-08, "loss": 0.7501, "step": 6572 }, { "epoch": 0.98, "learning_rate": 2.0325579091063918e-08, "loss": 0.7221, "step": 6573 }, { "epoch": 0.98, "learning_rate": 2.0018884702710515e-08, "loss": 0.7243, "step": 6574 }, { "epoch": 0.98, "learning_rate": 1.971451948843517e-08, "loss": 0.6737, "step": 6575 }, { "epoch": 0.98, "learning_rate": 1.9412483519274383e-08, "loss": 0.7286, "step": 6576 }, { "epoch": 0.98, "learning_rate": 1.911277686571733e-08, "loss": 0.7342, "step": 6577 }, { "epoch": 0.98, "learning_rate": 1.8815399597712503e-08, "loss": 0.6198, "step": 6578 }, { "epoch": 0.98, "learning_rate": 1.8520351784662162e-08, "loss": 0.7393, "step": 6579 }, { "epoch": 0.98, "learning_rate": 1.8227633495426777e-08, "loss": 0.6621, "step": 6580 }, { "epoch": 0.98, "learning_rate": 1.7937244798320597e-08, "loss": 0.6962, "step": 6581 }, { "epoch": 0.98, "learning_rate": 1.7649185761117182e-08, "loss": 0.7409, "step": 6582 }, { "epoch": 0.98, "learning_rate": 1.736345645104498e-08, "loss": 0.6703, "step": 6583 }, { "epoch": 0.98, "learning_rate": 1.7080056934789534e-08, "loss": 0.6609, "step": 6584 }, { "epoch": 0.98, "learning_rate": 1.6798987278490165e-08, "loss": 0.7034, "step": 6585 }, { "epoch": 0.98, "learning_rate": 1.6520247547745506e-08, "loss": 0.7017, "step": 6586 }, { "epoch": 0.98, "learning_rate": 1.6243837807607965e-08, "loss": 0.6301, "step": 6587 }, { "epoch": 0.98, "learning_rate": 1.5969758122588162e-08, "loss": 0.6683, "step": 6588 }, { "epoch": 0.98, "learning_rate": 1.569800855665271e-08, "loss": 0.6825, "step": 6589 }, { "epoch": 0.98, "learning_rate": 1.5428589173223095e-08, "loss": 0.7116, "step": 6590 }, { "epoch": 0.98, "learning_rate": 1.51615000351768e-08, "loss": 0.6611, "step": 6591 }, { "epoch": 0.98, "learning_rate": 1.4896741204849518e-08, "loss": 0.6632, "step": 6592 }, { "epoch": 0.98, "learning_rate": 1.463431274403182e-08, "loss": 0.3042, "step": 6593 }, { "epoch": 0.98, "learning_rate": 1.4374214713970269e-08, "loss": 0.6948, "step": 6594 }, { "epoch": 0.98, "learning_rate": 1.411644717536742e-08, "loss": 0.7229, "step": 6595 }, { "epoch": 0.98, "learning_rate": 1.3861010188382929e-08, "loss": 0.6463, "step": 6596 }, { "epoch": 0.98, "learning_rate": 1.3607903812631328e-08, "loss": 0.7344, "step": 6597 }, { "epoch": 0.98, "learning_rate": 1.3357128107183148e-08, "loss": 0.7081, "step": 6598 }, { "epoch": 0.98, "learning_rate": 1.3108683130566013e-08, "loss": 0.6667, "step": 6599 }, { "epoch": 0.98, "learning_rate": 1.2862568940764652e-08, "loss": 0.5675, "step": 6600 }, { "epoch": 0.98, "learning_rate": 1.2618785595216454e-08, "loss": 0.6699, "step": 6601 }, { "epoch": 0.98, "learning_rate": 1.2377333150817016e-08, "loss": 0.2928, "step": 6602 }, { "epoch": 0.98, "learning_rate": 1.2138211663917932e-08, "loss": 0.6573, "step": 6603 }, { "epoch": 0.98, "learning_rate": 1.1901421190327888e-08, "loss": 0.7099, "step": 6604 }, { "epoch": 0.99, "learning_rate": 1.1666961785307129e-08, "loss": 0.6766, "step": 6605 }, { "epoch": 0.99, "learning_rate": 1.1434833503577436e-08, "loss": 0.7678, "step": 6606 }, { "epoch": 0.99, "learning_rate": 1.1205036399312142e-08, "loss": 0.7365, "step": 6607 }, { "epoch": 0.99, "learning_rate": 1.09775705261439e-08, "loss": 0.6742, "step": 6608 }, { "epoch": 0.99, "learning_rate": 1.0752435937159133e-08, "loss": 0.7347, "step": 6609 }, { "epoch": 0.99, "learning_rate": 1.0529632684900259e-08, "loss": 0.6151, "step": 6610 }, { "epoch": 0.99, "learning_rate": 1.0309160821367903e-08, "loss": 0.6187, "step": 6611 }, { "epoch": 0.99, "learning_rate": 1.009102039801424e-08, "loss": 0.7058, "step": 6612 }, { "epoch": 0.99, "learning_rate": 9.875211465751877e-09, "loss": 0.6369, "step": 6613 }, { "epoch": 0.99, "learning_rate": 9.661734074946083e-09, "loss": 0.654, "step": 6614 }, { "epoch": 0.99, "learning_rate": 9.450588275420336e-09, "loss": 0.6859, "step": 6615 }, { "epoch": 0.99, "learning_rate": 9.241774116450775e-09, "loss": 0.6554, "step": 6616 }, { "epoch": 0.99, "learning_rate": 9.03529164677397e-09, "loss": 0.6539, "step": 6617 }, { "epoch": 0.99, "learning_rate": 8.831140914579151e-09, "loss": 0.7238, "step": 6618 }, { "epoch": 0.99, "learning_rate": 8.629321967510429e-09, "loss": 0.647, "step": 6619 }, { "epoch": 0.99, "learning_rate": 8.429834852671237e-09, "loss": 0.7375, "step": 6620 }, { "epoch": 0.99, "learning_rate": 8.232679616616557e-09, "loss": 0.7416, "step": 6621 }, { "epoch": 0.99, "learning_rate": 8.037856305361802e-09, "loss": 0.7173, "step": 6622 }, { "epoch": 0.99, "learning_rate": 7.845364964373936e-09, "loss": 0.7376, "step": 6623 }, { "epoch": 0.99, "learning_rate": 7.655205638578135e-09, "loss": 0.7022, "step": 6624 }, { "epoch": 0.99, "learning_rate": 7.46737837235445e-09, "loss": 0.6476, "step": 6625 }, { "epoch": 0.99, "learning_rate": 7.281883209538931e-09, "loss": 0.3203, "step": 6626 }, { "epoch": 0.99, "learning_rate": 7.0987201934236136e-09, "loss": 0.8059, "step": 6627 }, { "epoch": 0.99, "learning_rate": 6.917889366756525e-09, "loss": 0.6831, "step": 6628 }, { "epoch": 0.99, "learning_rate": 6.7393907717394625e-09, "loss": 0.3244, "step": 6629 }, { "epoch": 0.99, "learning_rate": 6.563224450031325e-09, "loss": 0.6543, "step": 6630 }, { "epoch": 0.99, "learning_rate": 6.3893904427481114e-09, "loss": 0.7045, "step": 6631 }, { "epoch": 0.99, "learning_rate": 6.2178887904584815e-09, "loss": 0.7078, "step": 6632 }, { "epoch": 0.99, "learning_rate": 6.048719533188197e-09, "loss": 0.666, "step": 6633 }, { "epoch": 0.99, "learning_rate": 5.8818827104201174e-09, "loss": 0.6996, "step": 6634 }, { "epoch": 0.99, "learning_rate": 5.717378361090875e-09, "loss": 0.702, "step": 6635 }, { "epoch": 0.99, "learning_rate": 5.555206523591983e-09, "loss": 0.3158, "step": 6636 }, { "epoch": 0.99, "learning_rate": 5.395367235773164e-09, "loss": 0.6311, "step": 6637 }, { "epoch": 0.99, "learning_rate": 5.237860534939021e-09, "loss": 0.3139, "step": 6638 }, { "epoch": 0.99, "learning_rate": 5.08268645784793e-09, "loss": 0.6785, "step": 6639 }, { "epoch": 0.99, "learning_rate": 4.9298450407153645e-09, "loss": 0.7318, "step": 6640 }, { "epoch": 0.99, "learning_rate": 4.779336319211681e-09, "loss": 0.7333, "step": 6641 }, { "epoch": 0.99, "learning_rate": 4.631160328465445e-09, "loss": 0.5929, "step": 6642 }, { "epoch": 0.99, "learning_rate": 4.485317103055664e-09, "loss": 0.6498, "step": 6643 }, { "epoch": 0.99, "learning_rate": 4.341806677022886e-09, "loss": 0.5515, "step": 6644 }, { "epoch": 0.99, "learning_rate": 4.200629083858099e-09, "loss": 0.7567, "step": 6645 }, { "epoch": 0.99, "learning_rate": 4.0617843565105005e-09, "loss": 0.6791, "step": 6646 }, { "epoch": 0.99, "learning_rate": 3.925272527384172e-09, "loss": 0.691, "step": 6647 }, { "epoch": 0.99, "learning_rate": 3.791093628339182e-09, "loss": 0.6377, "step": 6648 }, { "epoch": 0.99, "learning_rate": 3.659247690691592e-09, "loss": 0.8093, "step": 6649 }, { "epoch": 0.99, "learning_rate": 3.529734745210123e-09, "loss": 0.7121, "step": 6650 }, { "epoch": 0.99, "learning_rate": 3.402554822123927e-09, "loss": 0.6951, "step": 6651 }, { "epoch": 0.99, "learning_rate": 3.2777079511125964e-09, "loss": 0.6666, "step": 6652 }, { "epoch": 0.99, "learning_rate": 3.1551941613139347e-09, "loss": 0.7934, "step": 6653 }, { "epoch": 0.99, "learning_rate": 3.035013481321736e-09, "loss": 0.6751, "step": 6654 }, { "epoch": 0.99, "learning_rate": 2.9171659391824534e-09, "loss": 0.6941, "step": 6655 }, { "epoch": 0.99, "learning_rate": 2.8016515624018637e-09, "loss": 0.6878, "step": 6656 }, { "epoch": 0.99, "learning_rate": 2.688470377938401e-09, "loss": 0.7294, "step": 6657 }, { "epoch": 0.99, "learning_rate": 2.577622412206493e-09, "loss": 0.6853, "step": 6658 }, { "epoch": 0.99, "learning_rate": 2.4691076910776656e-09, "loss": 0.7723, "step": 6659 }, { "epoch": 0.99, "learning_rate": 2.3629262398761066e-09, "loss": 0.7366, "step": 6660 }, { "epoch": 0.99, "learning_rate": 2.259078083383104e-09, "loss": 0.6639, "step": 6661 }, { "epoch": 0.99, "learning_rate": 2.1575632458359363e-09, "loss": 0.7119, "step": 6662 }, { "epoch": 0.99, "learning_rate": 2.0583817509256534e-09, "loss": 0.6971, "step": 6663 }, { "epoch": 0.99, "learning_rate": 1.9615336218004045e-09, "loss": 0.6564, "step": 6664 }, { "epoch": 0.99, "learning_rate": 1.8670188810632207e-09, "loss": 0.6282, "step": 6665 }, { "epoch": 0.99, "learning_rate": 1.774837550773123e-09, "loss": 0.6805, "step": 6666 }, { "epoch": 0.99, "learning_rate": 1.684989652441793e-09, "loss": 0.7115, "step": 6667 }, { "epoch": 0.99, "learning_rate": 1.597475207039123e-09, "loss": 0.7496, "step": 6668 }, { "epoch": 0.99, "learning_rate": 1.5122942349898862e-09, "loss": 0.7505, "step": 6669 }, { "epoch": 0.99, "learning_rate": 1.4294467561737358e-09, "loss": 0.6789, "step": 6670 }, { "epoch": 0.99, "learning_rate": 1.3489327899274263e-09, "loss": 0.7107, "step": 6671 }, { "epoch": 1.0, "learning_rate": 1.270752355039262e-09, "loss": 0.7125, "step": 6672 }, { "epoch": 1.0, "learning_rate": 1.1949054697568685e-09, "loss": 0.6394, "step": 6673 }, { "epoch": 1.0, "learning_rate": 1.1213921517816418e-09, "loss": 0.5715, "step": 6674 }, { "epoch": 1.0, "learning_rate": 1.0502124182709684e-09, "loss": 0.7025, "step": 6675 }, { "epoch": 1.0, "learning_rate": 9.813662858348949e-10, "loss": 0.7194, "step": 6676 }, { "epoch": 1.0, "learning_rate": 9.148537705438998e-10, "loss": 0.6995, "step": 6677 }, { "epoch": 1.0, "learning_rate": 8.506748879189008e-10, "loss": 0.7543, "step": 6678 }, { "epoch": 1.0, "learning_rate": 7.888296529390272e-10, "loss": 0.7493, "step": 6679 }, { "epoch": 1.0, "learning_rate": 7.293180800382882e-10, "loss": 0.6801, "step": 6680 }, { "epoch": 1.0, "learning_rate": 6.72140183105574e-10, "loss": 0.6368, "step": 6681 }, { "epoch": 1.0, "learning_rate": 6.172959754857655e-10, "loss": 0.7086, "step": 6682 }, { "epoch": 1.0, "learning_rate": 5.647854699775135e-10, "loss": 0.7549, "step": 6683 }, { "epoch": 1.0, "learning_rate": 5.146086788365701e-10, "loss": 0.7345, "step": 6684 }, { "epoch": 1.0, "learning_rate": 4.667656137724574e-10, "loss": 0.6442, "step": 6685 }, { "epoch": 1.0, "learning_rate": 4.21256285952909e-10, "loss": 0.6312, "step": 6686 }, { "epoch": 1.0, "learning_rate": 3.7808070599831827e-10, "loss": 0.6268, "step": 6687 }, { "epoch": 1.0, "learning_rate": 3.372388839850693e-10, "loss": 0.7593, "step": 6688 }, { "epoch": 1.0, "learning_rate": 2.9873082944442687e-10, "loss": 0.6218, "step": 6689 }, { "epoch": 1.0, "learning_rate": 2.6255655136475656e-10, "loss": 0.6798, "step": 6690 }, { "epoch": 1.0, "learning_rate": 2.287160581881942e-10, "loss": 0.6975, "step": 6691 }, { "epoch": 1.0, "learning_rate": 1.972093578117562e-10, "loss": 0.68, "step": 6692 }, { "epoch": 1.0, "learning_rate": 1.680364575895599e-10, "loss": 0.6611, "step": 6693 }, { "epoch": 1.0, "learning_rate": 1.4119736432949284e-10, "loss": 0.7082, "step": 6694 }, { "epoch": 1.0, "learning_rate": 1.1669208429543332e-10, "loss": 0.7157, "step": 6695 }, { "epoch": 1.0, "learning_rate": 9.452062320725042e-11, "loss": 0.7164, "step": 6696 }, { "epoch": 1.0, "learning_rate": 7.468298623858339e-11, "loss": 0.7377, "step": 6697 }, { "epoch": 1.0, "learning_rate": 5.717917802017248e-11, "loss": 0.6964, "step": 6698 }, { "epoch": 1.0, "learning_rate": 4.2009202635417965e-11, "loss": 0.6909, "step": 6699 }, { "epoch": 1.0, "learning_rate": 2.917306362704153e-11, "loss": 0.6777, "step": 6700 }, { "epoch": 1.0, "learning_rate": 1.8670763989314665e-11, "loss": 0.7233, "step": 6701 }, { "epoch": 1.0, "learning_rate": 1.0502306173609812e-11, "loss": 0.6724, "step": 6702 }, { "epoch": 1.0, "learning_rate": 4.667692087290121e-12, "loss": 0.6868, "step": 6703 }, { "epoch": 1.0, "learning_rate": 1.166923089268579e-12, "loss": 0.3272, "step": 6704 }, { "epoch": 1.0, "learning_rate": 0.0, "loss": 0.3398, "step": 6705 }, { "epoch": 1.0, "step": 6705, "total_flos": 1.8020705052896788e+19, "train_loss": 0.7490487716919801, "train_runtime": 49633.9829, "train_samples_per_second": 17.291, "train_steps_per_second": 0.135 } ], "logging_steps": 1.0, "max_steps": 6705, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 1.8020705052896788e+19, "train_batch_size": 2, "trial_name": null, "trial_params": null }