{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9997367381861261, "eval_steps": 500, "global_step": 7595, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.385964912280702e-07, "loss": 17.4457, "step": 1 }, { "epoch": 0.0, "learning_rate": 8.771929824561404e-07, "loss": 19.6372, "step": 2 }, { "epoch": 0.0, "learning_rate": 1.3157894736842106e-06, "loss": 20.4378, "step": 3 }, { "epoch": 0.0, "learning_rate": 1.7543859649122807e-06, "loss": 17.4259, "step": 4 }, { "epoch": 0.0, "learning_rate": 2.1929824561403507e-06, "loss": 0.2024, "step": 5 }, { "epoch": 0.0, "learning_rate": 2.631578947368421e-06, "loss": 0.1783, "step": 6 }, { "epoch": 0.0, "learning_rate": 3.070175438596491e-06, "loss": 18.0804, "step": 7 }, { "epoch": 0.0, "learning_rate": 3.5087719298245615e-06, "loss": 18.5845, "step": 8 }, { "epoch": 0.0, "learning_rate": 3.9473684210526315e-06, "loss": 16.7549, "step": 9 }, { "epoch": 0.0, "learning_rate": 4.3859649122807014e-06, "loss": 0.2037, "step": 10 }, { "epoch": 0.0, "learning_rate": 4.824561403508772e-06, "loss": 20.9427, "step": 11 }, { "epoch": 0.0, "learning_rate": 5.263157894736842e-06, "loss": 17.4869, "step": 12 }, { "epoch": 0.0, "learning_rate": 5.701754385964912e-06, "loss": 18.1262, "step": 13 }, { "epoch": 0.0, "learning_rate": 6.140350877192982e-06, "loss": 16.8534, "step": 14 }, { "epoch": 0.0, "learning_rate": 6.578947368421053e-06, "loss": 17.7028, "step": 15 }, { "epoch": 0.0, "learning_rate": 7.017543859649123e-06, "loss": 18.1092, "step": 16 }, { "epoch": 0.0, "learning_rate": 7.456140350877193e-06, "loss": 18.1837, "step": 17 }, { "epoch": 0.0, "learning_rate": 7.894736842105263e-06, "loss": 20.6125, "step": 18 }, { "epoch": 0.0, "learning_rate": 8.333333333333334e-06, "loss": 0.1979, "step": 19 }, { "epoch": 0.0, "learning_rate": 8.771929824561403e-06, "loss": 17.6999, "step": 20 }, { "epoch": 0.0, "learning_rate": 9.210526315789474e-06, "loss": 17.7615, "step": 21 }, { "epoch": 0.0, "learning_rate": 9.649122807017545e-06, "loss": 18.4227, "step": 22 }, { "epoch": 0.0, "learning_rate": 1.0087719298245614e-05, "loss": 0.1976, "step": 23 }, { "epoch": 0.0, "learning_rate": 1.0526315789473684e-05, "loss": 17.7793, "step": 24 }, { "epoch": 0.0, "learning_rate": 1.0964912280701754e-05, "loss": 17.1499, "step": 25 }, { "epoch": 0.0, "learning_rate": 1.1403508771929824e-05, "loss": 18.8424, "step": 26 }, { "epoch": 0.0, "learning_rate": 1.1842105263157895e-05, "loss": 18.9542, "step": 27 }, { "epoch": 0.0, "learning_rate": 1.2280701754385964e-05, "loss": 18.1339, "step": 28 }, { "epoch": 0.0, "learning_rate": 1.2719298245614037e-05, "loss": 18.081, "step": 29 }, { "epoch": 0.0, "learning_rate": 1.3157894736842106e-05, "loss": 18.2065, "step": 30 }, { "epoch": 0.0, "learning_rate": 1.3596491228070177e-05, "loss": 17.5193, "step": 31 }, { "epoch": 0.0, "learning_rate": 1.4035087719298246e-05, "loss": 19.2107, "step": 32 }, { "epoch": 0.0, "learning_rate": 1.4473684210526317e-05, "loss": 17.8098, "step": 33 }, { "epoch": 0.0, "learning_rate": 1.4912280701754386e-05, "loss": 19.7772, "step": 34 }, { "epoch": 0.0, "learning_rate": 1.5350877192982457e-05, "loss": 18.0116, "step": 35 }, { "epoch": 0.0, "learning_rate": 1.5789473684210526e-05, "loss": 17.7046, "step": 36 }, { "epoch": 0.0, "learning_rate": 1.62280701754386e-05, "loss": 0.1783, "step": 37 }, { "epoch": 0.01, "learning_rate": 1.6666666666666667e-05, "loss": 18.3036, "step": 38 }, { "epoch": 0.01, "learning_rate": 1.7105263157894737e-05, "loss": 20.6389, "step": 39 }, { "epoch": 0.01, "learning_rate": 1.7543859649122806e-05, "loss": 18.1255, "step": 40 }, { "epoch": 0.01, "learning_rate": 1.7982456140350878e-05, "loss": 17.1216, "step": 41 }, { "epoch": 0.01, "learning_rate": 1.8421052631578947e-05, "loss": 18.1548, "step": 42 }, { "epoch": 0.01, "learning_rate": 1.885964912280702e-05, "loss": 18.1251, "step": 43 }, { "epoch": 0.01, "learning_rate": 1.929824561403509e-05, "loss": 17.4927, "step": 44 }, { "epoch": 0.01, "learning_rate": 1.9736842105263158e-05, "loss": 0.15, "step": 45 }, { "epoch": 0.01, "learning_rate": 2.0175438596491227e-05, "loss": 18.2657, "step": 46 }, { "epoch": 0.01, "learning_rate": 2.06140350877193e-05, "loss": 0.1456, "step": 47 }, { "epoch": 0.01, "learning_rate": 2.105263157894737e-05, "loss": 20.0656, "step": 48 }, { "epoch": 0.01, "learning_rate": 2.149122807017544e-05, "loss": 0.1207, "step": 49 }, { "epoch": 0.01, "learning_rate": 2.1929824561403507e-05, "loss": 0.0976, "step": 50 }, { "epoch": 0.01, "learning_rate": 2.236842105263158e-05, "loss": 16.9383, "step": 51 }, { "epoch": 0.01, "learning_rate": 2.280701754385965e-05, "loss": 0.0893, "step": 52 }, { "epoch": 0.01, "learning_rate": 2.324561403508772e-05, "loss": 19.2459, "step": 53 }, { "epoch": 0.01, "learning_rate": 2.368421052631579e-05, "loss": 18.6584, "step": 54 }, { "epoch": 0.01, "learning_rate": 2.412280701754386e-05, "loss": 18.4037, "step": 55 }, { "epoch": 0.01, "learning_rate": 2.456140350877193e-05, "loss": 20.0339, "step": 56 }, { "epoch": 0.01, "learning_rate": 2.5e-05, "loss": 16.7405, "step": 57 }, { "epoch": 0.01, "learning_rate": 2.5438596491228074e-05, "loss": 17.89, "step": 58 }, { "epoch": 0.01, "learning_rate": 2.5877192982456143e-05, "loss": 18.0934, "step": 59 }, { "epoch": 0.01, "learning_rate": 2.6315789473684212e-05, "loss": 19.5436, "step": 60 }, { "epoch": 0.01, "learning_rate": 2.675438596491228e-05, "loss": 18.5957, "step": 61 }, { "epoch": 0.01, "learning_rate": 2.7192982456140354e-05, "loss": 19.2535, "step": 62 }, { "epoch": 0.01, "learning_rate": 2.7631578947368426e-05, "loss": 16.7051, "step": 63 }, { "epoch": 0.01, "learning_rate": 2.8070175438596492e-05, "loss": 0.0198, "step": 64 }, { "epoch": 0.01, "learning_rate": 2.850877192982456e-05, "loss": 18.3876, "step": 65 }, { "epoch": 0.01, "learning_rate": 2.8947368421052634e-05, "loss": 0.0144, "step": 66 }, { "epoch": 0.01, "learning_rate": 2.9385964912280706e-05, "loss": 18.3494, "step": 67 }, { "epoch": 0.01, "learning_rate": 2.9824561403508772e-05, "loss": 16.8394, "step": 68 }, { "epoch": 0.01, "learning_rate": 3.0263157894736844e-05, "loss": 18.4876, "step": 69 }, { "epoch": 0.01, "learning_rate": 3.0701754385964913e-05, "loss": 18.228, "step": 70 }, { "epoch": 0.01, "learning_rate": 3.1140350877192986e-05, "loss": 0.0083, "step": 71 }, { "epoch": 0.01, "learning_rate": 3.157894736842105e-05, "loss": 0.0065, "step": 72 }, { "epoch": 0.01, "learning_rate": 3.2017543859649124e-05, "loss": 0.007, "step": 73 }, { "epoch": 0.01, "learning_rate": 3.24561403508772e-05, "loss": 18.4196, "step": 74 }, { "epoch": 0.01, "learning_rate": 3.289473684210527e-05, "loss": 19.3716, "step": 75 }, { "epoch": 0.01, "learning_rate": 3.3333333333333335e-05, "loss": 16.0267, "step": 76 }, { "epoch": 0.01, "learning_rate": 3.377192982456141e-05, "loss": 17.4156, "step": 77 }, { "epoch": 0.01, "learning_rate": 3.421052631578947e-05, "loss": 20.7511, "step": 78 }, { "epoch": 0.01, "learning_rate": 3.4649122807017546e-05, "loss": 0.0029, "step": 79 }, { "epoch": 0.01, "learning_rate": 3.508771929824561e-05, "loss": 16.8029, "step": 80 }, { "epoch": 0.01, "learning_rate": 3.5526315789473684e-05, "loss": 0.0033, "step": 81 }, { "epoch": 0.01, "learning_rate": 3.5964912280701756e-05, "loss": 20.6772, "step": 82 }, { "epoch": 0.01, "learning_rate": 3.640350877192983e-05, "loss": 17.6538, "step": 83 }, { "epoch": 0.01, "learning_rate": 3.6842105263157895e-05, "loss": 18.4643, "step": 84 }, { "epoch": 0.01, "learning_rate": 3.728070175438597e-05, "loss": 19.36, "step": 85 }, { "epoch": 0.01, "learning_rate": 3.771929824561404e-05, "loss": 19.6249, "step": 86 }, { "epoch": 0.01, "learning_rate": 3.815789473684211e-05, "loss": 19.0989, "step": 87 }, { "epoch": 0.01, "learning_rate": 3.859649122807018e-05, "loss": 17.8947, "step": 88 }, { "epoch": 0.01, "learning_rate": 3.9035087719298244e-05, "loss": 17.3486, "step": 89 }, { "epoch": 0.01, "learning_rate": 3.9473684210526316e-05, "loss": 17.7638, "step": 90 }, { "epoch": 0.01, "learning_rate": 3.991228070175439e-05, "loss": 17.2601, "step": 91 }, { "epoch": 0.01, "learning_rate": 4.0350877192982455e-05, "loss": 20.0061, "step": 92 }, { "epoch": 0.01, "learning_rate": 4.078947368421053e-05, "loss": 19.4714, "step": 93 }, { "epoch": 0.01, "learning_rate": 4.12280701754386e-05, "loss": 0.0019, "step": 94 }, { "epoch": 0.01, "learning_rate": 4.166666666666667e-05, "loss": 0.002, "step": 95 }, { "epoch": 0.01, "learning_rate": 4.210526315789474e-05, "loss": 17.1305, "step": 96 }, { "epoch": 0.01, "learning_rate": 4.254385964912281e-05, "loss": 17.0515, "step": 97 }, { "epoch": 0.01, "learning_rate": 4.298245614035088e-05, "loss": 17.0471, "step": 98 }, { "epoch": 0.01, "learning_rate": 4.342105263157895e-05, "loss": 0.0033, "step": 99 }, { "epoch": 0.01, "learning_rate": 4.3859649122807014e-05, "loss": 0.0027, "step": 100 }, { "epoch": 0.01, "learning_rate": 4.429824561403509e-05, "loss": 19.6215, "step": 101 }, { "epoch": 0.01, "learning_rate": 4.473684210526316e-05, "loss": 0.0014, "step": 102 }, { "epoch": 0.01, "learning_rate": 4.517543859649123e-05, "loss": 18.5407, "step": 103 }, { "epoch": 0.01, "learning_rate": 4.56140350877193e-05, "loss": 17.8796, "step": 104 }, { "epoch": 0.01, "learning_rate": 4.605263157894737e-05, "loss": 17.5804, "step": 105 }, { "epoch": 0.01, "learning_rate": 4.649122807017544e-05, "loss": 17.514, "step": 106 }, { "epoch": 0.01, "learning_rate": 4.6929824561403515e-05, "loss": 0.0018, "step": 107 }, { "epoch": 0.01, "learning_rate": 4.736842105263158e-05, "loss": 18.9136, "step": 108 }, { "epoch": 0.01, "learning_rate": 4.780701754385965e-05, "loss": 18.0742, "step": 109 }, { "epoch": 0.01, "learning_rate": 4.824561403508772e-05, "loss": 19.5746, "step": 110 }, { "epoch": 0.01, "learning_rate": 4.868421052631579e-05, "loss": 0.0013, "step": 111 }, { "epoch": 0.01, "learning_rate": 4.912280701754386e-05, "loss": 18.6992, "step": 112 }, { "epoch": 0.01, "learning_rate": 4.956140350877193e-05, "loss": 18.8273, "step": 113 }, { "epoch": 0.02, "learning_rate": 5e-05, "loss": 18.5728, "step": 114 }, { "epoch": 0.02, "learning_rate": 5.0438596491228075e-05, "loss": 17.4343, "step": 115 }, { "epoch": 0.02, "learning_rate": 5.087719298245615e-05, "loss": 18.8997, "step": 116 }, { "epoch": 0.02, "learning_rate": 5.131578947368422e-05, "loss": 19.0163, "step": 117 }, { "epoch": 0.02, "learning_rate": 5.1754385964912286e-05, "loss": 0.0007, "step": 118 }, { "epoch": 0.02, "learning_rate": 5.219298245614035e-05, "loss": 0.001, "step": 119 }, { "epoch": 0.02, "learning_rate": 5.2631578947368424e-05, "loss": 0.0009, "step": 120 }, { "epoch": 0.02, "learning_rate": 5.307017543859649e-05, "loss": 18.614, "step": 121 }, { "epoch": 0.02, "learning_rate": 5.350877192982456e-05, "loss": 17.5681, "step": 122 }, { "epoch": 0.02, "learning_rate": 5.3947368421052635e-05, "loss": 21.766, "step": 123 }, { "epoch": 0.02, "learning_rate": 5.438596491228071e-05, "loss": 16.2828, "step": 124 }, { "epoch": 0.02, "learning_rate": 5.482456140350878e-05, "loss": 20.2375, "step": 125 }, { "epoch": 0.02, "learning_rate": 5.526315789473685e-05, "loss": 19.0215, "step": 126 }, { "epoch": 0.02, "learning_rate": 5.570175438596491e-05, "loss": 0.0012, "step": 127 }, { "epoch": 0.02, "learning_rate": 5.6140350877192984e-05, "loss": 17.5922, "step": 128 }, { "epoch": 0.02, "learning_rate": 5.6578947368421056e-05, "loss": 0.0006, "step": 129 }, { "epoch": 0.02, "learning_rate": 5.701754385964912e-05, "loss": 0.001, "step": 130 }, { "epoch": 0.02, "learning_rate": 5.7456140350877194e-05, "loss": 18.8974, "step": 131 }, { "epoch": 0.02, "learning_rate": 5.789473684210527e-05, "loss": 18.5747, "step": 132 }, { "epoch": 0.02, "learning_rate": 5.833333333333334e-05, "loss": 17.6081, "step": 133 }, { "epoch": 0.02, "learning_rate": 5.877192982456141e-05, "loss": 18.4412, "step": 134 }, { "epoch": 0.02, "learning_rate": 5.921052631578947e-05, "loss": 18.772, "step": 135 }, { "epoch": 0.02, "learning_rate": 5.9649122807017544e-05, "loss": 17.6794, "step": 136 }, { "epoch": 0.02, "learning_rate": 6.0087719298245616e-05, "loss": 18.677, "step": 137 }, { "epoch": 0.02, "learning_rate": 6.052631578947369e-05, "loss": 18.7409, "step": 138 }, { "epoch": 0.02, "learning_rate": 6.096491228070176e-05, "loss": 17.1494, "step": 139 }, { "epoch": 0.02, "learning_rate": 6.140350877192983e-05, "loss": 18.3274, "step": 140 }, { "epoch": 0.02, "learning_rate": 6.18421052631579e-05, "loss": 0.0009, "step": 141 }, { "epoch": 0.02, "learning_rate": 6.228070175438597e-05, "loss": 18.8531, "step": 142 }, { "epoch": 0.02, "learning_rate": 6.271929824561403e-05, "loss": 17.1071, "step": 143 }, { "epoch": 0.02, "learning_rate": 6.31578947368421e-05, "loss": 19.6889, "step": 144 }, { "epoch": 0.02, "learning_rate": 6.359649122807018e-05, "loss": 0.0009, "step": 145 }, { "epoch": 0.02, "learning_rate": 6.403508771929825e-05, "loss": 19.6626, "step": 146 }, { "epoch": 0.02, "learning_rate": 6.447368421052632e-05, "loss": 0.0013, "step": 147 }, { "epoch": 0.02, "learning_rate": 6.49122807017544e-05, "loss": 19.7487, "step": 148 }, { "epoch": 0.02, "learning_rate": 6.535087719298247e-05, "loss": 18.2155, "step": 149 }, { "epoch": 0.02, "learning_rate": 6.578947368421054e-05, "loss": 18.7601, "step": 150 }, { "epoch": 0.02, "learning_rate": 6.62280701754386e-05, "loss": 19.824, "step": 151 }, { "epoch": 0.02, "learning_rate": 6.666666666666667e-05, "loss": 17.0909, "step": 152 }, { "epoch": 0.02, "learning_rate": 6.710526315789474e-05, "loss": 0.0011, "step": 153 }, { "epoch": 0.02, "learning_rate": 6.754385964912281e-05, "loss": 0.0006, "step": 154 }, { "epoch": 0.02, "learning_rate": 6.798245614035089e-05, "loss": 0.0005, "step": 155 }, { "epoch": 0.02, "learning_rate": 6.842105263157895e-05, "loss": 0.0011, "step": 156 }, { "epoch": 0.02, "learning_rate": 6.885964912280702e-05, "loss": 19.453, "step": 157 }, { "epoch": 0.02, "learning_rate": 6.929824561403509e-05, "loss": 19.1868, "step": 158 }, { "epoch": 0.02, "learning_rate": 6.973684210526315e-05, "loss": 18.8901, "step": 159 }, { "epoch": 0.02, "learning_rate": 7.017543859649122e-05, "loss": 17.3939, "step": 160 }, { "epoch": 0.02, "learning_rate": 7.06140350877193e-05, "loss": 0.0011, "step": 161 }, { "epoch": 0.02, "learning_rate": 7.105263157894737e-05, "loss": 18.6771, "step": 162 }, { "epoch": 0.02, "learning_rate": 7.149122807017544e-05, "loss": 20.2617, "step": 163 }, { "epoch": 0.02, "learning_rate": 7.192982456140351e-05, "loss": 20.0631, "step": 164 }, { "epoch": 0.02, "learning_rate": 7.236842105263159e-05, "loss": 0.0009, "step": 165 }, { "epoch": 0.02, "learning_rate": 7.280701754385966e-05, "loss": 17.391, "step": 166 }, { "epoch": 0.02, "learning_rate": 7.324561403508772e-05, "loss": 16.2182, "step": 167 }, { "epoch": 0.02, "learning_rate": 7.368421052631579e-05, "loss": 0.0005, "step": 168 }, { "epoch": 0.02, "learning_rate": 7.412280701754386e-05, "loss": 18.6713, "step": 169 }, { "epoch": 0.02, "learning_rate": 7.456140350877193e-05, "loss": 0.001, "step": 170 }, { "epoch": 0.02, "learning_rate": 7.500000000000001e-05, "loss": 0.0015, "step": 171 }, { "epoch": 0.02, "learning_rate": 7.543859649122808e-05, "loss": 18.683, "step": 172 }, { "epoch": 0.02, "learning_rate": 7.587719298245615e-05, "loss": 18.5308, "step": 173 }, { "epoch": 0.02, "learning_rate": 7.631578947368422e-05, "loss": 18.5156, "step": 174 }, { "epoch": 0.02, "learning_rate": 7.675438596491228e-05, "loss": 0.0008, "step": 175 }, { "epoch": 0.02, "learning_rate": 7.719298245614036e-05, "loss": 17.5356, "step": 176 }, { "epoch": 0.02, "learning_rate": 7.763157894736843e-05, "loss": 0.0004, "step": 177 }, { "epoch": 0.02, "learning_rate": 7.807017543859649e-05, "loss": 18.4018, "step": 178 }, { "epoch": 0.02, "learning_rate": 7.850877192982456e-05, "loss": 17.4749, "step": 179 }, { "epoch": 0.02, "learning_rate": 7.894736842105263e-05, "loss": 17.9338, "step": 180 }, { "epoch": 0.02, "learning_rate": 7.93859649122807e-05, "loss": 17.7575, "step": 181 }, { "epoch": 0.02, "learning_rate": 7.982456140350878e-05, "loss": 18.9836, "step": 182 }, { "epoch": 0.02, "learning_rate": 8.026315789473685e-05, "loss": 18.0773, "step": 183 }, { "epoch": 0.02, "learning_rate": 8.070175438596491e-05, "loss": 20.0952, "step": 184 }, { "epoch": 0.02, "learning_rate": 8.114035087719298e-05, "loss": 19.0409, "step": 185 }, { "epoch": 0.02, "learning_rate": 8.157894736842105e-05, "loss": 19.2275, "step": 186 }, { "epoch": 0.02, "learning_rate": 8.201754385964913e-05, "loss": 17.6435, "step": 187 }, { "epoch": 0.02, "learning_rate": 8.24561403508772e-05, "loss": 16.0377, "step": 188 }, { "epoch": 0.02, "learning_rate": 8.289473684210527e-05, "loss": 0.0009, "step": 189 }, { "epoch": 0.03, "learning_rate": 8.333333333333334e-05, "loss": 0.0009, "step": 190 }, { "epoch": 0.03, "learning_rate": 8.377192982456142e-05, "loss": 0.0012, "step": 191 }, { "epoch": 0.03, "learning_rate": 8.421052631578948e-05, "loss": 18.1483, "step": 192 }, { "epoch": 0.03, "learning_rate": 8.464912280701755e-05, "loss": 20.1452, "step": 193 }, { "epoch": 0.03, "learning_rate": 8.508771929824562e-05, "loss": 18.7991, "step": 194 }, { "epoch": 0.03, "learning_rate": 8.552631578947369e-05, "loss": 17.4872, "step": 195 }, { "epoch": 0.03, "learning_rate": 8.596491228070177e-05, "loss": 0.0004, "step": 196 }, { "epoch": 0.03, "learning_rate": 8.640350877192982e-05, "loss": 19.2762, "step": 197 }, { "epoch": 0.03, "learning_rate": 8.68421052631579e-05, "loss": 19.2677, "step": 198 }, { "epoch": 0.03, "learning_rate": 8.728070175438597e-05, "loss": 17.3723, "step": 199 }, { "epoch": 0.03, "learning_rate": 8.771929824561403e-05, "loss": 16.6873, "step": 200 }, { "epoch": 0.03, "learning_rate": 8.81578947368421e-05, "loss": 17.2452, "step": 201 }, { "epoch": 0.03, "learning_rate": 8.859649122807017e-05, "loss": 17.2913, "step": 202 }, { "epoch": 0.03, "learning_rate": 8.903508771929825e-05, "loss": 16.8744, "step": 203 }, { "epoch": 0.03, "learning_rate": 8.947368421052632e-05, "loss": 18.3259, "step": 204 }, { "epoch": 0.03, "learning_rate": 8.991228070175439e-05, "loss": 19.5572, "step": 205 }, { "epoch": 0.03, "learning_rate": 9.035087719298246e-05, "loss": 18.0914, "step": 206 }, { "epoch": 0.03, "learning_rate": 9.078947368421054e-05, "loss": 0.0002, "step": 207 }, { "epoch": 0.03, "learning_rate": 9.12280701754386e-05, "loss": 17.7028, "step": 208 }, { "epoch": 0.03, "learning_rate": 9.166666666666667e-05, "loss": 18.5841, "step": 209 }, { "epoch": 0.03, "learning_rate": 9.210526315789474e-05, "loss": 17.7342, "step": 210 }, { "epoch": 0.03, "learning_rate": 9.254385964912281e-05, "loss": 18.1894, "step": 211 }, { "epoch": 0.03, "learning_rate": 9.298245614035089e-05, "loss": 18.1085, "step": 212 }, { "epoch": 0.03, "learning_rate": 9.342105263157896e-05, "loss": 17.6417, "step": 213 }, { "epoch": 0.03, "learning_rate": 9.385964912280703e-05, "loss": 19.411, "step": 214 }, { "epoch": 0.03, "learning_rate": 9.42982456140351e-05, "loss": 17.862, "step": 215 }, { "epoch": 0.03, "learning_rate": 9.473684210526316e-05, "loss": 0.0007, "step": 216 }, { "epoch": 0.03, "learning_rate": 9.517543859649123e-05, "loss": 0.0009, "step": 217 }, { "epoch": 0.03, "learning_rate": 9.56140350877193e-05, "loss": 20.2117, "step": 218 }, { "epoch": 0.03, "learning_rate": 9.605263157894737e-05, "loss": 0.0009, "step": 219 }, { "epoch": 0.03, "learning_rate": 9.649122807017544e-05, "loss": 18.2904, "step": 220 }, { "epoch": 0.03, "learning_rate": 9.692982456140351e-05, "loss": 17.4153, "step": 221 }, { "epoch": 0.03, "learning_rate": 9.736842105263158e-05, "loss": 0.0009, "step": 222 }, { "epoch": 0.03, "learning_rate": 9.780701754385966e-05, "loss": 19.6873, "step": 223 }, { "epoch": 0.03, "learning_rate": 9.824561403508771e-05, "loss": 20.4517, "step": 224 }, { "epoch": 0.03, "learning_rate": 9.868421052631579e-05, "loss": 0.001, "step": 225 }, { "epoch": 0.03, "learning_rate": 9.912280701754386e-05, "loss": 0.0009, "step": 226 }, { "epoch": 0.03, "learning_rate": 9.956140350877193e-05, "loss": 17.9363, "step": 227 }, { "epoch": 0.03, "learning_rate": 0.0001, "loss": 0.0009, "step": 228 }, { "epoch": 0.03, "learning_rate": 9.999999545616414e-05, "loss": 17.4712, "step": 229 }, { "epoch": 0.03, "learning_rate": 9.99999818246574e-05, "loss": 19.9081, "step": 230 }, { "epoch": 0.03, "learning_rate": 9.999995910548224e-05, "loss": 17.1494, "step": 231 }, { "epoch": 0.03, "learning_rate": 9.999992729864281e-05, "loss": 0.0006, "step": 232 }, { "epoch": 0.03, "learning_rate": 9.999988640414486e-05, "loss": 19.4785, "step": 233 }, { "epoch": 0.03, "learning_rate": 9.999983642199585e-05, "loss": 17.0276, "step": 234 }, { "epoch": 0.03, "learning_rate": 9.999977735220485e-05, "loss": 0.0003, "step": 235 }, { "epoch": 0.03, "learning_rate": 9.999970919478261e-05, "loss": 0.0007, "step": 236 }, { "epoch": 0.03, "learning_rate": 9.999963194974151e-05, "loss": 20.376, "step": 237 }, { "epoch": 0.03, "learning_rate": 9.999954561709559e-05, "loss": 0.0006, "step": 238 }, { "epoch": 0.03, "learning_rate": 9.999945019686055e-05, "loss": 17.8797, "step": 239 }, { "epoch": 0.03, "learning_rate": 9.999934568905369e-05, "loss": 0.0006, "step": 240 }, { "epoch": 0.03, "learning_rate": 9.999923209369407e-05, "loss": 16.8431, "step": 241 }, { "epoch": 0.03, "learning_rate": 9.999910941080229e-05, "loss": 17.5623, "step": 242 }, { "epoch": 0.03, "learning_rate": 9.999897764040067e-05, "loss": 20.1436, "step": 243 }, { "epoch": 0.03, "learning_rate": 9.999883678251315e-05, "loss": 18.0399, "step": 244 }, { "epoch": 0.03, "learning_rate": 9.999868683716534e-05, "loss": 18.022, "step": 245 }, { "epoch": 0.03, "learning_rate": 9.999852780438448e-05, "loss": 0.0009, "step": 246 }, { "epoch": 0.03, "learning_rate": 9.999835968419949e-05, "loss": 21.4593, "step": 247 }, { "epoch": 0.03, "learning_rate": 9.99981824766409e-05, "loss": 0.0003, "step": 248 }, { "epoch": 0.03, "learning_rate": 9.999799618174095e-05, "loss": 18.4806, "step": 249 }, { "epoch": 0.03, "learning_rate": 9.999780079953347e-05, "loss": 19.076, "step": 250 }, { "epoch": 0.03, "learning_rate": 9.9997596330054e-05, "loss": 0.0006, "step": 251 }, { "epoch": 0.03, "learning_rate": 9.99973827733397e-05, "loss": 0.0003, "step": 252 }, { "epoch": 0.03, "learning_rate": 9.999716012942937e-05, "loss": 17.4444, "step": 253 }, { "epoch": 0.03, "learning_rate": 9.999692839836346e-05, "loss": 18.2045, "step": 254 }, { "epoch": 0.03, "learning_rate": 9.999668758018414e-05, "loss": 19.5156, "step": 255 }, { "epoch": 0.03, "learning_rate": 9.999643767493512e-05, "loss": 0.0015, "step": 256 }, { "epoch": 0.03, "learning_rate": 9.999617868266187e-05, "loss": 17.489, "step": 257 }, { "epoch": 0.03, "learning_rate": 9.999591060341143e-05, "loss": 0.0002, "step": 258 }, { "epoch": 0.03, "learning_rate": 9.999563343723255e-05, "loss": 16.5597, "step": 259 }, { "epoch": 0.03, "learning_rate": 9.999534718417558e-05, "loss": 19.2717, "step": 260 }, { "epoch": 0.03, "learning_rate": 9.999505184429258e-05, "loss": 17.2701, "step": 261 }, { "epoch": 0.03, "learning_rate": 9.999474741763719e-05, "loss": 16.2481, "step": 262 }, { "epoch": 0.03, "learning_rate": 9.999443390426478e-05, "loss": 17.1959, "step": 263 }, { "epoch": 0.03, "learning_rate": 9.999411130423229e-05, "loss": 17.6602, "step": 264 }, { "epoch": 0.03, "learning_rate": 9.999377961759837e-05, "loss": 18.1236, "step": 265 }, { "epoch": 0.04, "learning_rate": 9.999343884442334e-05, "loss": 0.0003, "step": 266 }, { "epoch": 0.04, "learning_rate": 9.999308898476909e-05, "loss": 18.9667, "step": 267 }, { "epoch": 0.04, "learning_rate": 9.999273003869923e-05, "loss": 0.0006, "step": 268 }, { "epoch": 0.04, "learning_rate": 9.999236200627899e-05, "loss": 18.8492, "step": 269 }, { "epoch": 0.04, "learning_rate": 9.999198488757526e-05, "loss": 0.0007, "step": 270 }, { "epoch": 0.04, "learning_rate": 9.99915986826566e-05, "loss": 0.0005, "step": 271 }, { "epoch": 0.04, "learning_rate": 9.99912033915932e-05, "loss": 0.0002, "step": 272 }, { "epoch": 0.04, "learning_rate": 9.999079901445689e-05, "loss": 0.0004, "step": 273 }, { "epoch": 0.04, "learning_rate": 9.999038555132117e-05, "loss": 17.504, "step": 274 }, { "epoch": 0.04, "learning_rate": 9.998996300226121e-05, "loss": 17.4522, "step": 275 }, { "epoch": 0.04, "learning_rate": 9.998953136735378e-05, "loss": 19.3678, "step": 276 }, { "epoch": 0.04, "learning_rate": 9.998909064667735e-05, "loss": 19.9882, "step": 277 }, { "epoch": 0.04, "learning_rate": 9.998864084031201e-05, "loss": 0.0003, "step": 278 }, { "epoch": 0.04, "learning_rate": 9.998818194833954e-05, "loss": 18.2113, "step": 279 }, { "epoch": 0.04, "learning_rate": 9.998771397084332e-05, "loss": 18.8028, "step": 280 }, { "epoch": 0.04, "learning_rate": 9.998723690790841e-05, "loss": 0.0007, "step": 281 }, { "epoch": 0.04, "learning_rate": 9.998675075962152e-05, "loss": 18.3369, "step": 282 }, { "epoch": 0.04, "learning_rate": 9.9986255526071e-05, "loss": 20.0453, "step": 283 }, { "epoch": 0.04, "learning_rate": 9.998575120734688e-05, "loss": 21.6205, "step": 284 }, { "epoch": 0.04, "learning_rate": 9.998523780354081e-05, "loss": 0.0004, "step": 285 }, { "epoch": 0.04, "learning_rate": 9.998471531474611e-05, "loss": 20.7245, "step": 286 }, { "epoch": 0.04, "learning_rate": 9.998418374105774e-05, "loss": 0.0003, "step": 287 }, { "epoch": 0.04, "learning_rate": 9.998364308257231e-05, "loss": 17.9279, "step": 288 }, { "epoch": 0.04, "learning_rate": 9.998309333938809e-05, "loss": 18.8775, "step": 289 }, { "epoch": 0.04, "learning_rate": 9.9982534511605e-05, "loss": 18.2659, "step": 290 }, { "epoch": 0.04, "learning_rate": 9.998196659932462e-05, "loss": 17.8552, "step": 291 }, { "epoch": 0.04, "learning_rate": 9.998138960265013e-05, "loss": 17.735, "step": 292 }, { "epoch": 0.04, "learning_rate": 9.998080352168645e-05, "loss": 18.2098, "step": 293 }, { "epoch": 0.04, "learning_rate": 9.998020835654007e-05, "loss": 18.1969, "step": 294 }, { "epoch": 0.04, "learning_rate": 9.99796041073192e-05, "loss": 0.0002, "step": 295 }, { "epoch": 0.04, "learning_rate": 9.997899077413359e-05, "loss": 20.199, "step": 296 }, { "epoch": 0.04, "learning_rate": 9.99783683570948e-05, "loss": 19.8173, "step": 297 }, { "epoch": 0.04, "learning_rate": 9.997773685631592e-05, "loss": 20.174, "step": 298 }, { "epoch": 0.04, "learning_rate": 9.997709627191172e-05, "loss": 18.7704, "step": 299 }, { "epoch": 0.04, "learning_rate": 9.997644660399864e-05, "loss": 18.7194, "step": 300 }, { "epoch": 0.04, "learning_rate": 9.997578785269475e-05, "loss": 18.3063, "step": 301 }, { "epoch": 0.04, "learning_rate": 9.997512001811978e-05, "loss": 18.2829, "step": 302 }, { "epoch": 0.04, "learning_rate": 9.997444310039512e-05, "loss": 0.0006, "step": 303 }, { "epoch": 0.04, "learning_rate": 9.99737570996438e-05, "loss": 17.5531, "step": 304 }, { "epoch": 0.04, "learning_rate": 9.997306201599053e-05, "loss": 17.8446, "step": 305 }, { "epoch": 0.04, "learning_rate": 9.997235784956159e-05, "loss": 0.0004, "step": 306 }, { "epoch": 0.04, "learning_rate": 9.997164460048498e-05, "loss": 18.0013, "step": 307 }, { "epoch": 0.04, "learning_rate": 9.997092226889036e-05, "loss": 19.7833, "step": 308 }, { "epoch": 0.04, "learning_rate": 9.997019085490902e-05, "loss": 18.7135, "step": 309 }, { "epoch": 0.04, "learning_rate": 9.996945035867386e-05, "loss": 19.2038, "step": 310 }, { "epoch": 0.04, "learning_rate": 9.996870078031948e-05, "loss": 0.0003, "step": 311 }, { "epoch": 0.04, "learning_rate": 9.996794211998214e-05, "loss": 19.6112, "step": 312 }, { "epoch": 0.04, "learning_rate": 9.99671743777997e-05, "loss": 20.0844, "step": 313 }, { "epoch": 0.04, "learning_rate": 9.996639755391173e-05, "loss": 0.0002, "step": 314 }, { "epoch": 0.04, "learning_rate": 9.99656116484594e-05, "loss": 19.1783, "step": 315 }, { "epoch": 0.04, "learning_rate": 9.996481666158556e-05, "loss": 17.2937, "step": 316 }, { "epoch": 0.04, "learning_rate": 9.996401259343468e-05, "loss": 18.0091, "step": 317 }, { "epoch": 0.04, "learning_rate": 9.996319944415294e-05, "loss": 17.7628, "step": 318 }, { "epoch": 0.04, "learning_rate": 9.99623772138881e-05, "loss": 19.4826, "step": 319 }, { "epoch": 0.04, "learning_rate": 9.996154590278963e-05, "loss": 19.554, "step": 320 }, { "epoch": 0.04, "learning_rate": 9.996070551100861e-05, "loss": 18.2647, "step": 321 }, { "epoch": 0.04, "learning_rate": 9.995985603869777e-05, "loss": 0.0023, "step": 322 }, { "epoch": 0.04, "learning_rate": 9.995899748601152e-05, "loss": 17.3381, "step": 323 }, { "epoch": 0.04, "learning_rate": 9.995812985310591e-05, "loss": 0.0009, "step": 324 }, { "epoch": 0.04, "learning_rate": 9.995725314013862e-05, "loss": 16.6402, "step": 325 }, { "epoch": 0.04, "learning_rate": 9.995636734726901e-05, "loss": 0.0005, "step": 326 }, { "epoch": 0.04, "learning_rate": 9.995547247465806e-05, "loss": 19.0233, "step": 327 }, { "epoch": 0.04, "learning_rate": 9.995456852246843e-05, "loss": 17.4011, "step": 328 }, { "epoch": 0.04, "learning_rate": 9.995365549086441e-05, "loss": 0.0001, "step": 329 }, { "epoch": 0.04, "learning_rate": 9.995273338001196e-05, "loss": 18.0625, "step": 330 }, { "epoch": 0.04, "learning_rate": 9.995180219007865e-05, "loss": 18.5558, "step": 331 }, { "epoch": 0.04, "learning_rate": 9.995086192123376e-05, "loss": 19.3978, "step": 332 }, { "epoch": 0.04, "learning_rate": 9.994991257364815e-05, "loss": 0.0002, "step": 333 }, { "epoch": 0.04, "learning_rate": 9.99489541474944e-05, "loss": 19.5114, "step": 334 }, { "epoch": 0.04, "learning_rate": 9.994798664294669e-05, "loss": 18.983, "step": 335 }, { "epoch": 0.04, "learning_rate": 9.994701006018088e-05, "loss": 0.0003, "step": 336 }, { "epoch": 0.04, "learning_rate": 9.994602439937444e-05, "loss": 19.0153, "step": 337 }, { "epoch": 0.04, "learning_rate": 9.994502966070655e-05, "loss": 17.21, "step": 338 }, { "epoch": 0.04, "learning_rate": 9.994402584435799e-05, "loss": 19.2939, "step": 339 }, { "epoch": 0.04, "learning_rate": 9.99430129505112e-05, "loss": 18.8974, "step": 340 }, { "epoch": 0.04, "learning_rate": 9.994199097935031e-05, "loss": 18.2854, "step": 341 }, { "epoch": 0.05, "learning_rate": 9.994095993106103e-05, "loss": 19.2429, "step": 342 }, { "epoch": 0.05, "learning_rate": 9.993991980583078e-05, "loss": 16.745, "step": 343 }, { "epoch": 0.05, "learning_rate": 9.993887060384858e-05, "loss": 18.7542, "step": 344 }, { "epoch": 0.05, "learning_rate": 9.993781232530514e-05, "loss": 19.9876, "step": 345 }, { "epoch": 0.05, "learning_rate": 9.993674497039283e-05, "loss": 19.0536, "step": 346 }, { "epoch": 0.05, "learning_rate": 9.99356685393056e-05, "loss": 17.4609, "step": 347 }, { "epoch": 0.05, "learning_rate": 9.993458303223916e-05, "loss": 18.3127, "step": 348 }, { "epoch": 0.05, "learning_rate": 9.993348844939072e-05, "loss": 21.5247, "step": 349 }, { "epoch": 0.05, "learning_rate": 9.993238479095927e-05, "loss": 20.0861, "step": 350 }, { "epoch": 0.05, "learning_rate": 9.993127205714544e-05, "loss": 19.2283, "step": 351 }, { "epoch": 0.05, "learning_rate": 9.99301502481514e-05, "loss": 19.9027, "step": 352 }, { "epoch": 0.05, "learning_rate": 9.992901936418109e-05, "loss": 16.7012, "step": 353 }, { "epoch": 0.05, "learning_rate": 9.992787940544002e-05, "loss": 19.4211, "step": 354 }, { "epoch": 0.05, "learning_rate": 9.992673037213543e-05, "loss": 0.0008, "step": 355 }, { "epoch": 0.05, "learning_rate": 9.992557226447613e-05, "loss": 17.3591, "step": 356 }, { "epoch": 0.05, "learning_rate": 9.992440508267258e-05, "loss": 18.4288, "step": 357 }, { "epoch": 0.05, "learning_rate": 9.992322882693697e-05, "loss": 19.1119, "step": 358 }, { "epoch": 0.05, "learning_rate": 9.992204349748308e-05, "loss": 17.5054, "step": 359 }, { "epoch": 0.05, "learning_rate": 9.992084909452633e-05, "loss": 18.1939, "step": 360 }, { "epoch": 0.05, "learning_rate": 9.99196456182838e-05, "loss": 17.7276, "step": 361 }, { "epoch": 0.05, "learning_rate": 9.991843306897426e-05, "loss": 19.6735, "step": 362 }, { "epoch": 0.05, "learning_rate": 9.991721144681806e-05, "loss": 17.1287, "step": 363 }, { "epoch": 0.05, "learning_rate": 9.991598075203725e-05, "loss": 18.3047, "step": 364 }, { "epoch": 0.05, "learning_rate": 9.991474098485552e-05, "loss": 17.8385, "step": 365 }, { "epoch": 0.05, "learning_rate": 9.991349214549819e-05, "loss": 18.7034, "step": 366 }, { "epoch": 0.05, "learning_rate": 9.991223423419225e-05, "loss": 17.2762, "step": 367 }, { "epoch": 0.05, "learning_rate": 9.991096725116633e-05, "loss": 17.6348, "step": 368 }, { "epoch": 0.05, "learning_rate": 9.990969119665068e-05, "loss": 16.4886, "step": 369 }, { "epoch": 0.05, "learning_rate": 9.990840607087727e-05, "loss": 18.0339, "step": 370 }, { "epoch": 0.05, "learning_rate": 9.990711187407964e-05, "loss": 0.0003, "step": 371 }, { "epoch": 0.05, "learning_rate": 9.990580860649304e-05, "loss": 19.3025, "step": 372 }, { "epoch": 0.05, "learning_rate": 9.990449626835434e-05, "loss": 16.0995, "step": 373 }, { "epoch": 0.05, "learning_rate": 9.990317485990207e-05, "loss": 0.0002, "step": 374 }, { "epoch": 0.05, "learning_rate": 9.990184438137635e-05, "loss": 18.5836, "step": 375 }, { "epoch": 0.05, "learning_rate": 9.990050483301905e-05, "loss": 0.0005, "step": 376 }, { "epoch": 0.05, "learning_rate": 9.989915621507363e-05, "loss": 19.3109, "step": 377 }, { "epoch": 0.05, "learning_rate": 9.98977985277852e-05, "loss": 18.8485, "step": 378 }, { "epoch": 0.05, "learning_rate": 9.989643177140051e-05, "loss": 21.0956, "step": 379 }, { "epoch": 0.05, "learning_rate": 9.9895055946168e-05, "loss": 20.0721, "step": 380 }, { "epoch": 0.05, "learning_rate": 9.989367105233771e-05, "loss": 18.1632, "step": 381 }, { "epoch": 0.05, "learning_rate": 9.989227709016137e-05, "loss": 0.0003, "step": 382 }, { "epoch": 0.05, "learning_rate": 9.98908740598923e-05, "loss": 17.5539, "step": 383 }, { "epoch": 0.05, "learning_rate": 9.988946196178555e-05, "loss": 16.7813, "step": 384 }, { "epoch": 0.05, "learning_rate": 9.988804079609775e-05, "loss": 18.4215, "step": 385 }, { "epoch": 0.05, "learning_rate": 9.98866105630872e-05, "loss": 17.7419, "step": 386 }, { "epoch": 0.05, "learning_rate": 9.988517126301386e-05, "loss": 19.3391, "step": 387 }, { "epoch": 0.05, "learning_rate": 9.988372289613932e-05, "loss": 16.533, "step": 388 }, { "epoch": 0.05, "learning_rate": 9.988226546272682e-05, "loss": 0.0002, "step": 389 }, { "epoch": 0.05, "learning_rate": 9.988079896304126e-05, "loss": 0.0004, "step": 390 }, { "epoch": 0.05, "learning_rate": 9.98793233973492e-05, "loss": 16.2173, "step": 391 }, { "epoch": 0.05, "learning_rate": 9.987783876591881e-05, "loss": 18.7244, "step": 392 }, { "epoch": 0.05, "learning_rate": 9.987634506901991e-05, "loss": 0.0002, "step": 393 }, { "epoch": 0.05, "learning_rate": 9.987484230692401e-05, "loss": 17.9809, "step": 394 }, { "epoch": 0.05, "learning_rate": 9.987333047990425e-05, "loss": 18.6448, "step": 395 }, { "epoch": 0.05, "learning_rate": 9.987180958823539e-05, "loss": 17.6606, "step": 396 }, { "epoch": 0.05, "learning_rate": 9.987027963219386e-05, "loss": 21.2817, "step": 397 }, { "epoch": 0.05, "learning_rate": 9.986874061205774e-05, "loss": 0.0002, "step": 398 }, { "epoch": 0.05, "learning_rate": 9.986719252810674e-05, "loss": 19.5799, "step": 399 }, { "epoch": 0.05, "learning_rate": 9.986563538062224e-05, "loss": 18.4125, "step": 400 }, { "epoch": 0.05, "learning_rate": 9.986406916988727e-05, "loss": 18.6601, "step": 401 }, { "epoch": 0.05, "learning_rate": 9.986249389618645e-05, "loss": 18.0002, "step": 402 }, { "epoch": 0.05, "learning_rate": 9.986090955980616e-05, "loss": 18.9031, "step": 403 }, { "epoch": 0.05, "learning_rate": 9.98593161610343e-05, "loss": 17.9227, "step": 404 }, { "epoch": 0.05, "learning_rate": 9.98577137001605e-05, "loss": 17.7788, "step": 405 }, { "epoch": 0.05, "learning_rate": 9.985610217747601e-05, "loss": 17.5724, "step": 406 }, { "epoch": 0.05, "learning_rate": 9.985448159327372e-05, "loss": 19.7877, "step": 407 }, { "epoch": 0.05, "learning_rate": 9.98528519478482e-05, "loss": 0.0001, "step": 408 }, { "epoch": 0.05, "learning_rate": 9.985121324149562e-05, "loss": 0.0002, "step": 409 }, { "epoch": 0.05, "learning_rate": 9.984956547451381e-05, "loss": 0.0002, "step": 410 }, { "epoch": 0.05, "learning_rate": 9.98479086472023e-05, "loss": 18.5862, "step": 411 }, { "epoch": 0.05, "learning_rate": 9.984624275986221e-05, "loss": 0.0007, "step": 412 }, { "epoch": 0.05, "learning_rate": 9.98445678127963e-05, "loss": 0.0002, "step": 413 }, { "epoch": 0.05, "learning_rate": 9.984288380630899e-05, "loss": 17.3117, "step": 414 }, { "epoch": 0.05, "learning_rate": 9.984119074070639e-05, "loss": 20.0023, "step": 415 }, { "epoch": 0.05, "learning_rate": 9.983948861629622e-05, "loss": 16.0174, "step": 416 }, { "epoch": 0.05, "learning_rate": 9.983777743338781e-05, "loss": 19.5782, "step": 417 }, { "epoch": 0.06, "learning_rate": 9.983605719229219e-05, "loss": 0.0002, "step": 418 }, { "epoch": 0.06, "learning_rate": 9.983432789332204e-05, "loss": 19.1599, "step": 419 }, { "epoch": 0.06, "learning_rate": 9.983258953679163e-05, "loss": 19.6429, "step": 420 }, { "epoch": 0.06, "learning_rate": 9.983084212301695e-05, "loss": 16.7345, "step": 421 }, { "epoch": 0.06, "learning_rate": 9.982908565231557e-05, "loss": 17.5477, "step": 422 }, { "epoch": 0.06, "learning_rate": 9.982732012500675e-05, "loss": 19.6708, "step": 423 }, { "epoch": 0.06, "learning_rate": 9.982554554141138e-05, "loss": 16.7347, "step": 424 }, { "epoch": 0.06, "learning_rate": 9.982376190185196e-05, "loss": 19.0804, "step": 425 }, { "epoch": 0.06, "learning_rate": 9.982196920665274e-05, "loss": 19.8619, "step": 426 }, { "epoch": 0.06, "learning_rate": 9.982016745613951e-05, "loss": 18.7834, "step": 427 }, { "epoch": 0.06, "learning_rate": 9.981835665063974e-05, "loss": 16.8912, "step": 428 }, { "epoch": 0.06, "learning_rate": 9.981653679048256e-05, "loss": 18.1254, "step": 429 }, { "epoch": 0.06, "learning_rate": 9.981470787599872e-05, "loss": 19.1056, "step": 430 }, { "epoch": 0.06, "learning_rate": 9.981286990752065e-05, "loss": 0.001, "step": 431 }, { "epoch": 0.06, "learning_rate": 9.981102288538242e-05, "loss": 18.8791, "step": 432 }, { "epoch": 0.06, "learning_rate": 9.98091668099197e-05, "loss": 17.8236, "step": 433 }, { "epoch": 0.06, "learning_rate": 9.980730168146986e-05, "loss": 17.8068, "step": 434 }, { "epoch": 0.06, "learning_rate": 9.980542750037188e-05, "loss": 19.2953, "step": 435 }, { "epoch": 0.06, "learning_rate": 9.98035442669664e-05, "loss": 0.0005, "step": 436 }, { "epoch": 0.06, "learning_rate": 9.980165198159571e-05, "loss": 17.6052, "step": 437 }, { "epoch": 0.06, "learning_rate": 9.979975064460374e-05, "loss": 0.0001, "step": 438 }, { "epoch": 0.06, "learning_rate": 9.979784025633607e-05, "loss": 17.8832, "step": 439 }, { "epoch": 0.06, "learning_rate": 9.979592081713991e-05, "loss": 17.9588, "step": 440 }, { "epoch": 0.06, "learning_rate": 9.979399232736413e-05, "loss": 17.9369, "step": 441 }, { "epoch": 0.06, "learning_rate": 9.979205478735923e-05, "loss": 18.9772, "step": 442 }, { "epoch": 0.06, "learning_rate": 9.979010819747738e-05, "loss": 19.6641, "step": 443 }, { "epoch": 0.06, "learning_rate": 9.978815255807236e-05, "loss": 18.4112, "step": 444 }, { "epoch": 0.06, "learning_rate": 9.978618786949963e-05, "loss": 18.6781, "step": 445 }, { "epoch": 0.06, "learning_rate": 9.978421413211629e-05, "loss": 0.0005, "step": 446 }, { "epoch": 0.06, "learning_rate": 9.978223134628104e-05, "loss": 16.7238, "step": 447 }, { "epoch": 0.06, "learning_rate": 9.978023951235428e-05, "loss": 0.0006, "step": 448 }, { "epoch": 0.06, "learning_rate": 9.977823863069801e-05, "loss": 0.0001, "step": 449 }, { "epoch": 0.06, "learning_rate": 9.977622870167595e-05, "loss": 18.1719, "step": 450 }, { "epoch": 0.06, "learning_rate": 9.977420972565337e-05, "loss": 21.0187, "step": 451 }, { "epoch": 0.06, "learning_rate": 9.977218170299722e-05, "loss": 0.0001, "step": 452 }, { "epoch": 0.06, "learning_rate": 9.977014463407611e-05, "loss": 18.0892, "step": 453 }, { "epoch": 0.06, "learning_rate": 9.976809851926029e-05, "loss": 0.0005, "step": 454 }, { "epoch": 0.06, "learning_rate": 9.976604335892166e-05, "loss": 19.0533, "step": 455 }, { "epoch": 0.06, "learning_rate": 9.976397915343372e-05, "loss": 0.0002, "step": 456 }, { "epoch": 0.06, "learning_rate": 9.976190590317167e-05, "loss": 0.0007, "step": 457 }, { "epoch": 0.06, "learning_rate": 9.975982360851233e-05, "loss": 17.7705, "step": 458 }, { "epoch": 0.06, "learning_rate": 9.975773226983415e-05, "loss": 0.0007, "step": 459 }, { "epoch": 0.06, "learning_rate": 9.975563188751726e-05, "loss": 19.5953, "step": 460 }, { "epoch": 0.06, "learning_rate": 9.975352246194338e-05, "loss": 18.2791, "step": 461 }, { "epoch": 0.06, "learning_rate": 9.975140399349594e-05, "loss": 18.55, "step": 462 }, { "epoch": 0.06, "learning_rate": 9.974927648255996e-05, "loss": 18.4204, "step": 463 }, { "epoch": 0.06, "learning_rate": 9.974713992952212e-05, "loss": 0.0001, "step": 464 }, { "epoch": 0.06, "learning_rate": 9.974499433477076e-05, "loss": 17.9254, "step": 465 }, { "epoch": 0.06, "learning_rate": 9.974283969869584e-05, "loss": 18.0661, "step": 466 }, { "epoch": 0.06, "learning_rate": 9.974067602168896e-05, "loss": 0.0002, "step": 467 }, { "epoch": 0.06, "learning_rate": 9.973850330414341e-05, "loss": 21.4592, "step": 468 }, { "epoch": 0.06, "learning_rate": 9.973632154645405e-05, "loss": 19.6223, "step": 469 }, { "epoch": 0.06, "learning_rate": 9.973413074901743e-05, "loss": 19.8064, "step": 470 }, { "epoch": 0.06, "learning_rate": 9.973193091223177e-05, "loss": 18.1841, "step": 471 }, { "epoch": 0.06, "learning_rate": 9.972972203649687e-05, "loss": 17.7497, "step": 472 }, { "epoch": 0.06, "learning_rate": 9.972750412221419e-05, "loss": 18.7547, "step": 473 }, { "epoch": 0.06, "learning_rate": 9.972527716978686e-05, "loss": 19.5114, "step": 474 }, { "epoch": 0.06, "learning_rate": 9.972304117961963e-05, "loss": 19.4285, "step": 475 }, { "epoch": 0.06, "learning_rate": 9.97207961521189e-05, "loss": 0.0002, "step": 476 }, { "epoch": 0.06, "learning_rate": 9.971854208769271e-05, "loss": 18.3548, "step": 477 }, { "epoch": 0.06, "learning_rate": 9.971627898675077e-05, "loss": 18.6118, "step": 478 }, { "epoch": 0.06, "learning_rate": 9.971400684970437e-05, "loss": 21.2451, "step": 479 }, { "epoch": 0.06, "learning_rate": 9.97117256769665e-05, "loss": 19.7615, "step": 480 }, { "epoch": 0.06, "learning_rate": 9.970943546895175e-05, "loss": 17.9575, "step": 481 }, { "epoch": 0.06, "learning_rate": 9.97071362260764e-05, "loss": 18.3987, "step": 482 }, { "epoch": 0.06, "learning_rate": 9.97048279487583e-05, "loss": 18.847, "step": 483 }, { "epoch": 0.06, "learning_rate": 9.970251063741705e-05, "loss": 17.5303, "step": 484 }, { "epoch": 0.06, "learning_rate": 9.970018429247379e-05, "loss": 18.8206, "step": 485 }, { "epoch": 0.06, "learning_rate": 9.969784891435135e-05, "loss": 0.0001, "step": 486 }, { "epoch": 0.06, "learning_rate": 9.969550450347421e-05, "loss": 19.1343, "step": 487 }, { "epoch": 0.06, "learning_rate": 9.969315106026842e-05, "loss": 17.8419, "step": 488 }, { "epoch": 0.06, "learning_rate": 9.969078858516179e-05, "loss": 15.9444, "step": 489 }, { "epoch": 0.06, "learning_rate": 9.968841707858368e-05, "loss": 0.0001, "step": 490 }, { "epoch": 0.06, "learning_rate": 9.968603654096511e-05, "loss": 0.0004, "step": 491 }, { "epoch": 0.06, "learning_rate": 9.968364697273877e-05, "loss": 18.7904, "step": 492 }, { "epoch": 0.06, "learning_rate": 9.968124837433897e-05, "loss": 0.0001, "step": 493 }, { "epoch": 0.07, "learning_rate": 9.967884074620164e-05, "loss": 18.3017, "step": 494 }, { "epoch": 0.07, "learning_rate": 9.96764240887644e-05, "loss": 18.6297, "step": 495 }, { "epoch": 0.07, "learning_rate": 9.96739984024665e-05, "loss": 19.6703, "step": 496 }, { "epoch": 0.07, "learning_rate": 9.967156368774877e-05, "loss": 0.0003, "step": 497 }, { "epoch": 0.07, "learning_rate": 9.966911994505375e-05, "loss": 19.1165, "step": 498 }, { "epoch": 0.07, "learning_rate": 9.966666717482562e-05, "loss": 18.1445, "step": 499 }, { "epoch": 0.07, "learning_rate": 9.966420537751015e-05, "loss": 19.9882, "step": 500 }, { "epoch": 0.07, "learning_rate": 9.966173455355479e-05, "loss": 19.4347, "step": 501 }, { "epoch": 0.07, "learning_rate": 9.965925470340862e-05, "loss": 17.6068, "step": 502 }, { "epoch": 0.07, "learning_rate": 9.965676582752237e-05, "loss": 0.0004, "step": 503 }, { "epoch": 0.07, "learning_rate": 9.965426792634839e-05, "loss": 19.8212, "step": 504 }, { "epoch": 0.07, "learning_rate": 9.965176100034068e-05, "loss": 18.0334, "step": 505 }, { "epoch": 0.07, "learning_rate": 9.96492450499549e-05, "loss": 18.7375, "step": 506 }, { "epoch": 0.07, "learning_rate": 9.964672007564832e-05, "loss": 17.344, "step": 507 }, { "epoch": 0.07, "learning_rate": 9.964418607787987e-05, "loss": 19.5528, "step": 508 }, { "epoch": 0.07, "learning_rate": 9.96416430571101e-05, "loss": 19.2495, "step": 509 }, { "epoch": 0.07, "learning_rate": 9.963909101380122e-05, "loss": 18.705, "step": 510 }, { "epoch": 0.07, "learning_rate": 9.963652994841708e-05, "loss": 19.4546, "step": 511 }, { "epoch": 0.07, "learning_rate": 9.963395986142315e-05, "loss": 18.2704, "step": 512 }, { "epoch": 0.07, "learning_rate": 9.963138075328655e-05, "loss": 16.8549, "step": 513 }, { "epoch": 0.07, "learning_rate": 9.962879262447605e-05, "loss": 19.1868, "step": 514 }, { "epoch": 0.07, "learning_rate": 9.962619547546205e-05, "loss": 0.0002, "step": 515 }, { "epoch": 0.07, "learning_rate": 9.96235893067166e-05, "loss": 18.784, "step": 516 }, { "epoch": 0.07, "learning_rate": 9.962097411871337e-05, "loss": 0.0005, "step": 517 }, { "epoch": 0.07, "learning_rate": 9.961834991192769e-05, "loss": 0.0002, "step": 518 }, { "epoch": 0.07, "learning_rate": 9.961571668683649e-05, "loss": 18.2854, "step": 519 }, { "epoch": 0.07, "learning_rate": 9.961307444391839e-05, "loss": 17.0493, "step": 520 }, { "epoch": 0.07, "learning_rate": 9.961042318365364e-05, "loss": 18.1278, "step": 521 }, { "epoch": 0.07, "learning_rate": 9.960776290652408e-05, "loss": 0.0002, "step": 522 }, { "epoch": 0.07, "learning_rate": 9.960509361301326e-05, "loss": 18.1657, "step": 523 }, { "epoch": 0.07, "learning_rate": 9.96024153036063e-05, "loss": 20.4523, "step": 524 }, { "epoch": 0.07, "learning_rate": 9.959972797879002e-05, "loss": 0.0003, "step": 525 }, { "epoch": 0.07, "learning_rate": 9.959703163905284e-05, "loss": 15.9685, "step": 526 }, { "epoch": 0.07, "learning_rate": 9.959432628488483e-05, "loss": 0.0003, "step": 527 }, { "epoch": 0.07, "learning_rate": 9.959161191677768e-05, "loss": 18.7708, "step": 528 }, { "epoch": 0.07, "learning_rate": 9.958888853522476e-05, "loss": 0.0007, "step": 529 }, { "epoch": 0.07, "learning_rate": 9.958615614072107e-05, "loss": 0.0001, "step": 530 }, { "epoch": 0.07, "learning_rate": 9.958341473376319e-05, "loss": 18.609, "step": 531 }, { "epoch": 0.07, "learning_rate": 9.958066431484938e-05, "loss": 0.0002, "step": 532 }, { "epoch": 0.07, "learning_rate": 9.957790488447957e-05, "loss": 18.4399, "step": 533 }, { "epoch": 0.07, "learning_rate": 9.95751364431553e-05, "loss": 18.5467, "step": 534 }, { "epoch": 0.07, "learning_rate": 9.95723589913797e-05, "loss": 0.0009, "step": 535 }, { "epoch": 0.07, "learning_rate": 9.956957252965762e-05, "loss": 17.836, "step": 536 }, { "epoch": 0.07, "learning_rate": 9.956677705849549e-05, "loss": 19.2468, "step": 537 }, { "epoch": 0.07, "learning_rate": 9.95639725784014e-05, "loss": 18.9642, "step": 538 }, { "epoch": 0.07, "learning_rate": 9.956115908988508e-05, "loss": 16.2565, "step": 539 }, { "epoch": 0.07, "learning_rate": 9.95583365934579e-05, "loss": 0.0004, "step": 540 }, { "epoch": 0.07, "learning_rate": 9.955550508963283e-05, "loss": 19.7098, "step": 541 }, { "epoch": 0.07, "learning_rate": 9.955266457892451e-05, "loss": 0.0008, "step": 542 }, { "epoch": 0.07, "learning_rate": 9.954981506184925e-05, "loss": 18.5998, "step": 543 }, { "epoch": 0.07, "learning_rate": 9.954695653892492e-05, "loss": 18.0808, "step": 544 }, { "epoch": 0.07, "learning_rate": 9.954408901067109e-05, "loss": 18.0772, "step": 545 }, { "epoch": 0.07, "learning_rate": 9.954121247760893e-05, "loss": 0.0004, "step": 546 }, { "epoch": 0.07, "learning_rate": 9.953832694026126e-05, "loss": 16.0986, "step": 547 }, { "epoch": 0.07, "learning_rate": 9.953543239915252e-05, "loss": 0.0004, "step": 548 }, { "epoch": 0.07, "learning_rate": 9.953252885480886e-05, "loss": 17.696, "step": 549 }, { "epoch": 0.07, "learning_rate": 9.952961630775795e-05, "loss": 18.102, "step": 550 }, { "epoch": 0.07, "learning_rate": 9.952669475852919e-05, "loss": 20.0815, "step": 551 }, { "epoch": 0.07, "learning_rate": 9.952376420765354e-05, "loss": 0.0001, "step": 552 }, { "epoch": 0.07, "learning_rate": 9.952082465566368e-05, "loss": 19.3483, "step": 553 }, { "epoch": 0.07, "learning_rate": 9.951787610309388e-05, "loss": 16.8207, "step": 554 }, { "epoch": 0.07, "learning_rate": 9.951491855048003e-05, "loss": 19.8163, "step": 555 }, { "epoch": 0.07, "learning_rate": 9.951195199835968e-05, "loss": 18.1319, "step": 556 }, { "epoch": 0.07, "learning_rate": 9.950897644727202e-05, "loss": 0.0012, "step": 557 }, { "epoch": 0.07, "learning_rate": 9.950599189775786e-05, "loss": 17.4633, "step": 558 }, { "epoch": 0.07, "learning_rate": 9.950299835035965e-05, "loss": 0.0001, "step": 559 }, { "epoch": 0.07, "learning_rate": 9.94999958056215e-05, "loss": 17.9726, "step": 560 }, { "epoch": 0.07, "learning_rate": 9.949698426408911e-05, "loss": 18.272, "step": 561 }, { "epoch": 0.07, "learning_rate": 9.949396372630983e-05, "loss": 19.1535, "step": 562 }, { "epoch": 0.07, "learning_rate": 9.949093419283267e-05, "loss": 18.0142, "step": 563 }, { "epoch": 0.07, "learning_rate": 9.948789566420826e-05, "loss": 18.3144, "step": 564 }, { "epoch": 0.07, "learning_rate": 9.948484814098885e-05, "loss": 17.6387, "step": 565 }, { "epoch": 0.07, "learning_rate": 9.948179162372835e-05, "loss": 0.0001, "step": 566 }, { "epoch": 0.07, "learning_rate": 9.947872611298228e-05, "loss": 17.4544, "step": 567 }, { "epoch": 0.07, "learning_rate": 9.947565160930782e-05, "loss": 0.0002, "step": 568 }, { "epoch": 0.07, "learning_rate": 9.947256811326376e-05, "loss": 17.9223, "step": 569 }, { "epoch": 0.08, "learning_rate": 9.946947562541055e-05, "loss": 17.2175, "step": 570 }, { "epoch": 0.08, "learning_rate": 9.946637414631025e-05, "loss": 0.0001, "step": 571 }, { "epoch": 0.08, "learning_rate": 9.946326367652657e-05, "loss": 0.0006, "step": 572 }, { "epoch": 0.08, "learning_rate": 9.946014421662483e-05, "loss": 17.5617, "step": 573 }, { "epoch": 0.08, "learning_rate": 9.945701576717204e-05, "loss": 19.6432, "step": 574 }, { "epoch": 0.08, "learning_rate": 9.945387832873677e-05, "loss": 0.0004, "step": 575 }, { "epoch": 0.08, "learning_rate": 9.945073190188927e-05, "loss": 18.8732, "step": 576 }, { "epoch": 0.08, "learning_rate": 9.944757648720143e-05, "loss": 19.8406, "step": 577 }, { "epoch": 0.08, "learning_rate": 9.944441208524674e-05, "loss": 18.0568, "step": 578 }, { "epoch": 0.08, "learning_rate": 9.944123869660034e-05, "loss": 18.1529, "step": 579 }, { "epoch": 0.08, "learning_rate": 9.943805632183902e-05, "loss": 0.0001, "step": 580 }, { "epoch": 0.08, "learning_rate": 9.943486496154118e-05, "loss": 18.5973, "step": 581 }, { "epoch": 0.08, "learning_rate": 9.943166461628684e-05, "loss": 19.37, "step": 582 }, { "epoch": 0.08, "learning_rate": 9.942845528665772e-05, "loss": 17.7406, "step": 583 }, { "epoch": 0.08, "learning_rate": 9.942523697323709e-05, "loss": 18.0443, "step": 584 }, { "epoch": 0.08, "learning_rate": 9.942200967660988e-05, "loss": 0.0004, "step": 585 }, { "epoch": 0.08, "learning_rate": 9.94187733973627e-05, "loss": 20.4469, "step": 586 }, { "epoch": 0.08, "learning_rate": 9.941552813608373e-05, "loss": 0.0004, "step": 587 }, { "epoch": 0.08, "learning_rate": 9.941227389336283e-05, "loss": 0.0003, "step": 588 }, { "epoch": 0.08, "learning_rate": 9.940901066979144e-05, "loss": 18.8447, "step": 589 }, { "epoch": 0.08, "learning_rate": 9.940573846596267e-05, "loss": 17.6209, "step": 590 }, { "epoch": 0.08, "learning_rate": 9.940245728247126e-05, "loss": 17.4108, "step": 591 }, { "epoch": 0.08, "learning_rate": 9.93991671199136e-05, "loss": 17.6206, "step": 592 }, { "epoch": 0.08, "learning_rate": 9.939586797888764e-05, "loss": 19.7273, "step": 593 }, { "epoch": 0.08, "learning_rate": 9.939255985999304e-05, "loss": 18.7074, "step": 594 }, { "epoch": 0.08, "learning_rate": 9.938924276383106e-05, "loss": 17.9621, "step": 595 }, { "epoch": 0.08, "learning_rate": 9.938591669100458e-05, "loss": 19.0146, "step": 596 }, { "epoch": 0.08, "learning_rate": 9.938258164211815e-05, "loss": 17.5151, "step": 597 }, { "epoch": 0.08, "learning_rate": 9.93792376177779e-05, "loss": 0.0003, "step": 598 }, { "epoch": 0.08, "learning_rate": 9.937588461859164e-05, "loss": 18.2144, "step": 599 }, { "epoch": 0.08, "learning_rate": 9.937252264516877e-05, "loss": 0.0004, "step": 600 }, { "epoch": 0.08, "learning_rate": 9.936915169812035e-05, "loss": 17.2012, "step": 601 }, { "epoch": 0.08, "learning_rate": 9.936577177805909e-05, "loss": 18.1962, "step": 602 }, { "epoch": 0.08, "learning_rate": 9.936238288559923e-05, "loss": 18.658, "step": 603 }, { "epoch": 0.08, "learning_rate": 9.935898502135676e-05, "loss": 18.737, "step": 604 }, { "epoch": 0.08, "learning_rate": 9.935557818594927e-05, "loss": 19.5866, "step": 605 }, { "epoch": 0.08, "learning_rate": 9.935216237999594e-05, "loss": 17.1425, "step": 606 }, { "epoch": 0.08, "learning_rate": 9.934873760411761e-05, "loss": 17.229, "step": 607 }, { "epoch": 0.08, "learning_rate": 9.934530385893673e-05, "loss": 18.1274, "step": 608 }, { "epoch": 0.08, "learning_rate": 9.93418611450774e-05, "loss": 18.51, "step": 609 }, { "epoch": 0.08, "learning_rate": 9.933840946316538e-05, "loss": 18.0342, "step": 610 }, { "epoch": 0.08, "learning_rate": 9.933494881382797e-05, "loss": 19.7221, "step": 611 }, { "epoch": 0.08, "learning_rate": 9.93314791976942e-05, "loss": 19.7595, "step": 612 }, { "epoch": 0.08, "learning_rate": 9.932800061539464e-05, "loss": 18.7118, "step": 613 }, { "epoch": 0.08, "learning_rate": 9.932451306756158e-05, "loss": 0.0004, "step": 614 }, { "epoch": 0.08, "learning_rate": 9.932101655482888e-05, "loss": 18.6327, "step": 615 }, { "epoch": 0.08, "learning_rate": 9.931751107783203e-05, "loss": 17.626, "step": 616 }, { "epoch": 0.08, "learning_rate": 9.931399663720818e-05, "loss": 0.0004, "step": 617 }, { "epoch": 0.08, "learning_rate": 9.931047323359606e-05, "loss": 16.2972, "step": 618 }, { "epoch": 0.08, "learning_rate": 9.93069408676361e-05, "loss": 17.7687, "step": 619 }, { "epoch": 0.08, "learning_rate": 9.930339953997032e-05, "loss": 19.9117, "step": 620 }, { "epoch": 0.08, "learning_rate": 9.929984925124231e-05, "loss": 16.3133, "step": 621 }, { "epoch": 0.08, "learning_rate": 9.929629000209742e-05, "loss": 0.0003, "step": 622 }, { "epoch": 0.08, "learning_rate": 9.929272179318253e-05, "loss": 0.0001, "step": 623 }, { "epoch": 0.08, "learning_rate": 9.928914462514613e-05, "loss": 19.3787, "step": 624 }, { "epoch": 0.08, "learning_rate": 9.928555849863846e-05, "loss": 0.0003, "step": 625 }, { "epoch": 0.08, "learning_rate": 9.928196341431125e-05, "loss": 18.963, "step": 626 }, { "epoch": 0.08, "learning_rate": 9.927835937281793e-05, "loss": 17.8407, "step": 627 }, { "epoch": 0.08, "learning_rate": 9.927474637481358e-05, "loss": 17.6679, "step": 628 }, { "epoch": 0.08, "learning_rate": 9.927112442095485e-05, "loss": 0.0001, "step": 629 }, { "epoch": 0.08, "learning_rate": 9.926749351190003e-05, "loss": 19.2772, "step": 630 }, { "epoch": 0.08, "learning_rate": 9.926385364830908e-05, "loss": 17.7003, "step": 631 }, { "epoch": 0.08, "learning_rate": 9.926020483084352e-05, "loss": 18.6373, "step": 632 }, { "epoch": 0.08, "learning_rate": 9.925654706016658e-05, "loss": 18.1087, "step": 633 }, { "epoch": 0.08, "learning_rate": 9.925288033694305e-05, "loss": 0.0001, "step": 634 }, { "epoch": 0.08, "learning_rate": 9.924920466183937e-05, "loss": 17.7334, "step": 635 }, { "epoch": 0.08, "learning_rate": 9.92455200355236e-05, "loss": 18.7427, "step": 636 }, { "epoch": 0.08, "learning_rate": 9.924182645866544e-05, "loss": 18.466, "step": 637 }, { "epoch": 0.08, "learning_rate": 9.923812393193621e-05, "loss": 18.3329, "step": 638 }, { "epoch": 0.08, "learning_rate": 9.923441245600886e-05, "loss": 20.4033, "step": 639 }, { "epoch": 0.08, "learning_rate": 9.923069203155795e-05, "loss": 18.2553, "step": 640 }, { "epoch": 0.08, "learning_rate": 9.922696265925971e-05, "loss": 18.2975, "step": 641 }, { "epoch": 0.08, "learning_rate": 9.922322433979192e-05, "loss": 18.1168, "step": 642 }, { "epoch": 0.08, "learning_rate": 9.921947707383408e-05, "loss": 18.7313, "step": 643 }, { "epoch": 0.08, "learning_rate": 9.921572086206724e-05, "loss": 18.6823, "step": 644 }, { "epoch": 0.08, "learning_rate": 9.921195570517411e-05, "loss": 20.3837, "step": 645 }, { "epoch": 0.09, "learning_rate": 9.920818160383903e-05, "loss": 18.4353, "step": 646 }, { "epoch": 0.09, "learning_rate": 9.920439855874793e-05, "loss": 17.5982, "step": 647 }, { "epoch": 0.09, "learning_rate": 9.920060657058843e-05, "loss": 17.0782, "step": 648 }, { "epoch": 0.09, "learning_rate": 9.919680564004971e-05, "loss": 19.7484, "step": 649 }, { "epoch": 0.09, "learning_rate": 9.91929957678226e-05, "loss": 18.3979, "step": 650 }, { "epoch": 0.09, "learning_rate": 9.918917695459958e-05, "loss": 16.1796, "step": 651 }, { "epoch": 0.09, "learning_rate": 9.91853492010747e-05, "loss": 18.1462, "step": 652 }, { "epoch": 0.09, "learning_rate": 9.91815125079437e-05, "loss": 0.0001, "step": 653 }, { "epoch": 0.09, "learning_rate": 9.91776668759039e-05, "loss": 16.9932, "step": 654 }, { "epoch": 0.09, "learning_rate": 9.917381230565424e-05, "loss": 18.7483, "step": 655 }, { "epoch": 0.09, "learning_rate": 9.916994879789532e-05, "loss": 18.1012, "step": 656 }, { "epoch": 0.09, "learning_rate": 9.916607635332935e-05, "loss": 19.3445, "step": 657 }, { "epoch": 0.09, "learning_rate": 9.916219497266015e-05, "loss": 17.2101, "step": 658 }, { "epoch": 0.09, "learning_rate": 9.915830465659317e-05, "loss": 0.0001, "step": 659 }, { "epoch": 0.09, "learning_rate": 9.91544054058355e-05, "loss": 17.2862, "step": 660 }, { "epoch": 0.09, "learning_rate": 9.915049722109584e-05, "loss": 18.3148, "step": 661 }, { "epoch": 0.09, "learning_rate": 9.914658010308451e-05, "loss": 0.0002, "step": 662 }, { "epoch": 0.09, "learning_rate": 9.914265405251345e-05, "loss": 18.5606, "step": 663 }, { "epoch": 0.09, "learning_rate": 9.913871907009626e-05, "loss": 18.4939, "step": 664 }, { "epoch": 0.09, "learning_rate": 9.913477515654811e-05, "loss": 18.95, "step": 665 }, { "epoch": 0.09, "learning_rate": 9.913082231258583e-05, "loss": 18.5873, "step": 666 }, { "epoch": 0.09, "learning_rate": 9.912686053892788e-05, "loss": 20.6744, "step": 667 }, { "epoch": 0.09, "learning_rate": 9.91228898362943e-05, "loss": 0.0002, "step": 668 }, { "epoch": 0.09, "learning_rate": 9.91189102054068e-05, "loss": 17.5774, "step": 669 }, { "epoch": 0.09, "learning_rate": 9.911492164698868e-05, "loss": 19.5405, "step": 670 }, { "epoch": 0.09, "learning_rate": 9.911092416176486e-05, "loss": 17.6654, "step": 671 }, { "epoch": 0.09, "learning_rate": 9.910691775046193e-05, "loss": 16.9991, "step": 672 }, { "epoch": 0.09, "learning_rate": 9.910290241380804e-05, "loss": 18.0832, "step": 673 }, { "epoch": 0.09, "learning_rate": 9.909887815253301e-05, "loss": 0.0008, "step": 674 }, { "epoch": 0.09, "learning_rate": 9.909484496736825e-05, "loss": 0.0001, "step": 675 }, { "epoch": 0.09, "learning_rate": 9.909080285904681e-05, "loss": 0.0004, "step": 676 }, { "epoch": 0.09, "learning_rate": 9.908675182830336e-05, "loss": 21.7779, "step": 677 }, { "epoch": 0.09, "learning_rate": 9.908269187587418e-05, "loss": 0.0001, "step": 678 }, { "epoch": 0.09, "learning_rate": 9.90786230024972e-05, "loss": 0.0004, "step": 679 }, { "epoch": 0.09, "learning_rate": 9.907454520891193e-05, "loss": 18.1453, "step": 680 }, { "epoch": 0.09, "learning_rate": 9.907045849585952e-05, "loss": 0.0001, "step": 681 }, { "epoch": 0.09, "learning_rate": 9.906636286408278e-05, "loss": 0.0001, "step": 682 }, { "epoch": 0.09, "learning_rate": 9.906225831432607e-05, "loss": 16.5483, "step": 683 }, { "epoch": 0.09, "learning_rate": 9.90581448473354e-05, "loss": 18.1914, "step": 684 }, { "epoch": 0.09, "learning_rate": 9.905402246385845e-05, "loss": 17.0655, "step": 685 }, { "epoch": 0.09, "learning_rate": 9.904989116464445e-05, "loss": 19.3224, "step": 686 }, { "epoch": 0.09, "learning_rate": 9.904575095044425e-05, "loss": 0.0004, "step": 687 }, { "epoch": 0.09, "learning_rate": 9.904160182201041e-05, "loss": 17.6437, "step": 688 }, { "epoch": 0.09, "learning_rate": 9.9037443780097e-05, "loss": 18.4961, "step": 689 }, { "epoch": 0.09, "learning_rate": 9.903327682545977e-05, "loss": 17.2826, "step": 690 }, { "epoch": 0.09, "learning_rate": 9.902910095885609e-05, "loss": 16.2287, "step": 691 }, { "epoch": 0.09, "learning_rate": 9.902491618104494e-05, "loss": 19.7204, "step": 692 }, { "epoch": 0.09, "learning_rate": 9.90207224927869e-05, "loss": 21.2897, "step": 693 }, { "epoch": 0.09, "learning_rate": 9.901651989484418e-05, "loss": 19.6587, "step": 694 }, { "epoch": 0.09, "learning_rate": 9.901230838798065e-05, "loss": 19.6601, "step": 695 }, { "epoch": 0.09, "learning_rate": 9.900808797296174e-05, "loss": 18.3619, "step": 696 }, { "epoch": 0.09, "learning_rate": 9.900385865055452e-05, "loss": 19.3512, "step": 697 }, { "epoch": 0.09, "learning_rate": 9.899962042152772e-05, "loss": 19.6427, "step": 698 }, { "epoch": 0.09, "learning_rate": 9.899537328665161e-05, "loss": 17.3797, "step": 699 }, { "epoch": 0.09, "learning_rate": 9.899111724669816e-05, "loss": 17.871, "step": 700 }, { "epoch": 0.09, "learning_rate": 9.89868523024409e-05, "loss": 17.6536, "step": 701 }, { "epoch": 0.09, "learning_rate": 9.898257845465497e-05, "loss": 19.5202, "step": 702 }, { "epoch": 0.09, "learning_rate": 9.897829570411721e-05, "loss": 18.9397, "step": 703 }, { "epoch": 0.09, "learning_rate": 9.897400405160599e-05, "loss": 17.7148, "step": 704 }, { "epoch": 0.09, "learning_rate": 9.896970349790134e-05, "loss": 0.0001, "step": 705 }, { "epoch": 0.09, "learning_rate": 9.89653940437849e-05, "loss": 18.1768, "step": 706 }, { "epoch": 0.09, "learning_rate": 9.896107569003995e-05, "loss": 18.5571, "step": 707 }, { "epoch": 0.09, "learning_rate": 9.895674843745133e-05, "loss": 18.4466, "step": 708 }, { "epoch": 0.09, "learning_rate": 9.895241228680556e-05, "loss": 0.0009, "step": 709 }, { "epoch": 0.09, "learning_rate": 9.894806723889073e-05, "loss": 16.4271, "step": 710 }, { "epoch": 0.09, "learning_rate": 9.894371329449659e-05, "loss": 0.0001, "step": 711 }, { "epoch": 0.09, "learning_rate": 9.893935045441445e-05, "loss": 18.9233, "step": 712 }, { "epoch": 0.09, "learning_rate": 9.893497871943731e-05, "loss": 0.0001, "step": 713 }, { "epoch": 0.09, "learning_rate": 9.893059809035971e-05, "loss": 0.0001, "step": 714 }, { "epoch": 0.09, "learning_rate": 9.892620856797789e-05, "loss": 17.6998, "step": 715 }, { "epoch": 0.09, "learning_rate": 9.892181015308963e-05, "loss": 18.4279, "step": 716 }, { "epoch": 0.09, "learning_rate": 9.891740284649435e-05, "loss": 17.5441, "step": 717 }, { "epoch": 0.09, "learning_rate": 9.891298664899312e-05, "loss": 0.0003, "step": 718 }, { "epoch": 0.09, "learning_rate": 9.890856156138857e-05, "loss": 18.6722, "step": 719 }, { "epoch": 0.09, "learning_rate": 9.8904127584485e-05, "loss": 18.6622, "step": 720 }, { "epoch": 0.09, "learning_rate": 9.889968471908829e-05, "loss": 20.3638, "step": 721 }, { "epoch": 0.1, "learning_rate": 9.889523296600595e-05, "loss": 0.0002, "step": 722 }, { "epoch": 0.1, "learning_rate": 9.88907723260471e-05, "loss": 18.9619, "step": 723 }, { "epoch": 0.1, "learning_rate": 9.888630280002244e-05, "loss": 17.1954, "step": 724 }, { "epoch": 0.1, "learning_rate": 9.88818243887444e-05, "loss": 17.9163, "step": 725 }, { "epoch": 0.1, "learning_rate": 9.887733709302688e-05, "loss": 0.0003, "step": 726 }, { "epoch": 0.1, "learning_rate": 9.88728409136855e-05, "loss": 18.5898, "step": 727 }, { "epoch": 0.1, "learning_rate": 9.88683358515374e-05, "loss": 18.9575, "step": 728 }, { "epoch": 0.1, "learning_rate": 9.886382190740146e-05, "loss": 18.2151, "step": 729 }, { "epoch": 0.1, "learning_rate": 9.885929908209809e-05, "loss": 19.2003, "step": 730 }, { "epoch": 0.1, "learning_rate": 9.88547673764493e-05, "loss": 16.1855, "step": 731 }, { "epoch": 0.1, "learning_rate": 9.885022679127875e-05, "loss": 19.1157, "step": 732 }, { "epoch": 0.1, "learning_rate": 9.884567732741172e-05, "loss": 17.7266, "step": 733 }, { "epoch": 0.1, "learning_rate": 9.884111898567508e-05, "loss": 17.9879, "step": 734 }, { "epoch": 0.1, "learning_rate": 9.883655176689735e-05, "loss": 0.0001, "step": 735 }, { "epoch": 0.1, "learning_rate": 9.88319756719086e-05, "loss": 19.1415, "step": 736 }, { "epoch": 0.1, "learning_rate": 9.882739070154057e-05, "loss": 0.0001, "step": 737 }, { "epoch": 0.1, "learning_rate": 9.882279685662659e-05, "loss": 17.8342, "step": 738 }, { "epoch": 0.1, "learning_rate": 9.88181941380016e-05, "loss": 0.0003, "step": 739 }, { "epoch": 0.1, "learning_rate": 9.88135825465022e-05, "loss": 0.0001, "step": 740 }, { "epoch": 0.1, "learning_rate": 9.880896208296651e-05, "loss": 0.0001, "step": 741 }, { "epoch": 0.1, "learning_rate": 9.880433274823433e-05, "loss": 0.0001, "step": 742 }, { "epoch": 0.1, "learning_rate": 9.879969454314708e-05, "loss": 18.2248, "step": 743 }, { "epoch": 0.1, "learning_rate": 9.879504746854775e-05, "loss": 17.5657, "step": 744 }, { "epoch": 0.1, "learning_rate": 9.879039152528095e-05, "loss": 18.0909, "step": 745 }, { "epoch": 0.1, "learning_rate": 9.878572671419296e-05, "loss": 0.0006, "step": 746 }, { "epoch": 0.1, "learning_rate": 9.878105303613156e-05, "loss": 0.0001, "step": 747 }, { "epoch": 0.1, "learning_rate": 9.877637049194626e-05, "loss": 19.063, "step": 748 }, { "epoch": 0.1, "learning_rate": 9.877167908248811e-05, "loss": 19.3227, "step": 749 }, { "epoch": 0.1, "learning_rate": 9.876697880860979e-05, "loss": 16.9044, "step": 750 }, { "epoch": 0.1, "learning_rate": 9.87622696711656e-05, "loss": 19.1241, "step": 751 }, { "epoch": 0.1, "learning_rate": 9.875755167101142e-05, "loss": 17.6942, "step": 752 }, { "epoch": 0.1, "learning_rate": 9.875282480900478e-05, "loss": 0.0001, "step": 753 }, { "epoch": 0.1, "learning_rate": 9.874808908600479e-05, "loss": 16.5329, "step": 754 }, { "epoch": 0.1, "learning_rate": 9.874334450287222e-05, "loss": 17.9176, "step": 755 }, { "epoch": 0.1, "learning_rate": 9.873859106046937e-05, "loss": 0.0001, "step": 756 }, { "epoch": 0.1, "learning_rate": 9.87338287596602e-05, "loss": 0.0001, "step": 757 }, { "epoch": 0.1, "learning_rate": 9.87290576013103e-05, "loss": 18.2208, "step": 758 }, { "epoch": 0.1, "learning_rate": 9.872427758628683e-05, "loss": 18.6835, "step": 759 }, { "epoch": 0.1, "learning_rate": 9.871948871545858e-05, "loss": 17.1986, "step": 760 }, { "epoch": 0.1, "learning_rate": 9.871469098969593e-05, "loss": 19.8745, "step": 761 }, { "epoch": 0.1, "learning_rate": 9.870988440987089e-05, "loss": 19.9879, "step": 762 }, { "epoch": 0.1, "learning_rate": 9.870506897685707e-05, "loss": 17.8626, "step": 763 }, { "epoch": 0.1, "learning_rate": 9.870024469152972e-05, "loss": 18.1049, "step": 764 }, { "epoch": 0.1, "learning_rate": 9.869541155476563e-05, "loss": 17.1957, "step": 765 }, { "epoch": 0.1, "learning_rate": 9.869056956744325e-05, "loss": 18.4922, "step": 766 }, { "epoch": 0.1, "learning_rate": 9.868571873044263e-05, "loss": 0.0001, "step": 767 }, { "epoch": 0.1, "learning_rate": 9.868085904464545e-05, "loss": 0.0001, "step": 768 }, { "epoch": 0.1, "learning_rate": 9.867599051093495e-05, "loss": 19.8541, "step": 769 }, { "epoch": 0.1, "learning_rate": 9.8671113130196e-05, "loss": 19.5636, "step": 770 }, { "epoch": 0.1, "learning_rate": 9.866622690331507e-05, "loss": 18.3229, "step": 771 }, { "epoch": 0.1, "learning_rate": 9.866133183118029e-05, "loss": 18.6332, "step": 772 }, { "epoch": 0.1, "learning_rate": 9.865642791468132e-05, "loss": 18.4553, "step": 773 }, { "epoch": 0.1, "learning_rate": 9.865151515470949e-05, "loss": 17.6813, "step": 774 }, { "epoch": 0.1, "learning_rate": 9.864659355215769e-05, "loss": 0.0005, "step": 775 }, { "epoch": 0.1, "learning_rate": 9.864166310792043e-05, "loss": 18.3691, "step": 776 }, { "epoch": 0.1, "learning_rate": 9.863672382289386e-05, "loss": 17.9422, "step": 777 }, { "epoch": 0.1, "learning_rate": 9.86317756979757e-05, "loss": 17.9661, "step": 778 }, { "epoch": 0.1, "learning_rate": 9.86268187340653e-05, "loss": 17.833, "step": 779 }, { "epoch": 0.1, "learning_rate": 9.862185293206359e-05, "loss": 19.5067, "step": 780 }, { "epoch": 0.1, "learning_rate": 9.861687829287314e-05, "loss": 0.0005, "step": 781 }, { "epoch": 0.1, "learning_rate": 9.861189481739807e-05, "loss": 19.2356, "step": 782 }, { "epoch": 0.1, "learning_rate": 9.860690250654419e-05, "loss": 18.4686, "step": 783 }, { "epoch": 0.1, "learning_rate": 9.860190136121885e-05, "loss": 17.5485, "step": 784 }, { "epoch": 0.1, "learning_rate": 9.859689138233101e-05, "loss": 19.3561, "step": 785 }, { "epoch": 0.1, "learning_rate": 9.859187257079128e-05, "loss": 20.4931, "step": 786 }, { "epoch": 0.1, "learning_rate": 9.858684492751181e-05, "loss": 19.5223, "step": 787 }, { "epoch": 0.1, "learning_rate": 9.858180845340643e-05, "loss": 20.0117, "step": 788 }, { "epoch": 0.1, "learning_rate": 9.857676314939051e-05, "loss": 19.4525, "step": 789 }, { "epoch": 0.1, "learning_rate": 9.857170901638106e-05, "loss": 0.0003, "step": 790 }, { "epoch": 0.1, "learning_rate": 9.856664605529671e-05, "loss": 19.1491, "step": 791 }, { "epoch": 0.1, "learning_rate": 9.856157426705762e-05, "loss": 0.0001, "step": 792 }, { "epoch": 0.1, "learning_rate": 9.855649365258563e-05, "loss": 16.4986, "step": 793 }, { "epoch": 0.1, "learning_rate": 9.855140421280418e-05, "loss": 17.3059, "step": 794 }, { "epoch": 0.1, "learning_rate": 9.854630594863828e-05, "loss": 18.8775, "step": 795 }, { "epoch": 0.1, "learning_rate": 9.854119886101452e-05, "loss": 19.3979, "step": 796 }, { "epoch": 0.1, "learning_rate": 9.853608295086118e-05, "loss": 18.2964, "step": 797 }, { "epoch": 0.11, "learning_rate": 9.853095821910807e-05, "loss": 18.5903, "step": 798 }, { "epoch": 0.11, "learning_rate": 9.852582466668665e-05, "loss": 19.1825, "step": 799 }, { "epoch": 0.11, "learning_rate": 9.852068229452993e-05, "loss": 19.261, "step": 800 }, { "epoch": 0.11, "learning_rate": 9.851553110357257e-05, "loss": 0.0001, "step": 801 }, { "epoch": 0.11, "learning_rate": 9.851037109475083e-05, "loss": 0.0005, "step": 802 }, { "epoch": 0.11, "learning_rate": 9.850520226900252e-05, "loss": 17.8144, "step": 803 }, { "epoch": 0.11, "learning_rate": 9.850002462726713e-05, "loss": 18.6173, "step": 804 }, { "epoch": 0.11, "learning_rate": 9.84948381704857e-05, "loss": 19.907, "step": 805 }, { "epoch": 0.11, "learning_rate": 9.848964289960089e-05, "loss": 0.0001, "step": 806 }, { "epoch": 0.11, "learning_rate": 9.848443881555694e-05, "loss": 18.0451, "step": 807 }, { "epoch": 0.11, "learning_rate": 9.847922591929975e-05, "loss": 19.0846, "step": 808 }, { "epoch": 0.11, "learning_rate": 9.847400421177673e-05, "loss": 17.7047, "step": 809 }, { "epoch": 0.11, "learning_rate": 9.8468773693937e-05, "loss": 0.0005, "step": 810 }, { "epoch": 0.11, "learning_rate": 9.846353436673117e-05, "loss": 19.2882, "step": 811 }, { "epoch": 0.11, "learning_rate": 9.845828623111152e-05, "loss": 17.6358, "step": 812 }, { "epoch": 0.11, "learning_rate": 9.845302928803196e-05, "loss": 0.0002, "step": 813 }, { "epoch": 0.11, "learning_rate": 9.844776353844791e-05, "loss": 17.1752, "step": 814 }, { "epoch": 0.11, "learning_rate": 9.844248898331645e-05, "loss": 18.6109, "step": 815 }, { "epoch": 0.11, "learning_rate": 9.843720562359627e-05, "loss": 19.3889, "step": 816 }, { "epoch": 0.11, "learning_rate": 9.84319134602476e-05, "loss": 17.5263, "step": 817 }, { "epoch": 0.11, "learning_rate": 9.842661249423233e-05, "loss": 16.7711, "step": 818 }, { "epoch": 0.11, "learning_rate": 9.842130272651393e-05, "loss": 0.0001, "step": 819 }, { "epoch": 0.11, "learning_rate": 9.841598415805746e-05, "loss": 16.908, "step": 820 }, { "epoch": 0.11, "learning_rate": 9.841065678982961e-05, "loss": 18.5799, "step": 821 }, { "epoch": 0.11, "learning_rate": 9.840532062279862e-05, "loss": 21.4529, "step": 822 }, { "epoch": 0.11, "learning_rate": 9.839997565793437e-05, "loss": 0.0001, "step": 823 }, { "epoch": 0.11, "learning_rate": 9.839462189620832e-05, "loss": 0.0004, "step": 824 }, { "epoch": 0.11, "learning_rate": 9.838925933859355e-05, "loss": 19.3577, "step": 825 }, { "epoch": 0.11, "learning_rate": 9.838388798606471e-05, "loss": 18.0159, "step": 826 }, { "epoch": 0.11, "learning_rate": 9.837850783959805e-05, "loss": 17.9598, "step": 827 }, { "epoch": 0.11, "learning_rate": 9.837311890017146e-05, "loss": 19.2974, "step": 828 }, { "epoch": 0.11, "learning_rate": 9.836772116876438e-05, "loss": 16.9755, "step": 829 }, { "epoch": 0.11, "learning_rate": 9.836231464635787e-05, "loss": 17.8578, "step": 830 }, { "epoch": 0.11, "learning_rate": 9.835689933393457e-05, "loss": 16.8877, "step": 831 }, { "epoch": 0.11, "learning_rate": 9.835147523247876e-05, "loss": 0.0001, "step": 832 }, { "epoch": 0.11, "learning_rate": 9.834604234297626e-05, "loss": 0.0001, "step": 833 }, { "epoch": 0.11, "learning_rate": 9.834060066641453e-05, "loss": 19.5469, "step": 834 }, { "epoch": 0.11, "learning_rate": 9.833515020378263e-05, "loss": 19.5707, "step": 835 }, { "epoch": 0.11, "learning_rate": 9.832969095607117e-05, "loss": 18.423, "step": 836 }, { "epoch": 0.11, "learning_rate": 9.832422292427239e-05, "loss": 17.096, "step": 837 }, { "epoch": 0.11, "learning_rate": 9.831874610938015e-05, "loss": 17.3895, "step": 838 }, { "epoch": 0.11, "learning_rate": 9.831326051238985e-05, "loss": 18.0279, "step": 839 }, { "epoch": 0.11, "learning_rate": 9.830776613429855e-05, "loss": 0.0001, "step": 840 }, { "epoch": 0.11, "learning_rate": 9.830226297610484e-05, "loss": 18.8917, "step": 841 }, { "epoch": 0.11, "learning_rate": 9.829675103880894e-05, "loss": 18.0877, "step": 842 }, { "epoch": 0.11, "learning_rate": 9.829123032341267e-05, "loss": 18.9325, "step": 843 }, { "epoch": 0.11, "learning_rate": 9.828570083091946e-05, "loss": 18.1808, "step": 844 }, { "epoch": 0.11, "learning_rate": 9.82801625623343e-05, "loss": 17.4183, "step": 845 }, { "epoch": 0.11, "learning_rate": 9.827461551866378e-05, "loss": 16.5739, "step": 846 }, { "epoch": 0.11, "learning_rate": 9.826905970091609e-05, "loss": 17.5791, "step": 847 }, { "epoch": 0.11, "learning_rate": 9.826349511010102e-05, "loss": 0.0001, "step": 848 }, { "epoch": 0.11, "learning_rate": 9.825792174722998e-05, "loss": 19.3257, "step": 849 }, { "epoch": 0.11, "learning_rate": 9.825233961331594e-05, "loss": 16.2465, "step": 850 }, { "epoch": 0.11, "learning_rate": 9.824674870937345e-05, "loss": 17.2793, "step": 851 }, { "epoch": 0.11, "learning_rate": 9.824114903641867e-05, "loss": 19.3341, "step": 852 }, { "epoch": 0.11, "learning_rate": 9.823554059546941e-05, "loss": 0.0002, "step": 853 }, { "epoch": 0.11, "learning_rate": 9.822992338754497e-05, "loss": 0.0001, "step": 854 }, { "epoch": 0.11, "learning_rate": 9.822429741366633e-05, "loss": 0.0001, "step": 855 }, { "epoch": 0.11, "learning_rate": 9.821866267485602e-05, "loss": 17.7425, "step": 856 }, { "epoch": 0.11, "learning_rate": 9.821301917213816e-05, "loss": 18.7154, "step": 857 }, { "epoch": 0.11, "learning_rate": 9.82073669065385e-05, "loss": 0.0, "step": 858 }, { "epoch": 0.11, "learning_rate": 9.820170587908434e-05, "loss": 18.7627, "step": 859 }, { "epoch": 0.11, "learning_rate": 9.81960360908046e-05, "loss": 17.5867, "step": 860 }, { "epoch": 0.11, "learning_rate": 9.819035754272978e-05, "loss": 18.7176, "step": 861 }, { "epoch": 0.11, "learning_rate": 9.818467023589197e-05, "loss": 0.0001, "step": 862 }, { "epoch": 0.11, "learning_rate": 9.817897417132486e-05, "loss": 0.0001, "step": 863 }, { "epoch": 0.11, "learning_rate": 9.817326935006375e-05, "loss": 0.0001, "step": 864 }, { "epoch": 0.11, "learning_rate": 9.816755577314549e-05, "loss": 16.9689, "step": 865 }, { "epoch": 0.11, "learning_rate": 9.816183344160854e-05, "loss": 18.771, "step": 866 }, { "epoch": 0.11, "learning_rate": 9.815610235649298e-05, "loss": 18.7155, "step": 867 }, { "epoch": 0.11, "learning_rate": 9.81503625188404e-05, "loss": 0.0002, "step": 868 }, { "epoch": 0.11, "learning_rate": 9.81446139296941e-05, "loss": 17.8329, "step": 869 }, { "epoch": 0.11, "learning_rate": 9.813885659009885e-05, "loss": 16.2999, "step": 870 }, { "epoch": 0.11, "learning_rate": 9.813309050110111e-05, "loss": 0.0002, "step": 871 }, { "epoch": 0.11, "learning_rate": 9.812731566374886e-05, "loss": 19.4016, "step": 872 }, { "epoch": 0.11, "learning_rate": 9.81215320790917e-05, "loss": 19.0271, "step": 873 }, { "epoch": 0.12, "learning_rate": 9.811573974818082e-05, "loss": 17.8921, "step": 874 }, { "epoch": 0.12, "learning_rate": 9.8109938672069e-05, "loss": 18.0429, "step": 875 }, { "epoch": 0.12, "learning_rate": 9.810412885181062e-05, "loss": 18.0519, "step": 876 }, { "epoch": 0.12, "learning_rate": 9.80983102884616e-05, "loss": 19.3862, "step": 877 }, { "epoch": 0.12, "learning_rate": 9.809248298307951e-05, "loss": 15.2105, "step": 878 }, { "epoch": 0.12, "learning_rate": 9.808664693672347e-05, "loss": 18.5772, "step": 879 }, { "epoch": 0.12, "learning_rate": 9.808080215045421e-05, "loss": 0.0009, "step": 880 }, { "epoch": 0.12, "learning_rate": 9.807494862533402e-05, "loss": 0.0, "step": 881 }, { "epoch": 0.12, "learning_rate": 9.806908636242683e-05, "loss": 18.4785, "step": 882 }, { "epoch": 0.12, "learning_rate": 9.80632153627981e-05, "loss": 19.1003, "step": 883 }, { "epoch": 0.12, "learning_rate": 9.805733562751492e-05, "loss": 16.959, "step": 884 }, { "epoch": 0.12, "learning_rate": 9.805144715764594e-05, "loss": 16.6533, "step": 885 }, { "epoch": 0.12, "learning_rate": 9.804554995426141e-05, "loss": 0.0001, "step": 886 }, { "epoch": 0.12, "learning_rate": 9.80396440184332e-05, "loss": 0.0001, "step": 887 }, { "epoch": 0.12, "learning_rate": 9.803372935123468e-05, "loss": 18.8095, "step": 888 }, { "epoch": 0.12, "learning_rate": 9.802780595374091e-05, "loss": 18.9422, "step": 889 }, { "epoch": 0.12, "learning_rate": 9.802187382702844e-05, "loss": 0.0, "step": 890 }, { "epoch": 0.12, "learning_rate": 9.80159329721755e-05, "loss": 19.0448, "step": 891 }, { "epoch": 0.12, "learning_rate": 9.800998339026184e-05, "loss": 0.0001, "step": 892 }, { "epoch": 0.12, "learning_rate": 9.80040250823688e-05, "loss": 18.1515, "step": 893 }, { "epoch": 0.12, "learning_rate": 9.799805804957935e-05, "loss": 17.1695, "step": 894 }, { "epoch": 0.12, "learning_rate": 9.799208229297802e-05, "loss": 0.0004, "step": 895 }, { "epoch": 0.12, "learning_rate": 9.798609781365092e-05, "loss": 18.3063, "step": 896 }, { "epoch": 0.12, "learning_rate": 9.798010461268572e-05, "loss": 18.1694, "step": 897 }, { "epoch": 0.12, "learning_rate": 9.797410269117175e-05, "loss": 18.5143, "step": 898 }, { "epoch": 0.12, "learning_rate": 9.796809205019985e-05, "loss": 19.9041, "step": 899 }, { "epoch": 0.12, "learning_rate": 9.796207269086248e-05, "loss": 18.5158, "step": 900 }, { "epoch": 0.12, "learning_rate": 9.795604461425369e-05, "loss": 16.8071, "step": 901 }, { "epoch": 0.12, "learning_rate": 9.795000782146908e-05, "loss": 0.0003, "step": 902 }, { "epoch": 0.12, "learning_rate": 9.79439623136059e-05, "loss": 20.3237, "step": 903 }, { "epoch": 0.12, "learning_rate": 9.793790809176289e-05, "loss": 16.5686, "step": 904 }, { "epoch": 0.12, "learning_rate": 9.793184515704046e-05, "loss": 17.201, "step": 905 }, { "epoch": 0.12, "learning_rate": 9.792577351054058e-05, "loss": 18.3924, "step": 906 }, { "epoch": 0.12, "learning_rate": 9.791969315336674e-05, "loss": 17.7231, "step": 907 }, { "epoch": 0.12, "learning_rate": 9.791360408662412e-05, "loss": 15.5251, "step": 908 }, { "epoch": 0.12, "learning_rate": 9.790750631141941e-05, "loss": 0.0006, "step": 909 }, { "epoch": 0.12, "learning_rate": 9.790139982886088e-05, "loss": 16.7649, "step": 910 }, { "epoch": 0.12, "learning_rate": 9.789528464005844e-05, "loss": 19.4163, "step": 911 }, { "epoch": 0.12, "learning_rate": 9.788916074612352e-05, "loss": 20.1217, "step": 912 }, { "epoch": 0.12, "learning_rate": 9.788302814816918e-05, "loss": 0.0002, "step": 913 }, { "epoch": 0.12, "learning_rate": 9.787688684731002e-05, "loss": 19.2863, "step": 914 }, { "epoch": 0.12, "learning_rate": 9.787073684466224e-05, "loss": 18.376, "step": 915 }, { "epoch": 0.12, "learning_rate": 9.786457814134366e-05, "loss": 0.0003, "step": 916 }, { "epoch": 0.12, "learning_rate": 9.785841073847361e-05, "loss": 17.4211, "step": 917 }, { "epoch": 0.12, "learning_rate": 9.785223463717305e-05, "loss": 19.2809, "step": 918 }, { "epoch": 0.12, "learning_rate": 9.784604983856451e-05, "loss": 19.0201, "step": 919 }, { "epoch": 0.12, "learning_rate": 9.783985634377208e-05, "loss": 18.5803, "step": 920 }, { "epoch": 0.12, "learning_rate": 9.783365415392148e-05, "loss": 16.2923, "step": 921 }, { "epoch": 0.12, "learning_rate": 9.782744327013995e-05, "loss": 17.5404, "step": 922 }, { "epoch": 0.12, "learning_rate": 9.782122369355638e-05, "loss": 0.0001, "step": 923 }, { "epoch": 0.12, "learning_rate": 9.781499542530114e-05, "loss": 18.2181, "step": 924 }, { "epoch": 0.12, "learning_rate": 9.780875846650627e-05, "loss": 17.6276, "step": 925 }, { "epoch": 0.12, "learning_rate": 9.780251281830537e-05, "loss": 17.3243, "step": 926 }, { "epoch": 0.12, "learning_rate": 9.77962584818336e-05, "loss": 17.4672, "step": 927 }, { "epoch": 0.12, "learning_rate": 9.77899954582277e-05, "loss": 17.7282, "step": 928 }, { "epoch": 0.12, "learning_rate": 9.7783723748626e-05, "loss": 17.6305, "step": 929 }, { "epoch": 0.12, "learning_rate": 9.777744335416841e-05, "loss": 19.056, "step": 930 }, { "epoch": 0.12, "learning_rate": 9.77711542759964e-05, "loss": 18.6091, "step": 931 }, { "epoch": 0.12, "learning_rate": 9.776485651525304e-05, "loss": 17.5446, "step": 932 }, { "epoch": 0.12, "learning_rate": 9.775855007308297e-05, "loss": 19.8427, "step": 933 }, { "epoch": 0.12, "learning_rate": 9.775223495063242e-05, "loss": 19.3728, "step": 934 }, { "epoch": 0.12, "learning_rate": 9.774591114904916e-05, "loss": 0.0001, "step": 935 }, { "epoch": 0.12, "learning_rate": 9.773957866948258e-05, "loss": 16.6783, "step": 936 }, { "epoch": 0.12, "learning_rate": 9.773323751308362e-05, "loss": 0.0001, "step": 937 }, { "epoch": 0.12, "learning_rate": 9.772688768100483e-05, "loss": 0.0001, "step": 938 }, { "epoch": 0.12, "learning_rate": 9.772052917440027e-05, "loss": 18.7507, "step": 939 }, { "epoch": 0.12, "learning_rate": 9.771416199442565e-05, "loss": 16.4543, "step": 940 }, { "epoch": 0.12, "learning_rate": 9.770778614223824e-05, "loss": 18.3709, "step": 941 }, { "epoch": 0.12, "learning_rate": 9.770140161899684e-05, "loss": 0.0006, "step": 942 }, { "epoch": 0.12, "learning_rate": 9.769500842586187e-05, "loss": 17.1865, "step": 943 }, { "epoch": 0.12, "learning_rate": 9.768860656399534e-05, "loss": 18.838, "step": 944 }, { "epoch": 0.12, "learning_rate": 9.768219603456077e-05, "loss": 17.0568, "step": 945 }, { "epoch": 0.12, "learning_rate": 9.767577683872333e-05, "loss": 0.0002, "step": 946 }, { "epoch": 0.12, "learning_rate": 9.76693489776497e-05, "loss": 18.151, "step": 947 }, { "epoch": 0.12, "learning_rate": 9.76629124525082e-05, "loss": 17.9107, "step": 948 }, { "epoch": 0.12, "learning_rate": 9.765646726446867e-05, "loss": 19.6403, "step": 949 }, { "epoch": 0.13, "learning_rate": 9.765001341470255e-05, "loss": 16.9871, "step": 950 }, { "epoch": 0.13, "learning_rate": 9.764355090438284e-05, "loss": 18.6395, "step": 951 }, { "epoch": 0.13, "learning_rate": 9.763707973468413e-05, "loss": 15.7441, "step": 952 }, { "epoch": 0.13, "learning_rate": 9.763059990678259e-05, "loss": 19.7085, "step": 953 }, { "epoch": 0.13, "learning_rate": 9.762411142185593e-05, "loss": 0.0001, "step": 954 }, { "epoch": 0.13, "learning_rate": 9.761761428108346e-05, "loss": 17.959, "step": 955 }, { "epoch": 0.13, "learning_rate": 9.761110848564607e-05, "loss": 19.2207, "step": 956 }, { "epoch": 0.13, "learning_rate": 9.760459403672619e-05, "loss": 0.0001, "step": 957 }, { "epoch": 0.13, "learning_rate": 9.759807093550787e-05, "loss": 17.7916, "step": 958 }, { "epoch": 0.13, "learning_rate": 9.759153918317668e-05, "loss": 17.0816, "step": 959 }, { "epoch": 0.13, "learning_rate": 9.75849987809198e-05, "loss": 0.0001, "step": 960 }, { "epoch": 0.13, "learning_rate": 9.757844972992598e-05, "loss": 19.3687, "step": 961 }, { "epoch": 0.13, "learning_rate": 9.757189203138553e-05, "loss": 18.1917, "step": 962 }, { "epoch": 0.13, "learning_rate": 9.75653256864903e-05, "loss": 18.6077, "step": 963 }, { "epoch": 0.13, "learning_rate": 9.755875069643378e-05, "loss": 0.0001, "step": 964 }, { "epoch": 0.13, "learning_rate": 9.755216706241101e-05, "loss": 19.3299, "step": 965 }, { "epoch": 0.13, "learning_rate": 9.754557478561857e-05, "loss": 19.504, "step": 966 }, { "epoch": 0.13, "learning_rate": 9.753897386725461e-05, "loss": 18.9911, "step": 967 }, { "epoch": 0.13, "learning_rate": 9.753236430851889e-05, "loss": 0.0001, "step": 968 }, { "epoch": 0.13, "learning_rate": 9.752574611061272e-05, "loss": 18.4915, "step": 969 }, { "epoch": 0.13, "learning_rate": 9.751911927473896e-05, "loss": 17.6132, "step": 970 }, { "epoch": 0.13, "learning_rate": 9.75124838021021e-05, "loss": 0.0004, "step": 971 }, { "epoch": 0.13, "learning_rate": 9.750583969390812e-05, "loss": 18.4662, "step": 972 }, { "epoch": 0.13, "learning_rate": 9.749918695136464e-05, "loss": 17.1979, "step": 973 }, { "epoch": 0.13, "learning_rate": 9.749252557568079e-05, "loss": 0.0004, "step": 974 }, { "epoch": 0.13, "learning_rate": 9.748585556806733e-05, "loss": 0.0002, "step": 975 }, { "epoch": 0.13, "learning_rate": 9.747917692973652e-05, "loss": 17.5652, "step": 976 }, { "epoch": 0.13, "learning_rate": 9.747248966190227e-05, "loss": 19.443, "step": 977 }, { "epoch": 0.13, "learning_rate": 9.746579376577998e-05, "loss": 0.0001, "step": 978 }, { "epoch": 0.13, "learning_rate": 9.745908924258664e-05, "loss": 16.122, "step": 979 }, { "epoch": 0.13, "learning_rate": 9.745237609354087e-05, "loss": 17.9762, "step": 980 }, { "epoch": 0.13, "learning_rate": 9.744565431986276e-05, "loss": 17.464, "step": 981 }, { "epoch": 0.13, "learning_rate": 9.743892392277405e-05, "loss": 20.5289, "step": 982 }, { "epoch": 0.13, "learning_rate": 9.743218490349799e-05, "loss": 19.7057, "step": 983 }, { "epoch": 0.13, "learning_rate": 9.742543726325944e-05, "loss": 17.4737, "step": 984 }, { "epoch": 0.13, "learning_rate": 9.741868100328478e-05, "loss": 16.8787, "step": 985 }, { "epoch": 0.13, "learning_rate": 9.741191612480199e-05, "loss": 0.0001, "step": 986 }, { "epoch": 0.13, "learning_rate": 9.740514262904064e-05, "loss": 0.0001, "step": 987 }, { "epoch": 0.13, "learning_rate": 9.73983605172318e-05, "loss": 18.5079, "step": 988 }, { "epoch": 0.13, "learning_rate": 9.739156979060815e-05, "loss": 18.9807, "step": 989 }, { "epoch": 0.13, "learning_rate": 9.738477045040396e-05, "loss": 17.398, "step": 990 }, { "epoch": 0.13, "learning_rate": 9.737796249785496e-05, "loss": 17.7491, "step": 991 }, { "epoch": 0.13, "learning_rate": 9.73711459341986e-05, "loss": 18.821, "step": 992 }, { "epoch": 0.13, "learning_rate": 9.736432076067377e-05, "loss": 18.1224, "step": 993 }, { "epoch": 0.13, "learning_rate": 9.735748697852098e-05, "loss": 16.8544, "step": 994 }, { "epoch": 0.13, "learning_rate": 9.735064458898231e-05, "loss": 17.9192, "step": 995 }, { "epoch": 0.13, "learning_rate": 9.734379359330135e-05, "loss": 16.6331, "step": 996 }, { "epoch": 0.13, "learning_rate": 9.733693399272332e-05, "loss": 18.8628, "step": 997 }, { "epoch": 0.13, "learning_rate": 9.733006578849496e-05, "loss": 17.3256, "step": 998 }, { "epoch": 0.13, "learning_rate": 9.73231889818646e-05, "loss": 20.0128, "step": 999 }, { "epoch": 0.13, "learning_rate": 9.731630357408212e-05, "loss": 0.0001, "step": 1000 }, { "epoch": 0.13, "learning_rate": 9.730940956639898e-05, "loss": 19.0719, "step": 1001 }, { "epoch": 0.13, "learning_rate": 9.730250696006815e-05, "loss": 17.8072, "step": 1002 }, { "epoch": 0.13, "learning_rate": 9.729559575634425e-05, "loss": 0.0002, "step": 1003 }, { "epoch": 0.13, "learning_rate": 9.72886759564834e-05, "loss": 18.7953, "step": 1004 }, { "epoch": 0.13, "learning_rate": 9.728174756174327e-05, "loss": 16.9212, "step": 1005 }, { "epoch": 0.13, "learning_rate": 9.727481057338314e-05, "loss": 17.5308, "step": 1006 }, { "epoch": 0.13, "learning_rate": 9.726786499266384e-05, "loss": 0.0004, "step": 1007 }, { "epoch": 0.13, "learning_rate": 9.726091082084777e-05, "loss": 17.4959, "step": 1008 }, { "epoch": 0.13, "learning_rate": 9.725394805919883e-05, "loss": 18.9897, "step": 1009 }, { "epoch": 0.13, "learning_rate": 9.724697670898254e-05, "loss": 18.6738, "step": 1010 }, { "epoch": 0.13, "learning_rate": 9.723999677146598e-05, "loss": 0.0001, "step": 1011 }, { "epoch": 0.13, "learning_rate": 9.723300824791777e-05, "loss": 0.0002, "step": 1012 }, { "epoch": 0.13, "learning_rate": 9.72260111396081e-05, "loss": 17.7651, "step": 1013 }, { "epoch": 0.13, "learning_rate": 9.721900544780871e-05, "loss": 16.9085, "step": 1014 }, { "epoch": 0.13, "learning_rate": 9.721199117379292e-05, "loss": 0.0001, "step": 1015 }, { "epoch": 0.13, "learning_rate": 9.720496831883558e-05, "loss": 18.0157, "step": 1016 }, { "epoch": 0.13, "learning_rate": 9.719793688421317e-05, "loss": 0.0002, "step": 1017 }, { "epoch": 0.13, "learning_rate": 9.71908968712036e-05, "loss": 17.4333, "step": 1018 }, { "epoch": 0.13, "learning_rate": 9.718384828108648e-05, "loss": 18.3442, "step": 1019 }, { "epoch": 0.13, "learning_rate": 9.717679111514288e-05, "loss": 18.2581, "step": 1020 }, { "epoch": 0.13, "learning_rate": 9.716972537465547e-05, "loss": 17.8453, "step": 1021 }, { "epoch": 0.13, "learning_rate": 9.716265106090849e-05, "loss": 17.35, "step": 1022 }, { "epoch": 0.13, "learning_rate": 9.715556817518771e-05, "loss": 18.6129, "step": 1023 }, { "epoch": 0.13, "learning_rate": 9.714847671878047e-05, "loss": 0.0001, "step": 1024 }, { "epoch": 0.13, "learning_rate": 9.714137669297565e-05, "loss": 18.2435, "step": 1025 }, { "epoch": 0.14, "learning_rate": 9.713426809906372e-05, "loss": 0.0001, "step": 1026 }, { "epoch": 0.14, "learning_rate": 9.71271509383367e-05, "loss": 18.9252, "step": 1027 }, { "epoch": 0.14, "learning_rate": 9.712002521208815e-05, "loss": 19.5129, "step": 1028 }, { "epoch": 0.14, "learning_rate": 9.711289092161319e-05, "loss": 17.6288, "step": 1029 }, { "epoch": 0.14, "learning_rate": 9.71057480682085e-05, "loss": 19.3403, "step": 1030 }, { "epoch": 0.14, "learning_rate": 9.709859665317234e-05, "loss": 17.9135, "step": 1031 }, { "epoch": 0.14, "learning_rate": 9.709143667780447e-05, "loss": 19.8565, "step": 1032 }, { "epoch": 0.14, "learning_rate": 9.708426814340625e-05, "loss": 18.2768, "step": 1033 }, { "epoch": 0.14, "learning_rate": 9.707709105128062e-05, "loss": 18.4665, "step": 1034 }, { "epoch": 0.14, "learning_rate": 9.7069905402732e-05, "loss": 0.0, "step": 1035 }, { "epoch": 0.14, "learning_rate": 9.706271119906641e-05, "loss": 0.0, "step": 1036 }, { "epoch": 0.14, "learning_rate": 9.705550844159145e-05, "loss": 0.0006, "step": 1037 }, { "epoch": 0.14, "learning_rate": 9.704829713161622e-05, "loss": 17.2422, "step": 1038 }, { "epoch": 0.14, "learning_rate": 9.704107727045141e-05, "loss": 17.6253, "step": 1039 }, { "epoch": 0.14, "learning_rate": 9.703384885940925e-05, "loss": 0.0002, "step": 1040 }, { "epoch": 0.14, "learning_rate": 9.702661189980353e-05, "loss": 19.6007, "step": 1041 }, { "epoch": 0.14, "learning_rate": 9.70193663929496e-05, "loss": 18.4738, "step": 1042 }, { "epoch": 0.14, "learning_rate": 9.701211234016435e-05, "loss": 0.0, "step": 1043 }, { "epoch": 0.14, "learning_rate": 9.700484974276622e-05, "loss": 19.3863, "step": 1044 }, { "epoch": 0.14, "learning_rate": 9.699757860207523e-05, "loss": 16.835, "step": 1045 }, { "epoch": 0.14, "learning_rate": 9.699029891941294e-05, "loss": 18.1365, "step": 1046 }, { "epoch": 0.14, "learning_rate": 9.698301069610241e-05, "loss": 16.8595, "step": 1047 }, { "epoch": 0.14, "learning_rate": 9.697571393346835e-05, "loss": 17.4873, "step": 1048 }, { "epoch": 0.14, "learning_rate": 9.696840863283696e-05, "loss": 18.8222, "step": 1049 }, { "epoch": 0.14, "learning_rate": 9.696109479553597e-05, "loss": 17.0702, "step": 1050 }, { "epoch": 0.14, "learning_rate": 9.695377242289475e-05, "loss": 19.692, "step": 1051 }, { "epoch": 0.14, "learning_rate": 9.694644151624413e-05, "loss": 18.9287, "step": 1052 }, { "epoch": 0.14, "learning_rate": 9.693910207691654e-05, "loss": 0.0012, "step": 1053 }, { "epoch": 0.14, "learning_rate": 9.693175410624592e-05, "loss": 19.3714, "step": 1054 }, { "epoch": 0.14, "learning_rate": 9.692439760556785e-05, "loss": 0.0001, "step": 1055 }, { "epoch": 0.14, "learning_rate": 9.691703257621936e-05, "loss": 19.102, "step": 1056 }, { "epoch": 0.14, "learning_rate": 9.690965901953906e-05, "loss": 17.6164, "step": 1057 }, { "epoch": 0.14, "learning_rate": 9.690227693686712e-05, "loss": 0.0002, "step": 1058 }, { "epoch": 0.14, "learning_rate": 9.689488632954529e-05, "loss": 17.6344, "step": 1059 }, { "epoch": 0.14, "learning_rate": 9.68874871989168e-05, "loss": 0.0001, "step": 1060 }, { "epoch": 0.14, "learning_rate": 9.68800795463265e-05, "loss": 18.4085, "step": 1061 }, { "epoch": 0.14, "learning_rate": 9.687266337312073e-05, "loss": 19.3864, "step": 1062 }, { "epoch": 0.14, "learning_rate": 9.686523868064743e-05, "loss": 18.3452, "step": 1063 }, { "epoch": 0.14, "learning_rate": 9.685780547025604e-05, "loss": 0.0002, "step": 1064 }, { "epoch": 0.14, "learning_rate": 9.685036374329759e-05, "loss": 0.0008, "step": 1065 }, { "epoch": 0.14, "learning_rate": 9.684291350112463e-05, "loss": 18.1804, "step": 1066 }, { "epoch": 0.14, "learning_rate": 9.683545474509126e-05, "loss": 18.8852, "step": 1067 }, { "epoch": 0.14, "learning_rate": 9.682798747655315e-05, "loss": 18.6141, "step": 1068 }, { "epoch": 0.14, "learning_rate": 9.682051169686749e-05, "loss": 0.0003, "step": 1069 }, { "epoch": 0.14, "learning_rate": 9.681302740739306e-05, "loss": 17.1991, "step": 1070 }, { "epoch": 0.14, "learning_rate": 9.68055346094901e-05, "loss": 20.1837, "step": 1071 }, { "epoch": 0.14, "learning_rate": 9.679803330452048e-05, "loss": 18.3265, "step": 1072 }, { "epoch": 0.14, "learning_rate": 9.679052349384761e-05, "loss": 17.6297, "step": 1073 }, { "epoch": 0.14, "learning_rate": 9.678300517883639e-05, "loss": 0.0001, "step": 1074 }, { "epoch": 0.14, "learning_rate": 9.67754783608533e-05, "loss": 19.9283, "step": 1075 }, { "epoch": 0.14, "learning_rate": 9.676794304126639e-05, "loss": 0.001, "step": 1076 }, { "epoch": 0.14, "learning_rate": 9.676039922144522e-05, "loss": 18.5317, "step": 1077 }, { "epoch": 0.14, "learning_rate": 9.675284690276089e-05, "loss": 20.06, "step": 1078 }, { "epoch": 0.14, "learning_rate": 9.674528608658608e-05, "loss": 18.4959, "step": 1079 }, { "epoch": 0.14, "learning_rate": 9.6737716774295e-05, "loss": 0.0001, "step": 1080 }, { "epoch": 0.14, "learning_rate": 9.673013896726336e-05, "loss": 20.114, "step": 1081 }, { "epoch": 0.14, "learning_rate": 9.672255266686849e-05, "loss": 19.5213, "step": 1082 }, { "epoch": 0.14, "learning_rate": 9.671495787448921e-05, "loss": 18.2031, "step": 1083 }, { "epoch": 0.14, "learning_rate": 9.67073545915059e-05, "loss": 16.7616, "step": 1084 }, { "epoch": 0.14, "learning_rate": 9.66997428193005e-05, "loss": 18.7067, "step": 1085 }, { "epoch": 0.14, "learning_rate": 9.669212255925646e-05, "loss": 17.1163, "step": 1086 }, { "epoch": 0.14, "learning_rate": 9.668449381275878e-05, "loss": 0.0001, "step": 1087 }, { "epoch": 0.14, "learning_rate": 9.667685658119402e-05, "loss": 20.2845, "step": 1088 }, { "epoch": 0.14, "learning_rate": 9.666921086595028e-05, "loss": 18.454, "step": 1089 }, { "epoch": 0.14, "learning_rate": 9.666155666841719e-05, "loss": 17.6968, "step": 1090 }, { "epoch": 0.14, "learning_rate": 9.665389398998592e-05, "loss": 16.353, "step": 1091 }, { "epoch": 0.14, "learning_rate": 9.664622283204921e-05, "loss": 17.1227, "step": 1092 }, { "epoch": 0.14, "learning_rate": 9.663854319600127e-05, "loss": 20.0163, "step": 1093 }, { "epoch": 0.14, "learning_rate": 9.663085508323796e-05, "loss": 17.9459, "step": 1094 }, { "epoch": 0.14, "learning_rate": 9.66231584951566e-05, "loss": 18.9425, "step": 1095 }, { "epoch": 0.14, "learning_rate": 9.661545343315605e-05, "loss": 18.4801, "step": 1096 }, { "epoch": 0.14, "learning_rate": 9.660773989863674e-05, "loss": 16.1894, "step": 1097 }, { "epoch": 0.14, "learning_rate": 9.660001789300066e-05, "loss": 0.0001, "step": 1098 }, { "epoch": 0.14, "learning_rate": 9.659228741765127e-05, "loss": 17.733, "step": 1099 }, { "epoch": 0.14, "learning_rate": 9.658454847399363e-05, "loss": 17.9384, "step": 1100 }, { "epoch": 0.14, "learning_rate": 9.657680106343434e-05, "loss": 16.8929, "step": 1101 }, { "epoch": 0.15, "learning_rate": 9.656904518738148e-05, "loss": 0.0007, "step": 1102 }, { "epoch": 0.15, "learning_rate": 9.656128084724474e-05, "loss": 0.0001, "step": 1103 }, { "epoch": 0.15, "learning_rate": 9.655350804443528e-05, "loss": 19.8777, "step": 1104 }, { "epoch": 0.15, "learning_rate": 9.654572678036587e-05, "loss": 18.2174, "step": 1105 }, { "epoch": 0.15, "learning_rate": 9.653793705645077e-05, "loss": 0.0002, "step": 1106 }, { "epoch": 0.15, "learning_rate": 9.653013887410578e-05, "loss": 18.8935, "step": 1107 }, { "epoch": 0.15, "learning_rate": 9.652233223474825e-05, "loss": 17.5531, "step": 1108 }, { "epoch": 0.15, "learning_rate": 9.651451713979705e-05, "loss": 18.0941, "step": 1109 }, { "epoch": 0.15, "learning_rate": 9.650669359067264e-05, "loss": 19.306, "step": 1110 }, { "epoch": 0.15, "learning_rate": 9.649886158879693e-05, "loss": 17.991, "step": 1111 }, { "epoch": 0.15, "learning_rate": 9.649102113559345e-05, "loss": 19.769, "step": 1112 }, { "epoch": 0.15, "learning_rate": 9.648317223248721e-05, "loss": 18.7757, "step": 1113 }, { "epoch": 0.15, "learning_rate": 9.647531488090477e-05, "loss": 18.4509, "step": 1114 }, { "epoch": 0.15, "learning_rate": 9.646744908227425e-05, "loss": 0.0016, "step": 1115 }, { "epoch": 0.15, "learning_rate": 9.645957483802528e-05, "loss": 19.2347, "step": 1116 }, { "epoch": 0.15, "learning_rate": 9.645169214958903e-05, "loss": 17.9371, "step": 1117 }, { "epoch": 0.15, "learning_rate": 9.644380101839819e-05, "loss": 18.3071, "step": 1118 }, { "epoch": 0.15, "learning_rate": 9.643590144588702e-05, "loss": 16.9746, "step": 1119 }, { "epoch": 0.15, "learning_rate": 9.642799343349129e-05, "loss": 18.7255, "step": 1120 }, { "epoch": 0.15, "learning_rate": 9.64200769826483e-05, "loss": 0.0001, "step": 1121 }, { "epoch": 0.15, "learning_rate": 9.64121520947969e-05, "loss": 17.9268, "step": 1122 }, { "epoch": 0.15, "learning_rate": 9.640421877137746e-05, "loss": 16.1008, "step": 1123 }, { "epoch": 0.15, "learning_rate": 9.639627701383191e-05, "loss": 0.0001, "step": 1124 }, { "epoch": 0.15, "learning_rate": 9.638832682360366e-05, "loss": 17.3475, "step": 1125 }, { "epoch": 0.15, "learning_rate": 9.638036820213769e-05, "loss": 17.8608, "step": 1126 }, { "epoch": 0.15, "learning_rate": 9.637240115088053e-05, "loss": 19.0726, "step": 1127 }, { "epoch": 0.15, "learning_rate": 9.63644256712802e-05, "loss": 0.0001, "step": 1128 }, { "epoch": 0.15, "learning_rate": 9.635644176478626e-05, "loss": 18.0653, "step": 1129 }, { "epoch": 0.15, "learning_rate": 9.634844943284984e-05, "loss": 16.4902, "step": 1130 }, { "epoch": 0.15, "learning_rate": 9.634044867692357e-05, "loss": 19.1557, "step": 1131 }, { "epoch": 0.15, "learning_rate": 9.63324394984616e-05, "loss": 0.001, "step": 1132 }, { "epoch": 0.15, "learning_rate": 9.632442189891963e-05, "loss": 0.0001, "step": 1133 }, { "epoch": 0.15, "learning_rate": 9.631639587975489e-05, "loss": 19.0425, "step": 1134 }, { "epoch": 0.15, "learning_rate": 9.630836144242612e-05, "loss": 19.3756, "step": 1135 }, { "epoch": 0.15, "learning_rate": 9.630031858839363e-05, "loss": 19.0999, "step": 1136 }, { "epoch": 0.15, "learning_rate": 9.629226731911924e-05, "loss": 17.8478, "step": 1137 }, { "epoch": 0.15, "learning_rate": 9.628420763606627e-05, "loss": 19.0271, "step": 1138 }, { "epoch": 0.15, "learning_rate": 9.62761395406996e-05, "loss": 18.7186, "step": 1139 }, { "epoch": 0.15, "learning_rate": 9.626806303448567e-05, "loss": 17.7258, "step": 1140 }, { "epoch": 0.15, "learning_rate": 9.625997811889236e-05, "loss": 17.5231, "step": 1141 }, { "epoch": 0.15, "learning_rate": 9.625188479538917e-05, "loss": 16.7042, "step": 1142 }, { "epoch": 0.15, "learning_rate": 9.624378306544707e-05, "loss": 17.7949, "step": 1143 }, { "epoch": 0.15, "learning_rate": 9.623567293053859e-05, "loss": 0.0003, "step": 1144 }, { "epoch": 0.15, "learning_rate": 9.622755439213776e-05, "loss": 18.1939, "step": 1145 }, { "epoch": 0.15, "learning_rate": 9.621942745172017e-05, "loss": 0.0008, "step": 1146 }, { "epoch": 0.15, "learning_rate": 9.62112921107629e-05, "loss": 0.0005, "step": 1147 }, { "epoch": 0.15, "learning_rate": 9.620314837074458e-05, "loss": 0.0004, "step": 1148 }, { "epoch": 0.15, "learning_rate": 9.619499623314537e-05, "loss": 0.0003, "step": 1149 }, { "epoch": 0.15, "learning_rate": 9.618683569944696e-05, "loss": 16.5704, "step": 1150 }, { "epoch": 0.15, "learning_rate": 9.617866677113252e-05, "loss": 0.0001, "step": 1151 }, { "epoch": 0.15, "learning_rate": 9.617048944968683e-05, "loss": 18.0337, "step": 1152 }, { "epoch": 0.15, "learning_rate": 9.616230373659609e-05, "loss": 17.2519, "step": 1153 }, { "epoch": 0.15, "learning_rate": 9.615410963334815e-05, "loss": 18.1214, "step": 1154 }, { "epoch": 0.15, "learning_rate": 9.614590714143225e-05, "loss": 16.3649, "step": 1155 }, { "epoch": 0.15, "learning_rate": 9.613769626233924e-05, "loss": 19.6858, "step": 1156 }, { "epoch": 0.15, "learning_rate": 9.61294769975615e-05, "loss": 19.3328, "step": 1157 }, { "epoch": 0.15, "learning_rate": 9.612124934859289e-05, "loss": 18.0931, "step": 1158 }, { "epoch": 0.15, "learning_rate": 9.611301331692881e-05, "loss": 17.8768, "step": 1159 }, { "epoch": 0.15, "learning_rate": 9.610476890406622e-05, "loss": 0.0001, "step": 1160 }, { "epoch": 0.15, "learning_rate": 9.60965161115035e-05, "loss": 17.8439, "step": 1161 }, { "epoch": 0.15, "learning_rate": 9.60882549407407e-05, "loss": 0.0006, "step": 1162 }, { "epoch": 0.15, "learning_rate": 9.607998539327928e-05, "loss": 17.7195, "step": 1163 }, { "epoch": 0.15, "learning_rate": 9.607170747062227e-05, "loss": 0.0001, "step": 1164 }, { "epoch": 0.15, "learning_rate": 9.606342117427419e-05, "loss": 17.9397, "step": 1165 }, { "epoch": 0.15, "learning_rate": 9.60551265057411e-05, "loss": 0.0001, "step": 1166 }, { "epoch": 0.15, "learning_rate": 9.604682346653062e-05, "loss": 17.927, "step": 1167 }, { "epoch": 0.15, "learning_rate": 9.603851205815184e-05, "loss": 18.9742, "step": 1168 }, { "epoch": 0.15, "learning_rate": 9.603019228211537e-05, "loss": 16.3926, "step": 1169 }, { "epoch": 0.15, "learning_rate": 9.602186413993338e-05, "loss": 0.0, "step": 1170 }, { "epoch": 0.15, "learning_rate": 9.601352763311953e-05, "loss": 17.6908, "step": 1171 }, { "epoch": 0.15, "learning_rate": 9.600518276318901e-05, "loss": 19.0759, "step": 1172 }, { "epoch": 0.15, "learning_rate": 9.599682953165851e-05, "loss": 18.0442, "step": 1173 }, { "epoch": 0.15, "learning_rate": 9.59884679400463e-05, "loss": 0.0001, "step": 1174 }, { "epoch": 0.15, "learning_rate": 9.598009798987209e-05, "loss": 18.7119, "step": 1175 }, { "epoch": 0.15, "learning_rate": 9.597171968265717e-05, "loss": 18.4601, "step": 1176 }, { "epoch": 0.15, "learning_rate": 9.59633330199243e-05, "loss": 17.6631, "step": 1177 }, { "epoch": 0.16, "learning_rate": 9.595493800319783e-05, "loss": 17.4828, "step": 1178 }, { "epoch": 0.16, "learning_rate": 9.594653463400354e-05, "loss": 17.8244, "step": 1179 }, { "epoch": 0.16, "learning_rate": 9.593812291386878e-05, "loss": 18.1114, "step": 1180 }, { "epoch": 0.16, "learning_rate": 9.592970284432241e-05, "loss": 0.0004, "step": 1181 }, { "epoch": 0.16, "learning_rate": 9.592127442689483e-05, "loss": 0.0001, "step": 1182 }, { "epoch": 0.16, "learning_rate": 9.59128376631179e-05, "loss": 16.5343, "step": 1183 }, { "epoch": 0.16, "learning_rate": 9.590439255452505e-05, "loss": 19.6576, "step": 1184 }, { "epoch": 0.16, "learning_rate": 9.589593910265121e-05, "loss": 18.4578, "step": 1185 }, { "epoch": 0.16, "learning_rate": 9.588747730903282e-05, "loss": 17.1493, "step": 1186 }, { "epoch": 0.16, "learning_rate": 9.587900717520784e-05, "loss": 18.0312, "step": 1187 }, { "epoch": 0.16, "learning_rate": 9.587052870271572e-05, "loss": 17.6495, "step": 1188 }, { "epoch": 0.16, "learning_rate": 9.58620418930975e-05, "loss": 15.9692, "step": 1189 }, { "epoch": 0.16, "learning_rate": 9.585354674789565e-05, "loss": 19.8346, "step": 1190 }, { "epoch": 0.16, "learning_rate": 9.58450432686542e-05, "loss": 0.0017, "step": 1191 }, { "epoch": 0.16, "learning_rate": 9.583653145691871e-05, "loss": 18.4293, "step": 1192 }, { "epoch": 0.16, "learning_rate": 9.58280113142362e-05, "loss": 18.3179, "step": 1193 }, { "epoch": 0.16, "learning_rate": 9.581948284215524e-05, "loss": 17.5051, "step": 1194 }, { "epoch": 0.16, "learning_rate": 9.581094604222591e-05, "loss": 0.0002, "step": 1195 }, { "epoch": 0.16, "learning_rate": 9.580240091599984e-05, "loss": 0.0003, "step": 1196 }, { "epoch": 0.16, "learning_rate": 9.579384746503007e-05, "loss": 17.1935, "step": 1197 }, { "epoch": 0.16, "learning_rate": 9.578528569087126e-05, "loss": 17.9124, "step": 1198 }, { "epoch": 0.16, "learning_rate": 9.577671559507954e-05, "loss": 0.0001, "step": 1199 }, { "epoch": 0.16, "learning_rate": 9.576813717921256e-05, "loss": 0.0002, "step": 1200 }, { "epoch": 0.16, "learning_rate": 9.575955044482946e-05, "loss": 0.0002, "step": 1201 }, { "epoch": 0.16, "learning_rate": 9.575095539349092e-05, "loss": 18.4694, "step": 1202 }, { "epoch": 0.16, "learning_rate": 9.57423520267591e-05, "loss": 0.0001, "step": 1203 }, { "epoch": 0.16, "learning_rate": 9.57337403461977e-05, "loss": 18.9796, "step": 1204 }, { "epoch": 0.16, "learning_rate": 9.572512035337196e-05, "loss": 16.1876, "step": 1205 }, { "epoch": 0.16, "learning_rate": 9.571649204984854e-05, "loss": 18.5524, "step": 1206 }, { "epoch": 0.16, "learning_rate": 9.57078554371957e-05, "loss": 18.2232, "step": 1207 }, { "epoch": 0.16, "learning_rate": 9.569921051698314e-05, "loss": 19.8588, "step": 1208 }, { "epoch": 0.16, "learning_rate": 9.569055729078214e-05, "loss": 18.237, "step": 1209 }, { "epoch": 0.16, "learning_rate": 9.568189576016543e-05, "loss": 17.9983, "step": 1210 }, { "epoch": 0.16, "learning_rate": 9.567322592670729e-05, "loss": 18.296, "step": 1211 }, { "epoch": 0.16, "learning_rate": 9.566454779198346e-05, "loss": 17.782, "step": 1212 }, { "epoch": 0.16, "learning_rate": 9.565586135757127e-05, "loss": 0.0001, "step": 1213 }, { "epoch": 0.16, "learning_rate": 9.564716662504947e-05, "loss": 0.0001, "step": 1214 }, { "epoch": 0.16, "learning_rate": 9.563846359599836e-05, "loss": 16.723, "step": 1215 }, { "epoch": 0.16, "learning_rate": 9.562975227199977e-05, "loss": 18.6723, "step": 1216 }, { "epoch": 0.16, "learning_rate": 9.5621032654637e-05, "loss": 17.9816, "step": 1217 }, { "epoch": 0.16, "learning_rate": 9.561230474549485e-05, "loss": 17.1202, "step": 1218 }, { "epoch": 0.16, "learning_rate": 9.560356854615968e-05, "loss": 0.0002, "step": 1219 }, { "epoch": 0.16, "learning_rate": 9.55948240582193e-05, "loss": 18.6395, "step": 1220 }, { "epoch": 0.16, "learning_rate": 9.558607128326308e-05, "loss": 18.7375, "step": 1221 }, { "epoch": 0.16, "learning_rate": 9.557731022288183e-05, "loss": 17.9898, "step": 1222 }, { "epoch": 0.16, "learning_rate": 9.556854087866793e-05, "loss": 16.8957, "step": 1223 }, { "epoch": 0.16, "learning_rate": 9.555976325221523e-05, "loss": 0.001, "step": 1224 }, { "epoch": 0.16, "learning_rate": 9.555097734511908e-05, "loss": 19.1277, "step": 1225 }, { "epoch": 0.16, "learning_rate": 9.554218315897639e-05, "loss": 16.5197, "step": 1226 }, { "epoch": 0.16, "learning_rate": 9.553338069538549e-05, "loss": 17.943, "step": 1227 }, { "epoch": 0.16, "learning_rate": 9.552456995594628e-05, "loss": 19.2016, "step": 1228 }, { "epoch": 0.16, "learning_rate": 9.551575094226011e-05, "loss": 16.8308, "step": 1229 }, { "epoch": 0.16, "learning_rate": 9.550692365592992e-05, "loss": 0.0001, "step": 1230 }, { "epoch": 0.16, "learning_rate": 9.549808809856005e-05, "loss": 16.7218, "step": 1231 }, { "epoch": 0.16, "learning_rate": 9.548924427175643e-05, "loss": 0.0004, "step": 1232 }, { "epoch": 0.16, "learning_rate": 9.548039217712643e-05, "loss": 18.0224, "step": 1233 }, { "epoch": 0.16, "learning_rate": 9.547153181627897e-05, "loss": 0.0, "step": 1234 }, { "epoch": 0.16, "learning_rate": 9.546266319082442e-05, "loss": 17.721, "step": 1235 }, { "epoch": 0.16, "learning_rate": 9.545378630237472e-05, "loss": 18.9072, "step": 1236 }, { "epoch": 0.16, "learning_rate": 9.544490115254326e-05, "loss": 0.0006, "step": 1237 }, { "epoch": 0.16, "learning_rate": 9.543600774294493e-05, "loss": 18.0128, "step": 1238 }, { "epoch": 0.16, "learning_rate": 9.542710607519616e-05, "loss": 18.6067, "step": 1239 }, { "epoch": 0.16, "learning_rate": 9.541819615091485e-05, "loss": 19.171, "step": 1240 }, { "epoch": 0.16, "learning_rate": 9.540927797172041e-05, "loss": 19.0954, "step": 1241 }, { "epoch": 0.16, "learning_rate": 9.540035153923375e-05, "loss": 0.0001, "step": 1242 }, { "epoch": 0.16, "learning_rate": 9.539141685507728e-05, "loss": 18.6178, "step": 1243 }, { "epoch": 0.16, "learning_rate": 9.538247392087489e-05, "loss": 16.9025, "step": 1244 }, { "epoch": 0.16, "learning_rate": 9.537352273825204e-05, "loss": 0.0001, "step": 1245 }, { "epoch": 0.16, "learning_rate": 9.536456330883559e-05, "loss": 17.8304, "step": 1246 }, { "epoch": 0.16, "learning_rate": 9.535559563425395e-05, "loss": 0.0007, "step": 1247 }, { "epoch": 0.16, "learning_rate": 9.534661971613707e-05, "loss": 17.9739, "step": 1248 }, { "epoch": 0.16, "learning_rate": 9.53376355561163e-05, "loss": 0.0, "step": 1249 }, { "epoch": 0.16, "learning_rate": 9.532864315582457e-05, "loss": 0.0001, "step": 1250 }, { "epoch": 0.16, "learning_rate": 9.531964251689627e-05, "loss": 17.6846, "step": 1251 }, { "epoch": 0.16, "learning_rate": 9.531063364096731e-05, "loss": 20.6663, "step": 1252 }, { "epoch": 0.16, "learning_rate": 9.530161652967507e-05, "loss": 17.7816, "step": 1253 }, { "epoch": 0.17, "learning_rate": 9.529259118465846e-05, "loss": 0.0002, "step": 1254 }, { "epoch": 0.17, "learning_rate": 9.528355760755783e-05, "loss": 18.3756, "step": 1255 }, { "epoch": 0.17, "learning_rate": 9.52745158000151e-05, "loss": 18.2046, "step": 1256 }, { "epoch": 0.17, "learning_rate": 9.526546576367363e-05, "loss": 17.8575, "step": 1257 }, { "epoch": 0.17, "learning_rate": 9.525640750017832e-05, "loss": 18.713, "step": 1258 }, { "epoch": 0.17, "learning_rate": 9.524734101117549e-05, "loss": 0.0, "step": 1259 }, { "epoch": 0.17, "learning_rate": 9.523826629831307e-05, "loss": 17.9846, "step": 1260 }, { "epoch": 0.17, "learning_rate": 9.522918336324038e-05, "loss": 17.9053, "step": 1261 }, { "epoch": 0.17, "learning_rate": 9.522009220760827e-05, "loss": 16.6364, "step": 1262 }, { "epoch": 0.17, "learning_rate": 9.521099283306913e-05, "loss": 18.46, "step": 1263 }, { "epoch": 0.17, "learning_rate": 9.520188524127675e-05, "loss": 18.9121, "step": 1264 }, { "epoch": 0.17, "learning_rate": 9.519276943388651e-05, "loss": 17.8609, "step": 1265 }, { "epoch": 0.17, "learning_rate": 9.51836454125552e-05, "loss": 18.6569, "step": 1266 }, { "epoch": 0.17, "learning_rate": 9.517451317894118e-05, "loss": 17.4947, "step": 1267 }, { "epoch": 0.17, "learning_rate": 9.516537273470423e-05, "loss": 17.8518, "step": 1268 }, { "epoch": 0.17, "learning_rate": 9.515622408150568e-05, "loss": 19.2065, "step": 1269 }, { "epoch": 0.17, "learning_rate": 9.514706722100833e-05, "loss": 0.0009, "step": 1270 }, { "epoch": 0.17, "learning_rate": 9.513790215487646e-05, "loss": 0.0001, "step": 1271 }, { "epoch": 0.17, "learning_rate": 9.512872888477585e-05, "loss": 0.0008, "step": 1272 }, { "epoch": 0.17, "learning_rate": 9.51195474123738e-05, "loss": 18.3347, "step": 1273 }, { "epoch": 0.17, "learning_rate": 9.511035773933902e-05, "loss": 0.0004, "step": 1274 }, { "epoch": 0.17, "learning_rate": 9.510115986734183e-05, "loss": 15.4555, "step": 1275 }, { "epoch": 0.17, "learning_rate": 9.509195379805393e-05, "loss": 17.3521, "step": 1276 }, { "epoch": 0.17, "learning_rate": 9.508273953314857e-05, "loss": 0.0001, "step": 1277 }, { "epoch": 0.17, "learning_rate": 9.507351707430045e-05, "loss": 18.6903, "step": 1278 }, { "epoch": 0.17, "learning_rate": 9.506428642318584e-05, "loss": 16.6494, "step": 1279 }, { "epoch": 0.17, "learning_rate": 9.505504758148239e-05, "loss": 0.0006, "step": 1280 }, { "epoch": 0.17, "learning_rate": 9.504580055086931e-05, "loss": 16.9195, "step": 1281 }, { "epoch": 0.17, "learning_rate": 9.503654533302729e-05, "loss": 0.0001, "step": 1282 }, { "epoch": 0.17, "learning_rate": 9.502728192963847e-05, "loss": 20.6213, "step": 1283 }, { "epoch": 0.17, "learning_rate": 9.501801034238654e-05, "loss": 14.9693, "step": 1284 }, { "epoch": 0.17, "learning_rate": 9.500873057295662e-05, "loss": 0.0001, "step": 1285 }, { "epoch": 0.17, "learning_rate": 9.499944262303534e-05, "loss": 0.0003, "step": 1286 }, { "epoch": 0.17, "learning_rate": 9.499014649431082e-05, "loss": 18.8057, "step": 1287 }, { "epoch": 0.17, "learning_rate": 9.498084218847268e-05, "loss": 0.0001, "step": 1288 }, { "epoch": 0.17, "learning_rate": 9.497152970721197e-05, "loss": 0.0, "step": 1289 }, { "epoch": 0.17, "learning_rate": 9.496220905222131e-05, "loss": 0.0003, "step": 1290 }, { "epoch": 0.17, "learning_rate": 9.495288022519473e-05, "loss": 18.2107, "step": 1291 }, { "epoch": 0.17, "learning_rate": 9.49435432278278e-05, "loss": 18.1818, "step": 1292 }, { "epoch": 0.17, "learning_rate": 9.493419806181754e-05, "loss": 0.0006, "step": 1293 }, { "epoch": 0.17, "learning_rate": 9.492484472886245e-05, "loss": 0.0002, "step": 1294 }, { "epoch": 0.17, "learning_rate": 9.491548323066255e-05, "loss": 0.0006, "step": 1295 }, { "epoch": 0.17, "learning_rate": 9.490611356891931e-05, "loss": 18.6274, "step": 1296 }, { "epoch": 0.17, "learning_rate": 9.489673574533573e-05, "loss": 20.0226, "step": 1297 }, { "epoch": 0.17, "learning_rate": 9.488734976161622e-05, "loss": 0.0003, "step": 1298 }, { "epoch": 0.17, "learning_rate": 9.487795561946674e-05, "loss": 18.6687, "step": 1299 }, { "epoch": 0.17, "learning_rate": 9.48685533205947e-05, "loss": 18.216, "step": 1300 }, { "epoch": 0.17, "learning_rate": 9.4859142866709e-05, "loss": 20.2366, "step": 1301 }, { "epoch": 0.17, "learning_rate": 9.484972425952005e-05, "loss": 0.0004, "step": 1302 }, { "epoch": 0.17, "learning_rate": 9.484029750073967e-05, "loss": 0.0002, "step": 1303 }, { "epoch": 0.17, "learning_rate": 9.483086259208122e-05, "loss": 18.1336, "step": 1304 }, { "epoch": 0.17, "learning_rate": 9.482141953525954e-05, "loss": 18.6159, "step": 1305 }, { "epoch": 0.17, "learning_rate": 9.481196833199091e-05, "loss": 0.0003, "step": 1306 }, { "epoch": 0.17, "learning_rate": 9.480250898399315e-05, "loss": 18.5377, "step": 1307 }, { "epoch": 0.17, "learning_rate": 9.479304149298552e-05, "loss": 16.4786, "step": 1308 }, { "epoch": 0.17, "learning_rate": 9.478356586068877e-05, "loss": 18.669, "step": 1309 }, { "epoch": 0.17, "learning_rate": 9.477408208882513e-05, "loss": 17.9468, "step": 1310 }, { "epoch": 0.17, "learning_rate": 9.476459017911828e-05, "loss": 19.8296, "step": 1311 }, { "epoch": 0.17, "learning_rate": 9.475509013329344e-05, "loss": 17.2701, "step": 1312 }, { "epoch": 0.17, "learning_rate": 9.474558195307727e-05, "loss": 18.9018, "step": 1313 }, { "epoch": 0.17, "learning_rate": 9.47360656401979e-05, "loss": 16.8615, "step": 1314 }, { "epoch": 0.17, "learning_rate": 9.472654119638498e-05, "loss": 18.5164, "step": 1315 }, { "epoch": 0.17, "learning_rate": 9.471700862336959e-05, "loss": 18.6652, "step": 1316 }, { "epoch": 0.17, "learning_rate": 9.470746792288429e-05, "loss": 0.0002, "step": 1317 }, { "epoch": 0.17, "learning_rate": 9.469791909666318e-05, "loss": 16.2375, "step": 1318 }, { "epoch": 0.17, "learning_rate": 9.468836214644175e-05, "loss": 17.9508, "step": 1319 }, { "epoch": 0.17, "learning_rate": 9.467879707395704e-05, "loss": 0.0001, "step": 1320 }, { "epoch": 0.17, "learning_rate": 9.466922388094752e-05, "loss": 0.0002, "step": 1321 }, { "epoch": 0.17, "learning_rate": 9.465964256915315e-05, "loss": 0.0002, "step": 1322 }, { "epoch": 0.17, "learning_rate": 9.465005314031536e-05, "loss": 17.1209, "step": 1323 }, { "epoch": 0.17, "learning_rate": 9.464045559617709e-05, "loss": 17.6179, "step": 1324 }, { "epoch": 0.17, "learning_rate": 9.463084993848269e-05, "loss": 18.9654, "step": 1325 }, { "epoch": 0.17, "learning_rate": 9.462123616897804e-05, "loss": 17.7475, "step": 1326 }, { "epoch": 0.17, "learning_rate": 9.461161428941048e-05, "loss": 18.2274, "step": 1327 }, { "epoch": 0.17, "learning_rate": 9.460198430152882e-05, "loss": 18.6027, "step": 1328 }, { "epoch": 0.17, "learning_rate": 9.459234620708332e-05, "loss": 18.8786, "step": 1329 }, { "epoch": 0.18, "learning_rate": 9.458270000782576e-05, "loss": 18.5368, "step": 1330 }, { "epoch": 0.18, "learning_rate": 9.457304570550937e-05, "loss": 0.0001, "step": 1331 }, { "epoch": 0.18, "learning_rate": 9.456338330188883e-05, "loss": 0.0001, "step": 1332 }, { "epoch": 0.18, "learning_rate": 9.455371279872034e-05, "loss": 18.6277, "step": 1333 }, { "epoch": 0.18, "learning_rate": 9.454403419776155e-05, "loss": 0.0011, "step": 1334 }, { "epoch": 0.18, "learning_rate": 9.453434750077155e-05, "loss": 20.2958, "step": 1335 }, { "epoch": 0.18, "learning_rate": 9.452465270951094e-05, "loss": 0.0, "step": 1336 }, { "epoch": 0.18, "learning_rate": 9.451494982574182e-05, "loss": 18.547, "step": 1337 }, { "epoch": 0.18, "learning_rate": 9.450523885122766e-05, "loss": 0.0001, "step": 1338 }, { "epoch": 0.18, "learning_rate": 9.449551978773352e-05, "loss": 19.9187, "step": 1339 }, { "epoch": 0.18, "learning_rate": 9.448579263702584e-05, "loss": 17.2138, "step": 1340 }, { "epoch": 0.18, "learning_rate": 9.447605740087256e-05, "loss": 20.0994, "step": 1341 }, { "epoch": 0.18, "learning_rate": 9.44663140810431e-05, "loss": 17.3677, "step": 1342 }, { "epoch": 0.18, "learning_rate": 9.445656267930836e-05, "loss": 17.6053, "step": 1343 }, { "epoch": 0.18, "learning_rate": 9.444680319744067e-05, "loss": 18.1823, "step": 1344 }, { "epoch": 0.18, "learning_rate": 9.443703563721386e-05, "loss": 0.0012, "step": 1345 }, { "epoch": 0.18, "learning_rate": 9.44272600004032e-05, "loss": 17.7141, "step": 1346 }, { "epoch": 0.18, "learning_rate": 9.441747628878548e-05, "loss": 19.8582, "step": 1347 }, { "epoch": 0.18, "learning_rate": 9.440768450413888e-05, "loss": 0.0006, "step": 1348 }, { "epoch": 0.18, "learning_rate": 9.439788464824313e-05, "loss": 0.0001, "step": 1349 }, { "epoch": 0.18, "learning_rate": 9.438807672287934e-05, "loss": 16.7896, "step": 1350 }, { "epoch": 0.18, "learning_rate": 9.43782607298302e-05, "loss": 18.6513, "step": 1351 }, { "epoch": 0.18, "learning_rate": 9.436843667087974e-05, "loss": 17.9471, "step": 1352 }, { "epoch": 0.18, "learning_rate": 9.435860454781355e-05, "loss": 18.288, "step": 1353 }, { "epoch": 0.18, "learning_rate": 9.434876436241864e-05, "loss": 18.2612, "step": 1354 }, { "epoch": 0.18, "learning_rate": 9.433891611648351e-05, "loss": 18.4937, "step": 1355 }, { "epoch": 0.18, "learning_rate": 9.432905981179809e-05, "loss": 0.0, "step": 1356 }, { "epoch": 0.18, "learning_rate": 9.431919545015381e-05, "loss": 18.2222, "step": 1357 }, { "epoch": 0.18, "learning_rate": 9.430932303334357e-05, "loss": 0.0001, "step": 1358 }, { "epoch": 0.18, "learning_rate": 9.429944256316168e-05, "loss": 18.0823, "step": 1359 }, { "epoch": 0.18, "learning_rate": 9.428955404140396e-05, "loss": 0.0024, "step": 1360 }, { "epoch": 0.18, "learning_rate": 9.427965746986772e-05, "loss": 0.0, "step": 1361 }, { "epoch": 0.18, "learning_rate": 9.426975285035163e-05, "loss": 0.0, "step": 1362 }, { "epoch": 0.18, "learning_rate": 9.425984018465594e-05, "loss": 18.3307, "step": 1363 }, { "epoch": 0.18, "learning_rate": 9.42499194745823e-05, "loss": 16.6283, "step": 1364 }, { "epoch": 0.18, "learning_rate": 9.423999072193382e-05, "loss": 19.1474, "step": 1365 }, { "epoch": 0.18, "learning_rate": 9.42300539285151e-05, "loss": 18.8688, "step": 1366 }, { "epoch": 0.18, "learning_rate": 9.422010909613217e-05, "loss": 0.0001, "step": 1367 }, { "epoch": 0.18, "learning_rate": 9.421015622659256e-05, "loss": 0.0, "step": 1368 }, { "epoch": 0.18, "learning_rate": 9.420019532170519e-05, "loss": 0.0004, "step": 1369 }, { "epoch": 0.18, "learning_rate": 9.419022638328056e-05, "loss": 17.5156, "step": 1370 }, { "epoch": 0.18, "learning_rate": 9.418024941313051e-05, "loss": 18.744, "step": 1371 }, { "epoch": 0.18, "learning_rate": 9.41702644130684e-05, "loss": 17.0778, "step": 1372 }, { "epoch": 0.18, "learning_rate": 9.416027138490903e-05, "loss": 0.0011, "step": 1373 }, { "epoch": 0.18, "learning_rate": 9.41502703304687e-05, "loss": 16.6005, "step": 1374 }, { "epoch": 0.18, "learning_rate": 9.41402612515651e-05, "loss": 0.0, "step": 1375 }, { "epoch": 0.18, "learning_rate": 9.413024415001743e-05, "loss": 18.109, "step": 1376 }, { "epoch": 0.18, "learning_rate": 9.412021902764633e-05, "loss": 16.7788, "step": 1377 }, { "epoch": 0.18, "learning_rate": 9.41101858862739e-05, "loss": 0.0, "step": 1378 }, { "epoch": 0.18, "learning_rate": 9.410014472772369e-05, "loss": 16.8302, "step": 1379 }, { "epoch": 0.18, "learning_rate": 9.409009555382072e-05, "loss": 0.0012, "step": 1380 }, { "epoch": 0.18, "learning_rate": 9.408003836639148e-05, "loss": 18.7711, "step": 1381 }, { "epoch": 0.18, "learning_rate": 9.406997316726387e-05, "loss": 19.4448, "step": 1382 }, { "epoch": 0.18, "learning_rate": 9.405989995826728e-05, "loss": 0.0, "step": 1383 }, { "epoch": 0.18, "learning_rate": 9.404981874123257e-05, "loss": 18.5264, "step": 1384 }, { "epoch": 0.18, "learning_rate": 9.403972951799202e-05, "loss": 16.7337, "step": 1385 }, { "epoch": 0.18, "learning_rate": 9.40296322903794e-05, "loss": 17.1977, "step": 1386 }, { "epoch": 0.18, "learning_rate": 9.401952706022987e-05, "loss": 17.8357, "step": 1387 }, { "epoch": 0.18, "learning_rate": 9.400941382938015e-05, "loss": 0.0001, "step": 1388 }, { "epoch": 0.18, "learning_rate": 9.399929259966831e-05, "loss": 15.7456, "step": 1389 }, { "epoch": 0.18, "learning_rate": 9.398916337293393e-05, "loss": 17.993, "step": 1390 }, { "epoch": 0.18, "learning_rate": 9.397902615101804e-05, "loss": 17.7948, "step": 1391 }, { "epoch": 0.18, "learning_rate": 9.396888093576312e-05, "loss": 0.0, "step": 1392 }, { "epoch": 0.18, "learning_rate": 9.395872772901306e-05, "loss": 17.8969, "step": 1393 }, { "epoch": 0.18, "learning_rate": 9.39485665326133e-05, "loss": 17.873, "step": 1394 }, { "epoch": 0.18, "learning_rate": 9.393839734841062e-05, "loss": 17.8493, "step": 1395 }, { "epoch": 0.18, "learning_rate": 9.392822017825333e-05, "loss": 17.9586, "step": 1396 }, { "epoch": 0.18, "learning_rate": 9.391803502399117e-05, "loss": 17.5281, "step": 1397 }, { "epoch": 0.18, "learning_rate": 9.39078418874753e-05, "loss": 0.0004, "step": 1398 }, { "epoch": 0.18, "learning_rate": 9.389764077055838e-05, "loss": 19.3253, "step": 1399 }, { "epoch": 0.18, "learning_rate": 9.388743167509449e-05, "loss": 18.1715, "step": 1400 }, { "epoch": 0.18, "learning_rate": 9.387721460293919e-05, "loss": 16.2706, "step": 1401 }, { "epoch": 0.18, "learning_rate": 9.386698955594943e-05, "loss": 18.4039, "step": 1402 }, { "epoch": 0.18, "learning_rate": 9.385675653598367e-05, "loss": 0.0006, "step": 1403 }, { "epoch": 0.18, "learning_rate": 9.384651554490178e-05, "loss": 0.0001, "step": 1404 }, { "epoch": 0.18, "learning_rate": 9.383626658456512e-05, "loss": 0.0008, "step": 1405 }, { "epoch": 0.19, "learning_rate": 9.382600965683645e-05, "loss": 18.4964, "step": 1406 }, { "epoch": 0.19, "learning_rate": 9.381574476358004e-05, "loss": 16.1106, "step": 1407 }, { "epoch": 0.19, "learning_rate": 9.380547190666152e-05, "loss": 0.0001, "step": 1408 }, { "epoch": 0.19, "learning_rate": 9.379519108794806e-05, "loss": 0.0004, "step": 1409 }, { "epoch": 0.19, "learning_rate": 9.378490230930818e-05, "loss": 18.3835, "step": 1410 }, { "epoch": 0.19, "learning_rate": 9.377460557261197e-05, "loss": 19.6647, "step": 1411 }, { "epoch": 0.19, "learning_rate": 9.376430087973083e-05, "loss": 0.0, "step": 1412 }, { "epoch": 0.19, "learning_rate": 9.375398823253773e-05, "loss": 17.5342, "step": 1413 }, { "epoch": 0.19, "learning_rate": 9.374366763290701e-05, "loss": 18.2502, "step": 1414 }, { "epoch": 0.19, "learning_rate": 9.373333908271444e-05, "loss": 0.0001, "step": 1415 }, { "epoch": 0.19, "learning_rate": 9.372300258383733e-05, "loss": 0.0002, "step": 1416 }, { "epoch": 0.19, "learning_rate": 9.371265813815431e-05, "loss": 18.7302, "step": 1417 }, { "epoch": 0.19, "learning_rate": 9.370230574754557e-05, "loss": 0.0001, "step": 1418 }, { "epoch": 0.19, "learning_rate": 9.369194541389267e-05, "loss": 17.9093, "step": 1419 }, { "epoch": 0.19, "learning_rate": 9.368157713907865e-05, "loss": 17.0579, "step": 1420 }, { "epoch": 0.19, "learning_rate": 9.367120092498796e-05, "loss": 19.0384, "step": 1421 }, { "epoch": 0.19, "learning_rate": 9.366081677350652e-05, "loss": 0.0003, "step": 1422 }, { "epoch": 0.19, "learning_rate": 9.36504246865217e-05, "loss": 16.51, "step": 1423 }, { "epoch": 0.19, "learning_rate": 9.364002466592225e-05, "loss": 18.6869, "step": 1424 }, { "epoch": 0.19, "learning_rate": 9.362961671359849e-05, "loss": 18.3903, "step": 1425 }, { "epoch": 0.19, "learning_rate": 9.361920083144202e-05, "loss": 20.0569, "step": 1426 }, { "epoch": 0.19, "learning_rate": 9.360877702134602e-05, "loss": 0.0, "step": 1427 }, { "epoch": 0.19, "learning_rate": 9.359834528520502e-05, "loss": 19.1842, "step": 1428 }, { "epoch": 0.19, "learning_rate": 9.358790562491504e-05, "loss": 0.0, "step": 1429 }, { "epoch": 0.19, "learning_rate": 9.35774580423735e-05, "loss": 0.0, "step": 1430 }, { "epoch": 0.19, "learning_rate": 9.356700253947932e-05, "loss": 17.5048, "step": 1431 }, { "epoch": 0.19, "learning_rate": 9.35565391181328e-05, "loss": 19.753, "step": 1432 }, { "epoch": 0.19, "learning_rate": 9.354606778023573e-05, "loss": 18.5181, "step": 1433 }, { "epoch": 0.19, "learning_rate": 9.353558852769127e-05, "loss": 16.508, "step": 1434 }, { "epoch": 0.19, "learning_rate": 9.352510136240409e-05, "loss": 0.0001, "step": 1435 }, { "epoch": 0.19, "learning_rate": 9.351460628628025e-05, "loss": 19.8384, "step": 1436 }, { "epoch": 0.19, "learning_rate": 9.350410330122728e-05, "loss": 17.8717, "step": 1437 }, { "epoch": 0.19, "learning_rate": 9.349359240915412e-05, "loss": 16.2785, "step": 1438 }, { "epoch": 0.19, "learning_rate": 9.348307361197119e-05, "loss": 15.6497, "step": 1439 }, { "epoch": 0.19, "learning_rate": 9.347254691159028e-05, "loss": 16.3784, "step": 1440 }, { "epoch": 0.19, "learning_rate": 9.346201230992468e-05, "loss": 16.6524, "step": 1441 }, { "epoch": 0.19, "learning_rate": 9.345146980888907e-05, "loss": 18.9319, "step": 1442 }, { "epoch": 0.19, "learning_rate": 9.34409194103996e-05, "loss": 0.0001, "step": 1443 }, { "epoch": 0.19, "learning_rate": 9.343036111637384e-05, "loss": 17.7131, "step": 1444 }, { "epoch": 0.19, "learning_rate": 9.341979492873081e-05, "loss": 19.0654, "step": 1445 }, { "epoch": 0.19, "learning_rate": 9.340922084939093e-05, "loss": 0.0001, "step": 1446 }, { "epoch": 0.19, "learning_rate": 9.339863888027605e-05, "loss": 17.6787, "step": 1447 }, { "epoch": 0.19, "learning_rate": 9.338804902330955e-05, "loss": 17.6689, "step": 1448 }, { "epoch": 0.19, "learning_rate": 9.337745128041611e-05, "loss": 17.4089, "step": 1449 }, { "epoch": 0.19, "learning_rate": 9.336684565352193e-05, "loss": 17.0688, "step": 1450 }, { "epoch": 0.19, "learning_rate": 9.335623214455463e-05, "loss": 19.1698, "step": 1451 }, { "epoch": 0.19, "learning_rate": 9.334561075544322e-05, "loss": 0.0, "step": 1452 }, { "epoch": 0.19, "learning_rate": 9.33349814881182e-05, "loss": 19.0359, "step": 1453 }, { "epoch": 0.19, "learning_rate": 9.332434434451147e-05, "loss": 16.2529, "step": 1454 }, { "epoch": 0.19, "learning_rate": 9.331369932655639e-05, "loss": 17.039, "step": 1455 }, { "epoch": 0.19, "learning_rate": 9.330304643618767e-05, "loss": 18.8832, "step": 1456 }, { "epoch": 0.19, "learning_rate": 9.329238567534156e-05, "loss": 19.8878, "step": 1457 }, { "epoch": 0.19, "learning_rate": 9.328171704595567e-05, "loss": 0.0001, "step": 1458 }, { "epoch": 0.19, "learning_rate": 9.327104054996907e-05, "loss": 0.0001, "step": 1459 }, { "epoch": 0.19, "learning_rate": 9.326035618932224e-05, "loss": 18.7869, "step": 1460 }, { "epoch": 0.19, "learning_rate": 9.324966396595711e-05, "loss": 0.0001, "step": 1461 }, { "epoch": 0.19, "learning_rate": 9.3238963881817e-05, "loss": 0.0, "step": 1462 }, { "epoch": 0.19, "learning_rate": 9.322825593884674e-05, "loss": 17.882, "step": 1463 }, { "epoch": 0.19, "learning_rate": 9.321754013899249e-05, "loss": 17.435, "step": 1464 }, { "epoch": 0.19, "learning_rate": 9.32068164842019e-05, "loss": 19.0902, "step": 1465 }, { "epoch": 0.19, "learning_rate": 9.319608497642403e-05, "loss": 0.0001, "step": 1466 }, { "epoch": 0.19, "learning_rate": 9.318534561760937e-05, "loss": 0.0, "step": 1467 }, { "epoch": 0.19, "learning_rate": 9.317459840970982e-05, "loss": 17.8061, "step": 1468 }, { "epoch": 0.19, "learning_rate": 9.316384335467875e-05, "loss": 18.2776, "step": 1469 }, { "epoch": 0.19, "learning_rate": 9.31530804544709e-05, "loss": 16.7728, "step": 1470 }, { "epoch": 0.19, "learning_rate": 9.314230971104248e-05, "loss": 0.0001, "step": 1471 }, { "epoch": 0.19, "learning_rate": 9.313153112635111e-05, "loss": 20.287, "step": 1472 }, { "epoch": 0.19, "learning_rate": 9.312074470235583e-05, "loss": 15.9979, "step": 1473 }, { "epoch": 0.19, "learning_rate": 9.310995044101711e-05, "loss": 0.0003, "step": 1474 }, { "epoch": 0.19, "learning_rate": 9.309914834429685e-05, "loss": 18.2681, "step": 1475 }, { "epoch": 0.19, "learning_rate": 9.308833841415836e-05, "loss": 18.7454, "step": 1476 }, { "epoch": 0.19, "learning_rate": 9.307752065256638e-05, "loss": 0.0001, "step": 1477 }, { "epoch": 0.19, "learning_rate": 9.306669506148709e-05, "loss": 15.5948, "step": 1478 }, { "epoch": 0.19, "learning_rate": 9.305586164288807e-05, "loss": 17.2396, "step": 1479 }, { "epoch": 0.19, "learning_rate": 9.304502039873831e-05, "loss": 17.7976, "step": 1480 }, { "epoch": 0.19, "learning_rate": 9.303417133100828e-05, "loss": 0.0001, "step": 1481 }, { "epoch": 0.2, "learning_rate": 9.302331444166981e-05, "loss": 0.0001, "step": 1482 }, { "epoch": 0.2, "learning_rate": 9.301244973269619e-05, "loss": 19.4238, "step": 1483 }, { "epoch": 0.2, "learning_rate": 9.300157720606211e-05, "loss": 17.2745, "step": 1484 }, { "epoch": 0.2, "learning_rate": 9.29906968637437e-05, "loss": 19.1673, "step": 1485 }, { "epoch": 0.2, "learning_rate": 9.297980870771849e-05, "loss": 0.0001, "step": 1486 }, { "epoch": 0.2, "learning_rate": 9.296891273996544e-05, "loss": 17.0683, "step": 1487 }, { "epoch": 0.2, "learning_rate": 9.295800896246491e-05, "loss": 0.0, "step": 1488 }, { "epoch": 0.2, "learning_rate": 9.294709737719876e-05, "loss": 17.8268, "step": 1489 }, { "epoch": 0.2, "learning_rate": 9.293617798615014e-05, "loss": 19.0256, "step": 1490 }, { "epoch": 0.2, "learning_rate": 9.292525079130372e-05, "loss": 0.0, "step": 1491 }, { "epoch": 0.2, "learning_rate": 9.291431579464556e-05, "loss": 17.9065, "step": 1492 }, { "epoch": 0.2, "learning_rate": 9.290337299816312e-05, "loss": 17.8965, "step": 1493 }, { "epoch": 0.2, "learning_rate": 9.289242240384529e-05, "loss": 16.8837, "step": 1494 }, { "epoch": 0.2, "learning_rate": 9.288146401368238e-05, "loss": 0.0001, "step": 1495 }, { "epoch": 0.2, "learning_rate": 9.287049782966612e-05, "loss": 15.9969, "step": 1496 }, { "epoch": 0.2, "learning_rate": 9.285952385378967e-05, "loss": 17.1664, "step": 1497 }, { "epoch": 0.2, "learning_rate": 9.284854208804754e-05, "loss": 0.0001, "step": 1498 }, { "epoch": 0.2, "learning_rate": 9.283755253443576e-05, "loss": 17.8853, "step": 1499 }, { "epoch": 0.2, "learning_rate": 9.282655519495165e-05, "loss": 16.9327, "step": 1500 }, { "epoch": 0.2, "learning_rate": 9.281555007159408e-05, "loss": 18.369, "step": 1501 }, { "epoch": 0.2, "learning_rate": 9.280453716636323e-05, "loss": 18.0621, "step": 1502 }, { "epoch": 0.2, "learning_rate": 9.279351648126077e-05, "loss": 17.2481, "step": 1503 }, { "epoch": 0.2, "learning_rate": 9.27824880182897e-05, "loss": 17.8206, "step": 1504 }, { "epoch": 0.2, "learning_rate": 9.277145177945453e-05, "loss": 18.0902, "step": 1505 }, { "epoch": 0.2, "learning_rate": 9.27604077667611e-05, "loss": 15.5546, "step": 1506 }, { "epoch": 0.2, "learning_rate": 9.274935598221668e-05, "loss": 21.0478, "step": 1507 }, { "epoch": 0.2, "learning_rate": 9.273829642783001e-05, "loss": 0.0, "step": 1508 }, { "epoch": 0.2, "learning_rate": 9.27272291056112e-05, "loss": 18.8824, "step": 1509 }, { "epoch": 0.2, "learning_rate": 9.271615401757175e-05, "loss": 18.6477, "step": 1510 }, { "epoch": 0.2, "learning_rate": 9.270507116572463e-05, "loss": 17.2127, "step": 1511 }, { "epoch": 0.2, "learning_rate": 9.269398055208414e-05, "loss": 18.7982, "step": 1512 }, { "epoch": 0.2, "learning_rate": 9.268288217866606e-05, "loss": 16.7243, "step": 1513 }, { "epoch": 0.2, "learning_rate": 9.267177604748756e-05, "loss": 17.4944, "step": 1514 }, { "epoch": 0.2, "learning_rate": 9.266066216056723e-05, "loss": 0.0001, "step": 1515 }, { "epoch": 0.2, "learning_rate": 9.264954051992503e-05, "loss": 0.0001, "step": 1516 }, { "epoch": 0.2, "learning_rate": 9.263841112758238e-05, "loss": 0.0, "step": 1517 }, { "epoch": 0.2, "learning_rate": 9.262727398556205e-05, "loss": 16.6487, "step": 1518 }, { "epoch": 0.2, "learning_rate": 9.26161290958883e-05, "loss": 17.6701, "step": 1519 }, { "epoch": 0.2, "learning_rate": 9.260497646058673e-05, "loss": 22.0732, "step": 1520 }, { "epoch": 0.2, "learning_rate": 9.259381608168437e-05, "loss": 18.1908, "step": 1521 }, { "epoch": 0.2, "learning_rate": 9.258264796120966e-05, "loss": 19.7992, "step": 1522 }, { "epoch": 0.2, "learning_rate": 9.257147210119244e-05, "loss": 17.3096, "step": 1523 }, { "epoch": 0.2, "learning_rate": 9.256028850366395e-05, "loss": 18.4577, "step": 1524 }, { "epoch": 0.2, "learning_rate": 9.254909717065687e-05, "loss": 16.6376, "step": 1525 }, { "epoch": 0.2, "learning_rate": 9.253789810420525e-05, "loss": 19.2244, "step": 1526 }, { "epoch": 0.2, "learning_rate": 9.252669130634455e-05, "loss": 17.6801, "step": 1527 }, { "epoch": 0.2, "learning_rate": 9.251547677911168e-05, "loss": 0.0, "step": 1528 }, { "epoch": 0.2, "learning_rate": 9.250425452454488e-05, "loss": 19.2121, "step": 1529 }, { "epoch": 0.2, "learning_rate": 9.249302454468385e-05, "loss": 0.0001, "step": 1530 }, { "epoch": 0.2, "learning_rate": 9.248178684156967e-05, "loss": 18.9997, "step": 1531 }, { "epoch": 0.2, "learning_rate": 9.247054141724485e-05, "loss": 0.0001, "step": 1532 }, { "epoch": 0.2, "learning_rate": 9.245928827375325e-05, "loss": 0.0, "step": 1533 }, { "epoch": 0.2, "learning_rate": 9.24480274131402e-05, "loss": 16.0853, "step": 1534 }, { "epoch": 0.2, "learning_rate": 9.24367588374524e-05, "loss": 19.5039, "step": 1535 }, { "epoch": 0.2, "learning_rate": 9.242548254873793e-05, "loss": 0.0001, "step": 1536 }, { "epoch": 0.2, "learning_rate": 9.241419854904629e-05, "loss": 18.5364, "step": 1537 }, { "epoch": 0.2, "learning_rate": 9.240290684042843e-05, "loss": 17.2441, "step": 1538 }, { "epoch": 0.2, "learning_rate": 9.239160742493659e-05, "loss": 18.8981, "step": 1539 }, { "epoch": 0.2, "learning_rate": 9.238030030462456e-05, "loss": 17.603, "step": 1540 }, { "epoch": 0.2, "learning_rate": 9.236898548154738e-05, "loss": 0.0, "step": 1541 }, { "epoch": 0.2, "learning_rate": 9.235766295776159e-05, "loss": 0.0002, "step": 1542 }, { "epoch": 0.2, "learning_rate": 9.234633273532506e-05, "loss": 17.5672, "step": 1543 }, { "epoch": 0.2, "learning_rate": 9.233499481629714e-05, "loss": 21.3878, "step": 1544 }, { "epoch": 0.2, "learning_rate": 9.232364920273855e-05, "loss": 17.4643, "step": 1545 }, { "epoch": 0.2, "learning_rate": 9.231229589671133e-05, "loss": 0.0001, "step": 1546 }, { "epoch": 0.2, "learning_rate": 9.230093490027904e-05, "loss": 0.0003, "step": 1547 }, { "epoch": 0.2, "learning_rate": 9.228956621550655e-05, "loss": 18.0659, "step": 1548 }, { "epoch": 0.2, "learning_rate": 9.227818984446015e-05, "loss": 0.0, "step": 1549 }, { "epoch": 0.2, "learning_rate": 9.226680578920756e-05, "loss": 0.0, "step": 1550 }, { "epoch": 0.2, "learning_rate": 9.225541405181787e-05, "loss": 18.53, "step": 1551 }, { "epoch": 0.2, "learning_rate": 9.224401463436156e-05, "loss": 18.2521, "step": 1552 }, { "epoch": 0.2, "learning_rate": 9.22326075389105e-05, "loss": 17.0732, "step": 1553 }, { "epoch": 0.2, "learning_rate": 9.222119276753797e-05, "loss": 0.0001, "step": 1554 }, { "epoch": 0.2, "learning_rate": 9.220977032231868e-05, "loss": 19.8904, "step": 1555 }, { "epoch": 0.2, "learning_rate": 9.219834020532867e-05, "loss": 20.1023, "step": 1556 }, { "epoch": 0.2, "learning_rate": 9.21869024186454e-05, "loss": 0.0003, "step": 1557 }, { "epoch": 0.21, "learning_rate": 9.217545696434773e-05, "loss": 0.0001, "step": 1558 }, { "epoch": 0.21, "learning_rate": 9.216400384451591e-05, "loss": 0.0001, "step": 1559 }, { "epoch": 0.21, "learning_rate": 9.215254306123158e-05, "loss": 17.083, "step": 1560 }, { "epoch": 0.21, "learning_rate": 9.21410746165778e-05, "loss": 19.0015, "step": 1561 }, { "epoch": 0.21, "learning_rate": 9.212959851263898e-05, "loss": 18.7561, "step": 1562 }, { "epoch": 0.21, "learning_rate": 9.211811475150095e-05, "loss": 16.1032, "step": 1563 }, { "epoch": 0.21, "learning_rate": 9.21066233352509e-05, "loss": 18.7584, "step": 1564 }, { "epoch": 0.21, "learning_rate": 9.209512426597748e-05, "loss": 18.5425, "step": 1565 }, { "epoch": 0.21, "learning_rate": 9.208361754577064e-05, "loss": 17.7798, "step": 1566 }, { "epoch": 0.21, "learning_rate": 9.207210317672179e-05, "loss": 0.0001, "step": 1567 }, { "epoch": 0.21, "learning_rate": 9.206058116092369e-05, "loss": 0.0001, "step": 1568 }, { "epoch": 0.21, "learning_rate": 9.204905150047051e-05, "loss": 0.0001, "step": 1569 }, { "epoch": 0.21, "learning_rate": 9.203751419745783e-05, "loss": 0.0001, "step": 1570 }, { "epoch": 0.21, "learning_rate": 9.202596925398256e-05, "loss": 17.3944, "step": 1571 }, { "epoch": 0.21, "learning_rate": 9.201441667214305e-05, "loss": 19.4032, "step": 1572 }, { "epoch": 0.21, "learning_rate": 9.200285645403903e-05, "loss": 17.9201, "step": 1573 }, { "epoch": 0.21, "learning_rate": 9.199128860177158e-05, "loss": 19.8992, "step": 1574 }, { "epoch": 0.21, "learning_rate": 9.197971311744325e-05, "loss": 0.0003, "step": 1575 }, { "epoch": 0.21, "learning_rate": 9.196813000315785e-05, "loss": 18.7457, "step": 1576 }, { "epoch": 0.21, "learning_rate": 9.195653926102072e-05, "loss": 19.2703, "step": 1577 }, { "epoch": 0.21, "learning_rate": 9.194494089313848e-05, "loss": 0.0013, "step": 1578 }, { "epoch": 0.21, "learning_rate": 9.193333490161917e-05, "loss": 17.4737, "step": 1579 }, { "epoch": 0.21, "learning_rate": 9.192172128857223e-05, "loss": 17.543, "step": 1580 }, { "epoch": 0.21, "learning_rate": 9.191010005610849e-05, "loss": 0.0, "step": 1581 }, { "epoch": 0.21, "learning_rate": 9.189847120634013e-05, "loss": 16.4061, "step": 1582 }, { "epoch": 0.21, "learning_rate": 9.188683474138073e-05, "loss": 17.3997, "step": 1583 }, { "epoch": 0.21, "learning_rate": 9.187519066334527e-05, "loss": 17.8412, "step": 1584 }, { "epoch": 0.21, "learning_rate": 9.186353897435009e-05, "loss": 17.7186, "step": 1585 }, { "epoch": 0.21, "learning_rate": 9.185187967651293e-05, "loss": 0.0001, "step": 1586 }, { "epoch": 0.21, "learning_rate": 9.184021277195292e-05, "loss": 17.9054, "step": 1587 }, { "epoch": 0.21, "learning_rate": 9.182853826279052e-05, "loss": 17.7051, "step": 1588 }, { "epoch": 0.21, "learning_rate": 9.181685615114766e-05, "loss": 16.8611, "step": 1589 }, { "epoch": 0.21, "learning_rate": 9.180516643914757e-05, "loss": 19.1151, "step": 1590 }, { "epoch": 0.21, "learning_rate": 9.179346912891491e-05, "loss": 17.131, "step": 1591 }, { "epoch": 0.21, "learning_rate": 9.178176422257572e-05, "loss": 17.5836, "step": 1592 }, { "epoch": 0.21, "learning_rate": 9.177005172225737e-05, "loss": 18.9118, "step": 1593 }, { "epoch": 0.21, "learning_rate": 9.175833163008869e-05, "loss": 0.0001, "step": 1594 }, { "epoch": 0.21, "learning_rate": 9.17466039481998e-05, "loss": 0.0, "step": 1595 }, { "epoch": 0.21, "learning_rate": 9.17348686787223e-05, "loss": 18.667, "step": 1596 }, { "epoch": 0.21, "learning_rate": 9.172312582378908e-05, "loss": 17.1228, "step": 1597 }, { "epoch": 0.21, "learning_rate": 9.171137538553445e-05, "loss": 17.0041, "step": 1598 }, { "epoch": 0.21, "learning_rate": 9.169961736609408e-05, "loss": 19.319, "step": 1599 }, { "epoch": 0.21, "learning_rate": 9.168785176760506e-05, "loss": 15.4833, "step": 1600 }, { "epoch": 0.21, "learning_rate": 9.16760785922058e-05, "loss": 0.0, "step": 1601 }, { "epoch": 0.21, "learning_rate": 9.166429784203615e-05, "loss": 17.5907, "step": 1602 }, { "epoch": 0.21, "learning_rate": 9.165250951923725e-05, "loss": 17.8143, "step": 1603 }, { "epoch": 0.21, "learning_rate": 9.164071362595172e-05, "loss": 0.0, "step": 1604 }, { "epoch": 0.21, "learning_rate": 9.162891016432346e-05, "loss": 16.9985, "step": 1605 }, { "epoch": 0.21, "learning_rate": 9.161709913649784e-05, "loss": 0.0003, "step": 1606 }, { "epoch": 0.21, "learning_rate": 9.160528054462152e-05, "loss": 0.0005, "step": 1607 }, { "epoch": 0.21, "learning_rate": 9.159345439084257e-05, "loss": 0.0, "step": 1608 }, { "epoch": 0.21, "learning_rate": 9.158162067731043e-05, "loss": 17.1773, "step": 1609 }, { "epoch": 0.21, "learning_rate": 9.156977940617594e-05, "loss": 18.4224, "step": 1610 }, { "epoch": 0.21, "learning_rate": 9.155793057959128e-05, "loss": 17.7451, "step": 1611 }, { "epoch": 0.21, "learning_rate": 9.154607419971002e-05, "loss": 18.8829, "step": 1612 }, { "epoch": 0.21, "learning_rate": 9.153421026868707e-05, "loss": 18.0139, "step": 1613 }, { "epoch": 0.21, "learning_rate": 9.152233878867879e-05, "loss": 16.4366, "step": 1614 }, { "epoch": 0.21, "learning_rate": 9.151045976184281e-05, "loss": 19.1816, "step": 1615 }, { "epoch": 0.21, "learning_rate": 9.149857319033824e-05, "loss": 18.116, "step": 1616 }, { "epoch": 0.21, "learning_rate": 9.148667907632544e-05, "loss": 18.0518, "step": 1617 }, { "epoch": 0.21, "learning_rate": 9.147477742196627e-05, "loss": 0.0, "step": 1618 }, { "epoch": 0.21, "learning_rate": 9.146286822942383e-05, "loss": 17.411, "step": 1619 }, { "epoch": 0.21, "learning_rate": 9.145095150086272e-05, "loss": 18.9734, "step": 1620 }, { "epoch": 0.21, "learning_rate": 9.143902723844882e-05, "loss": 19.2101, "step": 1621 }, { "epoch": 0.21, "learning_rate": 9.14270954443494e-05, "loss": 17.6784, "step": 1622 }, { "epoch": 0.21, "learning_rate": 9.141515612073311e-05, "loss": 17.4305, "step": 1623 }, { "epoch": 0.21, "learning_rate": 9.140320926976998e-05, "loss": 17.5368, "step": 1624 }, { "epoch": 0.21, "learning_rate": 9.139125489363134e-05, "loss": 17.6697, "step": 1625 }, { "epoch": 0.21, "learning_rate": 9.137929299448999e-05, "loss": 0.0002, "step": 1626 }, { "epoch": 0.21, "learning_rate": 9.136732357452005e-05, "loss": 0.0001, "step": 1627 }, { "epoch": 0.21, "learning_rate": 9.135534663589696e-05, "loss": 0.0001, "step": 1628 }, { "epoch": 0.21, "learning_rate": 9.134336218079758e-05, "loss": 16.6113, "step": 1629 }, { "epoch": 0.21, "learning_rate": 9.133137021140016e-05, "loss": 18.6507, "step": 1630 }, { "epoch": 0.21, "learning_rate": 9.131937072988426e-05, "loss": 0.0002, "step": 1631 }, { "epoch": 0.21, "learning_rate": 9.130736373843081e-05, "loss": 0.0001, "step": 1632 }, { "epoch": 0.21, "learning_rate": 9.129534923922216e-05, "loss": 16.9047, "step": 1633 }, { "epoch": 0.22, "learning_rate": 9.128332723444195e-05, "loss": 17.4199, "step": 1634 }, { "epoch": 0.22, "learning_rate": 9.127129772627523e-05, "loss": 0.0001, "step": 1635 }, { "epoch": 0.22, "learning_rate": 9.125926071690842e-05, "loss": 18.4601, "step": 1636 }, { "epoch": 0.22, "learning_rate": 9.12472162085293e-05, "loss": 0.0, "step": 1637 }, { "epoch": 0.22, "learning_rate": 9.123516420332694e-05, "loss": 16.3975, "step": 1638 }, { "epoch": 0.22, "learning_rate": 9.122310470349189e-05, "loss": 17.6069, "step": 1639 }, { "epoch": 0.22, "learning_rate": 9.121103771121599e-05, "loss": 18.3754, "step": 1640 }, { "epoch": 0.22, "learning_rate": 9.119896322869246e-05, "loss": 0.0, "step": 1641 }, { "epoch": 0.22, "learning_rate": 9.118688125811586e-05, "loss": 17.5732, "step": 1642 }, { "epoch": 0.22, "learning_rate": 9.117479180168214e-05, "loss": 0.0, "step": 1643 }, { "epoch": 0.22, "learning_rate": 9.116269486158862e-05, "loss": 16.3767, "step": 1644 }, { "epoch": 0.22, "learning_rate": 9.115059044003393e-05, "loss": 18.0831, "step": 1645 }, { "epoch": 0.22, "learning_rate": 9.113847853921812e-05, "loss": 17.0945, "step": 1646 }, { "epoch": 0.22, "learning_rate": 9.112635916134255e-05, "loss": 18.7641, "step": 1647 }, { "epoch": 0.22, "learning_rate": 9.111423230860995e-05, "loss": 17.8874, "step": 1648 }, { "epoch": 0.22, "learning_rate": 9.110209798322445e-05, "loss": 17.0032, "step": 1649 }, { "epoch": 0.22, "learning_rate": 9.108995618739145e-05, "loss": 0.0002, "step": 1650 }, { "epoch": 0.22, "learning_rate": 9.107780692331782e-05, "loss": 17.4266, "step": 1651 }, { "epoch": 0.22, "learning_rate": 9.106565019321171e-05, "loss": 0.0001, "step": 1652 }, { "epoch": 0.22, "learning_rate": 9.105348599928264e-05, "loss": 18.5778, "step": 1653 }, { "epoch": 0.22, "learning_rate": 9.10413143437415e-05, "loss": 17.7914, "step": 1654 }, { "epoch": 0.22, "learning_rate": 9.102913522880054e-05, "loss": 17.6472, "step": 1655 }, { "epoch": 0.22, "learning_rate": 9.101694865667334e-05, "loss": 0.0004, "step": 1656 }, { "epoch": 0.22, "learning_rate": 9.100475462957485e-05, "loss": 18.1771, "step": 1657 }, { "epoch": 0.22, "learning_rate": 9.099255314972139e-05, "loss": 0.0001, "step": 1658 }, { "epoch": 0.22, "learning_rate": 9.098034421933061e-05, "loss": 0.0006, "step": 1659 }, { "epoch": 0.22, "learning_rate": 9.096812784062154e-05, "loss": 21.9244, "step": 1660 }, { "epoch": 0.22, "learning_rate": 9.095590401581453e-05, "loss": 18.9501, "step": 1661 }, { "epoch": 0.22, "learning_rate": 9.09436727471313e-05, "loss": 16.8561, "step": 1662 }, { "epoch": 0.22, "learning_rate": 9.093143403679495e-05, "loss": 16.3471, "step": 1663 }, { "epoch": 0.22, "learning_rate": 9.091918788702988e-05, "loss": 17.1041, "step": 1664 }, { "epoch": 0.22, "learning_rate": 9.090693430006189e-05, "loss": 20.2359, "step": 1665 }, { "epoch": 0.22, "learning_rate": 9.089467327811811e-05, "loss": 18.366, "step": 1666 }, { "epoch": 0.22, "learning_rate": 9.088240482342702e-05, "loss": 16.7492, "step": 1667 }, { "epoch": 0.22, "learning_rate": 9.087012893821845e-05, "loss": 0.0001, "step": 1668 }, { "epoch": 0.22, "learning_rate": 9.085784562472357e-05, "loss": 17.3709, "step": 1669 }, { "epoch": 0.22, "learning_rate": 9.084555488517495e-05, "loss": 16.5848, "step": 1670 }, { "epoch": 0.22, "learning_rate": 9.083325672180643e-05, "loss": 17.4178, "step": 1671 }, { "epoch": 0.22, "learning_rate": 9.082095113685328e-05, "loss": 18.852, "step": 1672 }, { "epoch": 0.22, "learning_rate": 9.080863813255206e-05, "loss": 16.4687, "step": 1673 }, { "epoch": 0.22, "learning_rate": 9.079631771114074e-05, "loss": 16.7865, "step": 1674 }, { "epoch": 0.22, "learning_rate": 9.078398987485854e-05, "loss": 17.4126, "step": 1675 }, { "epoch": 0.22, "learning_rate": 9.077165462594612e-05, "loss": 18.7561, "step": 1676 }, { "epoch": 0.22, "learning_rate": 9.075931196664547e-05, "loss": 18.5178, "step": 1677 }, { "epoch": 0.22, "learning_rate": 9.074696189919987e-05, "loss": 18.6468, "step": 1678 }, { "epoch": 0.22, "learning_rate": 9.073460442585401e-05, "loss": 16.5071, "step": 1679 }, { "epoch": 0.22, "learning_rate": 9.07222395488539e-05, "loss": 0.0002, "step": 1680 }, { "epoch": 0.22, "learning_rate": 9.070986727044691e-05, "loss": 19.0173, "step": 1681 }, { "epoch": 0.22, "learning_rate": 9.069748759288172e-05, "loss": 0.0005, "step": 1682 }, { "epoch": 0.22, "learning_rate": 9.06851005184084e-05, "loss": 19.0635, "step": 1683 }, { "epoch": 0.22, "learning_rate": 9.067270604927833e-05, "loss": 17.2081, "step": 1684 }, { "epoch": 0.22, "learning_rate": 9.066030418774428e-05, "loss": 16.6672, "step": 1685 }, { "epoch": 0.22, "learning_rate": 9.064789493606028e-05, "loss": 18.0867, "step": 1686 }, { "epoch": 0.22, "learning_rate": 9.063547829648178e-05, "loss": 0.0001, "step": 1687 }, { "epoch": 0.22, "learning_rate": 9.062305427126554e-05, "loss": 17.1916, "step": 1688 }, { "epoch": 0.22, "learning_rate": 9.061062286266969e-05, "loss": 17.1187, "step": 1689 }, { "epoch": 0.22, "learning_rate": 9.059818407295365e-05, "loss": 18.0642, "step": 1690 }, { "epoch": 0.22, "learning_rate": 9.058573790437823e-05, "loss": 16.7529, "step": 1691 }, { "epoch": 0.22, "learning_rate": 9.057328435920556e-05, "loss": 0.0, "step": 1692 }, { "epoch": 0.22, "learning_rate": 9.056082343969914e-05, "loss": 16.6466, "step": 1693 }, { "epoch": 0.22, "learning_rate": 9.054835514812375e-05, "loss": 17.3474, "step": 1694 }, { "epoch": 0.22, "learning_rate": 9.053587948674554e-05, "loss": 0.0002, "step": 1695 }, { "epoch": 0.22, "learning_rate": 9.052339645783203e-05, "loss": 17.0151, "step": 1696 }, { "epoch": 0.22, "learning_rate": 9.051090606365206e-05, "loss": 0.0001, "step": 1697 }, { "epoch": 0.22, "learning_rate": 9.049840830647577e-05, "loss": 0.0001, "step": 1698 }, { "epoch": 0.22, "learning_rate": 9.04859031885747e-05, "loss": 0.0001, "step": 1699 }, { "epoch": 0.22, "learning_rate": 9.047339071222168e-05, "loss": 0.0, "step": 1700 }, { "epoch": 0.22, "learning_rate": 9.046087087969088e-05, "loss": 20.6662, "step": 1701 }, { "epoch": 0.22, "learning_rate": 9.044834369325787e-05, "loss": 0.0, "step": 1702 }, { "epoch": 0.22, "learning_rate": 9.043580915519947e-05, "loss": 19.9682, "step": 1703 }, { "epoch": 0.22, "learning_rate": 9.042326726779389e-05, "loss": 18.0866, "step": 1704 }, { "epoch": 0.22, "learning_rate": 9.041071803332065e-05, "loss": 0.0004, "step": 1705 }, { "epoch": 0.22, "learning_rate": 9.039816145406064e-05, "loss": 18.4235, "step": 1706 }, { "epoch": 0.22, "learning_rate": 9.038559753229603e-05, "loss": 17.8929, "step": 1707 }, { "epoch": 0.22, "learning_rate": 9.037302627031037e-05, "loss": 0.0001, "step": 1708 }, { "epoch": 0.22, "learning_rate": 9.036044767038856e-05, "loss": 0.0001, "step": 1709 }, { "epoch": 0.23, "learning_rate": 9.034786173481676e-05, "loss": 18.3564, "step": 1710 }, { "epoch": 0.23, "learning_rate": 9.033526846588252e-05, "loss": 20.0294, "step": 1711 }, { "epoch": 0.23, "learning_rate": 9.032266786587471e-05, "loss": 16.8303, "step": 1712 }, { "epoch": 0.23, "learning_rate": 9.031005993708354e-05, "loss": 17.9872, "step": 1713 }, { "epoch": 0.23, "learning_rate": 9.029744468180054e-05, "loss": 0.0001, "step": 1714 }, { "epoch": 0.23, "learning_rate": 9.028482210231857e-05, "loss": 19.6394, "step": 1715 }, { "epoch": 0.23, "learning_rate": 9.027219220093185e-05, "loss": 0.0001, "step": 1716 }, { "epoch": 0.23, "learning_rate": 9.025955497993587e-05, "loss": 18.6783, "step": 1717 }, { "epoch": 0.23, "learning_rate": 9.024691044162751e-05, "loss": 20.5726, "step": 1718 }, { "epoch": 0.23, "learning_rate": 9.023425858830499e-05, "loss": 17.9681, "step": 1719 }, { "epoch": 0.23, "learning_rate": 9.022159942226775e-05, "loss": 16.9289, "step": 1720 }, { "epoch": 0.23, "learning_rate": 9.020893294581671e-05, "loss": 0.0004, "step": 1721 }, { "epoch": 0.23, "learning_rate": 9.019625916125401e-05, "loss": 0.0001, "step": 1722 }, { "epoch": 0.23, "learning_rate": 9.018357807088317e-05, "loss": 17.2908, "step": 1723 }, { "epoch": 0.23, "learning_rate": 9.0170889677009e-05, "loss": 19.4708, "step": 1724 }, { "epoch": 0.23, "learning_rate": 9.015819398193768e-05, "loss": 0.0, "step": 1725 }, { "epoch": 0.23, "learning_rate": 9.014549098797669e-05, "loss": 17.601, "step": 1726 }, { "epoch": 0.23, "learning_rate": 9.013278069743482e-05, "loss": 18.0749, "step": 1727 }, { "epoch": 0.23, "learning_rate": 9.012006311262226e-05, "loss": 16.2157, "step": 1728 }, { "epoch": 0.23, "learning_rate": 9.010733823585043e-05, "loss": 19.424, "step": 1729 }, { "epoch": 0.23, "learning_rate": 9.009460606943214e-05, "loss": 16.9936, "step": 1730 }, { "epoch": 0.23, "learning_rate": 9.00818666156815e-05, "loss": 0.0001, "step": 1731 }, { "epoch": 0.23, "learning_rate": 9.006911987691395e-05, "loss": 17.9043, "step": 1732 }, { "epoch": 0.23, "learning_rate": 9.005636585544625e-05, "loss": 17.3956, "step": 1733 }, { "epoch": 0.23, "learning_rate": 9.004360455359648e-05, "loss": 0.0, "step": 1734 }, { "epoch": 0.23, "learning_rate": 9.003083597368407e-05, "loss": 19.4725, "step": 1735 }, { "epoch": 0.23, "learning_rate": 9.001806011802975e-05, "loss": 17.3047, "step": 1736 }, { "epoch": 0.23, "learning_rate": 9.000527698895557e-05, "loss": 17.4344, "step": 1737 }, { "epoch": 0.23, "learning_rate": 8.999248658878489e-05, "loss": 19.5767, "step": 1738 }, { "epoch": 0.23, "learning_rate": 8.997968891984244e-05, "loss": 18.3181, "step": 1739 }, { "epoch": 0.23, "learning_rate": 8.996688398445423e-05, "loss": 0.0, "step": 1740 }, { "epoch": 0.23, "learning_rate": 8.995407178494759e-05, "loss": 0.0007, "step": 1741 }, { "epoch": 0.23, "learning_rate": 8.994125232365119e-05, "loss": 18.4153, "step": 1742 }, { "epoch": 0.23, "learning_rate": 8.9928425602895e-05, "loss": 19.0613, "step": 1743 }, { "epoch": 0.23, "learning_rate": 8.991559162501036e-05, "loss": 18.4942, "step": 1744 }, { "epoch": 0.23, "learning_rate": 8.990275039232984e-05, "loss": 19.2202, "step": 1745 }, { "epoch": 0.23, "learning_rate": 8.98899019071874e-05, "loss": 0.0001, "step": 1746 }, { "epoch": 0.23, "learning_rate": 8.98770461719183e-05, "loss": 17.6277, "step": 1747 }, { "epoch": 0.23, "learning_rate": 8.98641831888591e-05, "loss": 18.0723, "step": 1748 }, { "epoch": 0.23, "learning_rate": 8.98513129603477e-05, "loss": 18.8881, "step": 1749 }, { "epoch": 0.23, "learning_rate": 8.98384354887233e-05, "loss": 19.2257, "step": 1750 }, { "epoch": 0.23, "learning_rate": 8.982555077632646e-05, "loss": 0.0001, "step": 1751 }, { "epoch": 0.23, "learning_rate": 8.981265882549897e-05, "loss": 0.0002, "step": 1752 }, { "epoch": 0.23, "learning_rate": 8.979975963858402e-05, "loss": 0.0002, "step": 1753 }, { "epoch": 0.23, "learning_rate": 8.978685321792606e-05, "loss": 0.0003, "step": 1754 }, { "epoch": 0.23, "learning_rate": 8.97739395658709e-05, "loss": 18.8353, "step": 1755 }, { "epoch": 0.23, "learning_rate": 8.976101868476563e-05, "loss": 0.0001, "step": 1756 }, { "epoch": 0.23, "learning_rate": 8.974809057695865e-05, "loss": 19.5237, "step": 1757 }, { "epoch": 0.23, "learning_rate": 8.973515524479972e-05, "loss": 19.8251, "step": 1758 }, { "epoch": 0.23, "learning_rate": 8.972221269063985e-05, "loss": 18.5141, "step": 1759 }, { "epoch": 0.23, "learning_rate": 8.970926291683139e-05, "loss": 0.0, "step": 1760 }, { "epoch": 0.23, "learning_rate": 8.969630592572804e-05, "loss": 0.0001, "step": 1761 }, { "epoch": 0.23, "learning_rate": 8.968334171968476e-05, "loss": 17.5202, "step": 1762 }, { "epoch": 0.23, "learning_rate": 8.967037030105784e-05, "loss": 0.0, "step": 1763 }, { "epoch": 0.23, "learning_rate": 8.965739167220487e-05, "loss": 18.5651, "step": 1764 }, { "epoch": 0.23, "learning_rate": 8.964440583548476e-05, "loss": 0.0002, "step": 1765 }, { "epoch": 0.23, "learning_rate": 8.963141279325776e-05, "loss": 17.5882, "step": 1766 }, { "epoch": 0.23, "learning_rate": 8.961841254788536e-05, "loss": 0.0001, "step": 1767 }, { "epoch": 0.23, "learning_rate": 8.960540510173043e-05, "loss": 19.3017, "step": 1768 }, { "epoch": 0.23, "learning_rate": 8.95923904571571e-05, "loss": 17.513, "step": 1769 }, { "epoch": 0.23, "learning_rate": 8.957936861653083e-05, "loss": 0.0001, "step": 1770 }, { "epoch": 0.23, "learning_rate": 8.95663395822184e-05, "loss": 16.6871, "step": 1771 }, { "epoch": 0.23, "learning_rate": 8.955330335658784e-05, "loss": 17.4546, "step": 1772 }, { "epoch": 0.23, "learning_rate": 8.954025994200859e-05, "loss": 17.181, "step": 1773 }, { "epoch": 0.23, "learning_rate": 8.952720934085128e-05, "loss": 18.5641, "step": 1774 }, { "epoch": 0.23, "learning_rate": 8.951415155548795e-05, "loss": 18.4083, "step": 1775 }, { "epoch": 0.23, "learning_rate": 8.950108658829184e-05, "loss": 16.8252, "step": 1776 }, { "epoch": 0.23, "learning_rate": 8.94880144416376e-05, "loss": 0.0, "step": 1777 }, { "epoch": 0.23, "learning_rate": 8.947493511790113e-05, "loss": 17.854, "step": 1778 }, { "epoch": 0.23, "learning_rate": 8.94618486194596e-05, "loss": 0.0001, "step": 1779 }, { "epoch": 0.23, "learning_rate": 8.94487549486916e-05, "loss": 17.0569, "step": 1780 }, { "epoch": 0.23, "learning_rate": 8.943565410797689e-05, "loss": 19.4192, "step": 1781 }, { "epoch": 0.23, "learning_rate": 8.942254609969662e-05, "loss": 0.0001, "step": 1782 }, { "epoch": 0.23, "learning_rate": 8.94094309262332e-05, "loss": 18.1179, "step": 1783 }, { "epoch": 0.23, "learning_rate": 8.939630858997037e-05, "loss": 18.5155, "step": 1784 }, { "epoch": 0.23, "learning_rate": 8.938317909329314e-05, "loss": 18.4716, "step": 1785 }, { "epoch": 0.24, "learning_rate": 8.937004243858786e-05, "loss": 0.0001, "step": 1786 }, { "epoch": 0.24, "learning_rate": 8.935689862824217e-05, "loss": 0.0, "step": 1787 }, { "epoch": 0.24, "learning_rate": 8.9343747664645e-05, "loss": 17.0321, "step": 1788 }, { "epoch": 0.24, "learning_rate": 8.933058955018656e-05, "loss": 20.0944, "step": 1789 }, { "epoch": 0.24, "learning_rate": 8.93174242872584e-05, "loss": 20.0753, "step": 1790 }, { "epoch": 0.24, "learning_rate": 8.930425187825335e-05, "loss": 19.7287, "step": 1791 }, { "epoch": 0.24, "learning_rate": 8.929107232556552e-05, "loss": 18.9771, "step": 1792 }, { "epoch": 0.24, "learning_rate": 8.927788563159037e-05, "loss": 17.9095, "step": 1793 }, { "epoch": 0.24, "learning_rate": 8.926469179872462e-05, "loss": 18.4858, "step": 1794 }, { "epoch": 0.24, "learning_rate": 8.925149082936628e-05, "loss": 18.6113, "step": 1795 }, { "epoch": 0.24, "learning_rate": 8.923828272591469e-05, "loss": 20.1953, "step": 1796 }, { "epoch": 0.24, "learning_rate": 8.922506749077046e-05, "loss": 18.5632, "step": 1797 }, { "epoch": 0.24, "learning_rate": 8.921184512633549e-05, "loss": 18.9534, "step": 1798 }, { "epoch": 0.24, "learning_rate": 8.919861563501302e-05, "loss": 0.0007, "step": 1799 }, { "epoch": 0.24, "learning_rate": 8.918537901920752e-05, "loss": 0.0003, "step": 1800 }, { "epoch": 0.24, "learning_rate": 8.917213528132482e-05, "loss": 15.9992, "step": 1801 }, { "epoch": 0.24, "learning_rate": 8.915888442377199e-05, "loss": 19.0949, "step": 1802 }, { "epoch": 0.24, "learning_rate": 8.914562644895745e-05, "loss": 16.6696, "step": 1803 }, { "epoch": 0.24, "learning_rate": 8.913236135929087e-05, "loss": 19.1952, "step": 1804 }, { "epoch": 0.24, "learning_rate": 8.91190891571832e-05, "loss": 17.1203, "step": 1805 }, { "epoch": 0.24, "learning_rate": 8.910580984504674e-05, "loss": 19.2454, "step": 1806 }, { "epoch": 0.24, "learning_rate": 8.909252342529504e-05, "loss": 17.8336, "step": 1807 }, { "epoch": 0.24, "learning_rate": 8.907922990034294e-05, "loss": 18.0104, "step": 1808 }, { "epoch": 0.24, "learning_rate": 8.906592927260662e-05, "loss": 0.0012, "step": 1809 }, { "epoch": 0.24, "learning_rate": 8.905262154450346e-05, "loss": 0.0002, "step": 1810 }, { "epoch": 0.24, "learning_rate": 8.903930671845224e-05, "loss": 0.0, "step": 1811 }, { "epoch": 0.24, "learning_rate": 8.902598479687294e-05, "loss": 18.4285, "step": 1812 }, { "epoch": 0.24, "learning_rate": 8.901265578218689e-05, "loss": 18.9575, "step": 1813 }, { "epoch": 0.24, "learning_rate": 8.899931967681665e-05, "loss": 17.627, "step": 1814 }, { "epoch": 0.24, "learning_rate": 8.898597648318614e-05, "loss": 18.2882, "step": 1815 }, { "epoch": 0.24, "learning_rate": 8.897262620372051e-05, "loss": 17.5898, "step": 1816 }, { "epoch": 0.24, "learning_rate": 8.895926884084622e-05, "loss": 18.276, "step": 1817 }, { "epoch": 0.24, "learning_rate": 8.894590439699104e-05, "loss": 16.5582, "step": 1818 }, { "epoch": 0.24, "learning_rate": 8.893253287458396e-05, "loss": 16.5387, "step": 1819 }, { "epoch": 0.24, "learning_rate": 8.891915427605533e-05, "loss": 19.0792, "step": 1820 }, { "epoch": 0.24, "learning_rate": 8.890576860383675e-05, "loss": 0.0, "step": 1821 }, { "epoch": 0.24, "learning_rate": 8.88923758603611e-05, "loss": 17.4448, "step": 1822 }, { "epoch": 0.24, "learning_rate": 8.887897604806259e-05, "loss": 18.033, "step": 1823 }, { "epoch": 0.24, "learning_rate": 8.886556916937667e-05, "loss": 17.7846, "step": 1824 }, { "epoch": 0.24, "learning_rate": 8.885215522674004e-05, "loss": 15.1908, "step": 1825 }, { "epoch": 0.24, "learning_rate": 8.88387342225908e-05, "loss": 17.7684, "step": 1826 }, { "epoch": 0.24, "learning_rate": 8.882530615936822e-05, "loss": 0.0001, "step": 1827 }, { "epoch": 0.24, "learning_rate": 8.881187103951291e-05, "loss": 18.5135, "step": 1828 }, { "epoch": 0.24, "learning_rate": 8.879842886546674e-05, "loss": 20.2604, "step": 1829 }, { "epoch": 0.24, "learning_rate": 8.87849796396729e-05, "loss": 19.2566, "step": 1830 }, { "epoch": 0.24, "learning_rate": 8.877152336457578e-05, "loss": 16.7456, "step": 1831 }, { "epoch": 0.24, "learning_rate": 8.875806004262115e-05, "loss": 16.991, "step": 1832 }, { "epoch": 0.24, "learning_rate": 8.8744589676256e-05, "loss": 15.5763, "step": 1833 }, { "epoch": 0.24, "learning_rate": 8.873111226792861e-05, "loss": 19.521, "step": 1834 }, { "epoch": 0.24, "learning_rate": 8.871762782008857e-05, "loss": 18.9966, "step": 1835 }, { "epoch": 0.24, "learning_rate": 8.87041363351867e-05, "loss": 19.2609, "step": 1836 }, { "epoch": 0.24, "learning_rate": 8.869063781567512e-05, "loss": 0.0001, "step": 1837 }, { "epoch": 0.24, "learning_rate": 8.867713226400725e-05, "loss": 18.1566, "step": 1838 }, { "epoch": 0.24, "learning_rate": 8.866361968263774e-05, "loss": 18.6255, "step": 1839 }, { "epoch": 0.24, "learning_rate": 8.86501000740226e-05, "loss": 17.6588, "step": 1840 }, { "epoch": 0.24, "learning_rate": 8.863657344061901e-05, "loss": 16.6631, "step": 1841 }, { "epoch": 0.24, "learning_rate": 8.862303978488553e-05, "loss": 17.3798, "step": 1842 }, { "epoch": 0.24, "learning_rate": 8.860949910928192e-05, "loss": 18.1933, "step": 1843 }, { "epoch": 0.24, "learning_rate": 8.859595141626924e-05, "loss": 0.0003, "step": 1844 }, { "epoch": 0.24, "learning_rate": 8.858239670830985e-05, "loss": 17.0222, "step": 1845 }, { "epoch": 0.24, "learning_rate": 8.856883498786734e-05, "loss": 16.177, "step": 1846 }, { "epoch": 0.24, "learning_rate": 8.855526625740663e-05, "loss": 0.0001, "step": 1847 }, { "epoch": 0.24, "learning_rate": 8.854169051939387e-05, "loss": 0.0005, "step": 1848 }, { "epoch": 0.24, "learning_rate": 8.852810777629649e-05, "loss": 18.6904, "step": 1849 }, { "epoch": 0.24, "learning_rate": 8.851451803058319e-05, "loss": 18.9462, "step": 1850 }, { "epoch": 0.24, "learning_rate": 8.850092128472399e-05, "loss": 18.4975, "step": 1851 }, { "epoch": 0.24, "learning_rate": 8.84873175411901e-05, "loss": 17.0295, "step": 1852 }, { "epoch": 0.24, "learning_rate": 8.847370680245411e-05, "loss": 0.0, "step": 1853 }, { "epoch": 0.24, "learning_rate": 8.846008907098974e-05, "loss": 0.0, "step": 1854 }, { "epoch": 0.24, "learning_rate": 8.84464643492721e-05, "loss": 0.0001, "step": 1855 }, { "epoch": 0.24, "learning_rate": 8.843283263977754e-05, "loss": 15.9185, "step": 1856 }, { "epoch": 0.24, "learning_rate": 8.841919394498366e-05, "loss": 17.2192, "step": 1857 }, { "epoch": 0.24, "learning_rate": 8.840554826736934e-05, "loss": 18.3809, "step": 1858 }, { "epoch": 0.24, "learning_rate": 8.839189560941472e-05, "loss": 17.014, "step": 1859 }, { "epoch": 0.24, "learning_rate": 8.837823597360121e-05, "loss": 0.0001, "step": 1860 }, { "epoch": 0.24, "learning_rate": 8.836456936241152e-05, "loss": 17.3006, "step": 1861 }, { "epoch": 0.25, "learning_rate": 8.83508957783296e-05, "loss": 17.4898, "step": 1862 }, { "epoch": 0.25, "learning_rate": 8.833721522384065e-05, "loss": 19.4364, "step": 1863 }, { "epoch": 0.25, "learning_rate": 8.832352770143118e-05, "loss": 17.1769, "step": 1864 }, { "epoch": 0.25, "learning_rate": 8.830983321358894e-05, "loss": 19.3046, "step": 1865 }, { "epoch": 0.25, "learning_rate": 8.829613176280294e-05, "loss": 0.0004, "step": 1866 }, { "epoch": 0.25, "learning_rate": 8.828242335156347e-05, "loss": 18.3363, "step": 1867 }, { "epoch": 0.25, "learning_rate": 8.826870798236208e-05, "loss": 17.8596, "step": 1868 }, { "epoch": 0.25, "learning_rate": 8.825498565769161e-05, "loss": 0.0003, "step": 1869 }, { "epoch": 0.25, "learning_rate": 8.82412563800461e-05, "loss": 17.5708, "step": 1870 }, { "epoch": 0.25, "learning_rate": 8.82275201519209e-05, "loss": 20.9665, "step": 1871 }, { "epoch": 0.25, "learning_rate": 8.821377697581266e-05, "loss": 17.5032, "step": 1872 }, { "epoch": 0.25, "learning_rate": 8.820002685421918e-05, "loss": 17.8929, "step": 1873 }, { "epoch": 0.25, "learning_rate": 8.818626978963967e-05, "loss": 18.7668, "step": 1874 }, { "epoch": 0.25, "learning_rate": 8.817250578457444e-05, "loss": 17.0091, "step": 1875 }, { "epoch": 0.25, "learning_rate": 8.815873484152521e-05, "loss": 17.1736, "step": 1876 }, { "epoch": 0.25, "learning_rate": 8.814495696299488e-05, "loss": 18.1159, "step": 1877 }, { "epoch": 0.25, "learning_rate": 8.81311721514876e-05, "loss": 17.1664, "step": 1878 }, { "epoch": 0.25, "learning_rate": 8.811738040950885e-05, "loss": 18.9847, "step": 1879 }, { "epoch": 0.25, "learning_rate": 8.810358173956529e-05, "loss": 17.5149, "step": 1880 }, { "epoch": 0.25, "learning_rate": 8.808977614416489e-05, "loss": 0.0, "step": 1881 }, { "epoch": 0.25, "learning_rate": 8.807596362581688e-05, "loss": 16.309, "step": 1882 }, { "epoch": 0.25, "learning_rate": 8.80621441870317e-05, "loss": 18.9476, "step": 1883 }, { "epoch": 0.25, "learning_rate": 8.80483178303211e-05, "loss": 16.7919, "step": 1884 }, { "epoch": 0.25, "learning_rate": 8.803448455819805e-05, "loss": 16.9303, "step": 1885 }, { "epoch": 0.25, "learning_rate": 8.802064437317685e-05, "loss": 17.8521, "step": 1886 }, { "epoch": 0.25, "learning_rate": 8.800679727777292e-05, "loss": 0.0002, "step": 1887 }, { "epoch": 0.25, "learning_rate": 8.799294327450308e-05, "loss": 0.0001, "step": 1888 }, { "epoch": 0.25, "learning_rate": 8.797908236588532e-05, "loss": 17.6047, "step": 1889 }, { "epoch": 0.25, "learning_rate": 8.79652145544389e-05, "loss": 17.9607, "step": 1890 }, { "epoch": 0.25, "learning_rate": 8.795133984268436e-05, "loss": 17.8229, "step": 1891 }, { "epoch": 0.25, "learning_rate": 8.793745823314346e-05, "loss": 0.0002, "step": 1892 }, { "epoch": 0.25, "learning_rate": 8.792356972833925e-05, "loss": 18.619, "step": 1893 }, { "epoch": 0.25, "learning_rate": 8.790967433079598e-05, "loss": 0.0001, "step": 1894 }, { "epoch": 0.25, "learning_rate": 8.789577204303922e-05, "loss": 18.4157, "step": 1895 }, { "epoch": 0.25, "learning_rate": 8.788186286759574e-05, "loss": 0.0, "step": 1896 }, { "epoch": 0.25, "learning_rate": 8.78679468069936e-05, "loss": 17.6249, "step": 1897 }, { "epoch": 0.25, "learning_rate": 8.785402386376207e-05, "loss": 17.8829, "step": 1898 }, { "epoch": 0.25, "learning_rate": 8.784009404043169e-05, "loss": 0.0, "step": 1899 }, { "epoch": 0.25, "learning_rate": 8.782615733953427e-05, "loss": 0.0001, "step": 1900 }, { "epoch": 0.25, "learning_rate": 8.781221376360285e-05, "loss": 17.9159, "step": 1901 }, { "epoch": 0.25, "learning_rate": 8.779826331517173e-05, "loss": 17.4471, "step": 1902 }, { "epoch": 0.25, "learning_rate": 8.778430599677643e-05, "loss": 0.0, "step": 1903 }, { "epoch": 0.25, "learning_rate": 8.777034181095376e-05, "loss": 0.0001, "step": 1904 }, { "epoch": 0.25, "learning_rate": 8.775637076024174e-05, "loss": 0.0, "step": 1905 }, { "epoch": 0.25, "learning_rate": 8.774239284717966e-05, "loss": 0.0001, "step": 1906 }, { "epoch": 0.25, "learning_rate": 8.77284080743081e-05, "loss": 0.0002, "step": 1907 }, { "epoch": 0.25, "learning_rate": 8.771441644416876e-05, "loss": 0.0001, "step": 1908 }, { "epoch": 0.25, "learning_rate": 8.770041795930473e-05, "loss": 19.6484, "step": 1909 }, { "epoch": 0.25, "learning_rate": 8.768641262226025e-05, "loss": 18.5183, "step": 1910 }, { "epoch": 0.25, "learning_rate": 8.767240043558088e-05, "loss": 17.3492, "step": 1911 }, { "epoch": 0.25, "learning_rate": 8.765838140181331e-05, "loss": 17.345, "step": 1912 }, { "epoch": 0.25, "learning_rate": 8.76443555235056e-05, "loss": 18.8431, "step": 1913 }, { "epoch": 0.25, "learning_rate": 8.763032280320702e-05, "loss": 17.9777, "step": 1914 }, { "epoch": 0.25, "learning_rate": 8.761628324346801e-05, "loss": 15.4429, "step": 1915 }, { "epoch": 0.25, "learning_rate": 8.760223684684035e-05, "loss": 16.568, "step": 1916 }, { "epoch": 0.25, "learning_rate": 8.7588183615877e-05, "loss": 18.3227, "step": 1917 }, { "epoch": 0.25, "learning_rate": 8.757412355313219e-05, "loss": 18.6461, "step": 1918 }, { "epoch": 0.25, "learning_rate": 8.756005666116135e-05, "loss": 17.0133, "step": 1919 }, { "epoch": 0.25, "learning_rate": 8.754598294252127e-05, "loss": 18.9245, "step": 1920 }, { "epoch": 0.25, "learning_rate": 8.753190239976983e-05, "loss": 17.0919, "step": 1921 }, { "epoch": 0.25, "learning_rate": 8.751781503546622e-05, "loss": 18.2372, "step": 1922 }, { "epoch": 0.25, "learning_rate": 8.750372085217089e-05, "loss": 0.0001, "step": 1923 }, { "epoch": 0.25, "learning_rate": 8.748961985244551e-05, "loss": 16.4266, "step": 1924 }, { "epoch": 0.25, "learning_rate": 8.747551203885295e-05, "loss": 0.0001, "step": 1925 }, { "epoch": 0.25, "learning_rate": 8.746139741395738e-05, "loss": 18.0865, "step": 1926 }, { "epoch": 0.25, "learning_rate": 8.744727598032417e-05, "loss": 17.4401, "step": 1927 }, { "epoch": 0.25, "learning_rate": 8.743314774051996e-05, "loss": 17.3302, "step": 1928 }, { "epoch": 0.25, "learning_rate": 8.741901269711258e-05, "loss": 18.2264, "step": 1929 }, { "epoch": 0.25, "learning_rate": 8.740487085267114e-05, "loss": 18.8639, "step": 1930 }, { "epoch": 0.25, "learning_rate": 8.739072220976596e-05, "loss": 17.2574, "step": 1931 }, { "epoch": 0.25, "learning_rate": 8.737656677096862e-05, "loss": 0.0001, "step": 1932 }, { "epoch": 0.25, "learning_rate": 8.736240453885189e-05, "loss": 0.0, "step": 1933 }, { "epoch": 0.25, "learning_rate": 8.734823551598983e-05, "loss": 18.7823, "step": 1934 }, { "epoch": 0.25, "learning_rate": 8.73340597049577e-05, "loss": 0.0002, "step": 1935 }, { "epoch": 0.25, "learning_rate": 8.731987710833198e-05, "loss": 17.1732, "step": 1936 }, { "epoch": 0.25, "learning_rate": 8.730568772869046e-05, "loss": 18.3154, "step": 1937 }, { "epoch": 0.26, "learning_rate": 8.729149156861206e-05, "loss": 0.0001, "step": 1938 }, { "epoch": 0.26, "learning_rate": 8.7277288630677e-05, "loss": 19.5667, "step": 1939 }, { "epoch": 0.26, "learning_rate": 8.726307891746671e-05, "loss": 0.0001, "step": 1940 }, { "epoch": 0.26, "learning_rate": 8.724886243156386e-05, "loss": 0.0003, "step": 1941 }, { "epoch": 0.26, "learning_rate": 8.723463917555233e-05, "loss": 17.5542, "step": 1942 }, { "epoch": 0.26, "learning_rate": 8.722040915201726e-05, "loss": 17.2007, "step": 1943 }, { "epoch": 0.26, "learning_rate": 8.7206172363545e-05, "loss": 0.0002, "step": 1944 }, { "epoch": 0.26, "learning_rate": 8.719192881272313e-05, "loss": 18.3915, "step": 1945 }, { "epoch": 0.26, "learning_rate": 8.71776785021405e-05, "loss": 18.1417, "step": 1946 }, { "epoch": 0.26, "learning_rate": 8.71634214343871e-05, "loss": 0.0001, "step": 1947 }, { "epoch": 0.26, "learning_rate": 8.714915761205422e-05, "loss": 16.7062, "step": 1948 }, { "epoch": 0.26, "learning_rate": 8.713488703773436e-05, "loss": 0.0, "step": 1949 }, { "epoch": 0.26, "learning_rate": 8.712060971402125e-05, "loss": 17.1954, "step": 1950 }, { "epoch": 0.26, "learning_rate": 8.710632564350984e-05, "loss": 16.4335, "step": 1951 }, { "epoch": 0.26, "learning_rate": 8.709203482879633e-05, "loss": 0.0001, "step": 1952 }, { "epoch": 0.26, "learning_rate": 8.707773727247809e-05, "loss": 18.0005, "step": 1953 }, { "epoch": 0.26, "learning_rate": 8.706343297715375e-05, "loss": 18.1037, "step": 1954 }, { "epoch": 0.26, "learning_rate": 8.70491219454232e-05, "loss": 18.6181, "step": 1955 }, { "epoch": 0.26, "learning_rate": 8.70348041798875e-05, "loss": 19.6279, "step": 1956 }, { "epoch": 0.26, "learning_rate": 8.702047968314895e-05, "loss": 16.461, "step": 1957 }, { "epoch": 0.26, "learning_rate": 8.700614845781108e-05, "loss": 17.8476, "step": 1958 }, { "epoch": 0.26, "learning_rate": 8.699181050647862e-05, "loss": 17.2161, "step": 1959 }, { "epoch": 0.26, "learning_rate": 8.697746583175757e-05, "loss": 16.7441, "step": 1960 }, { "epoch": 0.26, "learning_rate": 8.696311443625511e-05, "loss": 17.0123, "step": 1961 }, { "epoch": 0.26, "learning_rate": 8.694875632257966e-05, "loss": 0.0, "step": 1962 }, { "epoch": 0.26, "learning_rate": 8.693439149334086e-05, "loss": 18.8524, "step": 1963 }, { "epoch": 0.26, "learning_rate": 8.692001995114957e-05, "loss": 18.8965, "step": 1964 }, { "epoch": 0.26, "learning_rate": 8.690564169861785e-05, "loss": 17.6013, "step": 1965 }, { "epoch": 0.26, "learning_rate": 8.689125673835901e-05, "loss": 17.1105, "step": 1966 }, { "epoch": 0.26, "learning_rate": 8.687686507298755e-05, "loss": 0.0, "step": 1967 }, { "epoch": 0.26, "learning_rate": 8.686246670511922e-05, "loss": 17.6273, "step": 1968 }, { "epoch": 0.26, "learning_rate": 8.684806163737098e-05, "loss": 18.9228, "step": 1969 }, { "epoch": 0.26, "learning_rate": 8.683364987236098e-05, "loss": 0.0, "step": 1970 }, { "epoch": 0.26, "learning_rate": 8.681923141270863e-05, "loss": 18.7268, "step": 1971 }, { "epoch": 0.26, "learning_rate": 8.680480626103452e-05, "loss": 16.3121, "step": 1972 }, { "epoch": 0.26, "learning_rate": 8.679037441996046e-05, "loss": 16.7653, "step": 1973 }, { "epoch": 0.26, "learning_rate": 8.67759358921095e-05, "loss": 18.5983, "step": 1974 }, { "epoch": 0.26, "learning_rate": 8.67614906801059e-05, "loss": 17.4475, "step": 1975 }, { "epoch": 0.26, "learning_rate": 8.674703878657514e-05, "loss": 17.0181, "step": 1976 }, { "epoch": 0.26, "learning_rate": 8.673258021414386e-05, "loss": 20.285, "step": 1977 }, { "epoch": 0.26, "learning_rate": 8.671811496543996e-05, "loss": 16.5943, "step": 1978 }, { "epoch": 0.26, "learning_rate": 8.670364304309258e-05, "loss": 17.5276, "step": 1979 }, { "epoch": 0.26, "learning_rate": 8.668916444973203e-05, "loss": 15.9465, "step": 1980 }, { "epoch": 0.26, "learning_rate": 8.667467918798984e-05, "loss": 18.6577, "step": 1981 }, { "epoch": 0.26, "learning_rate": 8.666018726049875e-05, "loss": 0.0001, "step": 1982 }, { "epoch": 0.26, "learning_rate": 8.664568866989273e-05, "loss": 17.7911, "step": 1983 }, { "epoch": 0.26, "learning_rate": 8.663118341880692e-05, "loss": 17.2866, "step": 1984 }, { "epoch": 0.26, "learning_rate": 8.661667150987774e-05, "loss": 0.0, "step": 1985 }, { "epoch": 0.26, "learning_rate": 8.660215294574276e-05, "loss": 0.0001, "step": 1986 }, { "epoch": 0.26, "learning_rate": 8.658762772904079e-05, "loss": 17.7316, "step": 1987 }, { "epoch": 0.26, "learning_rate": 8.65730958624118e-05, "loss": 17.5205, "step": 1988 }, { "epoch": 0.26, "learning_rate": 8.655855734849705e-05, "loss": 0.0, "step": 1989 }, { "epoch": 0.26, "learning_rate": 8.654401218993895e-05, "loss": 18.1664, "step": 1990 }, { "epoch": 0.26, "learning_rate": 8.652946038938112e-05, "loss": 18.8755, "step": 1991 }, { "epoch": 0.26, "learning_rate": 8.651490194946843e-05, "loss": 16.0291, "step": 1992 }, { "epoch": 0.26, "learning_rate": 8.65003368728469e-05, "loss": 17.2775, "step": 1993 }, { "epoch": 0.26, "learning_rate": 8.648576516216378e-05, "loss": 20.232, "step": 1994 }, { "epoch": 0.26, "learning_rate": 8.647118682006756e-05, "loss": 17.8624, "step": 1995 }, { "epoch": 0.26, "learning_rate": 8.645660184920787e-05, "loss": 0.0, "step": 1996 }, { "epoch": 0.26, "learning_rate": 8.644201025223558e-05, "loss": 18.2952, "step": 1997 }, { "epoch": 0.26, "learning_rate": 8.642741203180278e-05, "loss": 18.5113, "step": 1998 }, { "epoch": 0.26, "learning_rate": 8.641280719056275e-05, "loss": 20.4075, "step": 1999 }, { "epoch": 0.26, "learning_rate": 8.639819573116997e-05, "loss": 16.6221, "step": 2000 }, { "epoch": 0.26, "learning_rate": 8.638357765628009e-05, "loss": 18.1892, "step": 2001 }, { "epoch": 0.26, "learning_rate": 8.636895296855002e-05, "loss": 16.8428, "step": 2002 }, { "epoch": 0.26, "learning_rate": 8.635432167063786e-05, "loss": 17.2952, "step": 2003 }, { "epoch": 0.26, "learning_rate": 8.633968376520287e-05, "loss": 18.5288, "step": 2004 }, { "epoch": 0.26, "learning_rate": 8.632503925490557e-05, "loss": 18.9406, "step": 2005 }, { "epoch": 0.26, "learning_rate": 8.631038814240761e-05, "loss": 18.7097, "step": 2006 }, { "epoch": 0.26, "learning_rate": 8.629573043037193e-05, "loss": 16.4435, "step": 2007 }, { "epoch": 0.26, "learning_rate": 8.628106612146259e-05, "loss": 0.0002, "step": 2008 }, { "epoch": 0.26, "learning_rate": 8.626639521834487e-05, "loss": 17.9622, "step": 2009 }, { "epoch": 0.26, "learning_rate": 8.625171772368529e-05, "loss": 17.852, "step": 2010 }, { "epoch": 0.26, "learning_rate": 8.62370336401515e-05, "loss": 17.9729, "step": 2011 }, { "epoch": 0.26, "learning_rate": 8.62223429704124e-05, "loss": 18.4163, "step": 2012 }, { "epoch": 0.26, "learning_rate": 8.620764571713807e-05, "loss": 18.0065, "step": 2013 }, { "epoch": 0.27, "learning_rate": 8.619294188299978e-05, "loss": 18.5385, "step": 2014 }, { "epoch": 0.27, "learning_rate": 8.617823147066998e-05, "loss": 18.1565, "step": 2015 }, { "epoch": 0.27, "learning_rate": 8.61635144828224e-05, "loss": 18.1166, "step": 2016 }, { "epoch": 0.27, "learning_rate": 8.614879092213187e-05, "loss": 16.8771, "step": 2017 }, { "epoch": 0.27, "learning_rate": 8.613406079127441e-05, "loss": 18.7215, "step": 2018 }, { "epoch": 0.27, "learning_rate": 8.611932409292733e-05, "loss": 18.2799, "step": 2019 }, { "epoch": 0.27, "learning_rate": 8.610458082976905e-05, "loss": 0.0, "step": 2020 }, { "epoch": 0.27, "learning_rate": 8.60898310044792e-05, "loss": 16.5532, "step": 2021 }, { "epoch": 0.27, "learning_rate": 8.607507461973863e-05, "loss": 0.0001, "step": 2022 }, { "epoch": 0.27, "learning_rate": 8.606031167822935e-05, "loss": 17.8356, "step": 2023 }, { "epoch": 0.27, "learning_rate": 8.604554218263458e-05, "loss": 17.7115, "step": 2024 }, { "epoch": 0.27, "learning_rate": 8.603076613563874e-05, "loss": 0.0, "step": 2025 }, { "epoch": 0.27, "learning_rate": 8.601598353992741e-05, "loss": 19.1945, "step": 2026 }, { "epoch": 0.27, "learning_rate": 8.600119439818737e-05, "loss": 18.4817, "step": 2027 }, { "epoch": 0.27, "learning_rate": 8.598639871310661e-05, "loss": 19.4083, "step": 2028 }, { "epoch": 0.27, "learning_rate": 8.597159648737431e-05, "loss": 17.4265, "step": 2029 }, { "epoch": 0.27, "learning_rate": 8.595678772368081e-05, "loss": 0.0, "step": 2030 }, { "epoch": 0.27, "learning_rate": 8.594197242471765e-05, "loss": 17.8836, "step": 2031 }, { "epoch": 0.27, "learning_rate": 8.592715059317756e-05, "loss": 0.0003, "step": 2032 }, { "epoch": 0.27, "learning_rate": 8.591232223175448e-05, "loss": 16.5761, "step": 2033 }, { "epoch": 0.27, "learning_rate": 8.589748734314348e-05, "loss": 16.9914, "step": 2034 }, { "epoch": 0.27, "learning_rate": 8.588264593004087e-05, "loss": 0.0, "step": 2035 }, { "epoch": 0.27, "learning_rate": 8.586779799514415e-05, "loss": 18.2506, "step": 2036 }, { "epoch": 0.27, "learning_rate": 8.585294354115197e-05, "loss": 16.2108, "step": 2037 }, { "epoch": 0.27, "learning_rate": 8.583808257076415e-05, "loss": 16.4459, "step": 2038 }, { "epoch": 0.27, "learning_rate": 8.582321508668175e-05, "loss": 0.0, "step": 2039 }, { "epoch": 0.27, "learning_rate": 8.580834109160697e-05, "loss": 16.6235, "step": 2040 }, { "epoch": 0.27, "learning_rate": 8.579346058824324e-05, "loss": 19.2919, "step": 2041 }, { "epoch": 0.27, "learning_rate": 8.577857357929513e-05, "loss": 17.3188, "step": 2042 }, { "epoch": 0.27, "learning_rate": 8.576368006746838e-05, "loss": 0.0, "step": 2043 }, { "epoch": 0.27, "learning_rate": 8.574878005546996e-05, "loss": 16.6831, "step": 2044 }, { "epoch": 0.27, "learning_rate": 8.573387354600799e-05, "loss": 17.6547, "step": 2045 }, { "epoch": 0.27, "learning_rate": 8.571896054179178e-05, "loss": 0.0, "step": 2046 }, { "epoch": 0.27, "learning_rate": 8.570404104553182e-05, "loss": 20.0488, "step": 2047 }, { "epoch": 0.27, "learning_rate": 8.568911505993979e-05, "loss": 17.688, "step": 2048 }, { "epoch": 0.27, "learning_rate": 8.567418258772853e-05, "loss": 17.8395, "step": 2049 }, { "epoch": 0.27, "learning_rate": 8.565924363161206e-05, "loss": 18.0358, "step": 2050 }, { "epoch": 0.27, "learning_rate": 8.56442981943056e-05, "loss": 16.4336, "step": 2051 }, { "epoch": 0.27, "learning_rate": 8.562934627852555e-05, "loss": 17.2619, "step": 2052 }, { "epoch": 0.27, "learning_rate": 8.561438788698943e-05, "loss": 19.1882, "step": 2053 }, { "epoch": 0.27, "learning_rate": 8.5599423022416e-05, "loss": 17.3927, "step": 2054 }, { "epoch": 0.27, "learning_rate": 8.558445168752518e-05, "loss": 18.0489, "step": 2055 }, { "epoch": 0.27, "learning_rate": 8.556947388503807e-05, "loss": 16.825, "step": 2056 }, { "epoch": 0.27, "learning_rate": 8.555448961767692e-05, "loss": 18.3751, "step": 2057 }, { "epoch": 0.27, "learning_rate": 8.553949888816518e-05, "loss": 16.5855, "step": 2058 }, { "epoch": 0.27, "learning_rate": 8.552450169922743e-05, "loss": 18.7806, "step": 2059 }, { "epoch": 0.27, "learning_rate": 8.550949805358952e-05, "loss": 0.0002, "step": 2060 }, { "epoch": 0.27, "learning_rate": 8.54944879539784e-05, "loss": 17.619, "step": 2061 }, { "epoch": 0.27, "learning_rate": 8.547947140312218e-05, "loss": 18.6761, "step": 2062 }, { "epoch": 0.27, "learning_rate": 8.546444840375019e-05, "loss": 0.0001, "step": 2063 }, { "epoch": 0.27, "learning_rate": 8.54494189585929e-05, "loss": 0.0, "step": 2064 }, { "epoch": 0.27, "learning_rate": 8.543438307038198e-05, "loss": 0.0, "step": 2065 }, { "epoch": 0.27, "learning_rate": 8.541934074185025e-05, "loss": 20.2573, "step": 2066 }, { "epoch": 0.27, "learning_rate": 8.54042919757317e-05, "loss": 19.6951, "step": 2067 }, { "epoch": 0.27, "learning_rate": 8.538923677476148e-05, "loss": 16.7191, "step": 2068 }, { "epoch": 0.27, "learning_rate": 8.537417514167594e-05, "loss": 18.7235, "step": 2069 }, { "epoch": 0.27, "learning_rate": 8.53591070792126e-05, "loss": 19.1373, "step": 2070 }, { "epoch": 0.27, "learning_rate": 8.53440325901101e-05, "loss": 18.691, "step": 2071 }, { "epoch": 0.27, "learning_rate": 8.532895167710829e-05, "loss": 16.63, "step": 2072 }, { "epoch": 0.27, "learning_rate": 8.53138643429482e-05, "loss": 15.9118, "step": 2073 }, { "epoch": 0.27, "learning_rate": 8.529877059037197e-05, "loss": 18.3157, "step": 2074 }, { "epoch": 0.27, "learning_rate": 8.528367042212296e-05, "loss": 18.6562, "step": 2075 }, { "epoch": 0.27, "learning_rate": 8.526856384094568e-05, "loss": 0.0, "step": 2076 }, { "epoch": 0.27, "learning_rate": 8.52534508495858e-05, "loss": 15.5564, "step": 2077 }, { "epoch": 0.27, "learning_rate": 8.523833145079017e-05, "loss": 0.0, "step": 2078 }, { "epoch": 0.27, "learning_rate": 8.522320564730677e-05, "loss": 19.1491, "step": 2079 }, { "epoch": 0.27, "learning_rate": 8.520807344188478e-05, "loss": 0.0, "step": 2080 }, { "epoch": 0.27, "learning_rate": 8.51929348372745e-05, "loss": 19.1054, "step": 2081 }, { "epoch": 0.27, "learning_rate": 8.517778983622748e-05, "loss": 17.9316, "step": 2082 }, { "epoch": 0.27, "learning_rate": 8.516263844149635e-05, "loss": 20.8389, "step": 2083 }, { "epoch": 0.27, "learning_rate": 8.514748065583489e-05, "loss": 17.705, "step": 2084 }, { "epoch": 0.27, "learning_rate": 8.513231648199816e-05, "loss": 17.9724, "step": 2085 }, { "epoch": 0.27, "learning_rate": 8.511714592274221e-05, "loss": 15.9375, "step": 2086 }, { "epoch": 0.27, "learning_rate": 8.510196898082441e-05, "loss": 18.0454, "step": 2087 }, { "epoch": 0.27, "learning_rate": 8.508678565900319e-05, "loss": 18.848, "step": 2088 }, { "epoch": 0.27, "learning_rate": 8.507159596003818e-05, "loss": 17.2981, "step": 2089 }, { "epoch": 0.28, "learning_rate": 8.505639988669016e-05, "loss": 17.1513, "step": 2090 }, { "epoch": 0.28, "learning_rate": 8.504119744172108e-05, "loss": 0.0002, "step": 2091 }, { "epoch": 0.28, "learning_rate": 8.502598862789401e-05, "loss": 15.3032, "step": 2092 }, { "epoch": 0.28, "learning_rate": 8.501077344797322e-05, "loss": 0.0004, "step": 2093 }, { "epoch": 0.28, "learning_rate": 8.49955519047241e-05, "loss": 17.7806, "step": 2094 }, { "epoch": 0.28, "learning_rate": 8.498032400091325e-05, "loss": 0.0001, "step": 2095 }, { "epoch": 0.28, "learning_rate": 8.496508973930839e-05, "loss": 16.7195, "step": 2096 }, { "epoch": 0.28, "learning_rate": 8.494984912267837e-05, "loss": 18.5492, "step": 2097 }, { "epoch": 0.28, "learning_rate": 8.493460215379326e-05, "loss": 18.4869, "step": 2098 }, { "epoch": 0.28, "learning_rate": 8.491934883542421e-05, "loss": 17.4072, "step": 2099 }, { "epoch": 0.28, "learning_rate": 8.490408917034362e-05, "loss": 18.4832, "step": 2100 }, { "epoch": 0.28, "learning_rate": 8.488882316132493e-05, "loss": 17.8075, "step": 2101 }, { "epoch": 0.28, "learning_rate": 8.487355081114282e-05, "loss": 0.0003, "step": 2102 }, { "epoch": 0.28, "learning_rate": 8.485827212257307e-05, "loss": 20.3693, "step": 2103 }, { "epoch": 0.28, "learning_rate": 8.484298709839267e-05, "loss": 18.3308, "step": 2104 }, { "epoch": 0.28, "learning_rate": 8.482769574137969e-05, "loss": 17.1791, "step": 2105 }, { "epoch": 0.28, "learning_rate": 8.48123980543134e-05, "loss": 18.0184, "step": 2106 }, { "epoch": 0.28, "learning_rate": 8.479709403997421e-05, "loss": 18.4828, "step": 2107 }, { "epoch": 0.28, "learning_rate": 8.478178370114367e-05, "loss": 18.3833, "step": 2108 }, { "epoch": 0.28, "learning_rate": 8.47664670406045e-05, "loss": 17.2698, "step": 2109 }, { "epoch": 0.28, "learning_rate": 8.475114406114053e-05, "loss": 16.4722, "step": 2110 }, { "epoch": 0.28, "learning_rate": 8.47358147655368e-05, "loss": 0.0001, "step": 2111 }, { "epoch": 0.28, "learning_rate": 8.472047915657943e-05, "loss": 16.6503, "step": 2112 }, { "epoch": 0.28, "learning_rate": 8.470513723705574e-05, "loss": 18.9057, "step": 2113 }, { "epoch": 0.28, "learning_rate": 8.468978900975417e-05, "loss": 0.0, "step": 2114 }, { "epoch": 0.28, "learning_rate": 8.467443447746431e-05, "loss": 19.6094, "step": 2115 }, { "epoch": 0.28, "learning_rate": 8.46590736429769e-05, "loss": 0.0, "step": 2116 }, { "epoch": 0.28, "learning_rate": 8.464370650908383e-05, "loss": 20.2802, "step": 2117 }, { "epoch": 0.28, "learning_rate": 8.462833307857812e-05, "loss": 17.7238, "step": 2118 }, { "epoch": 0.28, "learning_rate": 8.461295335425395e-05, "loss": 19.7508, "step": 2119 }, { "epoch": 0.28, "learning_rate": 8.459756733890665e-05, "loss": 16.8567, "step": 2120 }, { "epoch": 0.28, "learning_rate": 8.458217503533266e-05, "loss": 0.0001, "step": 2121 }, { "epoch": 0.28, "learning_rate": 8.456677644632957e-05, "loss": 20.6392, "step": 2122 }, { "epoch": 0.28, "learning_rate": 8.455137157469617e-05, "loss": 0.0001, "step": 2123 }, { "epoch": 0.28, "learning_rate": 8.453596042323233e-05, "loss": 18.6126, "step": 2124 }, { "epoch": 0.28, "learning_rate": 8.452054299473905e-05, "loss": 19.8608, "step": 2125 }, { "epoch": 0.28, "learning_rate": 8.450511929201857e-05, "loss": 16.6899, "step": 2126 }, { "epoch": 0.28, "learning_rate": 8.448968931787413e-05, "loss": 0.0001, "step": 2127 }, { "epoch": 0.28, "learning_rate": 8.447425307511022e-05, "loss": 18.9825, "step": 2128 }, { "epoch": 0.28, "learning_rate": 8.44588105665324e-05, "loss": 18.1343, "step": 2129 }, { "epoch": 0.28, "learning_rate": 8.444336179494743e-05, "loss": 0.0, "step": 2130 }, { "epoch": 0.28, "learning_rate": 8.442790676316317e-05, "loss": 0.0, "step": 2131 }, { "epoch": 0.28, "learning_rate": 8.441244547398861e-05, "loss": 17.509, "step": 2132 }, { "epoch": 0.28, "learning_rate": 8.43969779302339e-05, "loss": 18.2782, "step": 2133 }, { "epoch": 0.28, "learning_rate": 8.438150413471033e-05, "loss": 0.0, "step": 2134 }, { "epoch": 0.28, "learning_rate": 8.43660240902303e-05, "loss": 0.0, "step": 2135 }, { "epoch": 0.28, "learning_rate": 8.435053779960738e-05, "loss": 17.7305, "step": 2136 }, { "epoch": 0.28, "learning_rate": 8.433504526565623e-05, "loss": 0.0005, "step": 2137 }, { "epoch": 0.28, "learning_rate": 8.43195464911927e-05, "loss": 17.2926, "step": 2138 }, { "epoch": 0.28, "learning_rate": 8.430404147903372e-05, "loss": 14.9363, "step": 2139 }, { "epoch": 0.28, "learning_rate": 8.42885302319974e-05, "loss": 17.1528, "step": 2140 }, { "epoch": 0.28, "learning_rate": 8.427301275290295e-05, "loss": 17.1021, "step": 2141 }, { "epoch": 0.28, "learning_rate": 8.425748904457074e-05, "loss": 17.7405, "step": 2142 }, { "epoch": 0.28, "learning_rate": 8.424195910982222e-05, "loss": 18.3842, "step": 2143 }, { "epoch": 0.28, "learning_rate": 8.422642295148005e-05, "loss": 17.066, "step": 2144 }, { "epoch": 0.28, "learning_rate": 8.421088057236796e-05, "loss": 16.4973, "step": 2145 }, { "epoch": 0.28, "learning_rate": 8.419533197531084e-05, "loss": 18.896, "step": 2146 }, { "epoch": 0.28, "learning_rate": 8.41797771631347e-05, "loss": 19.0668, "step": 2147 }, { "epoch": 0.28, "learning_rate": 8.416421613866667e-05, "loss": 0.0, "step": 2148 }, { "epoch": 0.28, "learning_rate": 8.414864890473503e-05, "loss": 0.0003, "step": 2149 }, { "epoch": 0.28, "learning_rate": 8.413307546416918e-05, "loss": 0.0, "step": 2150 }, { "epoch": 0.28, "learning_rate": 8.411749581979964e-05, "loss": 15.9597, "step": 2151 }, { "epoch": 0.28, "learning_rate": 8.410190997445806e-05, "loss": 17.1965, "step": 2152 }, { "epoch": 0.28, "learning_rate": 8.408631793097723e-05, "loss": 18.6197, "step": 2153 }, { "epoch": 0.28, "learning_rate": 8.407071969219104e-05, "loss": 19.1951, "step": 2154 }, { "epoch": 0.28, "learning_rate": 8.405511526093455e-05, "loss": 18.2528, "step": 2155 }, { "epoch": 0.28, "learning_rate": 8.40395046400439e-05, "loss": 0.0001, "step": 2156 }, { "epoch": 0.28, "learning_rate": 8.402388783235639e-05, "loss": 18.1637, "step": 2157 }, { "epoch": 0.28, "learning_rate": 8.400826484071041e-05, "loss": 16.7962, "step": 2158 }, { "epoch": 0.28, "learning_rate": 8.399263566794549e-05, "loss": 17.3273, "step": 2159 }, { "epoch": 0.28, "learning_rate": 8.397700031690232e-05, "loss": 0.0001, "step": 2160 }, { "epoch": 0.28, "learning_rate": 8.396135879042264e-05, "loss": 17.3501, "step": 2161 }, { "epoch": 0.28, "learning_rate": 8.394571109134936e-05, "loss": 18.0169, "step": 2162 }, { "epoch": 0.28, "learning_rate": 8.393005722252653e-05, "loss": 0.0, "step": 2163 }, { "epoch": 0.28, "learning_rate": 8.391439718679926e-05, "loss": 0.0001, "step": 2164 }, { "epoch": 0.28, "learning_rate": 8.389873098701384e-05, "loss": 18.4377, "step": 2165 }, { "epoch": 0.29, "learning_rate": 8.388305862601762e-05, "loss": 0.0, "step": 2166 }, { "epoch": 0.29, "learning_rate": 8.386738010665915e-05, "loss": 0.0001, "step": 2167 }, { "epoch": 0.29, "learning_rate": 8.385169543178804e-05, "loss": 17.0993, "step": 2168 }, { "epoch": 0.29, "learning_rate": 8.383600460425502e-05, "loss": 18.8928, "step": 2169 }, { "epoch": 0.29, "learning_rate": 8.382030762691195e-05, "loss": 17.5283, "step": 2170 }, { "epoch": 0.29, "learning_rate": 8.380460450261184e-05, "loss": 18.8686, "step": 2171 }, { "epoch": 0.29, "learning_rate": 8.378889523420876e-05, "loss": 17.3878, "step": 2172 }, { "epoch": 0.29, "learning_rate": 8.377317982455793e-05, "loss": 17.8747, "step": 2173 }, { "epoch": 0.29, "learning_rate": 8.375745827651567e-05, "loss": 18.3737, "step": 2174 }, { "epoch": 0.29, "learning_rate": 8.374173059293946e-05, "loss": 16.7652, "step": 2175 }, { "epoch": 0.29, "learning_rate": 8.37259967766878e-05, "loss": 18.2455, "step": 2176 }, { "epoch": 0.29, "learning_rate": 8.371025683062042e-05, "loss": 18.7988, "step": 2177 }, { "epoch": 0.29, "learning_rate": 8.369451075759808e-05, "loss": 0.0002, "step": 2178 }, { "epoch": 0.29, "learning_rate": 8.36787585604827e-05, "loss": 17.9236, "step": 2179 }, { "epoch": 0.29, "learning_rate": 8.36630002421373e-05, "loss": 0.0001, "step": 2180 }, { "epoch": 0.29, "learning_rate": 8.364723580542599e-05, "loss": 19.5533, "step": 2181 }, { "epoch": 0.29, "learning_rate": 8.363146525321402e-05, "loss": 0.0003, "step": 2182 }, { "epoch": 0.29, "learning_rate": 8.361568858836773e-05, "loss": 0.0, "step": 2183 }, { "epoch": 0.29, "learning_rate": 8.359990581375458e-05, "loss": 17.4754, "step": 2184 }, { "epoch": 0.29, "learning_rate": 8.358411693224319e-05, "loss": 18.0722, "step": 2185 }, { "epoch": 0.29, "learning_rate": 8.35683219467032e-05, "loss": 18.3658, "step": 2186 }, { "epoch": 0.29, "learning_rate": 8.355252086000538e-05, "loss": 0.0027, "step": 2187 }, { "epoch": 0.29, "learning_rate": 8.35367136750217e-05, "loss": 0.0, "step": 2188 }, { "epoch": 0.29, "learning_rate": 8.352090039462512e-05, "loss": 18.5659, "step": 2189 }, { "epoch": 0.29, "learning_rate": 8.350508102168979e-05, "loss": 17.4618, "step": 2190 }, { "epoch": 0.29, "learning_rate": 8.34892555590909e-05, "loss": 16.4107, "step": 2191 }, { "epoch": 0.29, "learning_rate": 8.347342400970481e-05, "loss": 16.2735, "step": 2192 }, { "epoch": 0.29, "learning_rate": 8.345758637640892e-05, "loss": 19.0149, "step": 2193 }, { "epoch": 0.29, "learning_rate": 8.344174266208185e-05, "loss": 20.037, "step": 2194 }, { "epoch": 0.29, "learning_rate": 8.342589286960316e-05, "loss": 0.0001, "step": 2195 }, { "epoch": 0.29, "learning_rate": 8.341003700185366e-05, "loss": 0.0001, "step": 2196 }, { "epoch": 0.29, "learning_rate": 8.339417506171521e-05, "loss": 0.0, "step": 2197 }, { "epoch": 0.29, "learning_rate": 8.337830705207075e-05, "loss": 19.809, "step": 2198 }, { "epoch": 0.29, "learning_rate": 8.336243297580434e-05, "loss": 17.0879, "step": 2199 }, { "epoch": 0.29, "learning_rate": 8.334655283580117e-05, "loss": 17.9655, "step": 2200 }, { "epoch": 0.29, "learning_rate": 8.33306666349475e-05, "loss": 0.0006, "step": 2201 }, { "epoch": 0.29, "learning_rate": 8.33147743761307e-05, "loss": 17.7957, "step": 2202 }, { "epoch": 0.29, "learning_rate": 8.329887606223923e-05, "loss": 18.1803, "step": 2203 }, { "epoch": 0.29, "learning_rate": 8.328297169616271e-05, "loss": 16.1721, "step": 2204 }, { "epoch": 0.29, "learning_rate": 8.326706128079176e-05, "loss": 17.3926, "step": 2205 }, { "epoch": 0.29, "learning_rate": 8.325114481901817e-05, "loss": 0.0001, "step": 2206 }, { "epoch": 0.29, "learning_rate": 8.323522231373482e-05, "loss": 0.0005, "step": 2207 }, { "epoch": 0.29, "learning_rate": 8.321929376783568e-05, "loss": 17.2136, "step": 2208 }, { "epoch": 0.29, "learning_rate": 8.32033591842158e-05, "loss": 18.7741, "step": 2209 }, { "epoch": 0.29, "learning_rate": 8.318741856577137e-05, "loss": 17.5926, "step": 2210 }, { "epoch": 0.29, "learning_rate": 8.317147191539963e-05, "loss": 18.0176, "step": 2211 }, { "epoch": 0.29, "learning_rate": 8.315551923599896e-05, "loss": 0.0001, "step": 2212 }, { "epoch": 0.29, "learning_rate": 8.313956053046879e-05, "loss": 18.066, "step": 2213 }, { "epoch": 0.29, "learning_rate": 8.31235958017097e-05, "loss": 18.3254, "step": 2214 }, { "epoch": 0.29, "learning_rate": 8.310762505262331e-05, "loss": 16.8251, "step": 2215 }, { "epoch": 0.29, "learning_rate": 8.309164828611235e-05, "loss": 0.0, "step": 2216 }, { "epoch": 0.29, "learning_rate": 8.307566550508068e-05, "loss": 0.0003, "step": 2217 }, { "epoch": 0.29, "learning_rate": 8.305967671243322e-05, "loss": 16.5379, "step": 2218 }, { "epoch": 0.29, "learning_rate": 8.304368191107597e-05, "loss": 0.0, "step": 2219 }, { "epoch": 0.29, "learning_rate": 8.302768110391605e-05, "loss": 16.6446, "step": 2220 }, { "epoch": 0.29, "learning_rate": 8.301167429386167e-05, "loss": 17.449, "step": 2221 }, { "epoch": 0.29, "learning_rate": 8.29956614838221e-05, "loss": 16.888, "step": 2222 }, { "epoch": 0.29, "learning_rate": 8.297964267670776e-05, "loss": 18.963, "step": 2223 }, { "epoch": 0.29, "learning_rate": 8.296361787543007e-05, "loss": 0.0003, "step": 2224 }, { "epoch": 0.29, "learning_rate": 8.294758708290167e-05, "loss": 16.8631, "step": 2225 }, { "epoch": 0.29, "learning_rate": 8.293155030203613e-05, "loss": 17.9766, "step": 2226 }, { "epoch": 0.29, "learning_rate": 8.291550753574824e-05, "loss": 17.0685, "step": 2227 }, { "epoch": 0.29, "learning_rate": 8.28994587869538e-05, "loss": 17.6519, "step": 2228 }, { "epoch": 0.29, "learning_rate": 8.288340405856976e-05, "loss": 17.0847, "step": 2229 }, { "epoch": 0.29, "learning_rate": 8.28673433535141e-05, "loss": 17.1176, "step": 2230 }, { "epoch": 0.29, "learning_rate": 8.285127667470589e-05, "loss": 17.9832, "step": 2231 }, { "epoch": 0.29, "learning_rate": 8.283520402506533e-05, "loss": 15.8503, "step": 2232 }, { "epoch": 0.29, "learning_rate": 8.281912540751367e-05, "loss": 16.9512, "step": 2233 }, { "epoch": 0.29, "learning_rate": 8.280304082497327e-05, "loss": 17.6093, "step": 2234 }, { "epoch": 0.29, "learning_rate": 8.278695028036754e-05, "loss": 0.0, "step": 2235 }, { "epoch": 0.29, "learning_rate": 8.2770853776621e-05, "loss": 0.0003, "step": 2236 }, { "epoch": 0.29, "learning_rate": 8.275475131665922e-05, "loss": 16.0523, "step": 2237 }, { "epoch": 0.29, "learning_rate": 8.273864290340891e-05, "loss": 18.3358, "step": 2238 }, { "epoch": 0.29, "learning_rate": 8.272252853979781e-05, "loss": 17.4317, "step": 2239 }, { "epoch": 0.29, "learning_rate": 8.27064082287548e-05, "loss": 16.8429, "step": 2240 }, { "epoch": 0.29, "learning_rate": 8.269028197320975e-05, "loss": 0.0004, "step": 2241 }, { "epoch": 0.3, "learning_rate": 8.267414977609368e-05, "loss": 17.6375, "step": 2242 }, { "epoch": 0.3, "learning_rate": 8.265801164033868e-05, "loss": 0.0001, "step": 2243 }, { "epoch": 0.3, "learning_rate": 8.26418675688779e-05, "loss": 18.2865, "step": 2244 }, { "epoch": 0.3, "learning_rate": 8.262571756464559e-05, "loss": 17.7776, "step": 2245 }, { "epoch": 0.3, "learning_rate": 8.260956163057707e-05, "loss": 17.7345, "step": 2246 }, { "epoch": 0.3, "learning_rate": 8.259339976960873e-05, "loss": 19.6271, "step": 2247 }, { "epoch": 0.3, "learning_rate": 8.257723198467806e-05, "loss": 0.0001, "step": 2248 }, { "epoch": 0.3, "learning_rate": 8.256105827872357e-05, "loss": 16.3869, "step": 2249 }, { "epoch": 0.3, "learning_rate": 8.254487865468494e-05, "loss": 18.3732, "step": 2250 }, { "epoch": 0.3, "learning_rate": 8.252869311550282e-05, "loss": 16.6221, "step": 2251 }, { "epoch": 0.3, "learning_rate": 8.251250166411903e-05, "loss": 17.5625, "step": 2252 }, { "epoch": 0.3, "learning_rate": 8.249630430347641e-05, "loss": 15.4489, "step": 2253 }, { "epoch": 0.3, "learning_rate": 8.248010103651887e-05, "loss": 0.0, "step": 2254 }, { "epoch": 0.3, "learning_rate": 8.246389186619144e-05, "loss": 16.7001, "step": 2255 }, { "epoch": 0.3, "learning_rate": 8.244767679544014e-05, "loss": 0.0001, "step": 2256 }, { "epoch": 0.3, "learning_rate": 8.243145582721217e-05, "loss": 18.0925, "step": 2257 }, { "epoch": 0.3, "learning_rate": 8.241522896445572e-05, "loss": 18.8103, "step": 2258 }, { "epoch": 0.3, "learning_rate": 8.239899621012007e-05, "loss": 17.5948, "step": 2259 }, { "epoch": 0.3, "learning_rate": 8.238275756715561e-05, "loss": 0.0002, "step": 2260 }, { "epoch": 0.3, "learning_rate": 8.236651303851372e-05, "loss": 17.5543, "step": 2261 }, { "epoch": 0.3, "learning_rate": 8.235026262714695e-05, "loss": 0.0002, "step": 2262 }, { "epoch": 0.3, "learning_rate": 8.233400633600885e-05, "loss": 17.1718, "step": 2263 }, { "epoch": 0.3, "learning_rate": 8.231774416805406e-05, "loss": 18.1749, "step": 2264 }, { "epoch": 0.3, "learning_rate": 8.230147612623826e-05, "loss": 18.8071, "step": 2265 }, { "epoch": 0.3, "learning_rate": 8.228520221351826e-05, "loss": 17.9501, "step": 2266 }, { "epoch": 0.3, "learning_rate": 8.226892243285187e-05, "loss": 19.3103, "step": 2267 }, { "epoch": 0.3, "learning_rate": 8.225263678719802e-05, "loss": 19.5742, "step": 2268 }, { "epoch": 0.3, "learning_rate": 8.223634527951665e-05, "loss": 17.292, "step": 2269 }, { "epoch": 0.3, "learning_rate": 8.222004791276881e-05, "loss": 17.0691, "step": 2270 }, { "epoch": 0.3, "learning_rate": 8.220374468991662e-05, "loss": 17.4391, "step": 2271 }, { "epoch": 0.3, "learning_rate": 8.218743561392323e-05, "loss": 17.5011, "step": 2272 }, { "epoch": 0.3, "learning_rate": 8.21711206877529e-05, "loss": 17.5795, "step": 2273 }, { "epoch": 0.3, "learning_rate": 8.215479991437087e-05, "loss": 16.8782, "step": 2274 }, { "epoch": 0.3, "learning_rate": 8.213847329674353e-05, "loss": 18.4404, "step": 2275 }, { "epoch": 0.3, "learning_rate": 8.212214083783831e-05, "loss": 17.2147, "step": 2276 }, { "epoch": 0.3, "learning_rate": 8.210580254062368e-05, "loss": 17.9516, "step": 2277 }, { "epoch": 0.3, "learning_rate": 8.208945840806916e-05, "loss": 17.0761, "step": 2278 }, { "epoch": 0.3, "learning_rate": 8.207310844314537e-05, "loss": 0.0, "step": 2279 }, { "epoch": 0.3, "learning_rate": 8.205675264882399e-05, "loss": 0.0002, "step": 2280 }, { "epoch": 0.3, "learning_rate": 8.204039102807771e-05, "loss": 18.3901, "step": 2281 }, { "epoch": 0.3, "learning_rate": 8.202402358388033e-05, "loss": 18.3638, "step": 2282 }, { "epoch": 0.3, "learning_rate": 8.200765031920668e-05, "loss": 17.6101, "step": 2283 }, { "epoch": 0.3, "learning_rate": 8.199127123703265e-05, "loss": 0.0001, "step": 2284 }, { "epoch": 0.3, "learning_rate": 8.197488634033522e-05, "loss": 17.2179, "step": 2285 }, { "epoch": 0.3, "learning_rate": 8.195849563209238e-05, "loss": 17.487, "step": 2286 }, { "epoch": 0.3, "learning_rate": 8.194209911528321e-05, "loss": 17.01, "step": 2287 }, { "epoch": 0.3, "learning_rate": 8.192569679288782e-05, "loss": 18.0438, "step": 2288 }, { "epoch": 0.3, "learning_rate": 8.19092886678874e-05, "loss": 16.5858, "step": 2289 }, { "epoch": 0.3, "learning_rate": 8.189287474326417e-05, "loss": 18.0726, "step": 2290 }, { "epoch": 0.3, "learning_rate": 8.187645502200143e-05, "loss": 0.0001, "step": 2291 }, { "epoch": 0.3, "learning_rate": 8.186002950708351e-05, "loss": 17.1045, "step": 2292 }, { "epoch": 0.3, "learning_rate": 8.184359820149583e-05, "loss": 19.5679, "step": 2293 }, { "epoch": 0.3, "learning_rate": 8.18271611082248e-05, "loss": 18.9088, "step": 2294 }, { "epoch": 0.3, "learning_rate": 8.181071823025795e-05, "loss": 17.6339, "step": 2295 }, { "epoch": 0.3, "learning_rate": 8.17942695705838e-05, "loss": 0.0005, "step": 2296 }, { "epoch": 0.3, "learning_rate": 8.177781513219197e-05, "loss": 0.0001, "step": 2297 }, { "epoch": 0.3, "learning_rate": 8.176135491807311e-05, "loss": 16.706, "step": 2298 }, { "epoch": 0.3, "learning_rate": 8.17448889312189e-05, "loss": 17.1111, "step": 2299 }, { "epoch": 0.3, "learning_rate": 8.172841717462212e-05, "loss": 17.5214, "step": 2300 }, { "epoch": 0.3, "learning_rate": 8.171193965127655e-05, "loss": 17.947, "step": 2301 }, { "epoch": 0.3, "learning_rate": 8.169545636417704e-05, "loss": 0.0, "step": 2302 }, { "epoch": 0.3, "learning_rate": 8.167896731631948e-05, "loss": 18.3442, "step": 2303 }, { "epoch": 0.3, "learning_rate": 8.16624725107008e-05, "loss": 19.2519, "step": 2304 }, { "epoch": 0.3, "learning_rate": 8.164597195031904e-05, "loss": 18.0339, "step": 2305 }, { "epoch": 0.3, "learning_rate": 8.162946563817318e-05, "loss": 16.2386, "step": 2306 }, { "epoch": 0.3, "learning_rate": 8.161295357726331e-05, "loss": 0.0, "step": 2307 }, { "epoch": 0.3, "learning_rate": 8.159643577059055e-05, "loss": 17.2589, "step": 2308 }, { "epoch": 0.3, "learning_rate": 8.157991222115708e-05, "loss": 17.9783, "step": 2309 }, { "epoch": 0.3, "learning_rate": 8.156338293196614e-05, "loss": 18.1398, "step": 2310 }, { "epoch": 0.3, "learning_rate": 8.154684790602193e-05, "loss": 18.0701, "step": 2311 }, { "epoch": 0.3, "learning_rate": 8.153030714632976e-05, "loss": 16.9002, "step": 2312 }, { "epoch": 0.3, "learning_rate": 8.1513760655896e-05, "loss": 19.6007, "step": 2313 }, { "epoch": 0.3, "learning_rate": 8.149720843772801e-05, "loss": 18.16, "step": 2314 }, { "epoch": 0.3, "learning_rate": 8.148065049483423e-05, "loss": 0.0001, "step": 2315 }, { "epoch": 0.3, "learning_rate": 8.146408683022407e-05, "loss": 18.198, "step": 2316 }, { "epoch": 0.3, "learning_rate": 8.144751744690811e-05, "loss": 0.0002, "step": 2317 }, { "epoch": 0.31, "learning_rate": 8.143094234789783e-05, "loss": 17.3032, "step": 2318 }, { "epoch": 0.31, "learning_rate": 8.141436153620584e-05, "loss": 17.3947, "step": 2319 }, { "epoch": 0.31, "learning_rate": 8.139777501484575e-05, "loss": 18.2344, "step": 2320 }, { "epoch": 0.31, "learning_rate": 8.138118278683225e-05, "loss": 18.0742, "step": 2321 }, { "epoch": 0.31, "learning_rate": 8.136458485518097e-05, "loss": 19.9073, "step": 2322 }, { "epoch": 0.31, "learning_rate": 8.13479812229087e-05, "loss": 0.0, "step": 2323 }, { "epoch": 0.31, "learning_rate": 8.133137189303317e-05, "loss": 0.0001, "step": 2324 }, { "epoch": 0.31, "learning_rate": 8.13147568685732e-05, "loss": 20.2859, "step": 2325 }, { "epoch": 0.31, "learning_rate": 8.129813615254863e-05, "loss": 18.3246, "step": 2326 }, { "epoch": 0.31, "learning_rate": 8.128150974798033e-05, "loss": 17.0045, "step": 2327 }, { "epoch": 0.31, "learning_rate": 8.12648776578902e-05, "loss": 16.6533, "step": 2328 }, { "epoch": 0.31, "learning_rate": 8.124823988530116e-05, "loss": 0.0001, "step": 2329 }, { "epoch": 0.31, "learning_rate": 8.123159643323724e-05, "loss": 16.9615, "step": 2330 }, { "epoch": 0.31, "learning_rate": 8.12149473047234e-05, "loss": 18.1114, "step": 2331 }, { "epoch": 0.31, "learning_rate": 8.119829250278568e-05, "loss": 17.2383, "step": 2332 }, { "epoch": 0.31, "learning_rate": 8.118163203045115e-05, "loss": 0.0, "step": 2333 }, { "epoch": 0.31, "learning_rate": 8.116496589074793e-05, "loss": 17.4777, "step": 2334 }, { "epoch": 0.31, "learning_rate": 8.11482940867051e-05, "loss": 17.5075, "step": 2335 }, { "epoch": 0.31, "learning_rate": 8.113161662135287e-05, "loss": 17.5204, "step": 2336 }, { "epoch": 0.31, "learning_rate": 8.11149334977224e-05, "loss": 0.0001, "step": 2337 }, { "epoch": 0.31, "learning_rate": 8.109824471884591e-05, "loss": 0.0, "step": 2338 }, { "epoch": 0.31, "learning_rate": 8.108155028775664e-05, "loss": 19.3803, "step": 2339 }, { "epoch": 0.31, "learning_rate": 8.106485020748885e-05, "loss": 19.273, "step": 2340 }, { "epoch": 0.31, "learning_rate": 8.104814448107786e-05, "loss": 19.0781, "step": 2341 }, { "epoch": 0.31, "learning_rate": 8.103143311155998e-05, "loss": 18.4669, "step": 2342 }, { "epoch": 0.31, "learning_rate": 8.101471610197255e-05, "loss": 0.0, "step": 2343 }, { "epoch": 0.31, "learning_rate": 8.099799345535398e-05, "loss": 0.0001, "step": 2344 }, { "epoch": 0.31, "learning_rate": 8.098126517474363e-05, "loss": 0.0, "step": 2345 }, { "epoch": 0.31, "learning_rate": 8.096453126318193e-05, "loss": 18.406, "step": 2346 }, { "epoch": 0.31, "learning_rate": 8.094779172371034e-05, "loss": 15.4046, "step": 2347 }, { "epoch": 0.31, "learning_rate": 8.093104655937131e-05, "loss": 16.8321, "step": 2348 }, { "epoch": 0.31, "learning_rate": 8.091429577320835e-05, "loss": 0.0001, "step": 2349 }, { "epoch": 0.31, "learning_rate": 8.089753936826596e-05, "loss": 17.5248, "step": 2350 }, { "epoch": 0.31, "learning_rate": 8.088077734758968e-05, "loss": 16.3661, "step": 2351 }, { "epoch": 0.31, "learning_rate": 8.086400971422608e-05, "loss": 17.8633, "step": 2352 }, { "epoch": 0.31, "learning_rate": 8.08472364712227e-05, "loss": 0.0006, "step": 2353 }, { "epoch": 0.31, "learning_rate": 8.083045762162814e-05, "loss": 18.5462, "step": 2354 }, { "epoch": 0.31, "learning_rate": 8.081367316849205e-05, "loss": 18.7215, "step": 2355 }, { "epoch": 0.31, "learning_rate": 8.079688311486501e-05, "loss": 17.8045, "step": 2356 }, { "epoch": 0.31, "learning_rate": 8.078008746379873e-05, "loss": 16.9264, "step": 2357 }, { "epoch": 0.31, "learning_rate": 8.076328621834583e-05, "loss": 18.4811, "step": 2358 }, { "epoch": 0.31, "learning_rate": 8.074647938156002e-05, "loss": 0.0001, "step": 2359 }, { "epoch": 0.31, "learning_rate": 8.072966695649598e-05, "loss": 17.5187, "step": 2360 }, { "epoch": 0.31, "learning_rate": 8.071284894620943e-05, "loss": 19.0278, "step": 2361 }, { "epoch": 0.31, "learning_rate": 8.069602535375711e-05, "loss": 0.0001, "step": 2362 }, { "epoch": 0.31, "learning_rate": 8.067919618219677e-05, "loss": 17.4768, "step": 2363 }, { "epoch": 0.31, "learning_rate": 8.066236143458717e-05, "loss": 0.0001, "step": 2364 }, { "epoch": 0.31, "learning_rate": 8.064552111398806e-05, "loss": 19.0005, "step": 2365 }, { "epoch": 0.31, "learning_rate": 8.062867522346025e-05, "loss": 17.7517, "step": 2366 }, { "epoch": 0.31, "learning_rate": 8.061182376606552e-05, "loss": 0.0006, "step": 2367 }, { "epoch": 0.31, "learning_rate": 8.05949667448667e-05, "loss": 17.7742, "step": 2368 }, { "epoch": 0.31, "learning_rate": 8.05781041629276e-05, "loss": 16.1591, "step": 2369 }, { "epoch": 0.31, "learning_rate": 8.056123602331305e-05, "loss": 17.3184, "step": 2370 }, { "epoch": 0.31, "learning_rate": 8.054436232908891e-05, "loss": 17.1623, "step": 2371 }, { "epoch": 0.31, "learning_rate": 8.0527483083322e-05, "loss": 0.0, "step": 2372 }, { "epoch": 0.31, "learning_rate": 8.051059828908021e-05, "loss": 15.7879, "step": 2373 }, { "epoch": 0.31, "learning_rate": 8.049370794943238e-05, "loss": 17.2295, "step": 2374 }, { "epoch": 0.31, "learning_rate": 8.047681206744844e-05, "loss": 17.365, "step": 2375 }, { "epoch": 0.31, "learning_rate": 8.045991064619922e-05, "loss": 0.0001, "step": 2376 }, { "epoch": 0.31, "learning_rate": 8.044300368875663e-05, "loss": 0.0005, "step": 2377 }, { "epoch": 0.31, "learning_rate": 8.042609119819357e-05, "loss": 19.3709, "step": 2378 }, { "epoch": 0.31, "learning_rate": 8.040917317758395e-05, "loss": 19.4806, "step": 2379 }, { "epoch": 0.31, "learning_rate": 8.039224963000268e-05, "loss": 18.2482, "step": 2380 }, { "epoch": 0.31, "learning_rate": 8.037532055852564e-05, "loss": 19.1512, "step": 2381 }, { "epoch": 0.31, "learning_rate": 8.035838596622978e-05, "loss": 16.3513, "step": 2382 }, { "epoch": 0.31, "learning_rate": 8.034144585619301e-05, "loss": 0.0001, "step": 2383 }, { "epoch": 0.31, "learning_rate": 8.032450023149425e-05, "loss": 19.851, "step": 2384 }, { "epoch": 0.31, "learning_rate": 8.030754909521343e-05, "loss": 0.0, "step": 2385 }, { "epoch": 0.31, "learning_rate": 8.029059245043149e-05, "loss": 16.5044, "step": 2386 }, { "epoch": 0.31, "learning_rate": 8.027363030023033e-05, "loss": 17.5279, "step": 2387 }, { "epoch": 0.31, "learning_rate": 8.025666264769289e-05, "loss": 0.0001, "step": 2388 }, { "epoch": 0.31, "learning_rate": 8.023968949590312e-05, "loss": 0.0001, "step": 2389 }, { "epoch": 0.31, "learning_rate": 8.022271084794593e-05, "loss": 0.0, "step": 2390 }, { "epoch": 0.31, "learning_rate": 8.020572670690725e-05, "loss": 0.0001, "step": 2391 }, { "epoch": 0.31, "learning_rate": 8.0188737075874e-05, "loss": 0.0001, "step": 2392 }, { "epoch": 0.31, "learning_rate": 8.017174195793411e-05, "loss": 18.0052, "step": 2393 }, { "epoch": 0.32, "learning_rate": 8.015474135617649e-05, "loss": 17.8817, "step": 2394 }, { "epoch": 0.32, "learning_rate": 8.013773527369109e-05, "loss": 19.6747, "step": 2395 }, { "epoch": 0.32, "learning_rate": 8.012072371356878e-05, "loss": 0.0001, "step": 2396 }, { "epoch": 0.32, "learning_rate": 8.01037066789015e-05, "loss": 16.8548, "step": 2397 }, { "epoch": 0.32, "learning_rate": 8.008668417278215e-05, "loss": 18.6418, "step": 2398 }, { "epoch": 0.32, "learning_rate": 8.006965619830462e-05, "loss": 18.6382, "step": 2399 }, { "epoch": 0.32, "learning_rate": 8.005262275856382e-05, "loss": 16.9006, "step": 2400 }, { "epoch": 0.32, "learning_rate": 8.003558385665561e-05, "loss": 17.9567, "step": 2401 }, { "epoch": 0.32, "learning_rate": 8.00185394956769e-05, "loss": 0.0001, "step": 2402 }, { "epoch": 0.32, "learning_rate": 8.000148967872551e-05, "loss": 0.0001, "step": 2403 }, { "epoch": 0.32, "learning_rate": 7.998443440890037e-05, "loss": 0.0, "step": 2404 }, { "epoch": 0.32, "learning_rate": 7.996737368930128e-05, "loss": 0.0004, "step": 2405 }, { "epoch": 0.32, "learning_rate": 7.995030752302913e-05, "loss": 0.0001, "step": 2406 }, { "epoch": 0.32, "learning_rate": 7.99332359131857e-05, "loss": 19.2503, "step": 2407 }, { "epoch": 0.32, "learning_rate": 7.991615886287386e-05, "loss": 16.5151, "step": 2408 }, { "epoch": 0.32, "learning_rate": 7.98990763751974e-05, "loss": 17.671, "step": 2409 }, { "epoch": 0.32, "learning_rate": 7.988198845326114e-05, "loss": 18.0507, "step": 2410 }, { "epoch": 0.32, "learning_rate": 7.986489510017083e-05, "loss": 19.2665, "step": 2411 }, { "epoch": 0.32, "learning_rate": 7.984779631903328e-05, "loss": 0.0, "step": 2412 }, { "epoch": 0.32, "learning_rate": 7.983069211295625e-05, "loss": 0.0001, "step": 2413 }, { "epoch": 0.32, "learning_rate": 7.981358248504849e-05, "loss": 0.0, "step": 2414 }, { "epoch": 0.32, "learning_rate": 7.979646743841971e-05, "loss": 0.0013, "step": 2415 }, { "epoch": 0.32, "learning_rate": 7.977934697618064e-05, "loss": 18.0672, "step": 2416 }, { "epoch": 0.32, "learning_rate": 7.9762221101443e-05, "loss": 18.2175, "step": 2417 }, { "epoch": 0.32, "learning_rate": 7.974508981731945e-05, "loss": 18.0538, "step": 2418 }, { "epoch": 0.32, "learning_rate": 7.972795312692368e-05, "loss": 0.0006, "step": 2419 }, { "epoch": 0.32, "learning_rate": 7.971081103337033e-05, "loss": 17.7904, "step": 2420 }, { "epoch": 0.32, "learning_rate": 7.969366353977503e-05, "loss": 15.6615, "step": 2421 }, { "epoch": 0.32, "learning_rate": 7.967651064925441e-05, "loss": 17.7797, "step": 2422 }, { "epoch": 0.32, "learning_rate": 7.965935236492607e-05, "loss": 0.0004, "step": 2423 }, { "epoch": 0.32, "learning_rate": 7.964218868990857e-05, "loss": 17.8207, "step": 2424 }, { "epoch": 0.32, "learning_rate": 7.962501962732148e-05, "loss": 17.6929, "step": 2425 }, { "epoch": 0.32, "learning_rate": 7.960784518028534e-05, "loss": 18.2066, "step": 2426 }, { "epoch": 0.32, "learning_rate": 7.959066535192165e-05, "loss": 18.1192, "step": 2427 }, { "epoch": 0.32, "learning_rate": 7.95734801453529e-05, "loss": 0.0005, "step": 2428 }, { "epoch": 0.32, "learning_rate": 7.955628956370257e-05, "loss": 19.3472, "step": 2429 }, { "epoch": 0.32, "learning_rate": 7.953909361009511e-05, "loss": 0.0, "step": 2430 }, { "epoch": 0.32, "learning_rate": 7.952189228765595e-05, "loss": 17.1664, "step": 2431 }, { "epoch": 0.32, "learning_rate": 7.950468559951148e-05, "loss": 17.0534, "step": 2432 }, { "epoch": 0.32, "learning_rate": 7.948747354878906e-05, "loss": 16.6341, "step": 2433 }, { "epoch": 0.32, "learning_rate": 7.947025613861705e-05, "loss": 0.0001, "step": 2434 }, { "epoch": 0.32, "learning_rate": 7.945303337212479e-05, "loss": 18.0049, "step": 2435 }, { "epoch": 0.32, "learning_rate": 7.943580525244257e-05, "loss": 18.5354, "step": 2436 }, { "epoch": 0.32, "learning_rate": 7.941857178270165e-05, "loss": 0.0001, "step": 2437 }, { "epoch": 0.32, "learning_rate": 7.940133296603427e-05, "loss": 16.9234, "step": 2438 }, { "epoch": 0.32, "learning_rate": 7.938408880557364e-05, "loss": 0.0001, "step": 2439 }, { "epoch": 0.32, "learning_rate": 7.936683930445398e-05, "loss": 18.5247, "step": 2440 }, { "epoch": 0.32, "learning_rate": 7.93495844658104e-05, "loss": 0.0, "step": 2441 }, { "epoch": 0.32, "learning_rate": 7.933232429277906e-05, "loss": 0.0001, "step": 2442 }, { "epoch": 0.32, "learning_rate": 7.931505878849706e-05, "loss": 17.7791, "step": 2443 }, { "epoch": 0.32, "learning_rate": 7.929778795610241e-05, "loss": 17.7771, "step": 2444 }, { "epoch": 0.32, "learning_rate": 7.928051179873421e-05, "loss": 16.962, "step": 2445 }, { "epoch": 0.32, "learning_rate": 7.926323031953244e-05, "loss": 17.585, "step": 2446 }, { "epoch": 0.32, "learning_rate": 7.924594352163805e-05, "loss": 0.0001, "step": 2447 }, { "epoch": 0.32, "learning_rate": 7.922865140819298e-05, "loss": 0.0004, "step": 2448 }, { "epoch": 0.32, "learning_rate": 7.921135398234013e-05, "loss": 0.0014, "step": 2449 }, { "epoch": 0.32, "learning_rate": 7.919405124722337e-05, "loss": 18.4164, "step": 2450 }, { "epoch": 0.32, "learning_rate": 7.917674320598756e-05, "loss": 18.132, "step": 2451 }, { "epoch": 0.32, "learning_rate": 7.915942986177845e-05, "loss": 0.0001, "step": 2452 }, { "epoch": 0.32, "learning_rate": 7.914211121774283e-05, "loss": 17.5439, "step": 2453 }, { "epoch": 0.32, "learning_rate": 7.912478727702841e-05, "loss": 0.0002, "step": 2454 }, { "epoch": 0.32, "learning_rate": 7.910745804278388e-05, "loss": 18.6332, "step": 2455 }, { "epoch": 0.32, "learning_rate": 7.90901235181589e-05, "loss": 16.149, "step": 2456 }, { "epoch": 0.32, "learning_rate": 7.907278370630404e-05, "loss": 17.4596, "step": 2457 }, { "epoch": 0.32, "learning_rate": 7.90554386103709e-05, "loss": 17.4926, "step": 2458 }, { "epoch": 0.32, "learning_rate": 7.903808823351202e-05, "loss": 18.3562, "step": 2459 }, { "epoch": 0.32, "learning_rate": 7.902073257888087e-05, "loss": 0.0007, "step": 2460 }, { "epoch": 0.32, "learning_rate": 7.900337164963192e-05, "loss": 18.3151, "step": 2461 }, { "epoch": 0.32, "learning_rate": 7.898600544892056e-05, "loss": 0.0, "step": 2462 }, { "epoch": 0.32, "learning_rate": 7.896863397990317e-05, "loss": 0.0, "step": 2463 }, { "epoch": 0.32, "learning_rate": 7.895125724573705e-05, "loss": 17.4628, "step": 2464 }, { "epoch": 0.32, "learning_rate": 7.893387524958052e-05, "loss": 17.8158, "step": 2465 }, { "epoch": 0.32, "learning_rate": 7.891648799459278e-05, "loss": 16.4245, "step": 2466 }, { "epoch": 0.32, "learning_rate": 7.889909548393404e-05, "loss": 15.6572, "step": 2467 }, { "epoch": 0.32, "learning_rate": 7.888169772076546e-05, "loss": 17.0135, "step": 2468 }, { "epoch": 0.32, "learning_rate": 7.886429470824912e-05, "loss": 17.7, "step": 2469 }, { "epoch": 0.33, "learning_rate": 7.88468864495481e-05, "loss": 0.0001, "step": 2470 }, { "epoch": 0.33, "learning_rate": 7.882947294782638e-05, "loss": 17.4925, "step": 2471 }, { "epoch": 0.33, "learning_rate": 7.881205420624895e-05, "loss": 17.8347, "step": 2472 }, { "epoch": 0.33, "learning_rate": 7.879463022798172e-05, "loss": 15.7841, "step": 2473 }, { "epoch": 0.33, "learning_rate": 7.877720101619155e-05, "loss": 16.8473, "step": 2474 }, { "epoch": 0.33, "learning_rate": 7.875976657404627e-05, "loss": 0.0002, "step": 2475 }, { "epoch": 0.33, "learning_rate": 7.874232690471466e-05, "loss": 15.8325, "step": 2476 }, { "epoch": 0.33, "learning_rate": 7.872488201136641e-05, "loss": 0.0001, "step": 2477 }, { "epoch": 0.33, "learning_rate": 7.87074318971722e-05, "loss": 18.3424, "step": 2478 }, { "epoch": 0.33, "learning_rate": 7.868997656530367e-05, "loss": 16.6568, "step": 2479 }, { "epoch": 0.33, "learning_rate": 7.867251601893336e-05, "loss": 16.468, "step": 2480 }, { "epoch": 0.33, "learning_rate": 7.865505026123479e-05, "loss": 16.4798, "step": 2481 }, { "epoch": 0.33, "learning_rate": 7.863757929538244e-05, "loss": 0.0, "step": 2482 }, { "epoch": 0.33, "learning_rate": 7.862010312455168e-05, "loss": 0.0001, "step": 2483 }, { "epoch": 0.33, "learning_rate": 7.86026217519189e-05, "loss": 17.5605, "step": 2484 }, { "epoch": 0.33, "learning_rate": 7.858513518066139e-05, "loss": 17.4041, "step": 2485 }, { "epoch": 0.33, "learning_rate": 7.856764341395739e-05, "loss": 18.681, "step": 2486 }, { "epoch": 0.33, "learning_rate": 7.855014645498609e-05, "loss": 17.7959, "step": 2487 }, { "epoch": 0.33, "learning_rate": 7.853264430692762e-05, "loss": 18.5895, "step": 2488 }, { "epoch": 0.33, "learning_rate": 7.851513697296304e-05, "loss": 0.0, "step": 2489 }, { "epoch": 0.33, "learning_rate": 7.849762445627441e-05, "loss": 17.6749, "step": 2490 }, { "epoch": 0.33, "learning_rate": 7.848010676004465e-05, "loss": 15.7842, "step": 2491 }, { "epoch": 0.33, "learning_rate": 7.846258388745768e-05, "loss": 18.1254, "step": 2492 }, { "epoch": 0.33, "learning_rate": 7.844505584169834e-05, "loss": 18.16, "step": 2493 }, { "epoch": 0.33, "learning_rate": 7.842752262595243e-05, "loss": 18.3556, "step": 2494 }, { "epoch": 0.33, "learning_rate": 7.840998424340664e-05, "loss": 19.4546, "step": 2495 }, { "epoch": 0.33, "learning_rate": 7.839244069724865e-05, "loss": 0.0001, "step": 2496 }, { "epoch": 0.33, "learning_rate": 7.837489199066706e-05, "loss": 17.6206, "step": 2497 }, { "epoch": 0.33, "learning_rate": 7.835733812685137e-05, "loss": 0.0, "step": 2498 }, { "epoch": 0.33, "learning_rate": 7.833977910899213e-05, "loss": 18.2276, "step": 2499 }, { "epoch": 0.33, "learning_rate": 7.832221494028069e-05, "loss": 19.4281, "step": 2500 }, { "epoch": 0.33, "learning_rate": 7.830464562390941e-05, "loss": 0.0, "step": 2501 }, { "epoch": 0.33, "learning_rate": 7.828707116307159e-05, "loss": 16.5922, "step": 2502 }, { "epoch": 0.33, "learning_rate": 7.826949156096144e-05, "loss": 18.4868, "step": 2503 }, { "epoch": 0.33, "learning_rate": 7.825190682077412e-05, "loss": 17.1739, "step": 2504 }, { "epoch": 0.33, "learning_rate": 7.82343169457057e-05, "loss": 18.9313, "step": 2505 }, { "epoch": 0.33, "learning_rate": 7.821672193895321e-05, "loss": 18.008, "step": 2506 }, { "epoch": 0.33, "learning_rate": 7.81991218037146e-05, "loss": 0.0, "step": 2507 }, { "epoch": 0.33, "learning_rate": 7.818151654318877e-05, "loss": 15.1142, "step": 2508 }, { "epoch": 0.33, "learning_rate": 7.816390616057552e-05, "loss": 17.2779, "step": 2509 }, { "epoch": 0.33, "learning_rate": 7.814629065907559e-05, "loss": 0.0003, "step": 2510 }, { "epoch": 0.33, "learning_rate": 7.812867004189069e-05, "loss": 17.3744, "step": 2511 }, { "epoch": 0.33, "learning_rate": 7.811104431222339e-05, "loss": 18.8871, "step": 2512 }, { "epoch": 0.33, "learning_rate": 7.809341347327725e-05, "loss": 17.9807, "step": 2513 }, { "epoch": 0.33, "learning_rate": 7.807577752825675e-05, "loss": 18.8514, "step": 2514 }, { "epoch": 0.33, "learning_rate": 7.805813648036724e-05, "loss": 17.1706, "step": 2515 }, { "epoch": 0.33, "learning_rate": 7.804049033281507e-05, "loss": 18.185, "step": 2516 }, { "epoch": 0.33, "learning_rate": 7.802283908880749e-05, "loss": 17.254, "step": 2517 }, { "epoch": 0.33, "learning_rate": 7.800518275155267e-05, "loss": 0.0001, "step": 2518 }, { "epoch": 0.33, "learning_rate": 7.798752132425968e-05, "loss": 16.7269, "step": 2519 }, { "epoch": 0.33, "learning_rate": 7.796985481013859e-05, "loss": 17.7533, "step": 2520 }, { "epoch": 0.33, "learning_rate": 7.795218321240033e-05, "loss": 17.1605, "step": 2521 }, { "epoch": 0.33, "learning_rate": 7.793450653425678e-05, "loss": 16.3853, "step": 2522 }, { "epoch": 0.33, "learning_rate": 7.791682477892073e-05, "loss": 0.0, "step": 2523 }, { "epoch": 0.33, "learning_rate": 7.789913794960589e-05, "loss": 18.3925, "step": 2524 }, { "epoch": 0.33, "learning_rate": 7.788144604952691e-05, "loss": 0.0, "step": 2525 }, { "epoch": 0.33, "learning_rate": 7.786374908189938e-05, "loss": 18.0472, "step": 2526 }, { "epoch": 0.33, "learning_rate": 7.784604704993976e-05, "loss": 16.6137, "step": 2527 }, { "epoch": 0.33, "learning_rate": 7.782833995686545e-05, "loss": 15.7519, "step": 2528 }, { "epoch": 0.33, "learning_rate": 7.781062780589477e-05, "loss": 17.9185, "step": 2529 }, { "epoch": 0.33, "learning_rate": 7.779291060024698e-05, "loss": 0.0001, "step": 2530 }, { "epoch": 0.33, "learning_rate": 7.777518834314226e-05, "loss": 16.6178, "step": 2531 }, { "epoch": 0.33, "learning_rate": 7.775746103780165e-05, "loss": 0.0, "step": 2532 }, { "epoch": 0.33, "learning_rate": 7.773972868744718e-05, "loss": 0.0004, "step": 2533 }, { "epoch": 0.33, "learning_rate": 7.772199129530174e-05, "loss": 17.4079, "step": 2534 }, { "epoch": 0.33, "learning_rate": 7.77042488645892e-05, "loss": 17.5419, "step": 2535 }, { "epoch": 0.33, "learning_rate": 7.768650139853427e-05, "loss": 0.0001, "step": 2536 }, { "epoch": 0.33, "learning_rate": 7.766874890036262e-05, "loss": 16.7813, "step": 2537 }, { "epoch": 0.33, "learning_rate": 7.765099137330084e-05, "loss": 0.0001, "step": 2538 }, { "epoch": 0.33, "learning_rate": 7.763322882057643e-05, "loss": 0.0001, "step": 2539 }, { "epoch": 0.33, "learning_rate": 7.761546124541776e-05, "loss": 16.6561, "step": 2540 }, { "epoch": 0.33, "learning_rate": 7.759768865105417e-05, "loss": 17.1559, "step": 2541 }, { "epoch": 0.33, "learning_rate": 7.757991104071591e-05, "loss": 0.0003, "step": 2542 }, { "epoch": 0.33, "learning_rate": 7.756212841763408e-05, "loss": 15.0456, "step": 2543 }, { "epoch": 0.33, "learning_rate": 7.754434078504077e-05, "loss": 16.3381, "step": 2544 }, { "epoch": 0.34, "learning_rate": 7.75265481461689e-05, "loss": 18.8952, "step": 2545 }, { "epoch": 0.34, "learning_rate": 7.75087505042524e-05, "loss": 17.1044, "step": 2546 }, { "epoch": 0.34, "learning_rate": 7.749094786252602e-05, "loss": 19.6517, "step": 2547 }, { "epoch": 0.34, "learning_rate": 7.747314022422544e-05, "loss": 0.0003, "step": 2548 }, { "epoch": 0.34, "learning_rate": 7.745532759258726e-05, "loss": 0.0002, "step": 2549 }, { "epoch": 0.34, "learning_rate": 7.743750997084905e-05, "loss": 0.0006, "step": 2550 }, { "epoch": 0.34, "learning_rate": 7.741968736224912e-05, "loss": 17.1194, "step": 2551 }, { "epoch": 0.34, "learning_rate": 7.740185977002687e-05, "loss": 18.8545, "step": 2552 }, { "epoch": 0.34, "learning_rate": 7.73840271974225e-05, "loss": 16.9259, "step": 2553 }, { "epoch": 0.34, "learning_rate": 7.736618964767715e-05, "loss": 18.7145, "step": 2554 }, { "epoch": 0.34, "learning_rate": 7.734834712403282e-05, "loss": 0.0, "step": 2555 }, { "epoch": 0.34, "learning_rate": 7.733049962973252e-05, "loss": 16.483, "step": 2556 }, { "epoch": 0.34, "learning_rate": 7.731264716802e-05, "loss": 0.0, "step": 2557 }, { "epoch": 0.34, "learning_rate": 7.72947897421401e-05, "loss": 0.0, "step": 2558 }, { "epoch": 0.34, "learning_rate": 7.72769273553384e-05, "loss": 16.9752, "step": 2559 }, { "epoch": 0.34, "learning_rate": 7.72590600108615e-05, "loss": 17.5219, "step": 2560 }, { "epoch": 0.34, "learning_rate": 7.72411877119568e-05, "loss": 0.0004, "step": 2561 }, { "epoch": 0.34, "learning_rate": 7.72233104618727e-05, "loss": 15.8583, "step": 2562 }, { "epoch": 0.34, "learning_rate": 7.720542826385842e-05, "loss": 16.7147, "step": 2563 }, { "epoch": 0.34, "learning_rate": 7.718754112116412e-05, "loss": 0.0022, "step": 2564 }, { "epoch": 0.34, "learning_rate": 7.716964903704085e-05, "loss": 16.8751, "step": 2565 }, { "epoch": 0.34, "learning_rate": 7.715175201474057e-05, "loss": 17.4272, "step": 2566 }, { "epoch": 0.34, "learning_rate": 7.713385005751611e-05, "loss": 0.0002, "step": 2567 }, { "epoch": 0.34, "learning_rate": 7.71159431686212e-05, "loss": 17.6867, "step": 2568 }, { "epoch": 0.34, "learning_rate": 7.709803135131053e-05, "loss": 16.6829, "step": 2569 }, { "epoch": 0.34, "learning_rate": 7.708011460883958e-05, "loss": 0.0, "step": 2570 }, { "epoch": 0.34, "learning_rate": 7.70621929444648e-05, "loss": 17.6394, "step": 2571 }, { "epoch": 0.34, "learning_rate": 7.70442663614435e-05, "loss": 0.0, "step": 2572 }, { "epoch": 0.34, "learning_rate": 7.702633486303393e-05, "loss": 0.0001, "step": 2573 }, { "epoch": 0.34, "learning_rate": 7.700839845249517e-05, "loss": 18.1044, "step": 2574 }, { "epoch": 0.34, "learning_rate": 7.699045713308722e-05, "loss": 0.0001, "step": 2575 }, { "epoch": 0.34, "learning_rate": 7.697251090807101e-05, "loss": 0.0, "step": 2576 }, { "epoch": 0.34, "learning_rate": 7.695455978070833e-05, "loss": 16.7943, "step": 2577 }, { "epoch": 0.34, "learning_rate": 7.693660375426182e-05, "loss": 16.2851, "step": 2578 }, { "epoch": 0.34, "learning_rate": 7.691864283199506e-05, "loss": 16.7593, "step": 2579 }, { "epoch": 0.34, "learning_rate": 7.690067701717253e-05, "loss": 0.0, "step": 2580 }, { "epoch": 0.34, "learning_rate": 7.688270631305957e-05, "loss": 17.6759, "step": 2581 }, { "epoch": 0.34, "learning_rate": 7.686473072292241e-05, "loss": 0.0, "step": 2582 }, { "epoch": 0.34, "learning_rate": 7.684675025002819e-05, "loss": 0.0, "step": 2583 }, { "epoch": 0.34, "learning_rate": 7.68287648976449e-05, "loss": 17.1255, "step": 2584 }, { "epoch": 0.34, "learning_rate": 7.681077466904146e-05, "loss": 0.0001, "step": 2585 }, { "epoch": 0.34, "learning_rate": 7.679277956748764e-05, "loss": 0.0, "step": 2586 }, { "epoch": 0.34, "learning_rate": 7.677477959625412e-05, "loss": 0.0, "step": 2587 }, { "epoch": 0.34, "learning_rate": 7.675677475861247e-05, "loss": 0.0001, "step": 2588 }, { "epoch": 0.34, "learning_rate": 7.67387650578351e-05, "loss": 17.7938, "step": 2589 }, { "epoch": 0.34, "learning_rate": 7.672075049719537e-05, "loss": 18.5737, "step": 2590 }, { "epoch": 0.34, "learning_rate": 7.670273107996744e-05, "loss": 15.9628, "step": 2591 }, { "epoch": 0.34, "learning_rate": 7.668470680942646e-05, "loss": 17.1913, "step": 2592 }, { "epoch": 0.34, "learning_rate": 7.666667768884836e-05, "loss": 0.0, "step": 2593 }, { "epoch": 0.34, "learning_rate": 7.664864372151e-05, "loss": 16.0867, "step": 2594 }, { "epoch": 0.34, "learning_rate": 7.663060491068914e-05, "loss": 17.896, "step": 2595 }, { "epoch": 0.34, "learning_rate": 7.661256125966437e-05, "loss": 16.6044, "step": 2596 }, { "epoch": 0.34, "learning_rate": 7.65945127717152e-05, "loss": 0.0004, "step": 2597 }, { "epoch": 0.34, "learning_rate": 7.657645945012201e-05, "loss": 0.0001, "step": 2598 }, { "epoch": 0.34, "learning_rate": 7.655840129816602e-05, "loss": 16.2019, "step": 2599 }, { "epoch": 0.34, "learning_rate": 7.654033831912941e-05, "loss": 0.0, "step": 2600 }, { "epoch": 0.34, "learning_rate": 7.652227051629515e-05, "loss": 17.8771, "step": 2601 }, { "epoch": 0.34, "learning_rate": 7.650419789294714e-05, "loss": 0.0002, "step": 2602 }, { "epoch": 0.34, "learning_rate": 7.648612045237014e-05, "loss": 0.0, "step": 2603 }, { "epoch": 0.34, "learning_rate": 7.646803819784978e-05, "loss": 17.2579, "step": 2604 }, { "epoch": 0.34, "learning_rate": 7.64499511326726e-05, "loss": 17.5051, "step": 2605 }, { "epoch": 0.34, "learning_rate": 7.643185926012594e-05, "loss": 17.3849, "step": 2606 }, { "epoch": 0.34, "learning_rate": 7.641376258349809e-05, "loss": 18.7533, "step": 2607 }, { "epoch": 0.34, "learning_rate": 7.639566110607818e-05, "loss": 0.0003, "step": 2608 }, { "epoch": 0.34, "learning_rate": 7.637755483115622e-05, "loss": 15.3475, "step": 2609 }, { "epoch": 0.34, "learning_rate": 7.635944376202308e-05, "loss": 17.8881, "step": 2610 }, { "epoch": 0.34, "learning_rate": 7.634132790197049e-05, "loss": 0.0, "step": 2611 }, { "epoch": 0.34, "learning_rate": 7.632320725429112e-05, "loss": 17.4957, "step": 2612 }, { "epoch": 0.34, "learning_rate": 7.630508182227841e-05, "loss": 19.1026, "step": 2613 }, { "epoch": 0.34, "learning_rate": 7.628695160922674e-05, "loss": 0.0, "step": 2614 }, { "epoch": 0.34, "learning_rate": 7.626881661843135e-05, "loss": 17.4981, "step": 2615 }, { "epoch": 0.34, "learning_rate": 7.625067685318832e-05, "loss": 15.4972, "step": 2616 }, { "epoch": 0.34, "learning_rate": 7.623253231679462e-05, "loss": 18.2155, "step": 2617 }, { "epoch": 0.34, "learning_rate": 7.621438301254807e-05, "loss": 16.7684, "step": 2618 }, { "epoch": 0.34, "learning_rate": 7.619622894374738e-05, "loss": 18.5113, "step": 2619 }, { "epoch": 0.34, "learning_rate": 7.617807011369213e-05, "loss": 18.2114, "step": 2620 }, { "epoch": 0.35, "learning_rate": 7.615990652568273e-05, "loss": 0.0, "step": 2621 }, { "epoch": 0.35, "learning_rate": 7.614173818302046e-05, "loss": 19.6138, "step": 2622 }, { "epoch": 0.35, "learning_rate": 7.61235650890075e-05, "loss": 0.0, "step": 2623 }, { "epoch": 0.35, "learning_rate": 7.610538724694689e-05, "loss": 0.0001, "step": 2624 }, { "epoch": 0.35, "learning_rate": 7.608720466014248e-05, "loss": 18.4505, "step": 2625 }, { "epoch": 0.35, "learning_rate": 7.606901733189904e-05, "loss": 16.5605, "step": 2626 }, { "epoch": 0.35, "learning_rate": 7.605082526552216e-05, "loss": 17.6081, "step": 2627 }, { "epoch": 0.35, "learning_rate": 7.603262846431833e-05, "loss": 16.2905, "step": 2628 }, { "epoch": 0.35, "learning_rate": 7.601442693159486e-05, "loss": 0.0001, "step": 2629 }, { "epoch": 0.35, "learning_rate": 7.599622067065996e-05, "loss": 18.1701, "step": 2630 }, { "epoch": 0.35, "learning_rate": 7.59780096848227e-05, "loss": 17.8308, "step": 2631 }, { "epoch": 0.35, "learning_rate": 7.595979397739293e-05, "loss": 0.0001, "step": 2632 }, { "epoch": 0.35, "learning_rate": 7.594157355168146e-05, "loss": 19.3647, "step": 2633 }, { "epoch": 0.35, "learning_rate": 7.59233484109999e-05, "loss": 15.6768, "step": 2634 }, { "epoch": 0.35, "learning_rate": 7.590511855866076e-05, "loss": 17.892, "step": 2635 }, { "epoch": 0.35, "learning_rate": 7.588688399797733e-05, "loss": 16.7538, "step": 2636 }, { "epoch": 0.35, "learning_rate": 7.586864473226385e-05, "loss": 17.272, "step": 2637 }, { "epoch": 0.35, "learning_rate": 7.585040076483534e-05, "loss": 17.7665, "step": 2638 }, { "epoch": 0.35, "learning_rate": 7.583215209900772e-05, "loss": 17.6904, "step": 2639 }, { "epoch": 0.35, "learning_rate": 7.581389873809774e-05, "loss": 0.0001, "step": 2640 }, { "epoch": 0.35, "learning_rate": 7.5795640685423e-05, "loss": 17.1558, "step": 2641 }, { "epoch": 0.35, "learning_rate": 7.577737794430197e-05, "loss": 0.0, "step": 2642 }, { "epoch": 0.35, "learning_rate": 7.575911051805398e-05, "loss": 18.0623, "step": 2643 }, { "epoch": 0.35, "learning_rate": 7.57408384099992e-05, "loss": 0.0, "step": 2644 }, { "epoch": 0.35, "learning_rate": 7.572256162345863e-05, "loss": 18.2358, "step": 2645 }, { "epoch": 0.35, "learning_rate": 7.570428016175415e-05, "loss": 0.0, "step": 2646 }, { "epoch": 0.35, "learning_rate": 7.568599402820846e-05, "loss": 18.989, "step": 2647 }, { "epoch": 0.35, "learning_rate": 7.566770322614515e-05, "loss": 16.7682, "step": 2648 }, { "epoch": 0.35, "learning_rate": 7.564940775888863e-05, "loss": 17.1936, "step": 2649 }, { "epoch": 0.35, "learning_rate": 7.563110762976417e-05, "loss": 16.6139, "step": 2650 }, { "epoch": 0.35, "learning_rate": 7.561280284209788e-05, "loss": 0.0, "step": 2651 }, { "epoch": 0.35, "learning_rate": 7.559449339921669e-05, "loss": 16.2856, "step": 2652 }, { "epoch": 0.35, "learning_rate": 7.557617930444843e-05, "loss": 0.0022, "step": 2653 }, { "epoch": 0.35, "learning_rate": 7.555786056112175e-05, "loss": 16.2894, "step": 2654 }, { "epoch": 0.35, "learning_rate": 7.553953717256615e-05, "loss": 0.0001, "step": 2655 }, { "epoch": 0.35, "learning_rate": 7.552120914211192e-05, "loss": 16.7316, "step": 2656 }, { "epoch": 0.35, "learning_rate": 7.550287647309031e-05, "loss": 17.8747, "step": 2657 }, { "epoch": 0.35, "learning_rate": 7.548453916883333e-05, "loss": 16.8379, "step": 2658 }, { "epoch": 0.35, "learning_rate": 7.546619723267381e-05, "loss": 0.0001, "step": 2659 }, { "epoch": 0.35, "learning_rate": 7.544785066794549e-05, "loss": 16.9887, "step": 2660 }, { "epoch": 0.35, "learning_rate": 7.542949947798293e-05, "loss": 16.7155, "step": 2661 }, { "epoch": 0.35, "learning_rate": 7.541114366612148e-05, "loss": 0.0001, "step": 2662 }, { "epoch": 0.35, "learning_rate": 7.539278323569742e-05, "loss": 0.0, "step": 2663 }, { "epoch": 0.35, "learning_rate": 7.53744181900478e-05, "loss": 0.0003, "step": 2664 }, { "epoch": 0.35, "learning_rate": 7.535604853251052e-05, "loss": 17.9416, "step": 2665 }, { "epoch": 0.35, "learning_rate": 7.533767426642434e-05, "loss": 17.8643, "step": 2666 }, { "epoch": 0.35, "learning_rate": 7.531929539512885e-05, "loss": 18.5284, "step": 2667 }, { "epoch": 0.35, "learning_rate": 7.530091192196447e-05, "loss": 0.0003, "step": 2668 }, { "epoch": 0.35, "learning_rate": 7.528252385027246e-05, "loss": 19.69, "step": 2669 }, { "epoch": 0.35, "learning_rate": 7.52641311833949e-05, "loss": 17.5952, "step": 2670 }, { "epoch": 0.35, "learning_rate": 7.524573392467474e-05, "loss": 19.1431, "step": 2671 }, { "epoch": 0.35, "learning_rate": 7.522733207745573e-05, "loss": 18.3773, "step": 2672 }, { "epoch": 0.35, "learning_rate": 7.520892564508249e-05, "loss": 17.4885, "step": 2673 }, { "epoch": 0.35, "learning_rate": 7.519051463090044e-05, "loss": 17.2507, "step": 2674 }, { "epoch": 0.35, "learning_rate": 7.517209903825585e-05, "loss": 19.1544, "step": 2675 }, { "epoch": 0.35, "learning_rate": 7.515367887049578e-05, "loss": 17.6405, "step": 2676 }, { "epoch": 0.35, "learning_rate": 7.51352541309682e-05, "loss": 0.0001, "step": 2677 }, { "epoch": 0.35, "learning_rate": 7.511682482302187e-05, "loss": 18.0641, "step": 2678 }, { "epoch": 0.35, "learning_rate": 7.509839095000637e-05, "loss": 17.3194, "step": 2679 }, { "epoch": 0.35, "learning_rate": 7.507995251527213e-05, "loss": 0.0001, "step": 2680 }, { "epoch": 0.35, "learning_rate": 7.506150952217036e-05, "loss": 17.7565, "step": 2681 }, { "epoch": 0.35, "learning_rate": 7.50430619740532e-05, "loss": 0.0001, "step": 2682 }, { "epoch": 0.35, "learning_rate": 7.502460987427348e-05, "loss": 16.8615, "step": 2683 }, { "epoch": 0.35, "learning_rate": 7.5006153226185e-05, "loss": 17.7614, "step": 2684 }, { "epoch": 0.35, "learning_rate": 7.49876920331423e-05, "loss": 17.2922, "step": 2685 }, { "epoch": 0.35, "learning_rate": 7.496922629850074e-05, "loss": 0.0001, "step": 2686 }, { "epoch": 0.35, "learning_rate": 7.495075602561656e-05, "loss": 0.0014, "step": 2687 }, { "epoch": 0.35, "learning_rate": 7.493228121784677e-05, "loss": 19.4541, "step": 2688 }, { "epoch": 0.35, "learning_rate": 7.491380187854926e-05, "loss": 18.1784, "step": 2689 }, { "epoch": 0.35, "learning_rate": 7.489531801108268e-05, "loss": 16.3277, "step": 2690 }, { "epoch": 0.35, "learning_rate": 7.487682961880657e-05, "loss": 17.4002, "step": 2691 }, { "epoch": 0.35, "learning_rate": 7.485833670508125e-05, "loss": 18.251, "step": 2692 }, { "epoch": 0.35, "learning_rate": 7.483983927326785e-05, "loss": 17.593, "step": 2693 }, { "epoch": 0.35, "learning_rate": 7.482133732672834e-05, "loss": 0.0, "step": 2694 }, { "epoch": 0.35, "learning_rate": 7.480283086882553e-05, "loss": 16.4151, "step": 2695 }, { "epoch": 0.35, "learning_rate": 7.478431990292305e-05, "loss": 0.0001, "step": 2696 }, { "epoch": 0.36, "learning_rate": 7.47658044323853e-05, "loss": 0.0, "step": 2697 }, { "epoch": 0.36, "learning_rate": 7.474728446057752e-05, "loss": 0.0, "step": 2698 }, { "epoch": 0.36, "learning_rate": 7.472875999086583e-05, "loss": 16.157, "step": 2699 }, { "epoch": 0.36, "learning_rate": 7.471023102661709e-05, "loss": 17.2375, "step": 2700 }, { "epoch": 0.36, "learning_rate": 7.469169757119899e-05, "loss": 14.7437, "step": 2701 }, { "epoch": 0.36, "learning_rate": 7.467315962798003e-05, "loss": 0.0001, "step": 2702 }, { "epoch": 0.36, "learning_rate": 7.46546172003296e-05, "loss": 17.2433, "step": 2703 }, { "epoch": 0.36, "learning_rate": 7.463607029161783e-05, "loss": 0.0001, "step": 2704 }, { "epoch": 0.36, "learning_rate": 7.461751890521567e-05, "loss": 16.352, "step": 2705 }, { "epoch": 0.36, "learning_rate": 7.459896304449488e-05, "loss": 16.8436, "step": 2706 }, { "epoch": 0.36, "learning_rate": 7.45804027128281e-05, "loss": 18.9493, "step": 2707 }, { "epoch": 0.36, "learning_rate": 7.456183791358871e-05, "loss": 18.1594, "step": 2708 }, { "epoch": 0.36, "learning_rate": 7.454326865015093e-05, "loss": 17.1185, "step": 2709 }, { "epoch": 0.36, "learning_rate": 7.452469492588979e-05, "loss": 16.7899, "step": 2710 }, { "epoch": 0.36, "learning_rate": 7.450611674418109e-05, "loss": 17.4926, "step": 2711 }, { "epoch": 0.36, "learning_rate": 7.448753410840153e-05, "loss": 16.6868, "step": 2712 }, { "epoch": 0.36, "learning_rate": 7.446894702192855e-05, "loss": 16.5325, "step": 2713 }, { "epoch": 0.36, "learning_rate": 7.445035548814042e-05, "loss": 17.9506, "step": 2714 }, { "epoch": 0.36, "learning_rate": 7.44317595104162e-05, "loss": 16.0928, "step": 2715 }, { "epoch": 0.36, "learning_rate": 7.441315909213578e-05, "loss": 18.6113, "step": 2716 }, { "epoch": 0.36, "learning_rate": 7.439455423667987e-05, "loss": 18.8699, "step": 2717 }, { "epoch": 0.36, "learning_rate": 7.437594494742995e-05, "loss": 18.4542, "step": 2718 }, { "epoch": 0.36, "learning_rate": 7.43573312277683e-05, "loss": 18.7397, "step": 2719 }, { "epoch": 0.36, "learning_rate": 7.433871308107806e-05, "loss": 17.2009, "step": 2720 }, { "epoch": 0.36, "learning_rate": 7.432009051074314e-05, "loss": 17.8634, "step": 2721 }, { "epoch": 0.36, "learning_rate": 7.430146352014821e-05, "loss": 18.0274, "step": 2722 }, { "epoch": 0.36, "learning_rate": 7.428283211267887e-05, "loss": 17.0223, "step": 2723 }, { "epoch": 0.36, "learning_rate": 7.42641962917214e-05, "loss": 0.0005, "step": 2724 }, { "epoch": 0.36, "learning_rate": 7.42455560606629e-05, "loss": 19.5364, "step": 2725 }, { "epoch": 0.36, "learning_rate": 7.42269114228913e-05, "loss": 0.0, "step": 2726 }, { "epoch": 0.36, "learning_rate": 7.420826238179539e-05, "loss": 17.0746, "step": 2727 }, { "epoch": 0.36, "learning_rate": 7.418960894076464e-05, "loss": 20.0688, "step": 2728 }, { "epoch": 0.36, "learning_rate": 7.417095110318939e-05, "loss": 18.3768, "step": 2729 }, { "epoch": 0.36, "learning_rate": 7.415228887246075e-05, "loss": 0.0, "step": 2730 }, { "epoch": 0.36, "learning_rate": 7.413362225197068e-05, "loss": 18.0333, "step": 2731 }, { "epoch": 0.36, "learning_rate": 7.411495124511188e-05, "loss": 19.8212, "step": 2732 }, { "epoch": 0.36, "learning_rate": 7.409627585527787e-05, "loss": 17.4327, "step": 2733 }, { "epoch": 0.36, "learning_rate": 7.407759608586298e-05, "loss": 0.0, "step": 2734 }, { "epoch": 0.36, "learning_rate": 7.405891194026229e-05, "loss": 0.0001, "step": 2735 }, { "epoch": 0.36, "learning_rate": 7.404022342187174e-05, "loss": 20.4128, "step": 2736 }, { "epoch": 0.36, "learning_rate": 7.402153053408803e-05, "loss": 18.0527, "step": 2737 }, { "epoch": 0.36, "learning_rate": 7.400283328030865e-05, "loss": 17.4274, "step": 2738 }, { "epoch": 0.36, "learning_rate": 7.398413166393187e-05, "loss": 0.0001, "step": 2739 }, { "epoch": 0.36, "learning_rate": 7.39654256883568e-05, "loss": 0.0001, "step": 2740 }, { "epoch": 0.36, "learning_rate": 7.394671535698333e-05, "loss": 0.0, "step": 2741 }, { "epoch": 0.36, "learning_rate": 7.392800067321207e-05, "loss": 16.7764, "step": 2742 }, { "epoch": 0.36, "learning_rate": 7.390928164044453e-05, "loss": 0.0, "step": 2743 }, { "epoch": 0.36, "learning_rate": 7.389055826208294e-05, "loss": 18.894, "step": 2744 }, { "epoch": 0.36, "learning_rate": 7.387183054153032e-05, "loss": 17.6416, "step": 2745 }, { "epoch": 0.36, "learning_rate": 7.385309848219053e-05, "loss": 16.2967, "step": 2746 }, { "epoch": 0.36, "learning_rate": 7.383436208746816e-05, "loss": 16.5672, "step": 2747 }, { "epoch": 0.36, "learning_rate": 7.381562136076865e-05, "loss": 17.7558, "step": 2748 }, { "epoch": 0.36, "learning_rate": 7.379687630549816e-05, "loss": 0.0001, "step": 2749 }, { "epoch": 0.36, "learning_rate": 7.377812692506368e-05, "loss": 0.0001, "step": 2750 }, { "epoch": 0.36, "learning_rate": 7.375937322287295e-05, "loss": 18.8396, "step": 2751 }, { "epoch": 0.36, "learning_rate": 7.374061520233455e-05, "loss": 0.0019, "step": 2752 }, { "epoch": 0.36, "learning_rate": 7.37218528668578e-05, "loss": 19.1016, "step": 2753 }, { "epoch": 0.36, "learning_rate": 7.370308621985284e-05, "loss": 0.0002, "step": 2754 }, { "epoch": 0.36, "learning_rate": 7.368431526473054e-05, "loss": 0.0006, "step": 2755 }, { "epoch": 0.36, "learning_rate": 7.366554000490262e-05, "loss": 18.0955, "step": 2756 }, { "epoch": 0.36, "learning_rate": 7.36467604437815e-05, "loss": 18.0656, "step": 2757 }, { "epoch": 0.36, "learning_rate": 7.362797658478048e-05, "loss": 0.0001, "step": 2758 }, { "epoch": 0.36, "learning_rate": 7.360918843131357e-05, "loss": 16.6126, "step": 2759 }, { "epoch": 0.36, "learning_rate": 7.359039598679558e-05, "loss": 17.6063, "step": 2760 }, { "epoch": 0.36, "learning_rate": 7.35715992546421e-05, "loss": 18.4472, "step": 2761 }, { "epoch": 0.36, "learning_rate": 7.35527982382695e-05, "loss": 15.6336, "step": 2762 }, { "epoch": 0.36, "learning_rate": 7.353399294109495e-05, "loss": 18.936, "step": 2763 }, { "epoch": 0.36, "learning_rate": 7.351518336653633e-05, "loss": 0.0001, "step": 2764 }, { "epoch": 0.36, "learning_rate": 7.34963695180124e-05, "loss": 16.1549, "step": 2765 }, { "epoch": 0.36, "learning_rate": 7.34775513989426e-05, "loss": 17.6498, "step": 2766 }, { "epoch": 0.36, "learning_rate": 7.345872901274722e-05, "loss": 0.0, "step": 2767 }, { "epoch": 0.36, "learning_rate": 7.343990236284727e-05, "loss": 18.3285, "step": 2768 }, { "epoch": 0.36, "learning_rate": 7.342107145266457e-05, "loss": 17.5401, "step": 2769 }, { "epoch": 0.36, "learning_rate": 7.34022362856217e-05, "loss": 16.5191, "step": 2770 }, { "epoch": 0.36, "learning_rate": 7.338339686514201e-05, "loss": 17.0405, "step": 2771 }, { "epoch": 0.36, "learning_rate": 7.336455319464963e-05, "loss": 0.0, "step": 2772 }, { "epoch": 0.37, "learning_rate": 7.334570527756947e-05, "loss": 0.0, "step": 2773 }, { "epoch": 0.37, "learning_rate": 7.33268531173272e-05, "loss": 18.6791, "step": 2774 }, { "epoch": 0.37, "learning_rate": 7.330799671734927e-05, "loss": 17.7002, "step": 2775 }, { "epoch": 0.37, "learning_rate": 7.328913608106286e-05, "loss": 0.0001, "step": 2776 }, { "epoch": 0.37, "learning_rate": 7.327027121189602e-05, "loss": 17.6236, "step": 2777 }, { "epoch": 0.37, "learning_rate": 7.325140211327747e-05, "loss": 0.0, "step": 2778 }, { "epoch": 0.37, "learning_rate": 7.323252878863672e-05, "loss": 0.0, "step": 2779 }, { "epoch": 0.37, "learning_rate": 7.321365124140408e-05, "loss": 17.6926, "step": 2780 }, { "epoch": 0.37, "learning_rate": 7.319476947501061e-05, "loss": 0.0, "step": 2781 }, { "epoch": 0.37, "learning_rate": 7.317588349288812e-05, "loss": 18.1256, "step": 2782 }, { "epoch": 0.37, "learning_rate": 7.315699329846921e-05, "loss": 18.3388, "step": 2783 }, { "epoch": 0.37, "learning_rate": 7.313809889518726e-05, "loss": 19.783, "step": 2784 }, { "epoch": 0.37, "learning_rate": 7.311920028647634e-05, "loss": 17.6751, "step": 2785 }, { "epoch": 0.37, "learning_rate": 7.31002974757714e-05, "loss": 17.0054, "step": 2786 }, { "epoch": 0.37, "learning_rate": 7.308139046650806e-05, "loss": 17.458, "step": 2787 }, { "epoch": 0.37, "learning_rate": 7.30624792621227e-05, "loss": 17.6842, "step": 2788 }, { "epoch": 0.37, "learning_rate": 7.304356386605256e-05, "loss": 0.001, "step": 2789 }, { "epoch": 0.37, "learning_rate": 7.302464428173556e-05, "loss": 17.1943, "step": 2790 }, { "epoch": 0.37, "learning_rate": 7.300572051261036e-05, "loss": 18.3305, "step": 2791 }, { "epoch": 0.37, "learning_rate": 7.298679256211646e-05, "loss": 0.0001, "step": 2792 }, { "epoch": 0.37, "learning_rate": 7.296786043369407e-05, "loss": 16.1799, "step": 2793 }, { "epoch": 0.37, "learning_rate": 7.294892413078415e-05, "loss": 0.0, "step": 2794 }, { "epoch": 0.37, "learning_rate": 7.292998365682848e-05, "loss": 17.0072, "step": 2795 }, { "epoch": 0.37, "learning_rate": 7.291103901526952e-05, "loss": 0.0001, "step": 2796 }, { "epoch": 0.37, "learning_rate": 7.289209020955054e-05, "loss": 18.0817, "step": 2797 }, { "epoch": 0.37, "learning_rate": 7.287313724311556e-05, "loss": 16.3925, "step": 2798 }, { "epoch": 0.37, "learning_rate": 7.285418011940931e-05, "loss": 16.6049, "step": 2799 }, { "epoch": 0.37, "learning_rate": 7.283521884187733e-05, "loss": 14.9122, "step": 2800 }, { "epoch": 0.37, "learning_rate": 7.281625341396592e-05, "loss": 15.764, "step": 2801 }, { "epoch": 0.37, "learning_rate": 7.27972838391221e-05, "loss": 16.377, "step": 2802 }, { "epoch": 0.37, "learning_rate": 7.277831012079364e-05, "loss": 17.9017, "step": 2803 }, { "epoch": 0.37, "learning_rate": 7.275933226242908e-05, "loss": 16.361, "step": 2804 }, { "epoch": 0.37, "learning_rate": 7.274035026747771e-05, "loss": 16.4999, "step": 2805 }, { "epoch": 0.37, "learning_rate": 7.272136413938962e-05, "loss": 19.1235, "step": 2806 }, { "epoch": 0.37, "learning_rate": 7.270237388161554e-05, "loss": 17.7601, "step": 2807 }, { "epoch": 0.37, "learning_rate": 7.268337949760704e-05, "loss": 17.8323, "step": 2808 }, { "epoch": 0.37, "learning_rate": 7.26643809908164e-05, "loss": 16.9101, "step": 2809 }, { "epoch": 0.37, "learning_rate": 7.264537836469672e-05, "loss": 16.6441, "step": 2810 }, { "epoch": 0.37, "learning_rate": 7.262637162270172e-05, "loss": 18.6369, "step": 2811 }, { "epoch": 0.37, "learning_rate": 7.260736076828598e-05, "loss": 0.0007, "step": 2812 }, { "epoch": 0.37, "learning_rate": 7.258834580490477e-05, "loss": 0.0, "step": 2813 }, { "epoch": 0.37, "learning_rate": 7.256932673601416e-05, "loss": 0.0001, "step": 2814 }, { "epoch": 0.37, "learning_rate": 7.255030356507088e-05, "loss": 17.1214, "step": 2815 }, { "epoch": 0.37, "learning_rate": 7.25312762955325e-05, "loss": 0.0001, "step": 2816 }, { "epoch": 0.37, "learning_rate": 7.251224493085727e-05, "loss": 18.7271, "step": 2817 }, { "epoch": 0.37, "learning_rate": 7.24932094745042e-05, "loss": 17.0069, "step": 2818 }, { "epoch": 0.37, "learning_rate": 7.247416992993306e-05, "loss": 17.0695, "step": 2819 }, { "epoch": 0.37, "learning_rate": 7.245512630060437e-05, "loss": 0.0, "step": 2820 }, { "epoch": 0.37, "learning_rate": 7.243607858997934e-05, "loss": 16.9922, "step": 2821 }, { "epoch": 0.37, "learning_rate": 7.241702680151998e-05, "loss": 18.9932, "step": 2822 }, { "epoch": 0.37, "learning_rate": 7.239797093868902e-05, "loss": 17.2912, "step": 2823 }, { "epoch": 0.37, "learning_rate": 7.237891100494991e-05, "loss": 18.934, "step": 2824 }, { "epoch": 0.37, "learning_rate": 7.235984700376686e-05, "loss": 0.0008, "step": 2825 }, { "epoch": 0.37, "learning_rate": 7.234077893860485e-05, "loss": 16.7243, "step": 2826 }, { "epoch": 0.37, "learning_rate": 7.232170681292953e-05, "loss": 15.4478, "step": 2827 }, { "epoch": 0.37, "learning_rate": 7.230263063020734e-05, "loss": 17.5681, "step": 2828 }, { "epoch": 0.37, "learning_rate": 7.228355039390545e-05, "loss": 16.9495, "step": 2829 }, { "epoch": 0.37, "learning_rate": 7.226446610749173e-05, "loss": 16.8779, "step": 2830 }, { "epoch": 0.37, "learning_rate": 7.224537777443483e-05, "loss": 17.6738, "step": 2831 }, { "epoch": 0.37, "learning_rate": 7.222628539820413e-05, "loss": 0.0001, "step": 2832 }, { "epoch": 0.37, "learning_rate": 7.220718898226974e-05, "loss": 16.8392, "step": 2833 }, { "epoch": 0.37, "learning_rate": 7.218808853010246e-05, "loss": 0.0, "step": 2834 }, { "epoch": 0.37, "learning_rate": 7.216898404517391e-05, "loss": 16.6114, "step": 2835 }, { "epoch": 0.37, "learning_rate": 7.214987553095637e-05, "loss": 0.0, "step": 2836 }, { "epoch": 0.37, "learning_rate": 7.213076299092287e-05, "loss": 16.8757, "step": 2837 }, { "epoch": 0.37, "learning_rate": 7.211164642854721e-05, "loss": 17.0686, "step": 2838 }, { "epoch": 0.37, "learning_rate": 7.209252584730387e-05, "loss": 15.7267, "step": 2839 }, { "epoch": 0.37, "learning_rate": 7.207340125066808e-05, "loss": 18.1447, "step": 2840 }, { "epoch": 0.37, "learning_rate": 7.20542726421158e-05, "loss": 17.1484, "step": 2841 }, { "epoch": 0.37, "learning_rate": 7.203514002512374e-05, "loss": 15.5823, "step": 2842 }, { "epoch": 0.37, "learning_rate": 7.201600340316929e-05, "loss": 0.0012, "step": 2843 }, { "epoch": 0.37, "learning_rate": 7.199686277973063e-05, "loss": 0.0001, "step": 2844 }, { "epoch": 0.37, "learning_rate": 7.197771815828662e-05, "loss": 18.5602, "step": 2845 }, { "epoch": 0.37, "learning_rate": 7.195856954231682e-05, "loss": 16.7157, "step": 2846 }, { "epoch": 0.37, "learning_rate": 7.193941693530164e-05, "loss": 17.2921, "step": 2847 }, { "epoch": 0.37, "learning_rate": 7.192026034072206e-05, "loss": 18.6336, "step": 2848 }, { "epoch": 0.38, "learning_rate": 7.190109976205987e-05, "loss": 18.022, "step": 2849 }, { "epoch": 0.38, "learning_rate": 7.188193520279761e-05, "loss": 17.7115, "step": 2850 }, { "epoch": 0.38, "learning_rate": 7.186276666641846e-05, "loss": 0.0, "step": 2851 }, { "epoch": 0.38, "learning_rate": 7.184359415640639e-05, "loss": 16.2098, "step": 2852 }, { "epoch": 0.38, "learning_rate": 7.182441767624607e-05, "loss": 0.0001, "step": 2853 }, { "epoch": 0.38, "learning_rate": 7.180523722942288e-05, "loss": 19.3599, "step": 2854 }, { "epoch": 0.38, "learning_rate": 7.178605281942294e-05, "loss": 17.4972, "step": 2855 }, { "epoch": 0.38, "learning_rate": 7.176686444973309e-05, "loss": 19.452, "step": 2856 }, { "epoch": 0.38, "learning_rate": 7.174767212384085e-05, "loss": 0.0002, "step": 2857 }, { "epoch": 0.38, "learning_rate": 7.172847584523453e-05, "loss": 15.4981, "step": 2858 }, { "epoch": 0.38, "learning_rate": 7.17092756174031e-05, "loss": 16.4666, "step": 2859 }, { "epoch": 0.38, "learning_rate": 7.169007144383627e-05, "loss": 0.0003, "step": 2860 }, { "epoch": 0.38, "learning_rate": 7.167086332802446e-05, "loss": 0.0, "step": 2861 }, { "epoch": 0.38, "learning_rate": 7.165165127345882e-05, "loss": 17.388, "step": 2862 }, { "epoch": 0.38, "learning_rate": 7.163243528363121e-05, "loss": 16.0338, "step": 2863 }, { "epoch": 0.38, "learning_rate": 7.161321536203418e-05, "loss": 17.6249, "step": 2864 }, { "epoch": 0.38, "learning_rate": 7.159399151216105e-05, "loss": 0.0, "step": 2865 }, { "epoch": 0.38, "learning_rate": 7.157476373750578e-05, "loss": 16.8573, "step": 2866 }, { "epoch": 0.38, "learning_rate": 7.155553204156312e-05, "loss": 17.3691, "step": 2867 }, { "epoch": 0.38, "learning_rate": 7.153629642782849e-05, "loss": 17.1545, "step": 2868 }, { "epoch": 0.38, "learning_rate": 7.151705689979802e-05, "loss": 17.6143, "step": 2869 }, { "epoch": 0.38, "learning_rate": 7.149781346096854e-05, "loss": 18.7476, "step": 2870 }, { "epoch": 0.38, "learning_rate": 7.147856611483765e-05, "loss": 0.0002, "step": 2871 }, { "epoch": 0.38, "learning_rate": 7.145931486490361e-05, "loss": 0.0002, "step": 2872 }, { "epoch": 0.38, "learning_rate": 7.14400597146654e-05, "loss": 18.2718, "step": 2873 }, { "epoch": 0.38, "learning_rate": 7.142080066762267e-05, "loss": 18.1067, "step": 2874 }, { "epoch": 0.38, "learning_rate": 7.140153772727589e-05, "loss": 17.6215, "step": 2875 }, { "epoch": 0.38, "learning_rate": 7.138227089712613e-05, "loss": 18.3823, "step": 2876 }, { "epoch": 0.38, "learning_rate": 7.136300018067519e-05, "loss": 0.0, "step": 2877 }, { "epoch": 0.38, "learning_rate": 7.134372558142559e-05, "loss": 0.0, "step": 2878 }, { "epoch": 0.38, "learning_rate": 7.132444710288058e-05, "loss": 18.0464, "step": 2879 }, { "epoch": 0.38, "learning_rate": 7.130516474854407e-05, "loss": 16.7106, "step": 2880 }, { "epoch": 0.38, "learning_rate": 7.128587852192069e-05, "loss": 0.0, "step": 2881 }, { "epoch": 0.38, "learning_rate": 7.12665884265158e-05, "loss": 18.3706, "step": 2882 }, { "epoch": 0.38, "learning_rate": 7.124729446583541e-05, "loss": 16.6549, "step": 2883 }, { "epoch": 0.38, "learning_rate": 7.12279966433863e-05, "loss": 17.5501, "step": 2884 }, { "epoch": 0.38, "learning_rate": 7.120869496267588e-05, "loss": 18.955, "step": 2885 }, { "epoch": 0.38, "learning_rate": 7.118938942721233e-05, "loss": 0.0, "step": 2886 }, { "epoch": 0.38, "learning_rate": 7.117008004050447e-05, "loss": 16.0065, "step": 2887 }, { "epoch": 0.38, "learning_rate": 7.115076680606187e-05, "loss": 18.4124, "step": 2888 }, { "epoch": 0.38, "learning_rate": 7.113144972739475e-05, "loss": 18.3328, "step": 2889 }, { "epoch": 0.38, "learning_rate": 7.111212880801408e-05, "loss": 15.8489, "step": 2890 }, { "epoch": 0.38, "learning_rate": 7.109280405143148e-05, "loss": 16.8512, "step": 2891 }, { "epoch": 0.38, "learning_rate": 7.107347546115932e-05, "loss": 18.2512, "step": 2892 }, { "epoch": 0.38, "learning_rate": 7.105414304071062e-05, "loss": 0.0002, "step": 2893 }, { "epoch": 0.38, "learning_rate": 7.103480679359912e-05, "loss": 19.109, "step": 2894 }, { "epoch": 0.38, "learning_rate": 7.101546672333923e-05, "loss": 0.0003, "step": 2895 }, { "epoch": 0.38, "learning_rate": 7.09961228334461e-05, "loss": 17.8141, "step": 2896 }, { "epoch": 0.38, "learning_rate": 7.097677512743555e-05, "loss": 0.0001, "step": 2897 }, { "epoch": 0.38, "learning_rate": 7.095742360882408e-05, "loss": 16.9342, "step": 2898 }, { "epoch": 0.38, "learning_rate": 7.093806828112886e-05, "loss": 15.9247, "step": 2899 }, { "epoch": 0.38, "learning_rate": 7.091870914786784e-05, "loss": 17.4274, "step": 2900 }, { "epoch": 0.38, "learning_rate": 7.08993462125596e-05, "loss": 17.3958, "step": 2901 }, { "epoch": 0.38, "learning_rate": 7.087997947872341e-05, "loss": 16.1878, "step": 2902 }, { "epoch": 0.38, "learning_rate": 7.086060894987925e-05, "loss": 16.3399, "step": 2903 }, { "epoch": 0.38, "learning_rate": 7.084123462954775e-05, "loss": 17.8589, "step": 2904 }, { "epoch": 0.38, "learning_rate": 7.082185652125029e-05, "loss": 18.0822, "step": 2905 }, { "epoch": 0.38, "learning_rate": 7.08024746285089e-05, "loss": 0.0003, "step": 2906 }, { "epoch": 0.38, "learning_rate": 7.078308895484631e-05, "loss": 0.0003, "step": 2907 }, { "epoch": 0.38, "learning_rate": 7.076369950378592e-05, "loss": 18.1325, "step": 2908 }, { "epoch": 0.38, "learning_rate": 7.074430627885185e-05, "loss": 16.5823, "step": 2909 }, { "epoch": 0.38, "learning_rate": 7.072490928356885e-05, "loss": 18.1484, "step": 2910 }, { "epoch": 0.38, "learning_rate": 7.070550852146243e-05, "loss": 0.0, "step": 2911 }, { "epoch": 0.38, "learning_rate": 7.06861039960587e-05, "loss": 17.3297, "step": 2912 }, { "epoch": 0.38, "learning_rate": 7.066669571088456e-05, "loss": 17.0195, "step": 2913 }, { "epoch": 0.38, "learning_rate": 7.064728366946749e-05, "loss": 0.0, "step": 2914 }, { "epoch": 0.38, "learning_rate": 7.06278678753357e-05, "loss": 0.0, "step": 2915 }, { "epoch": 0.38, "learning_rate": 7.060844833201808e-05, "loss": 18.2596, "step": 2916 }, { "epoch": 0.38, "learning_rate": 7.058902504304419e-05, "loss": 15.0068, "step": 2917 }, { "epoch": 0.38, "learning_rate": 7.056959801194431e-05, "loss": 17.9713, "step": 2918 }, { "epoch": 0.38, "learning_rate": 7.055016724224934e-05, "loss": 17.7185, "step": 2919 }, { "epoch": 0.38, "learning_rate": 7.053073273749089e-05, "loss": 19.2593, "step": 2920 }, { "epoch": 0.38, "learning_rate": 7.051129450120126e-05, "loss": 16.2338, "step": 2921 }, { "epoch": 0.38, "learning_rate": 7.049185253691342e-05, "loss": 17.4277, "step": 2922 }, { "epoch": 0.38, "learning_rate": 7.047240684816101e-05, "loss": 18.8943, "step": 2923 }, { "epoch": 0.38, "learning_rate": 7.045295743847834e-05, "loss": 0.0, "step": 2924 }, { "epoch": 0.39, "learning_rate": 7.043350431140041e-05, "loss": 17.7602, "step": 2925 }, { "epoch": 0.39, "learning_rate": 7.041404747046291e-05, "loss": 17.714, "step": 2926 }, { "epoch": 0.39, "learning_rate": 7.039458691920217e-05, "loss": 16.3262, "step": 2927 }, { "epoch": 0.39, "learning_rate": 7.03751226611552e-05, "loss": 17.7313, "step": 2928 }, { "epoch": 0.39, "learning_rate": 7.035565469985971e-05, "loss": 17.2069, "step": 2929 }, { "epoch": 0.39, "learning_rate": 7.033618303885409e-05, "loss": 18.3754, "step": 2930 }, { "epoch": 0.39, "learning_rate": 7.031670768167737e-05, "loss": 16.3516, "step": 2931 }, { "epoch": 0.39, "learning_rate": 7.029722863186922e-05, "loss": 18.5456, "step": 2932 }, { "epoch": 0.39, "learning_rate": 7.027774589297009e-05, "loss": 18.0071, "step": 2933 }, { "epoch": 0.39, "learning_rate": 7.0258259468521e-05, "loss": 17.6504, "step": 2934 }, { "epoch": 0.39, "learning_rate": 7.023876936206368e-05, "loss": 17.8166, "step": 2935 }, { "epoch": 0.39, "learning_rate": 7.021927557714051e-05, "loss": 17.3647, "step": 2936 }, { "epoch": 0.39, "learning_rate": 7.019977811729457e-05, "loss": 19.4772, "step": 2937 }, { "epoch": 0.39, "learning_rate": 7.018027698606959e-05, "loss": 17.9904, "step": 2938 }, { "epoch": 0.39, "learning_rate": 7.016077218700996e-05, "loss": 0.0, "step": 2939 }, { "epoch": 0.39, "learning_rate": 7.014126372366074e-05, "loss": 0.0001, "step": 2940 }, { "epoch": 0.39, "learning_rate": 7.012175159956767e-05, "loss": 17.0041, "step": 2941 }, { "epoch": 0.39, "learning_rate": 7.010223581827715e-05, "loss": 19.0755, "step": 2942 }, { "epoch": 0.39, "learning_rate": 7.008271638333624e-05, "loss": 19.619, "step": 2943 }, { "epoch": 0.39, "learning_rate": 7.006319329829262e-05, "loss": 17.1491, "step": 2944 }, { "epoch": 0.39, "learning_rate": 7.004366656669475e-05, "loss": 0.0001, "step": 2945 }, { "epoch": 0.39, "learning_rate": 7.002413619209164e-05, "loss": 0.0001, "step": 2946 }, { "epoch": 0.39, "learning_rate": 7.000460217803301e-05, "loss": 16.4474, "step": 2947 }, { "epoch": 0.39, "learning_rate": 6.998506452806924e-05, "loss": 16.4923, "step": 2948 }, { "epoch": 0.39, "learning_rate": 6.996552324575132e-05, "loss": 15.7922, "step": 2949 }, { "epoch": 0.39, "learning_rate": 6.994597833463101e-05, "loss": 17.6278, "step": 2950 }, { "epoch": 0.39, "learning_rate": 6.992642979826064e-05, "loss": 17.3098, "step": 2951 }, { "epoch": 0.39, "learning_rate": 6.990687764019322e-05, "loss": 15.3099, "step": 2952 }, { "epoch": 0.39, "learning_rate": 6.98873218639824e-05, "loss": 16.9829, "step": 2953 }, { "epoch": 0.39, "learning_rate": 6.986776247318256e-05, "loss": 19.5315, "step": 2954 }, { "epoch": 0.39, "learning_rate": 6.984819947134866e-05, "loss": 16.6466, "step": 2955 }, { "epoch": 0.39, "learning_rate": 6.982863286203632e-05, "loss": 0.0002, "step": 2956 }, { "epoch": 0.39, "learning_rate": 6.980906264880186e-05, "loss": 16.0429, "step": 2957 }, { "epoch": 0.39, "learning_rate": 6.978948883520225e-05, "loss": 17.3174, "step": 2958 }, { "epoch": 0.39, "learning_rate": 6.976991142479508e-05, "loss": 17.6045, "step": 2959 }, { "epoch": 0.39, "learning_rate": 6.97503304211386e-05, "loss": 0.0, "step": 2960 }, { "epoch": 0.39, "learning_rate": 6.973074582779174e-05, "loss": 0.0002, "step": 2961 }, { "epoch": 0.39, "learning_rate": 6.971115764831408e-05, "loss": 17.5046, "step": 2962 }, { "epoch": 0.39, "learning_rate": 6.96915658862658e-05, "loss": 18.0533, "step": 2963 }, { "epoch": 0.39, "learning_rate": 6.96719705452078e-05, "loss": 18.7515, "step": 2964 }, { "epoch": 0.39, "learning_rate": 6.965237162870161e-05, "loss": 0.0001, "step": 2965 }, { "epoch": 0.39, "learning_rate": 6.963276914030937e-05, "loss": 0.0, "step": 2966 }, { "epoch": 0.39, "learning_rate": 6.961316308359392e-05, "loss": 18.0415, "step": 2967 }, { "epoch": 0.39, "learning_rate": 6.959355346211871e-05, "loss": 18.0945, "step": 2968 }, { "epoch": 0.39, "learning_rate": 6.957394027944788e-05, "loss": 16.5139, "step": 2969 }, { "epoch": 0.39, "learning_rate": 6.955432353914618e-05, "loss": 0.0, "step": 2970 }, { "epoch": 0.39, "learning_rate": 6.953470324477904e-05, "loss": 18.2916, "step": 2971 }, { "epoch": 0.39, "learning_rate": 6.951507939991247e-05, "loss": 0.0001, "step": 2972 }, { "epoch": 0.39, "learning_rate": 6.94954520081132e-05, "loss": 17.704, "step": 2973 }, { "epoch": 0.39, "learning_rate": 6.947582107294859e-05, "loss": 18.981, "step": 2974 }, { "epoch": 0.39, "learning_rate": 6.945618659798661e-05, "loss": 18.0239, "step": 2975 }, { "epoch": 0.39, "learning_rate": 6.94365485867959e-05, "loss": 17.0418, "step": 2976 }, { "epoch": 0.39, "learning_rate": 6.941690704294572e-05, "loss": 16.939, "step": 2977 }, { "epoch": 0.39, "learning_rate": 6.9397261970006e-05, "loss": 17.82, "step": 2978 }, { "epoch": 0.39, "learning_rate": 6.93776133715473e-05, "loss": 17.2862, "step": 2979 }, { "epoch": 0.39, "learning_rate": 6.935796125114084e-05, "loss": 17.7098, "step": 2980 }, { "epoch": 0.39, "learning_rate": 6.933830561235844e-05, "loss": 16.8862, "step": 2981 }, { "epoch": 0.39, "learning_rate": 6.931864645877256e-05, "loss": 0.0007, "step": 2982 }, { "epoch": 0.39, "learning_rate": 6.929898379395635e-05, "loss": 16.4094, "step": 2983 }, { "epoch": 0.39, "learning_rate": 6.927931762148357e-05, "loss": 18.3736, "step": 2984 }, { "epoch": 0.39, "learning_rate": 6.925964794492858e-05, "loss": 15.1741, "step": 2985 }, { "epoch": 0.39, "learning_rate": 6.923997476786645e-05, "loss": 17.8336, "step": 2986 }, { "epoch": 0.39, "learning_rate": 6.922029809387282e-05, "loss": 16.3246, "step": 2987 }, { "epoch": 0.39, "learning_rate": 6.920061792652401e-05, "loss": 17.891, "step": 2988 }, { "epoch": 0.39, "learning_rate": 6.918093426939695e-05, "loss": 15.0534, "step": 2989 }, { "epoch": 0.39, "learning_rate": 6.91612471260692e-05, "loss": 17.2681, "step": 2990 }, { "epoch": 0.39, "learning_rate": 6.9141556500119e-05, "loss": 16.7619, "step": 2991 }, { "epoch": 0.39, "learning_rate": 6.912186239512516e-05, "loss": 0.0001, "step": 2992 }, { "epoch": 0.39, "learning_rate": 6.910216481466717e-05, "loss": 17.4376, "step": 2993 }, { "epoch": 0.39, "learning_rate": 6.90824637623251e-05, "loss": 0.0, "step": 2994 }, { "epoch": 0.39, "learning_rate": 6.906275924167972e-05, "loss": 0.0001, "step": 2995 }, { "epoch": 0.39, "learning_rate": 6.904305125631238e-05, "loss": 0.0, "step": 2996 }, { "epoch": 0.39, "learning_rate": 6.902333980980507e-05, "loss": 0.0, "step": 2997 }, { "epoch": 0.39, "learning_rate": 6.900362490574041e-05, "loss": 17.3068, "step": 2998 }, { "epoch": 0.39, "learning_rate": 6.898390654770168e-05, "loss": 16.6532, "step": 2999 }, { "epoch": 0.39, "learning_rate": 6.896418473927273e-05, "loss": 17.7732, "step": 3000 }, { "epoch": 0.4, "learning_rate": 6.894445948403808e-05, "loss": 0.0, "step": 3001 }, { "epoch": 0.4, "learning_rate": 6.892473078558285e-05, "loss": 15.7553, "step": 3002 }, { "epoch": 0.4, "learning_rate": 6.89049986474928e-05, "loss": 17.8248, "step": 3003 }, { "epoch": 0.4, "learning_rate": 6.888526307335434e-05, "loss": 0.0, "step": 3004 }, { "epoch": 0.4, "learning_rate": 6.886552406675444e-05, "loss": 17.4849, "step": 3005 }, { "epoch": 0.4, "learning_rate": 6.884578163128075e-05, "loss": 18.8482, "step": 3006 }, { "epoch": 0.4, "learning_rate": 6.882603577052154e-05, "loss": 18.2992, "step": 3007 }, { "epoch": 0.4, "learning_rate": 6.880628648806567e-05, "loss": 16.4032, "step": 3008 }, { "epoch": 0.4, "learning_rate": 6.878653378750264e-05, "loss": 17.3425, "step": 3009 }, { "epoch": 0.4, "learning_rate": 6.876677767242258e-05, "loss": 20.2481, "step": 3010 }, { "epoch": 0.4, "learning_rate": 6.874701814641622e-05, "loss": 17.2738, "step": 3011 }, { "epoch": 0.4, "learning_rate": 6.872725521307495e-05, "loss": 18.8094, "step": 3012 }, { "epoch": 0.4, "learning_rate": 6.87074888759907e-05, "loss": 16.6815, "step": 3013 }, { "epoch": 0.4, "learning_rate": 6.868771913875611e-05, "loss": 16.1427, "step": 3014 }, { "epoch": 0.4, "learning_rate": 6.86679460049644e-05, "loss": 0.0012, "step": 3015 }, { "epoch": 0.4, "learning_rate": 6.864816947820939e-05, "loss": 0.0, "step": 3016 }, { "epoch": 0.4, "learning_rate": 6.862838956208553e-05, "loss": 0.0, "step": 3017 }, { "epoch": 0.4, "learning_rate": 6.860860626018789e-05, "loss": 17.5972, "step": 3018 }, { "epoch": 0.4, "learning_rate": 6.858881957611215e-05, "loss": 18.0898, "step": 3019 }, { "epoch": 0.4, "learning_rate": 6.856902951345461e-05, "loss": 18.6862, "step": 3020 }, { "epoch": 0.4, "learning_rate": 6.85492360758122e-05, "loss": 0.0001, "step": 3021 }, { "epoch": 0.4, "learning_rate": 6.852943926678242e-05, "loss": 20.0157, "step": 3022 }, { "epoch": 0.4, "learning_rate": 6.850963908996342e-05, "loss": 0.0, "step": 3023 }, { "epoch": 0.4, "learning_rate": 6.848983554895395e-05, "loss": 18.5176, "step": 3024 }, { "epoch": 0.4, "learning_rate": 6.847002864735337e-05, "loss": 18.183, "step": 3025 }, { "epoch": 0.4, "learning_rate": 6.845021838876165e-05, "loss": 16.4695, "step": 3026 }, { "epoch": 0.4, "learning_rate": 6.843040477677935e-05, "loss": 16.0815, "step": 3027 }, { "epoch": 0.4, "learning_rate": 6.841058781500772e-05, "loss": 0.0001, "step": 3028 }, { "epoch": 0.4, "learning_rate": 6.839076750704852e-05, "loss": 0.0011, "step": 3029 }, { "epoch": 0.4, "learning_rate": 6.837094385650416e-05, "loss": 18.7593, "step": 3030 }, { "epoch": 0.4, "learning_rate": 6.835111686697767e-05, "loss": 0.0002, "step": 3031 }, { "epoch": 0.4, "learning_rate": 6.833128654207264e-05, "loss": 0.0002, "step": 3032 }, { "epoch": 0.4, "learning_rate": 6.831145288539336e-05, "loss": 0.0001, "step": 3033 }, { "epoch": 0.4, "learning_rate": 6.829161590054462e-05, "loss": 0.0001, "step": 3034 }, { "epoch": 0.4, "learning_rate": 6.827177559113186e-05, "loss": 18.0803, "step": 3035 }, { "epoch": 0.4, "learning_rate": 6.825193196076115e-05, "loss": 18.9919, "step": 3036 }, { "epoch": 0.4, "learning_rate": 6.823208501303911e-05, "loss": 18.2453, "step": 3037 }, { "epoch": 0.4, "learning_rate": 6.821223475157303e-05, "loss": 15.8566, "step": 3038 }, { "epoch": 0.4, "learning_rate": 6.819238117997071e-05, "loss": 17.4093, "step": 3039 }, { "epoch": 0.4, "learning_rate": 6.817252430184064e-05, "loss": 17.8163, "step": 3040 }, { "epoch": 0.4, "learning_rate": 6.815266412079188e-05, "loss": 15.4329, "step": 3041 }, { "epoch": 0.4, "learning_rate": 6.813280064043406e-05, "loss": 17.9604, "step": 3042 }, { "epoch": 0.4, "learning_rate": 6.811293386437746e-05, "loss": 15.5758, "step": 3043 }, { "epoch": 0.4, "learning_rate": 6.80930637962329e-05, "loss": 0.0, "step": 3044 }, { "epoch": 0.4, "learning_rate": 6.807319043961189e-05, "loss": 0.0006, "step": 3045 }, { "epoch": 0.4, "learning_rate": 6.805331379812643e-05, "loss": 18.6554, "step": 3046 }, { "epoch": 0.4, "learning_rate": 6.80334338753892e-05, "loss": 0.0, "step": 3047 }, { "epoch": 0.4, "learning_rate": 6.801355067501343e-05, "loss": 17.125, "step": 3048 }, { "epoch": 0.4, "learning_rate": 6.799366420061295e-05, "loss": 16.998, "step": 3049 }, { "epoch": 0.4, "learning_rate": 6.797377445580222e-05, "loss": 17.137, "step": 3050 }, { "epoch": 0.4, "learning_rate": 6.795388144419624e-05, "loss": 0.0, "step": 3051 }, { "epoch": 0.4, "learning_rate": 6.793398516941065e-05, "loss": 17.8472, "step": 3052 }, { "epoch": 0.4, "learning_rate": 6.791408563506168e-05, "loss": 17.2012, "step": 3053 }, { "epoch": 0.4, "learning_rate": 6.789418284476611e-05, "loss": 0.0001, "step": 3054 }, { "epoch": 0.4, "learning_rate": 6.787427680214136e-05, "loss": 0.0, "step": 3055 }, { "epoch": 0.4, "learning_rate": 6.78543675108054e-05, "loss": 16.9088, "step": 3056 }, { "epoch": 0.4, "learning_rate": 6.783445497437685e-05, "loss": 0.0002, "step": 3057 }, { "epoch": 0.4, "learning_rate": 6.781453919647485e-05, "loss": 0.0, "step": 3058 }, { "epoch": 0.4, "learning_rate": 6.779462018071917e-05, "loss": 16.8043, "step": 3059 }, { "epoch": 0.4, "learning_rate": 6.777469793073017e-05, "loss": 16.9368, "step": 3060 }, { "epoch": 0.4, "learning_rate": 6.775477245012877e-05, "loss": 17.0178, "step": 3061 }, { "epoch": 0.4, "learning_rate": 6.77348437425365e-05, "loss": 18.7137, "step": 3062 }, { "epoch": 0.4, "learning_rate": 6.771491181157547e-05, "loss": 16.6844, "step": 3063 }, { "epoch": 0.4, "learning_rate": 6.769497666086838e-05, "loss": 17.3671, "step": 3064 }, { "epoch": 0.4, "learning_rate": 6.767503829403852e-05, "loss": 19.2991, "step": 3065 }, { "epoch": 0.4, "learning_rate": 6.765509671470976e-05, "loss": 16.9393, "step": 3066 }, { "epoch": 0.4, "learning_rate": 6.763515192650652e-05, "loss": 0.0, "step": 3067 }, { "epoch": 0.4, "learning_rate": 6.761520393305384e-05, "loss": 19.4459, "step": 3068 }, { "epoch": 0.4, "learning_rate": 6.759525273797738e-05, "loss": 17.1473, "step": 3069 }, { "epoch": 0.4, "learning_rate": 6.757529834490328e-05, "loss": 0.0001, "step": 3070 }, { "epoch": 0.4, "learning_rate": 6.755534075745837e-05, "loss": 17.2945, "step": 3071 }, { "epoch": 0.4, "learning_rate": 6.753537997926997e-05, "loss": 17.6984, "step": 3072 }, { "epoch": 0.4, "learning_rate": 6.751541601396605e-05, "loss": 0.0005, "step": 3073 }, { "epoch": 0.4, "learning_rate": 6.749544886517511e-05, "loss": 16.9074, "step": 3074 }, { "epoch": 0.4, "learning_rate": 6.747547853652626e-05, "loss": 0.0, "step": 3075 }, { "epoch": 0.4, "learning_rate": 6.745550503164915e-05, "loss": 18.4603, "step": 3076 }, { "epoch": 0.41, "learning_rate": 6.743552835417407e-05, "loss": 16.7098, "step": 3077 }, { "epoch": 0.41, "learning_rate": 6.741554850773185e-05, "loss": 17.0448, "step": 3078 }, { "epoch": 0.41, "learning_rate": 6.739556549595384e-05, "loss": 19.4302, "step": 3079 }, { "epoch": 0.41, "learning_rate": 6.737557932247207e-05, "loss": 17.0803, "step": 3080 }, { "epoch": 0.41, "learning_rate": 6.735558999091911e-05, "loss": 17.2291, "step": 3081 }, { "epoch": 0.41, "learning_rate": 6.733559750492804e-05, "loss": 15.9044, "step": 3082 }, { "epoch": 0.41, "learning_rate": 6.73156018681326e-05, "loss": 19.3437, "step": 3083 }, { "epoch": 0.41, "learning_rate": 6.729560308416705e-05, "loss": 17.3333, "step": 3084 }, { "epoch": 0.41, "learning_rate": 6.727560115666624e-05, "loss": 17.0297, "step": 3085 }, { "epoch": 0.41, "learning_rate": 6.72555960892656e-05, "loss": 16.7252, "step": 3086 }, { "epoch": 0.41, "learning_rate": 6.72355878856011e-05, "loss": 18.6306, "step": 3087 }, { "epoch": 0.41, "learning_rate": 6.72155765493093e-05, "loss": 16.7795, "step": 3088 }, { "epoch": 0.41, "learning_rate": 6.719556208402735e-05, "loss": 0.0003, "step": 3089 }, { "epoch": 0.41, "learning_rate": 6.717554449339294e-05, "loss": 16.3496, "step": 3090 }, { "epoch": 0.41, "learning_rate": 6.715552378104432e-05, "loss": 18.4198, "step": 3091 }, { "epoch": 0.41, "learning_rate": 6.713549995062036e-05, "loss": 17.5656, "step": 3092 }, { "epoch": 0.41, "learning_rate": 6.711547300576041e-05, "loss": 16.0696, "step": 3093 }, { "epoch": 0.41, "learning_rate": 6.709544295010449e-05, "loss": 17.7727, "step": 3094 }, { "epoch": 0.41, "learning_rate": 6.707540978729308e-05, "loss": 16.9968, "step": 3095 }, { "epoch": 0.41, "learning_rate": 6.70553735209673e-05, "loss": 16.1418, "step": 3096 }, { "epoch": 0.41, "learning_rate": 6.703533415476881e-05, "loss": 16.8301, "step": 3097 }, { "epoch": 0.41, "learning_rate": 6.701529169233983e-05, "loss": 0.0, "step": 3098 }, { "epoch": 0.41, "learning_rate": 6.699524613732315e-05, "loss": 19.2597, "step": 3099 }, { "epoch": 0.41, "learning_rate": 6.697519749336215e-05, "loss": 0.0001, "step": 3100 }, { "epoch": 0.41, "learning_rate": 6.695514576410066e-05, "loss": 15.9058, "step": 3101 }, { "epoch": 0.41, "learning_rate": 6.693509095318323e-05, "loss": 16.7102, "step": 3102 }, { "epoch": 0.41, "learning_rate": 6.691503306425484e-05, "loss": 17.7706, "step": 3103 }, { "epoch": 0.41, "learning_rate": 6.68949721009611e-05, "loss": 17.505, "step": 3104 }, { "epoch": 0.41, "learning_rate": 6.687490806694816e-05, "loss": 0.0009, "step": 3105 }, { "epoch": 0.41, "learning_rate": 6.685484096586274e-05, "loss": 0.0001, "step": 3106 }, { "epoch": 0.41, "learning_rate": 6.683477080135207e-05, "loss": 16.6252, "step": 3107 }, { "epoch": 0.41, "learning_rate": 6.681469757706403e-05, "loss": 16.4663, "step": 3108 }, { "epoch": 0.41, "learning_rate": 6.679462129664691e-05, "loss": 0.0003, "step": 3109 }, { "epoch": 0.41, "learning_rate": 6.677454196374971e-05, "loss": 16.4015, "step": 3110 }, { "epoch": 0.41, "learning_rate": 6.675445958202191e-05, "loss": 17.0587, "step": 3111 }, { "epoch": 0.41, "learning_rate": 6.673437415511354e-05, "loss": 18.1069, "step": 3112 }, { "epoch": 0.41, "learning_rate": 6.671428568667518e-05, "loss": 17.3679, "step": 3113 }, { "epoch": 0.41, "learning_rate": 6.6694194180358e-05, "loss": 15.8354, "step": 3114 }, { "epoch": 0.41, "learning_rate": 6.66740996398137e-05, "loss": 18.209, "step": 3115 }, { "epoch": 0.41, "learning_rate": 6.665400206869453e-05, "loss": 18.1771, "step": 3116 }, { "epoch": 0.41, "learning_rate": 6.663390147065328e-05, "loss": 0.0011, "step": 3117 }, { "epoch": 0.41, "learning_rate": 6.661379784934332e-05, "loss": 0.0, "step": 3118 }, { "epoch": 0.41, "learning_rate": 6.659369120841854e-05, "loss": 17.5403, "step": 3119 }, { "epoch": 0.41, "learning_rate": 6.65735815515334e-05, "loss": 18.489, "step": 3120 }, { "epoch": 0.41, "learning_rate": 6.655346888234289e-05, "loss": 0.0001, "step": 3121 }, { "epoch": 0.41, "learning_rate": 6.653335320450255e-05, "loss": 16.8243, "step": 3122 }, { "epoch": 0.41, "learning_rate": 6.65132345216685e-05, "loss": 17.0554, "step": 3123 }, { "epoch": 0.41, "learning_rate": 6.649311283749736e-05, "loss": 16.3139, "step": 3124 }, { "epoch": 0.41, "learning_rate": 6.647298815564632e-05, "loss": 17.3875, "step": 3125 }, { "epoch": 0.41, "learning_rate": 6.645286047977311e-05, "loss": 18.1109, "step": 3126 }, { "epoch": 0.41, "learning_rate": 6.6432729813536e-05, "loss": 0.0, "step": 3127 }, { "epoch": 0.41, "learning_rate": 6.641259616059383e-05, "loss": 15.8164, "step": 3128 }, { "epoch": 0.41, "learning_rate": 6.63924595246059e-05, "loss": 17.7108, "step": 3129 }, { "epoch": 0.41, "learning_rate": 6.63723199092322e-05, "loss": 17.8996, "step": 3130 }, { "epoch": 0.41, "learning_rate": 6.635217731813311e-05, "loss": 18.3701, "step": 3131 }, { "epoch": 0.41, "learning_rate": 6.633203175496964e-05, "loss": 0.0001, "step": 3132 }, { "epoch": 0.41, "learning_rate": 6.631188322340329e-05, "loss": 0.0001, "step": 3133 }, { "epoch": 0.41, "learning_rate": 6.629173172709616e-05, "loss": 17.7071, "step": 3134 }, { "epoch": 0.41, "learning_rate": 6.627157726971084e-05, "loss": 0.0001, "step": 3135 }, { "epoch": 0.41, "learning_rate": 6.625141985491047e-05, "loss": 19.6346, "step": 3136 }, { "epoch": 0.41, "learning_rate": 6.623125948635873e-05, "loss": 17.4034, "step": 3137 }, { "epoch": 0.41, "learning_rate": 6.621109616771982e-05, "loss": 0.0001, "step": 3138 }, { "epoch": 0.41, "learning_rate": 6.619092990265851e-05, "loss": 18.181, "step": 3139 }, { "epoch": 0.41, "learning_rate": 6.617076069484011e-05, "loss": 0.0004, "step": 3140 }, { "epoch": 0.41, "learning_rate": 6.61505885479304e-05, "loss": 17.6704, "step": 3141 }, { "epoch": 0.41, "learning_rate": 6.613041346559575e-05, "loss": 17.8125, "step": 3142 }, { "epoch": 0.41, "learning_rate": 6.611023545150306e-05, "loss": 0.0, "step": 3143 }, { "epoch": 0.41, "learning_rate": 6.609005450931976e-05, "loss": 0.0, "step": 3144 }, { "epoch": 0.41, "learning_rate": 6.606987064271379e-05, "loss": 17.256, "step": 3145 }, { "epoch": 0.41, "learning_rate": 6.604968385535364e-05, "loss": 18.5355, "step": 3146 }, { "epoch": 0.41, "learning_rate": 6.602949415090832e-05, "loss": 0.0002, "step": 3147 }, { "epoch": 0.41, "learning_rate": 6.60093015330474e-05, "loss": 0.0002, "step": 3148 }, { "epoch": 0.41, "learning_rate": 6.598910600544095e-05, "loss": 18.6402, "step": 3149 }, { "epoch": 0.41, "learning_rate": 6.596890757175956e-05, "loss": 17.7125, "step": 3150 }, { "epoch": 0.41, "learning_rate": 6.594870623567438e-05, "loss": 0.0, "step": 3151 }, { "epoch": 0.41, "learning_rate": 6.592850200085707e-05, "loss": 17.2145, "step": 3152 }, { "epoch": 0.42, "learning_rate": 6.590829487097983e-05, "loss": 18.1634, "step": 3153 }, { "epoch": 0.42, "learning_rate": 6.588808484971535e-05, "loss": 18.529, "step": 3154 }, { "epoch": 0.42, "learning_rate": 6.586787194073688e-05, "loss": 19.2547, "step": 3155 }, { "epoch": 0.42, "learning_rate": 6.584765614771821e-05, "loss": 18.5087, "step": 3156 }, { "epoch": 0.42, "learning_rate": 6.582743747433359e-05, "loss": 16.8477, "step": 3157 }, { "epoch": 0.42, "learning_rate": 6.580721592425785e-05, "loss": 17.5619, "step": 3158 }, { "epoch": 0.42, "learning_rate": 6.578699150116634e-05, "loss": 16.7291, "step": 3159 }, { "epoch": 0.42, "learning_rate": 6.576676420873492e-05, "loss": 18.3378, "step": 3160 }, { "epoch": 0.42, "learning_rate": 6.574653405063992e-05, "loss": 16.7838, "step": 3161 }, { "epoch": 0.42, "learning_rate": 6.572630103055832e-05, "loss": 16.9157, "step": 3162 }, { "epoch": 0.42, "learning_rate": 6.570606515216747e-05, "loss": 18.2234, "step": 3163 }, { "epoch": 0.42, "learning_rate": 6.568582641914536e-05, "loss": 16.2896, "step": 3164 }, { "epoch": 0.42, "learning_rate": 6.56655848351704e-05, "loss": 0.0016, "step": 3165 }, { "epoch": 0.42, "learning_rate": 6.564534040392163e-05, "loss": 0.0001, "step": 3166 }, { "epoch": 0.42, "learning_rate": 6.562509312907849e-05, "loss": 16.4947, "step": 3167 }, { "epoch": 0.42, "learning_rate": 6.560484301432102e-05, "loss": 17.7536, "step": 3168 }, { "epoch": 0.42, "learning_rate": 6.558459006332973e-05, "loss": 18.0868, "step": 3169 }, { "epoch": 0.42, "learning_rate": 6.55643342797857e-05, "loss": 0.0001, "step": 3170 }, { "epoch": 0.42, "learning_rate": 6.554407566737042e-05, "loss": 16.1288, "step": 3171 }, { "epoch": 0.42, "learning_rate": 6.552381422976604e-05, "loss": 20.167, "step": 3172 }, { "epoch": 0.42, "learning_rate": 6.550354997065509e-05, "loss": 17.9931, "step": 3173 }, { "epoch": 0.42, "learning_rate": 6.548328289372069e-05, "loss": 0.0, "step": 3174 }, { "epoch": 0.42, "learning_rate": 6.546301300264645e-05, "loss": 17.2672, "step": 3175 }, { "epoch": 0.42, "learning_rate": 6.544274030111651e-05, "loss": 17.5365, "step": 3176 }, { "epoch": 0.42, "learning_rate": 6.542246479281548e-05, "loss": 16.8565, "step": 3177 }, { "epoch": 0.42, "learning_rate": 6.54021864814285e-05, "loss": 17.3229, "step": 3178 }, { "epoch": 0.42, "learning_rate": 6.538190537064123e-05, "loss": 17.6007, "step": 3179 }, { "epoch": 0.42, "learning_rate": 6.536162146413985e-05, "loss": 17.6751, "step": 3180 }, { "epoch": 0.42, "learning_rate": 6.5341334765611e-05, "loss": 16.8897, "step": 3181 }, { "epoch": 0.42, "learning_rate": 6.532104527874186e-05, "loss": 0.0003, "step": 3182 }, { "epoch": 0.42, "learning_rate": 6.530075300722012e-05, "loss": 0.0001, "step": 3183 }, { "epoch": 0.42, "learning_rate": 6.5280457954734e-05, "loss": 0.0001, "step": 3184 }, { "epoch": 0.42, "learning_rate": 6.526016012497216e-05, "loss": 17.2792, "step": 3185 }, { "epoch": 0.42, "learning_rate": 6.52398595216238e-05, "loss": 17.8994, "step": 3186 }, { "epoch": 0.42, "learning_rate": 6.521955614837865e-05, "loss": 0.0, "step": 3187 }, { "epoch": 0.42, "learning_rate": 6.519925000892687e-05, "loss": 16.0961, "step": 3188 }, { "epoch": 0.42, "learning_rate": 6.517894110695923e-05, "loss": 16.7367, "step": 3189 }, { "epoch": 0.42, "learning_rate": 6.51586294461669e-05, "loss": 16.8967, "step": 3190 }, { "epoch": 0.42, "learning_rate": 6.513831503024162e-05, "loss": 15.8682, "step": 3191 }, { "epoch": 0.42, "learning_rate": 6.511799786287557e-05, "loss": 15.819, "step": 3192 }, { "epoch": 0.42, "learning_rate": 6.509767794776153e-05, "loss": 0.0001, "step": 3193 }, { "epoch": 0.42, "learning_rate": 6.507735528859264e-05, "loss": 0.0005, "step": 3194 }, { "epoch": 0.42, "learning_rate": 6.505702988906267e-05, "loss": 18.5331, "step": 3195 }, { "epoch": 0.42, "learning_rate": 6.503670175286578e-05, "loss": 16.64, "step": 3196 }, { "epoch": 0.42, "learning_rate": 6.501637088369674e-05, "loss": 18.1111, "step": 3197 }, { "epoch": 0.42, "learning_rate": 6.49960372852507e-05, "loss": 16.3179, "step": 3198 }, { "epoch": 0.42, "learning_rate": 6.497570096122339e-05, "loss": 18.1052, "step": 3199 }, { "epoch": 0.42, "learning_rate": 6.495536191531098e-05, "loss": 0.0009, "step": 3200 }, { "epoch": 0.42, "learning_rate": 6.49350201512102e-05, "loss": 17.9967, "step": 3201 }, { "epoch": 0.42, "learning_rate": 6.491467567261822e-05, "loss": 0.0, "step": 3202 }, { "epoch": 0.42, "learning_rate": 6.489432848323269e-05, "loss": 0.0001, "step": 3203 }, { "epoch": 0.42, "learning_rate": 6.487397858675182e-05, "loss": 17.864, "step": 3204 }, { "epoch": 0.42, "learning_rate": 6.485362598687427e-05, "loss": 17.6039, "step": 3205 }, { "epoch": 0.42, "learning_rate": 6.483327068729919e-05, "loss": 16.3835, "step": 3206 }, { "epoch": 0.42, "learning_rate": 6.48129126917262e-05, "loss": 0.0, "step": 3207 }, { "epoch": 0.42, "learning_rate": 6.479255200385545e-05, "loss": 0.0, "step": 3208 }, { "epoch": 0.42, "learning_rate": 6.47721886273876e-05, "loss": 19.288, "step": 3209 }, { "epoch": 0.42, "learning_rate": 6.475182256602372e-05, "loss": 0.0, "step": 3210 }, { "epoch": 0.42, "learning_rate": 6.473145382346542e-05, "loss": 16.3268, "step": 3211 }, { "epoch": 0.42, "learning_rate": 6.47110824034148e-05, "loss": 16.6395, "step": 3212 }, { "epoch": 0.42, "learning_rate": 6.469070830957444e-05, "loss": 17.2347, "step": 3213 }, { "epoch": 0.42, "learning_rate": 6.467033154564738e-05, "loss": 17.1478, "step": 3214 }, { "epoch": 0.42, "learning_rate": 6.46499521153372e-05, "loss": 18.0283, "step": 3215 }, { "epoch": 0.42, "learning_rate": 6.462957002234788e-05, "loss": 16.7159, "step": 3216 }, { "epoch": 0.42, "learning_rate": 6.460918527038397e-05, "loss": 17.0706, "step": 3217 }, { "epoch": 0.42, "learning_rate": 6.458879786315048e-05, "loss": 0.0003, "step": 3218 }, { "epoch": 0.42, "learning_rate": 6.456840780435289e-05, "loss": 17.9569, "step": 3219 }, { "epoch": 0.42, "learning_rate": 6.454801509769712e-05, "loss": 18.8649, "step": 3220 }, { "epoch": 0.42, "learning_rate": 6.452761974688966e-05, "loss": 0.0001, "step": 3221 }, { "epoch": 0.42, "learning_rate": 6.450722175563741e-05, "loss": 19.2665, "step": 3222 }, { "epoch": 0.42, "learning_rate": 6.448682112764779e-05, "loss": 0.0001, "step": 3223 }, { "epoch": 0.42, "learning_rate": 6.446641786662867e-05, "loss": 0.0, "step": 3224 }, { "epoch": 0.42, "learning_rate": 6.444601197628844e-05, "loss": 0.0001, "step": 3225 }, { "epoch": 0.42, "learning_rate": 6.442560346033591e-05, "loss": 18.387, "step": 3226 }, { "epoch": 0.42, "learning_rate": 6.44051923224804e-05, "loss": 0.0, "step": 3227 }, { "epoch": 0.42, "learning_rate": 6.438477856643172e-05, "loss": 0.0, "step": 3228 }, { "epoch": 0.43, "learning_rate": 6.436436219590014e-05, "loss": 17.876, "step": 3229 }, { "epoch": 0.43, "learning_rate": 6.43439432145964e-05, "loss": 17.3763, "step": 3230 }, { "epoch": 0.43, "learning_rate": 6.432352162623171e-05, "loss": 16.5978, "step": 3231 }, { "epoch": 0.43, "learning_rate": 6.430309743451777e-05, "loss": 0.0001, "step": 3232 }, { "epoch": 0.43, "learning_rate": 6.428267064316675e-05, "loss": 0.0002, "step": 3233 }, { "epoch": 0.43, "learning_rate": 6.426224125589131e-05, "loss": 17.2227, "step": 3234 }, { "epoch": 0.43, "learning_rate": 6.424180927640453e-05, "loss": 15.9547, "step": 3235 }, { "epoch": 0.43, "learning_rate": 6.422137470842001e-05, "loss": 17.0326, "step": 3236 }, { "epoch": 0.43, "learning_rate": 6.420093755565177e-05, "loss": 17.093, "step": 3237 }, { "epoch": 0.43, "learning_rate": 6.418049782181438e-05, "loss": 18.2775, "step": 3238 }, { "epoch": 0.43, "learning_rate": 6.416005551062281e-05, "loss": 15.2083, "step": 3239 }, { "epoch": 0.43, "learning_rate": 6.413961062579253e-05, "loss": 16.2239, "step": 3240 }, { "epoch": 0.43, "learning_rate": 6.411916317103942e-05, "loss": 17.0984, "step": 3241 }, { "epoch": 0.43, "learning_rate": 6.409871315007995e-05, "loss": 0.0002, "step": 3242 }, { "epoch": 0.43, "learning_rate": 6.407826056663094e-05, "loss": 18.0357, "step": 3243 }, { "epoch": 0.43, "learning_rate": 6.405780542440972e-05, "loss": 16.7098, "step": 3244 }, { "epoch": 0.43, "learning_rate": 6.403734772713408e-05, "loss": 16.4802, "step": 3245 }, { "epoch": 0.43, "learning_rate": 6.401688747852229e-05, "loss": 17.5722, "step": 3246 }, { "epoch": 0.43, "learning_rate": 6.399642468229306e-05, "loss": 16.2143, "step": 3247 }, { "epoch": 0.43, "learning_rate": 6.397595934216559e-05, "loss": 17.1038, "step": 3248 }, { "epoch": 0.43, "learning_rate": 6.395549146185948e-05, "loss": 17.7738, "step": 3249 }, { "epoch": 0.43, "learning_rate": 6.39350210450949e-05, "loss": 19.1923, "step": 3250 }, { "epoch": 0.43, "learning_rate": 6.391454809559237e-05, "loss": 18.4887, "step": 3251 }, { "epoch": 0.43, "learning_rate": 6.389407261707295e-05, "loss": 0.0, "step": 3252 }, { "epoch": 0.43, "learning_rate": 6.38735946132581e-05, "loss": 0.0, "step": 3253 }, { "epoch": 0.43, "learning_rate": 6.38531140878698e-05, "loss": 18.9639, "step": 3254 }, { "epoch": 0.43, "learning_rate": 6.383263104463043e-05, "loss": 17.3878, "step": 3255 }, { "epoch": 0.43, "learning_rate": 6.381214548726287e-05, "loss": 17.0834, "step": 3256 }, { "epoch": 0.43, "learning_rate": 6.379165741949041e-05, "loss": 16.3625, "step": 3257 }, { "epoch": 0.43, "learning_rate": 6.377116684503686e-05, "loss": 17.5142, "step": 3258 }, { "epoch": 0.43, "learning_rate": 6.375067376762645e-05, "loss": 17.0325, "step": 3259 }, { "epoch": 0.43, "learning_rate": 6.373017819098386e-05, "loss": 0.0002, "step": 3260 }, { "epoch": 0.43, "learning_rate": 6.370968011883423e-05, "loss": 16.1099, "step": 3261 }, { "epoch": 0.43, "learning_rate": 6.368917955490314e-05, "loss": 16.4182, "step": 3262 }, { "epoch": 0.43, "learning_rate": 6.366867650291667e-05, "loss": 17.9495, "step": 3263 }, { "epoch": 0.43, "learning_rate": 6.36481709666013e-05, "loss": 15.9926, "step": 3264 }, { "epoch": 0.43, "learning_rate": 6.362766294968399e-05, "loss": 16.5847, "step": 3265 }, { "epoch": 0.43, "learning_rate": 6.360715245589212e-05, "loss": 0.0001, "step": 3266 }, { "epoch": 0.43, "learning_rate": 6.358663948895358e-05, "loss": 0.0006, "step": 3267 }, { "epoch": 0.43, "learning_rate": 6.356612405259664e-05, "loss": 0.0002, "step": 3268 }, { "epoch": 0.43, "learning_rate": 6.354560615055006e-05, "loss": 17.2299, "step": 3269 }, { "epoch": 0.43, "learning_rate": 6.352508578654304e-05, "loss": 18.2517, "step": 3270 }, { "epoch": 0.43, "learning_rate": 6.350456296430525e-05, "loss": 17.0085, "step": 3271 }, { "epoch": 0.43, "learning_rate": 6.348403768756675e-05, "loss": 17.5334, "step": 3272 }, { "epoch": 0.43, "learning_rate": 6.346350996005808e-05, "loss": 17.8341, "step": 3273 }, { "epoch": 0.43, "learning_rate": 6.344297978551025e-05, "loss": 0.0003, "step": 3274 }, { "epoch": 0.43, "learning_rate": 6.342244716765469e-05, "loss": 0.0, "step": 3275 }, { "epoch": 0.43, "learning_rate": 6.340191211022323e-05, "loss": 16.734, "step": 3276 }, { "epoch": 0.43, "learning_rate": 6.338137461694823e-05, "loss": 17.634, "step": 3277 }, { "epoch": 0.43, "learning_rate": 6.336083469156244e-05, "loss": 17.2622, "step": 3278 }, { "epoch": 0.43, "learning_rate": 6.334029233779907e-05, "loss": 0.0, "step": 3279 }, { "epoch": 0.43, "learning_rate": 6.331974755939174e-05, "loss": 18.9826, "step": 3280 }, { "epoch": 0.43, "learning_rate": 6.329920036007457e-05, "loss": 16.8465, "step": 3281 }, { "epoch": 0.43, "learning_rate": 6.327865074358203e-05, "loss": 18.4603, "step": 3282 }, { "epoch": 0.43, "learning_rate": 6.325809871364912e-05, "loss": 18.196, "step": 3283 }, { "epoch": 0.43, "learning_rate": 6.323754427401125e-05, "loss": 15.3884, "step": 3284 }, { "epoch": 0.43, "learning_rate": 6.321698742840425e-05, "loss": 18.2902, "step": 3285 }, { "epoch": 0.43, "learning_rate": 6.319642818056436e-05, "loss": 17.117, "step": 3286 }, { "epoch": 0.43, "learning_rate": 6.317586653422836e-05, "loss": 16.5607, "step": 3287 }, { "epoch": 0.43, "learning_rate": 6.315530249313337e-05, "loss": 17.8595, "step": 3288 }, { "epoch": 0.43, "learning_rate": 6.313473606101696e-05, "loss": 17.3084, "step": 3289 }, { "epoch": 0.43, "learning_rate": 6.311416724161715e-05, "loss": 17.8358, "step": 3290 }, { "epoch": 0.43, "learning_rate": 6.309359603867243e-05, "loss": 0.0002, "step": 3291 }, { "epoch": 0.43, "learning_rate": 6.307302245592165e-05, "loss": 18.7624, "step": 3292 }, { "epoch": 0.43, "learning_rate": 6.305244649710415e-05, "loss": 16.8595, "step": 3293 }, { "epoch": 0.43, "learning_rate": 6.303186816595965e-05, "loss": 17.0099, "step": 3294 }, { "epoch": 0.43, "learning_rate": 6.301128746622838e-05, "loss": 15.8256, "step": 3295 }, { "epoch": 0.43, "learning_rate": 6.299070440165092e-05, "loss": 18.3445, "step": 3296 }, { "epoch": 0.43, "learning_rate": 6.297011897596833e-05, "loss": 16.7923, "step": 3297 }, { "epoch": 0.43, "learning_rate": 6.294953119292206e-05, "loss": 17.1987, "step": 3298 }, { "epoch": 0.43, "learning_rate": 6.292894105625402e-05, "loss": 19.6952, "step": 3299 }, { "epoch": 0.43, "learning_rate": 6.290834856970656e-05, "loss": 17.7913, "step": 3300 }, { "epoch": 0.43, "learning_rate": 6.28877537370224e-05, "loss": 17.5737, "step": 3301 }, { "epoch": 0.43, "learning_rate": 6.286715656194474e-05, "loss": 0.0003, "step": 3302 }, { "epoch": 0.43, "learning_rate": 6.284655704821719e-05, "loss": 0.0, "step": 3303 }, { "epoch": 0.43, "learning_rate": 6.282595519958377e-05, "loss": 17.3397, "step": 3304 }, { "epoch": 0.44, "learning_rate": 6.280535101978897e-05, "loss": 0.0001, "step": 3305 }, { "epoch": 0.44, "learning_rate": 6.278474451257762e-05, "loss": 17.1745, "step": 3306 }, { "epoch": 0.44, "learning_rate": 6.276413568169504e-05, "loss": 17.019, "step": 3307 }, { "epoch": 0.44, "learning_rate": 6.274352453088699e-05, "loss": 16.8869, "step": 3308 }, { "epoch": 0.44, "learning_rate": 6.272291106389957e-05, "loss": 16.0272, "step": 3309 }, { "epoch": 0.44, "learning_rate": 6.270229528447939e-05, "loss": 18.5898, "step": 3310 }, { "epoch": 0.44, "learning_rate": 6.268167719637339e-05, "loss": 0.0001, "step": 3311 }, { "epoch": 0.44, "learning_rate": 6.266105680332904e-05, "loss": 17.6625, "step": 3312 }, { "epoch": 0.44, "learning_rate": 6.264043410909411e-05, "loss": 17.6744, "step": 3313 }, { "epoch": 0.44, "learning_rate": 6.261980911741688e-05, "loss": 0.0, "step": 3314 }, { "epoch": 0.44, "learning_rate": 6.259918183204599e-05, "loss": 16.5928, "step": 3315 }, { "epoch": 0.44, "learning_rate": 6.257855225673055e-05, "loss": 18.2588, "step": 3316 }, { "epoch": 0.44, "learning_rate": 6.255792039522003e-05, "loss": 17.6055, "step": 3317 }, { "epoch": 0.44, "learning_rate": 6.253728625126437e-05, "loss": 16.9395, "step": 3318 }, { "epoch": 0.44, "learning_rate": 6.251664982861385e-05, "loss": 0.0006, "step": 3319 }, { "epoch": 0.44, "learning_rate": 6.249601113101924e-05, "loss": 0.0002, "step": 3320 }, { "epoch": 0.44, "learning_rate": 6.24753701622317e-05, "loss": 16.5157, "step": 3321 }, { "epoch": 0.44, "learning_rate": 6.24547269260028e-05, "loss": 0.0001, "step": 3322 }, { "epoch": 0.44, "learning_rate": 6.24340814260845e-05, "loss": 16.8459, "step": 3323 }, { "epoch": 0.44, "learning_rate": 6.241343366622918e-05, "loss": 17.1656, "step": 3324 }, { "epoch": 0.44, "learning_rate": 6.239278365018968e-05, "loss": 0.0, "step": 3325 }, { "epoch": 0.44, "learning_rate": 6.237213138171919e-05, "loss": 18.9452, "step": 3326 }, { "epoch": 0.44, "learning_rate": 6.235147686457131e-05, "loss": 0.0001, "step": 3327 }, { "epoch": 0.44, "learning_rate": 6.233082010250012e-05, "loss": 0.0, "step": 3328 }, { "epoch": 0.44, "learning_rate": 6.231016109926002e-05, "loss": 16.7854, "step": 3329 }, { "epoch": 0.44, "learning_rate": 6.228949985860585e-05, "loss": 16.2904, "step": 3330 }, { "epoch": 0.44, "learning_rate": 6.22688363842929e-05, "loss": 18.9962, "step": 3331 }, { "epoch": 0.44, "learning_rate": 6.224817068007678e-05, "loss": 16.7595, "step": 3332 }, { "epoch": 0.44, "learning_rate": 6.222750274971359e-05, "loss": 16.1718, "step": 3333 }, { "epoch": 0.44, "learning_rate": 6.220683259695978e-05, "loss": 17.2107, "step": 3334 }, { "epoch": 0.44, "learning_rate": 6.218616022557221e-05, "loss": 17.4942, "step": 3335 }, { "epoch": 0.44, "learning_rate": 6.216548563930819e-05, "loss": 18.1304, "step": 3336 }, { "epoch": 0.44, "learning_rate": 6.214480884192536e-05, "loss": 19.4338, "step": 3337 }, { "epoch": 0.44, "learning_rate": 6.212412983718181e-05, "loss": 0.0001, "step": 3338 }, { "epoch": 0.44, "learning_rate": 6.210344862883602e-05, "loss": 16.3178, "step": 3339 }, { "epoch": 0.44, "learning_rate": 6.208276522064689e-05, "loss": 18.1417, "step": 3340 }, { "epoch": 0.44, "learning_rate": 6.206207961637369e-05, "loss": 19.5051, "step": 3341 }, { "epoch": 0.44, "learning_rate": 6.204139181977609e-05, "loss": 18.153, "step": 3342 }, { "epoch": 0.44, "learning_rate": 6.202070183461415e-05, "loss": 18.3187, "step": 3343 }, { "epoch": 0.44, "learning_rate": 6.20000096646484e-05, "loss": 16.9582, "step": 3344 }, { "epoch": 0.44, "learning_rate": 6.197931531363966e-05, "loss": 0.0001, "step": 3345 }, { "epoch": 0.44, "learning_rate": 6.195861878534925e-05, "loss": 0.0001, "step": 3346 }, { "epoch": 0.44, "learning_rate": 6.193792008353876e-05, "loss": 17.0467, "step": 3347 }, { "epoch": 0.44, "learning_rate": 6.191721921197034e-05, "loss": 18.2586, "step": 3348 }, { "epoch": 0.44, "learning_rate": 6.189651617440638e-05, "loss": 0.0, "step": 3349 }, { "epoch": 0.44, "learning_rate": 6.187581097460975e-05, "loss": 16.0409, "step": 3350 }, { "epoch": 0.44, "learning_rate": 6.185510361634369e-05, "loss": 18.694, "step": 3351 }, { "epoch": 0.44, "learning_rate": 6.183439410337184e-05, "loss": 17.2919, "step": 3352 }, { "epoch": 0.44, "learning_rate": 6.181368243945821e-05, "loss": 17.2913, "step": 3353 }, { "epoch": 0.44, "learning_rate": 6.179296862836725e-05, "loss": 17.3727, "step": 3354 }, { "epoch": 0.44, "learning_rate": 6.177225267386372e-05, "loss": 0.0001, "step": 3355 }, { "epoch": 0.44, "learning_rate": 6.175153457971282e-05, "loss": 18.5861, "step": 3356 }, { "epoch": 0.44, "learning_rate": 6.173081434968017e-05, "loss": 17.1505, "step": 3357 }, { "epoch": 0.44, "learning_rate": 6.171009198753173e-05, "loss": 16.0068, "step": 3358 }, { "epoch": 0.44, "learning_rate": 6.168936749703386e-05, "loss": 20.0759, "step": 3359 }, { "epoch": 0.44, "learning_rate": 6.16686408819533e-05, "loss": 17.5112, "step": 3360 }, { "epoch": 0.44, "learning_rate": 6.164791214605716e-05, "loss": 17.9532, "step": 3361 }, { "epoch": 0.44, "learning_rate": 6.162718129311301e-05, "loss": 18.0787, "step": 3362 }, { "epoch": 0.44, "learning_rate": 6.160644832688874e-05, "loss": 17.6997, "step": 3363 }, { "epoch": 0.44, "learning_rate": 6.15857132511526e-05, "loss": 18.3664, "step": 3364 }, { "epoch": 0.44, "learning_rate": 6.15649760696733e-05, "loss": 0.0, "step": 3365 }, { "epoch": 0.44, "learning_rate": 6.154423678621988e-05, "loss": 17.312, "step": 3366 }, { "epoch": 0.44, "learning_rate": 6.152349540456179e-05, "loss": 0.0, "step": 3367 }, { "epoch": 0.44, "learning_rate": 6.15027519284688e-05, "loss": 0.0, "step": 3368 }, { "epoch": 0.44, "learning_rate": 6.148200636171117e-05, "loss": 17.4731, "step": 3369 }, { "epoch": 0.44, "learning_rate": 6.146125870805945e-05, "loss": 18.1933, "step": 3370 }, { "epoch": 0.44, "learning_rate": 6.144050897128459e-05, "loss": 18.7718, "step": 3371 }, { "epoch": 0.44, "learning_rate": 6.141975715515794e-05, "loss": 17.5664, "step": 3372 }, { "epoch": 0.44, "learning_rate": 6.139900326345121e-05, "loss": 0.0, "step": 3373 }, { "epoch": 0.44, "learning_rate": 6.137824729993649e-05, "loss": 15.8959, "step": 3374 }, { "epoch": 0.44, "learning_rate": 6.135748926838625e-05, "loss": 19.1232, "step": 3375 }, { "epoch": 0.44, "learning_rate": 6.133672917257331e-05, "loss": 0.0, "step": 3376 }, { "epoch": 0.44, "learning_rate": 6.131596701627092e-05, "loss": 0.0001, "step": 3377 }, { "epoch": 0.44, "learning_rate": 6.129520280325267e-05, "loss": 0.0003, "step": 3378 }, { "epoch": 0.44, "learning_rate": 6.127443653729252e-05, "loss": 17.0504, "step": 3379 }, { "epoch": 0.44, "learning_rate": 6.125366822216481e-05, "loss": 0.0001, "step": 3380 }, { "epoch": 0.45, "learning_rate": 6.123289786164423e-05, "loss": 16.5224, "step": 3381 }, { "epoch": 0.45, "learning_rate": 6.121212545950593e-05, "loss": 16.5886, "step": 3382 }, { "epoch": 0.45, "learning_rate": 6.119135101952529e-05, "loss": 17.0085, "step": 3383 }, { "epoch": 0.45, "learning_rate": 6.117057454547817e-05, "loss": 16.2744, "step": 3384 }, { "epoch": 0.45, "learning_rate": 6.114979604114077e-05, "loss": 17.2346, "step": 3385 }, { "epoch": 0.45, "learning_rate": 6.112901551028963e-05, "loss": 16.5947, "step": 3386 }, { "epoch": 0.45, "learning_rate": 6.110823295670172e-05, "loss": 17.7804, "step": 3387 }, { "epoch": 0.45, "learning_rate": 6.108744838415432e-05, "loss": 21.3931, "step": 3388 }, { "epoch": 0.45, "learning_rate": 6.106666179642509e-05, "loss": 17.871, "step": 3389 }, { "epoch": 0.45, "learning_rate": 6.104587319729208e-05, "loss": 0.0001, "step": 3390 }, { "epoch": 0.45, "learning_rate": 6.1025082590533675e-05, "loss": 0.0, "step": 3391 }, { "epoch": 0.45, "learning_rate": 6.1004289979928654e-05, "loss": 17.8137, "step": 3392 }, { "epoch": 0.45, "learning_rate": 6.098349536925613e-05, "loss": 16.2787, "step": 3393 }, { "epoch": 0.45, "learning_rate": 6.09626987622956e-05, "loss": 17.2393, "step": 3394 }, { "epoch": 0.45, "learning_rate": 6.0941900162826926e-05, "loss": 18.4374, "step": 3395 }, { "epoch": 0.45, "learning_rate": 6.092109957463032e-05, "loss": 17.4552, "step": 3396 }, { "epoch": 0.45, "learning_rate": 6.090029700148636e-05, "loss": 15.7759, "step": 3397 }, { "epoch": 0.45, "learning_rate": 6.087949244717598e-05, "loss": 0.0, "step": 3398 }, { "epoch": 0.45, "learning_rate": 6.0858685915480493e-05, "loss": 17.4506, "step": 3399 }, { "epoch": 0.45, "learning_rate": 6.0837877410181545e-05, "loss": 16.7326, "step": 3400 }, { "epoch": 0.45, "learning_rate": 6.081706693506116e-05, "loss": 18.9313, "step": 3401 }, { "epoch": 0.45, "learning_rate": 6.079625449390171e-05, "loss": 19.0508, "step": 3402 }, { "epoch": 0.45, "learning_rate": 6.0775440090485925e-05, "loss": 18.0232, "step": 3403 }, { "epoch": 0.45, "learning_rate": 6.07546237285969e-05, "loss": 17.3484, "step": 3404 }, { "epoch": 0.45, "learning_rate": 6.0733805412018074e-05, "loss": 18.5115, "step": 3405 }, { "epoch": 0.45, "learning_rate": 6.071298514453325e-05, "loss": 17.6111, "step": 3406 }, { "epoch": 0.45, "learning_rate": 6.069216292992659e-05, "loss": 17.0403, "step": 3407 }, { "epoch": 0.45, "learning_rate": 6.06713387719826e-05, "loss": 15.2209, "step": 3408 }, { "epoch": 0.45, "learning_rate": 6.0650512674486126e-05, "loss": 18.011, "step": 3409 }, { "epoch": 0.45, "learning_rate": 6.062968464122241e-05, "loss": 15.6955, "step": 3410 }, { "epoch": 0.45, "learning_rate": 6.0608854675977e-05, "loss": 17.4541, "step": 3411 }, { "epoch": 0.45, "learning_rate": 6.058802278253582e-05, "loss": 0.0, "step": 3412 }, { "epoch": 0.45, "learning_rate": 6.056718896468512e-05, "loss": 17.7549, "step": 3413 }, { "epoch": 0.45, "learning_rate": 6.054635322621156e-05, "loss": 18.1881, "step": 3414 }, { "epoch": 0.45, "learning_rate": 6.0525515570902066e-05, "loss": 18.0026, "step": 3415 }, { "epoch": 0.45, "learning_rate": 6.0504676002543983e-05, "loss": 0.0001, "step": 3416 }, { "epoch": 0.45, "learning_rate": 6.0483834524924944e-05, "loss": 17.4229, "step": 3417 }, { "epoch": 0.45, "learning_rate": 6.046299114183298e-05, "loss": 16.7689, "step": 3418 }, { "epoch": 0.45, "learning_rate": 6.0442145857056456e-05, "loss": 0.0001, "step": 3419 }, { "epoch": 0.45, "learning_rate": 6.042129867438405e-05, "loss": 16.049, "step": 3420 }, { "epoch": 0.45, "learning_rate": 6.040044959760482e-05, "loss": 17.1472, "step": 3421 }, { "epoch": 0.45, "learning_rate": 6.037959863050815e-05, "loss": 17.7624, "step": 3422 }, { "epoch": 0.45, "learning_rate": 6.03587457768838e-05, "loss": 0.0004, "step": 3423 }, { "epoch": 0.45, "learning_rate": 6.033789104052182e-05, "loss": 0.0002, "step": 3424 }, { "epoch": 0.45, "learning_rate": 6.0317034425212645e-05, "loss": 0.0001, "step": 3425 }, { "epoch": 0.45, "learning_rate": 6.029617593474702e-05, "loss": 0.0001, "step": 3426 }, { "epoch": 0.45, "learning_rate": 6.027531557291607e-05, "loss": 17.4008, "step": 3427 }, { "epoch": 0.45, "learning_rate": 6.025445334351122e-05, "loss": 0.0, "step": 3428 }, { "epoch": 0.45, "learning_rate": 6.0233589250324266e-05, "loss": 19.1, "step": 3429 }, { "epoch": 0.45, "learning_rate": 6.021272329714731e-05, "loss": 18.1077, "step": 3430 }, { "epoch": 0.45, "learning_rate": 6.019185548777282e-05, "loss": 16.0625, "step": 3431 }, { "epoch": 0.45, "learning_rate": 6.017098582599361e-05, "loss": 15.9816, "step": 3432 }, { "epoch": 0.45, "learning_rate": 6.015011431560278e-05, "loss": 18.8666, "step": 3433 }, { "epoch": 0.45, "learning_rate": 6.0129240960393816e-05, "loss": 0.0003, "step": 3434 }, { "epoch": 0.45, "learning_rate": 6.0108365764160524e-05, "loss": 16.8125, "step": 3435 }, { "epoch": 0.45, "learning_rate": 6.008748873069705e-05, "loss": 20.0543, "step": 3436 }, { "epoch": 0.45, "learning_rate": 6.006660986379784e-05, "loss": 0.0001, "step": 3437 }, { "epoch": 0.45, "learning_rate": 6.0045729167257714e-05, "loss": 16.9758, "step": 3438 }, { "epoch": 0.45, "learning_rate": 6.0024846644871825e-05, "loss": 15.7631, "step": 3439 }, { "epoch": 0.45, "learning_rate": 6.000396230043562e-05, "loss": 16.0156, "step": 3440 }, { "epoch": 0.45, "learning_rate": 5.998307613774492e-05, "loss": 17.3724, "step": 3441 }, { "epoch": 0.45, "learning_rate": 5.9962188160595835e-05, "loss": 16.1897, "step": 3442 }, { "epoch": 0.45, "learning_rate": 5.994129837278485e-05, "loss": 0.0001, "step": 3443 }, { "epoch": 0.45, "learning_rate": 5.992040677810874e-05, "loss": 0.0, "step": 3444 }, { "epoch": 0.45, "learning_rate": 5.9899513380364624e-05, "loss": 17.9638, "step": 3445 }, { "epoch": 0.45, "learning_rate": 5.987861818334995e-05, "loss": 0.0, "step": 3446 }, { "epoch": 0.45, "learning_rate": 5.98577211908625e-05, "loss": 18.8836, "step": 3447 }, { "epoch": 0.45, "learning_rate": 5.983682240670038e-05, "loss": 0.0, "step": 3448 }, { "epoch": 0.45, "learning_rate": 5.9815921834662006e-05, "loss": 16.1196, "step": 3449 }, { "epoch": 0.45, "learning_rate": 5.979501947854611e-05, "loss": 19.1113, "step": 3450 }, { "epoch": 0.45, "learning_rate": 5.9774115342151785e-05, "loss": 0.0, "step": 3451 }, { "epoch": 0.45, "learning_rate": 5.9753209429278446e-05, "loss": 18.7493, "step": 3452 }, { "epoch": 0.45, "learning_rate": 5.973230174372578e-05, "loss": 0.0, "step": 3453 }, { "epoch": 0.45, "learning_rate": 5.971139228929387e-05, "loss": 17.2135, "step": 3454 }, { "epoch": 0.45, "learning_rate": 5.9690481069783035e-05, "loss": 17.7443, "step": 3455 }, { "epoch": 0.45, "learning_rate": 5.9669568088994e-05, "loss": 0.0002, "step": 3456 }, { "epoch": 0.46, "learning_rate": 5.9648653350727756e-05, "loss": 18.1431, "step": 3457 }, { "epoch": 0.46, "learning_rate": 5.9627736858785635e-05, "loss": 17.8141, "step": 3458 }, { "epoch": 0.46, "learning_rate": 5.9606818616969254e-05, "loss": 17.304, "step": 3459 }, { "epoch": 0.46, "learning_rate": 5.958589862908062e-05, "loss": 15.6677, "step": 3460 }, { "epoch": 0.46, "learning_rate": 5.956497689892197e-05, "loss": 18.6764, "step": 3461 }, { "epoch": 0.46, "learning_rate": 5.954405343029593e-05, "loss": 16.3573, "step": 3462 }, { "epoch": 0.46, "learning_rate": 5.9523128227005395e-05, "loss": 17.9287, "step": 3463 }, { "epoch": 0.46, "learning_rate": 5.950220129285361e-05, "loss": 0.0, "step": 3464 }, { "epoch": 0.46, "learning_rate": 5.94812726316441e-05, "loss": 16.1451, "step": 3465 }, { "epoch": 0.46, "learning_rate": 5.946034224718073e-05, "loss": 0.0001, "step": 3466 }, { "epoch": 0.46, "learning_rate": 5.943941014326766e-05, "loss": 18.5691, "step": 3467 }, { "epoch": 0.46, "learning_rate": 5.941847632370939e-05, "loss": 0.0001, "step": 3468 }, { "epoch": 0.46, "learning_rate": 5.9397540792310714e-05, "loss": 17.2479, "step": 3469 }, { "epoch": 0.46, "learning_rate": 5.9376603552876706e-05, "loss": 16.275, "step": 3470 }, { "epoch": 0.46, "learning_rate": 5.93556646092128e-05, "loss": 17.1757, "step": 3471 }, { "epoch": 0.46, "learning_rate": 5.9334723965124725e-05, "loss": 17.8737, "step": 3472 }, { "epoch": 0.46, "learning_rate": 5.9313781624418515e-05, "loss": 0.0, "step": 3473 }, { "epoch": 0.46, "learning_rate": 5.929283759090051e-05, "loss": 0.0, "step": 3474 }, { "epoch": 0.46, "learning_rate": 5.927189186837734e-05, "loss": 18.1504, "step": 3475 }, { "epoch": 0.46, "learning_rate": 5.9250944460656e-05, "loss": 0.0, "step": 3476 }, { "epoch": 0.46, "learning_rate": 5.9229995371543725e-05, "loss": 0.0, "step": 3477 }, { "epoch": 0.46, "learning_rate": 5.9209044604848097e-05, "loss": 17.3605, "step": 3478 }, { "epoch": 0.46, "learning_rate": 5.918809216437698e-05, "loss": 15.8108, "step": 3479 }, { "epoch": 0.46, "learning_rate": 5.916713805393855e-05, "loss": 15.2969, "step": 3480 }, { "epoch": 0.46, "learning_rate": 5.914618227734131e-05, "loss": 18.5488, "step": 3481 }, { "epoch": 0.46, "learning_rate": 5.912522483839403e-05, "loss": 16.765, "step": 3482 }, { "epoch": 0.46, "learning_rate": 5.9104265740905784e-05, "loss": 17.2049, "step": 3483 }, { "epoch": 0.46, "learning_rate": 5.908330498868598e-05, "loss": 17.24, "step": 3484 }, { "epoch": 0.46, "learning_rate": 5.906234258554431e-05, "loss": 0.0, "step": 3485 }, { "epoch": 0.46, "learning_rate": 5.9041378535290745e-05, "loss": 16.8734, "step": 3486 }, { "epoch": 0.46, "learning_rate": 5.902041284173557e-05, "loss": 0.0002, "step": 3487 }, { "epoch": 0.46, "learning_rate": 5.89994455086894e-05, "loss": 17.2937, "step": 3488 }, { "epoch": 0.46, "learning_rate": 5.897847653996309e-05, "loss": 15.7396, "step": 3489 }, { "epoch": 0.46, "learning_rate": 5.895750593936784e-05, "loss": 0.0, "step": 3490 }, { "epoch": 0.46, "learning_rate": 5.8936533710715115e-05, "loss": 19.2667, "step": 3491 }, { "epoch": 0.46, "learning_rate": 5.89155598578167e-05, "loss": 0.0, "step": 3492 }, { "epoch": 0.46, "learning_rate": 5.8894584384484673e-05, "loss": 16.2838, "step": 3493 }, { "epoch": 0.46, "learning_rate": 5.887360729453137e-05, "loss": 16.4766, "step": 3494 }, { "epoch": 0.46, "learning_rate": 5.885262859176949e-05, "loss": 16.6605, "step": 3495 }, { "epoch": 0.46, "learning_rate": 5.8831648280011934e-05, "loss": 18.1683, "step": 3496 }, { "epoch": 0.46, "learning_rate": 5.881066636307199e-05, "loss": 16.9593, "step": 3497 }, { "epoch": 0.46, "learning_rate": 5.878968284476317e-05, "loss": 16.7209, "step": 3498 }, { "epoch": 0.46, "learning_rate": 5.876869772889931e-05, "loss": 0.0001, "step": 3499 }, { "epoch": 0.46, "learning_rate": 5.874771101929451e-05, "loss": 16.3656, "step": 3500 }, { "epoch": 0.46, "learning_rate": 5.8726722719763206e-05, "loss": 0.0, "step": 3501 }, { "epoch": 0.46, "learning_rate": 5.870573283412009e-05, "loss": 0.0002, "step": 3502 }, { "epoch": 0.46, "learning_rate": 5.8684741366180105e-05, "loss": 16.4457, "step": 3503 }, { "epoch": 0.46, "learning_rate": 5.866374831975856e-05, "loss": 16.894, "step": 3504 }, { "epoch": 0.46, "learning_rate": 5.8642753698671005e-05, "loss": 17.5099, "step": 3505 }, { "epoch": 0.46, "learning_rate": 5.862175750673329e-05, "loss": 15.8168, "step": 3506 }, { "epoch": 0.46, "learning_rate": 5.860075974776154e-05, "loss": 15.5075, "step": 3507 }, { "epoch": 0.46, "learning_rate": 5.857976042557215e-05, "loss": 17.8369, "step": 3508 }, { "epoch": 0.46, "learning_rate": 5.8558759543981845e-05, "loss": 15.9209, "step": 3509 }, { "epoch": 0.46, "learning_rate": 5.85377571068076e-05, "loss": 17.3808, "step": 3510 }, { "epoch": 0.46, "learning_rate": 5.851675311786669e-05, "loss": 17.7152, "step": 3511 }, { "epoch": 0.46, "learning_rate": 5.849574758097664e-05, "loss": 0.0, "step": 3512 }, { "epoch": 0.46, "learning_rate": 5.8474740499955295e-05, "loss": 18.5697, "step": 3513 }, { "epoch": 0.46, "learning_rate": 5.8453731878620755e-05, "loss": 17.724, "step": 3514 }, { "epoch": 0.46, "learning_rate": 5.8432721720791426e-05, "loss": 17.3675, "step": 3515 }, { "epoch": 0.46, "learning_rate": 5.841171003028595e-05, "loss": 17.6692, "step": 3516 }, { "epoch": 0.46, "learning_rate": 5.839069681092328e-05, "loss": 17.9715, "step": 3517 }, { "epoch": 0.46, "learning_rate": 5.836968206652267e-05, "loss": 0.0, "step": 3518 }, { "epoch": 0.46, "learning_rate": 5.8348665800903604e-05, "loss": 16.6555, "step": 3519 }, { "epoch": 0.46, "learning_rate": 5.832764801788584e-05, "loss": 18.5354, "step": 3520 }, { "epoch": 0.46, "learning_rate": 5.830662872128945e-05, "loss": 0.0, "step": 3521 }, { "epoch": 0.46, "learning_rate": 5.8285607914934756e-05, "loss": 19.4942, "step": 3522 }, { "epoch": 0.46, "learning_rate": 5.8264585602642383e-05, "loss": 16.4119, "step": 3523 }, { "epoch": 0.46, "learning_rate": 5.824356178823319e-05, "loss": 15.6307, "step": 3524 }, { "epoch": 0.46, "learning_rate": 5.8222536475528334e-05, "loss": 17.5135, "step": 3525 }, { "epoch": 0.46, "learning_rate": 5.820150966834923e-05, "loss": 0.0001, "step": 3526 }, { "epoch": 0.46, "learning_rate": 5.818048137051758e-05, "loss": 16.4332, "step": 3527 }, { "epoch": 0.46, "learning_rate": 5.815945158585534e-05, "loss": 0.0002, "step": 3528 }, { "epoch": 0.46, "learning_rate": 5.813842031818475e-05, "loss": 17.6426, "step": 3529 }, { "epoch": 0.46, "learning_rate": 5.811738757132833e-05, "loss": 0.0, "step": 3530 }, { "epoch": 0.46, "learning_rate": 5.809635334910884e-05, "loss": 17.9081, "step": 3531 }, { "epoch": 0.46, "learning_rate": 5.8075317655349326e-05, "loss": 17.4328, "step": 3532 }, { "epoch": 0.47, "learning_rate": 5.805428049387307e-05, "loss": 0.0, "step": 3533 }, { "epoch": 0.47, "learning_rate": 5.80332418685037e-05, "loss": 16.3763, "step": 3534 }, { "epoch": 0.47, "learning_rate": 5.801220178306502e-05, "loss": 19.293, "step": 3535 }, { "epoch": 0.47, "learning_rate": 5.799116024138116e-05, "loss": 0.0001, "step": 3536 }, { "epoch": 0.47, "learning_rate": 5.7970117247276465e-05, "loss": 17.1326, "step": 3537 }, { "epoch": 0.47, "learning_rate": 5.7949072804575625e-05, "loss": 17.381, "step": 3538 }, { "epoch": 0.47, "learning_rate": 5.792802691710347e-05, "loss": 0.0, "step": 3539 }, { "epoch": 0.47, "learning_rate": 5.79069795886852e-05, "loss": 17.5912, "step": 3540 }, { "epoch": 0.47, "learning_rate": 5.788593082314624e-05, "loss": 0.0, "step": 3541 }, { "epoch": 0.47, "learning_rate": 5.786488062431227e-05, "loss": 18.4018, "step": 3542 }, { "epoch": 0.47, "learning_rate": 5.784382899600923e-05, "loss": 18.2346, "step": 3543 }, { "epoch": 0.47, "learning_rate": 5.7822775942063345e-05, "loss": 15.9052, "step": 3544 }, { "epoch": 0.47, "learning_rate": 5.780172146630105e-05, "loss": 18.4584, "step": 3545 }, { "epoch": 0.47, "learning_rate": 5.7780665572549086e-05, "loss": 18.0119, "step": 3546 }, { "epoch": 0.47, "learning_rate": 5.775960826463445e-05, "loss": 18.2031, "step": 3547 }, { "epoch": 0.47, "learning_rate": 5.773854954638436e-05, "loss": 17.6532, "step": 3548 }, { "epoch": 0.47, "learning_rate": 5.771748942162629e-05, "loss": 18.077, "step": 3549 }, { "epoch": 0.47, "learning_rate": 5.769642789418803e-05, "loss": 16.631, "step": 3550 }, { "epoch": 0.47, "learning_rate": 5.767536496789757e-05, "loss": 17.2485, "step": 3551 }, { "epoch": 0.47, "learning_rate": 5.765430064658317e-05, "loss": 18.7015, "step": 3552 }, { "epoch": 0.47, "learning_rate": 5.763323493407333e-05, "loss": 17.4673, "step": 3553 }, { "epoch": 0.47, "learning_rate": 5.761216783419684e-05, "loss": 17.3335, "step": 3554 }, { "epoch": 0.47, "learning_rate": 5.75910993507827e-05, "loss": 0.0001, "step": 3555 }, { "epoch": 0.47, "learning_rate": 5.757002948766017e-05, "loss": 18.8419, "step": 3556 }, { "epoch": 0.47, "learning_rate": 5.754895824865879e-05, "loss": 17.8335, "step": 3557 }, { "epoch": 0.47, "learning_rate": 5.752788563760833e-05, "loss": 16.2446, "step": 3558 }, { "epoch": 0.47, "learning_rate": 5.75068116583388e-05, "loss": 18.6967, "step": 3559 }, { "epoch": 0.47, "learning_rate": 5.748573631468046e-05, "loss": 17.0557, "step": 3560 }, { "epoch": 0.47, "learning_rate": 5.746465961046384e-05, "loss": 16.3316, "step": 3561 }, { "epoch": 0.47, "learning_rate": 5.744358154951971e-05, "loss": 17.1138, "step": 3562 }, { "epoch": 0.47, "learning_rate": 5.742250213567906e-05, "loss": 16.7941, "step": 3563 }, { "epoch": 0.47, "learning_rate": 5.740142137277317e-05, "loss": 16.9928, "step": 3564 }, { "epoch": 0.47, "learning_rate": 5.73803392646335e-05, "loss": 18.4789, "step": 3565 }, { "epoch": 0.47, "learning_rate": 5.735925581509184e-05, "loss": 0.0, "step": 3566 }, { "epoch": 0.47, "learning_rate": 5.733817102798017e-05, "loss": 17.4642, "step": 3567 }, { "epoch": 0.47, "learning_rate": 5.7317084907130704e-05, "loss": 16.4862, "step": 3568 }, { "epoch": 0.47, "learning_rate": 5.7295997456375924e-05, "loss": 18.5267, "step": 3569 }, { "epoch": 0.47, "learning_rate": 5.7274908679548545e-05, "loss": 17.3916, "step": 3570 }, { "epoch": 0.47, "learning_rate": 5.7253818580481545e-05, "loss": 18.2419, "step": 3571 }, { "epoch": 0.47, "learning_rate": 5.72327271630081e-05, "loss": 17.0557, "step": 3572 }, { "epoch": 0.47, "learning_rate": 5.7211634430961655e-05, "loss": 17.7328, "step": 3573 }, { "epoch": 0.47, "learning_rate": 5.719054038817587e-05, "loss": 16.7025, "step": 3574 }, { "epoch": 0.47, "learning_rate": 5.716944503848469e-05, "loss": 0.0001, "step": 3575 }, { "epoch": 0.47, "learning_rate": 5.7148348385722264e-05, "loss": 17.6137, "step": 3576 }, { "epoch": 0.47, "learning_rate": 5.712725043372296e-05, "loss": 16.8484, "step": 3577 }, { "epoch": 0.47, "learning_rate": 5.710615118632139e-05, "loss": 18.2023, "step": 3578 }, { "epoch": 0.47, "learning_rate": 5.7085050647352455e-05, "loss": 0.0001, "step": 3579 }, { "epoch": 0.47, "learning_rate": 5.706394882065124e-05, "loss": 0.0, "step": 3580 }, { "epoch": 0.47, "learning_rate": 5.704284571005306e-05, "loss": 16.9132, "step": 3581 }, { "epoch": 0.47, "learning_rate": 5.702174131939349e-05, "loss": 18.6928, "step": 3582 }, { "epoch": 0.47, "learning_rate": 5.700063565250831e-05, "loss": 0.0001, "step": 3583 }, { "epoch": 0.47, "learning_rate": 5.6979528713233565e-05, "loss": 17.4447, "step": 3584 }, { "epoch": 0.47, "learning_rate": 5.6958420505405506e-05, "loss": 0.0, "step": 3585 }, { "epoch": 0.47, "learning_rate": 5.693731103286062e-05, "loss": 0.0, "step": 3586 }, { "epoch": 0.47, "learning_rate": 5.691620029943563e-05, "loss": 17.3333, "step": 3587 }, { "epoch": 0.47, "learning_rate": 5.689508830896748e-05, "loss": 0.0001, "step": 3588 }, { "epoch": 0.47, "learning_rate": 5.687397506529336e-05, "loss": 0.0001, "step": 3589 }, { "epoch": 0.47, "learning_rate": 5.685286057225066e-05, "loss": 16.6448, "step": 3590 }, { "epoch": 0.47, "learning_rate": 5.6831744833677e-05, "loss": 0.0003, "step": 3591 }, { "epoch": 0.47, "learning_rate": 5.681062785341028e-05, "loss": 17.0249, "step": 3592 }, { "epoch": 0.47, "learning_rate": 5.6789509635288555e-05, "loss": 15.9962, "step": 3593 }, { "epoch": 0.47, "learning_rate": 5.676839018315012e-05, "loss": 17.7445, "step": 3594 }, { "epoch": 0.47, "learning_rate": 5.674726950083353e-05, "loss": 17.869, "step": 3595 }, { "epoch": 0.47, "learning_rate": 5.672614759217754e-05, "loss": 16.9927, "step": 3596 }, { "epoch": 0.47, "learning_rate": 5.6705024461021126e-05, "loss": 18.6248, "step": 3597 }, { "epoch": 0.47, "learning_rate": 5.6683900111203494e-05, "loss": 17.6295, "step": 3598 }, { "epoch": 0.47, "learning_rate": 5.6662774546564046e-05, "loss": 17.0987, "step": 3599 }, { "epoch": 0.47, "learning_rate": 5.664164777094245e-05, "loss": 0.0, "step": 3600 }, { "epoch": 0.47, "learning_rate": 5.6620519788178574e-05, "loss": 16.0381, "step": 3601 }, { "epoch": 0.47, "learning_rate": 5.659939060211248e-05, "loss": 16.9667, "step": 3602 }, { "epoch": 0.47, "learning_rate": 5.657826021658448e-05, "loss": 16.9973, "step": 3603 }, { "epoch": 0.47, "learning_rate": 5.655712863543509e-05, "loss": 17.3692, "step": 3604 }, { "epoch": 0.47, "learning_rate": 5.6535995862505056e-05, "loss": 16.928, "step": 3605 }, { "epoch": 0.47, "learning_rate": 5.651486190163533e-05, "loss": 17.488, "step": 3606 }, { "epoch": 0.47, "learning_rate": 5.649372675666708e-05, "loss": 16.5496, "step": 3607 }, { "epoch": 0.47, "learning_rate": 5.6472590431441685e-05, "loss": 0.0001, "step": 3608 }, { "epoch": 0.48, "learning_rate": 5.645145292980075e-05, "loss": 0.0007, "step": 3609 }, { "epoch": 0.48, "learning_rate": 5.643031425558609e-05, "loss": 17.0474, "step": 3610 }, { "epoch": 0.48, "learning_rate": 5.6409174412639735e-05, "loss": 17.3417, "step": 3611 }, { "epoch": 0.48, "learning_rate": 5.6388033404803916e-05, "loss": 16.8493, "step": 3612 }, { "epoch": 0.48, "learning_rate": 5.636689123592109e-05, "loss": 16.5574, "step": 3613 }, { "epoch": 0.48, "learning_rate": 5.6345747909833915e-05, "loss": 0.0, "step": 3614 }, { "epoch": 0.48, "learning_rate": 5.632460343038526e-05, "loss": 17.0118, "step": 3615 }, { "epoch": 0.48, "learning_rate": 5.6303457801418213e-05, "loss": 18.0378, "step": 3616 }, { "epoch": 0.48, "learning_rate": 5.628231102677606e-05, "loss": 0.0002, "step": 3617 }, { "epoch": 0.48, "learning_rate": 5.6261163110302306e-05, "loss": 0.0, "step": 3618 }, { "epoch": 0.48, "learning_rate": 5.624001405584065e-05, "loss": 0.0, "step": 3619 }, { "epoch": 0.48, "learning_rate": 5.621886386723502e-05, "loss": 17.6, "step": 3620 }, { "epoch": 0.48, "learning_rate": 5.6197712548329514e-05, "loss": 19.4885, "step": 3621 }, { "epoch": 0.48, "learning_rate": 5.6176560102968467e-05, "loss": 0.0, "step": 3622 }, { "epoch": 0.48, "learning_rate": 5.6155406534996415e-05, "loss": 0.0004, "step": 3623 }, { "epoch": 0.48, "learning_rate": 5.613425184825808e-05, "loss": 18.6464, "step": 3624 }, { "epoch": 0.48, "learning_rate": 5.611309604659841e-05, "loss": 0.0, "step": 3625 }, { "epoch": 0.48, "learning_rate": 5.6091939133862537e-05, "loss": 16.5877, "step": 3626 }, { "epoch": 0.48, "learning_rate": 5.60707811138958e-05, "loss": 18.0418, "step": 3627 }, { "epoch": 0.48, "learning_rate": 5.604962199054376e-05, "loss": 17.0059, "step": 3628 }, { "epoch": 0.48, "learning_rate": 5.6028461767652144e-05, "loss": 16.7053, "step": 3629 }, { "epoch": 0.48, "learning_rate": 5.600730044906689e-05, "loss": 19.2144, "step": 3630 }, { "epoch": 0.48, "learning_rate": 5.598613803863414e-05, "loss": 18.0381, "step": 3631 }, { "epoch": 0.48, "learning_rate": 5.596497454020027e-05, "loss": 0.0, "step": 3632 }, { "epoch": 0.48, "learning_rate": 5.594380995761177e-05, "loss": 16.3199, "step": 3633 }, { "epoch": 0.48, "learning_rate": 5.592264429471541e-05, "loss": 16.2369, "step": 3634 }, { "epoch": 0.48, "learning_rate": 5.59014775553581e-05, "loss": 16.7618, "step": 3635 }, { "epoch": 0.48, "learning_rate": 5.588030974338698e-05, "loss": 16.7121, "step": 3636 }, { "epoch": 0.48, "learning_rate": 5.585914086264937e-05, "loss": 18.6827, "step": 3637 }, { "epoch": 0.48, "learning_rate": 5.583797091699279e-05, "loss": 17.1443, "step": 3638 }, { "epoch": 0.48, "learning_rate": 5.581679991026494e-05, "loss": 15.8677, "step": 3639 }, { "epoch": 0.48, "learning_rate": 5.579562784631373e-05, "loss": 17.403, "step": 3640 }, { "epoch": 0.48, "learning_rate": 5.577445472898726e-05, "loss": 17.1275, "step": 3641 }, { "epoch": 0.48, "learning_rate": 5.5753280562133814e-05, "loss": 16.2139, "step": 3642 }, { "epoch": 0.48, "learning_rate": 5.573210534960186e-05, "loss": 18.0243, "step": 3643 }, { "epoch": 0.48, "learning_rate": 5.5710929095240074e-05, "loss": 17.5901, "step": 3644 }, { "epoch": 0.48, "learning_rate": 5.56897518028973e-05, "loss": 17.9376, "step": 3645 }, { "epoch": 0.48, "learning_rate": 5.566857347642262e-05, "loss": 0.0001, "step": 3646 }, { "epoch": 0.48, "learning_rate": 5.564739411966524e-05, "loss": 17.6294, "step": 3647 }, { "epoch": 0.48, "learning_rate": 5.562621373647456e-05, "loss": 0.0009, "step": 3648 }, { "epoch": 0.48, "learning_rate": 5.560503233070024e-05, "loss": 17.6628, "step": 3649 }, { "epoch": 0.48, "learning_rate": 5.5583849906192034e-05, "loss": 0.0017, "step": 3650 }, { "epoch": 0.48, "learning_rate": 5.556266646679993e-05, "loss": 17.9471, "step": 3651 }, { "epoch": 0.48, "learning_rate": 5.554148201637408e-05, "loss": 16.2749, "step": 3652 }, { "epoch": 0.48, "learning_rate": 5.5520296558764854e-05, "loss": 17.2028, "step": 3653 }, { "epoch": 0.48, "learning_rate": 5.549911009782277e-05, "loss": 0.0001, "step": 3654 }, { "epoch": 0.48, "learning_rate": 5.547792263739854e-05, "loss": 0.0004, "step": 3655 }, { "epoch": 0.48, "learning_rate": 5.545673418134305e-05, "loss": 16.5986, "step": 3656 }, { "epoch": 0.48, "learning_rate": 5.543554473350739e-05, "loss": 18.6793, "step": 3657 }, { "epoch": 0.48, "learning_rate": 5.541435429774281e-05, "loss": 17.3828, "step": 3658 }, { "epoch": 0.48, "learning_rate": 5.539316287790074e-05, "loss": 15.5344, "step": 3659 }, { "epoch": 0.48, "learning_rate": 5.537197047783279e-05, "loss": 16.4513, "step": 3660 }, { "epoch": 0.48, "learning_rate": 5.535077710139076e-05, "loss": 18.2516, "step": 3661 }, { "epoch": 0.48, "learning_rate": 5.532958275242662e-05, "loss": 0.0, "step": 3662 }, { "epoch": 0.48, "learning_rate": 5.5308387434792507e-05, "loss": 16.4374, "step": 3663 }, { "epoch": 0.48, "learning_rate": 5.5287191152340756e-05, "loss": 16.6192, "step": 3664 }, { "epoch": 0.48, "learning_rate": 5.526599390892384e-05, "loss": 0.0, "step": 3665 }, { "epoch": 0.48, "learning_rate": 5.5244795708394446e-05, "loss": 16.1729, "step": 3666 }, { "epoch": 0.48, "learning_rate": 5.522359655460544e-05, "loss": 17.9344, "step": 3667 }, { "epoch": 0.48, "learning_rate": 5.520239645140981e-05, "loss": 16.2168, "step": 3668 }, { "epoch": 0.48, "learning_rate": 5.5181195402660746e-05, "loss": 0.0001, "step": 3669 }, { "epoch": 0.48, "learning_rate": 5.515999341221163e-05, "loss": 0.0003, "step": 3670 }, { "epoch": 0.48, "learning_rate": 5.5138790483916e-05, "loss": 17.2432, "step": 3671 }, { "epoch": 0.48, "learning_rate": 5.511758662162755e-05, "loss": 16.7496, "step": 3672 }, { "epoch": 0.48, "learning_rate": 5.509638182920014e-05, "loss": 16.7837, "step": 3673 }, { "epoch": 0.48, "learning_rate": 5.507517611048786e-05, "loss": 0.0002, "step": 3674 }, { "epoch": 0.48, "learning_rate": 5.505396946934488e-05, "loss": 16.6795, "step": 3675 }, { "epoch": 0.48, "learning_rate": 5.503276190962557e-05, "loss": 17.2377, "step": 3676 }, { "epoch": 0.48, "learning_rate": 5.5011553435184513e-05, "loss": 17.2076, "step": 3677 }, { "epoch": 0.48, "learning_rate": 5.499034404987642e-05, "loss": 17.332, "step": 3678 }, { "epoch": 0.48, "learning_rate": 5.4969133757556146e-05, "loss": 17.0314, "step": 3679 }, { "epoch": 0.48, "learning_rate": 5.494792256207875e-05, "loss": 15.8183, "step": 3680 }, { "epoch": 0.48, "learning_rate": 5.492671046729942e-05, "loss": 18.5463, "step": 3681 }, { "epoch": 0.48, "learning_rate": 5.4905497477073564e-05, "loss": 17.6831, "step": 3682 }, { "epoch": 0.48, "learning_rate": 5.488428359525669e-05, "loss": 0.0001, "step": 3683 }, { "epoch": 0.48, "learning_rate": 5.486306882570449e-05, "loss": 16.1267, "step": 3684 }, { "epoch": 0.49, "learning_rate": 5.484185317227284e-05, "loss": 17.327, "step": 3685 }, { "epoch": 0.49, "learning_rate": 5.482063663881775e-05, "loss": 0.0004, "step": 3686 }, { "epoch": 0.49, "learning_rate": 5.479941922919538e-05, "loss": 17.5001, "step": 3687 }, { "epoch": 0.49, "learning_rate": 5.4778200947262095e-05, "loss": 17.6083, "step": 3688 }, { "epoch": 0.49, "learning_rate": 5.4756981796874366e-05, "loss": 16.2221, "step": 3689 }, { "epoch": 0.49, "learning_rate": 5.4735761781888875e-05, "loss": 0.0, "step": 3690 }, { "epoch": 0.49, "learning_rate": 5.47145409061624e-05, "loss": 0.0, "step": 3691 }, { "epoch": 0.49, "learning_rate": 5.469331917355194e-05, "loss": 17.3937, "step": 3692 }, { "epoch": 0.49, "learning_rate": 5.467209658791458e-05, "loss": 16.4906, "step": 3693 }, { "epoch": 0.49, "learning_rate": 5.465087315310764e-05, "loss": 0.0004, "step": 3694 }, { "epoch": 0.49, "learning_rate": 5.4629648872988525e-05, "loss": 17.5121, "step": 3695 }, { "epoch": 0.49, "learning_rate": 5.460842375141483e-05, "loss": 16.3528, "step": 3696 }, { "epoch": 0.49, "learning_rate": 5.4587197792244285e-05, "loss": 0.0004, "step": 3697 }, { "epoch": 0.49, "learning_rate": 5.4565970999334794e-05, "loss": 17.0073, "step": 3698 }, { "epoch": 0.49, "learning_rate": 5.4544743376544394e-05, "loss": 0.0, "step": 3699 }, { "epoch": 0.49, "learning_rate": 5.452351492773128e-05, "loss": 0.0001, "step": 3700 }, { "epoch": 0.49, "learning_rate": 5.450228565675377e-05, "loss": 17.1345, "step": 3701 }, { "epoch": 0.49, "learning_rate": 5.448105556747041e-05, "loss": 17.1754, "step": 3702 }, { "epoch": 0.49, "learning_rate": 5.445982466373979e-05, "loss": 0.0001, "step": 3703 }, { "epoch": 0.49, "learning_rate": 5.443859294942073e-05, "loss": 16.2713, "step": 3704 }, { "epoch": 0.49, "learning_rate": 5.441736042837216e-05, "loss": 0.0, "step": 3705 }, { "epoch": 0.49, "learning_rate": 5.4396127104453155e-05, "loss": 17.2187, "step": 3706 }, { "epoch": 0.49, "learning_rate": 5.437489298152295e-05, "loss": 17.8335, "step": 3707 }, { "epoch": 0.49, "learning_rate": 5.4353658063440924e-05, "loss": 0.0, "step": 3708 }, { "epoch": 0.49, "learning_rate": 5.433242235406659e-05, "loss": 16.7675, "step": 3709 }, { "epoch": 0.49, "learning_rate": 5.4311185857259625e-05, "loss": 16.6389, "step": 3710 }, { "epoch": 0.49, "learning_rate": 5.428994857687982e-05, "loss": 0.0002, "step": 3711 }, { "epoch": 0.49, "learning_rate": 5.426871051678714e-05, "loss": 16.9057, "step": 3712 }, { "epoch": 0.49, "learning_rate": 5.424747168084165e-05, "loss": 17.4299, "step": 3713 }, { "epoch": 0.49, "learning_rate": 5.4226232072903584e-05, "loss": 17.6032, "step": 3714 }, { "epoch": 0.49, "learning_rate": 5.420499169683333e-05, "loss": 17.4481, "step": 3715 }, { "epoch": 0.49, "learning_rate": 5.418375055649141e-05, "loss": 16.3902, "step": 3716 }, { "epoch": 0.49, "learning_rate": 5.416250865573844e-05, "loss": 0.0, "step": 3717 }, { "epoch": 0.49, "learning_rate": 5.414126599843523e-05, "loss": 0.0, "step": 3718 }, { "epoch": 0.49, "learning_rate": 5.412002258844271e-05, "loss": 16.5861, "step": 3719 }, { "epoch": 0.49, "learning_rate": 5.409877842962193e-05, "loss": 0.0001, "step": 3720 }, { "epoch": 0.49, "learning_rate": 5.40775335258341e-05, "loss": 0.0005, "step": 3721 }, { "epoch": 0.49, "learning_rate": 5.4056287880940534e-05, "loss": 19.1558, "step": 3722 }, { "epoch": 0.49, "learning_rate": 5.403504149880273e-05, "loss": 0.0001, "step": 3723 }, { "epoch": 0.49, "learning_rate": 5.401379438328228e-05, "loss": 0.0001, "step": 3724 }, { "epoch": 0.49, "learning_rate": 5.399254653824091e-05, "loss": 0.0, "step": 3725 }, { "epoch": 0.49, "learning_rate": 5.397129796754048e-05, "loss": 0.0, "step": 3726 }, { "epoch": 0.49, "learning_rate": 5.395004867504302e-05, "loss": 17.2583, "step": 3727 }, { "epoch": 0.49, "learning_rate": 5.3928798664610645e-05, "loss": 0.0, "step": 3728 }, { "epoch": 0.49, "learning_rate": 5.390754794010563e-05, "loss": 17.0791, "step": 3729 }, { "epoch": 0.49, "learning_rate": 5.388629650539034e-05, "loss": 18.3174, "step": 3730 }, { "epoch": 0.49, "learning_rate": 5.386504436432732e-05, "loss": 0.0, "step": 3731 }, { "epoch": 0.49, "learning_rate": 5.384379152077922e-05, "loss": 18.5766, "step": 3732 }, { "epoch": 0.49, "learning_rate": 5.382253797860881e-05, "loss": 16.3533, "step": 3733 }, { "epoch": 0.49, "learning_rate": 5.380128374167898e-05, "loss": 16.551, "step": 3734 }, { "epoch": 0.49, "learning_rate": 5.378002881385279e-05, "loss": 15.8035, "step": 3735 }, { "epoch": 0.49, "learning_rate": 5.375877319899337e-05, "loss": 15.9863, "step": 3736 }, { "epoch": 0.49, "learning_rate": 5.3737516900964025e-05, "loss": 15.3648, "step": 3737 }, { "epoch": 0.49, "learning_rate": 5.3716259923628145e-05, "loss": 17.1902, "step": 3738 }, { "epoch": 0.49, "learning_rate": 5.3695002270849246e-05, "loss": 17.6988, "step": 3739 }, { "epoch": 0.49, "learning_rate": 5.367374394649102e-05, "loss": 0.0001, "step": 3740 }, { "epoch": 0.49, "learning_rate": 5.3652484954417194e-05, "loss": 16.3675, "step": 3741 }, { "epoch": 0.49, "learning_rate": 5.363122529849169e-05, "loss": 18.1841, "step": 3742 }, { "epoch": 0.49, "learning_rate": 5.360996498257853e-05, "loss": 16.532, "step": 3743 }, { "epoch": 0.49, "learning_rate": 5.358870401054182e-05, "loss": 16.2502, "step": 3744 }, { "epoch": 0.49, "learning_rate": 5.3567442386245845e-05, "loss": 16.4578, "step": 3745 }, { "epoch": 0.49, "learning_rate": 5.354618011355494e-05, "loss": 17.6787, "step": 3746 }, { "epoch": 0.49, "learning_rate": 5.3524917196333644e-05, "loss": 17.3484, "step": 3747 }, { "epoch": 0.49, "learning_rate": 5.3503653638446536e-05, "loss": 17.3696, "step": 3748 }, { "epoch": 0.49, "learning_rate": 5.348238944375835e-05, "loss": 16.8245, "step": 3749 }, { "epoch": 0.49, "learning_rate": 5.346112461613391e-05, "loss": 18.2059, "step": 3750 }, { "epoch": 0.49, "learning_rate": 5.343985915943819e-05, "loss": 16.6423, "step": 3751 }, { "epoch": 0.49, "learning_rate": 5.341859307753626e-05, "loss": 0.0, "step": 3752 }, { "epoch": 0.49, "learning_rate": 5.33973263742933e-05, "loss": 15.1859, "step": 3753 }, { "epoch": 0.49, "learning_rate": 5.337605905357459e-05, "loss": 19.5658, "step": 3754 }, { "epoch": 0.49, "learning_rate": 5.3354791119245564e-05, "loss": 16.4898, "step": 3755 }, { "epoch": 0.49, "learning_rate": 5.3333522575171726e-05, "loss": 0.0001, "step": 3756 }, { "epoch": 0.49, "learning_rate": 5.331225342521872e-05, "loss": 15.6768, "step": 3757 }, { "epoch": 0.49, "learning_rate": 5.329098367325227e-05, "loss": 16.1829, "step": 3758 }, { "epoch": 0.49, "learning_rate": 5.3269713323138236e-05, "loss": 17.7671, "step": 3759 }, { "epoch": 0.49, "learning_rate": 5.324844237874258e-05, "loss": 17.1935, "step": 3760 }, { "epoch": 0.5, "learning_rate": 5.322717084393136e-05, "loss": 0.0, "step": 3761 }, { "epoch": 0.5, "learning_rate": 5.3205898722570766e-05, "loss": 17.6906, "step": 3762 }, { "epoch": 0.5, "learning_rate": 5.3184626018527054e-05, "loss": 17.1952, "step": 3763 }, { "epoch": 0.5, "learning_rate": 5.3163352735666636e-05, "loss": 17.1628, "step": 3764 }, { "epoch": 0.5, "learning_rate": 5.3142078877855996e-05, "loss": 16.6277, "step": 3765 }, { "epoch": 0.5, "learning_rate": 5.312080444896172e-05, "loss": 19.1053, "step": 3766 }, { "epoch": 0.5, "learning_rate": 5.309952945285053e-05, "loss": 0.0, "step": 3767 }, { "epoch": 0.5, "learning_rate": 5.3078253893389205e-05, "loss": 0.0, "step": 3768 }, { "epoch": 0.5, "learning_rate": 5.305697777444467e-05, "loss": 0.0, "step": 3769 }, { "epoch": 0.5, "learning_rate": 5.303570109988393e-05, "loss": 17.743, "step": 3770 }, { "epoch": 0.5, "learning_rate": 5.301442387357407e-05, "loss": 16.5017, "step": 3771 }, { "epoch": 0.5, "learning_rate": 5.299314609938234e-05, "loss": 15.7256, "step": 3772 }, { "epoch": 0.5, "learning_rate": 5.2971867781176024e-05, "loss": 18.3374, "step": 3773 }, { "epoch": 0.5, "learning_rate": 5.295058892282253e-05, "loss": 17.3046, "step": 3774 }, { "epoch": 0.5, "learning_rate": 5.2929309528189356e-05, "loss": 17.7687, "step": 3775 }, { "epoch": 0.5, "learning_rate": 5.290802960114413e-05, "loss": 0.0002, "step": 3776 }, { "epoch": 0.5, "learning_rate": 5.288674914555454e-05, "loss": 0.0001, "step": 3777 }, { "epoch": 0.5, "learning_rate": 5.2865468165288366e-05, "loss": 19.3181, "step": 3778 }, { "epoch": 0.5, "learning_rate": 5.28441866642135e-05, "loss": 19.3971, "step": 3779 }, { "epoch": 0.5, "learning_rate": 5.282290464619797e-05, "loss": 18.9552, "step": 3780 }, { "epoch": 0.5, "learning_rate": 5.2801622115109805e-05, "loss": 17.6958, "step": 3781 }, { "epoch": 0.5, "learning_rate": 5.278033907481721e-05, "loss": 16.1989, "step": 3782 }, { "epoch": 0.5, "learning_rate": 5.275905552918844e-05, "loss": 0.0001, "step": 3783 }, { "epoch": 0.5, "learning_rate": 5.2737771482091833e-05, "loss": 16.9703, "step": 3784 }, { "epoch": 0.5, "learning_rate": 5.271648693739588e-05, "loss": 17.7233, "step": 3785 }, { "epoch": 0.5, "learning_rate": 5.269520189896909e-05, "loss": 16.8067, "step": 3786 }, { "epoch": 0.5, "learning_rate": 5.26739163706801e-05, "loss": 0.0, "step": 3787 }, { "epoch": 0.5, "learning_rate": 5.265263035639762e-05, "loss": 0.0, "step": 3788 }, { "epoch": 0.5, "learning_rate": 5.263134385999047e-05, "loss": 17.4351, "step": 3789 }, { "epoch": 0.5, "learning_rate": 5.261005688532753e-05, "loss": 0.0015, "step": 3790 }, { "epoch": 0.5, "learning_rate": 5.258876943627779e-05, "loss": 15.0431, "step": 3791 }, { "epoch": 0.5, "learning_rate": 5.256748151671031e-05, "loss": 17.135, "step": 3792 }, { "epoch": 0.5, "learning_rate": 5.254619313049425e-05, "loss": 16.8755, "step": 3793 }, { "epoch": 0.5, "learning_rate": 5.252490428149886e-05, "loss": 0.0002, "step": 3794 }, { "epoch": 0.5, "learning_rate": 5.250361497359343e-05, "loss": 0.0, "step": 3795 }, { "epoch": 0.5, "learning_rate": 5.248232521064736e-05, "loss": 15.7748, "step": 3796 }, { "epoch": 0.5, "learning_rate": 5.246103499653019e-05, "loss": 16.494, "step": 3797 }, { "epoch": 0.5, "learning_rate": 5.2439744335111454e-05, "loss": 17.7846, "step": 3798 }, { "epoch": 0.5, "learning_rate": 5.24184532302608e-05, "loss": 0.0001, "step": 3799 }, { "epoch": 0.5, "learning_rate": 5.239716168584795e-05, "loss": 15.6047, "step": 3800 }, { "epoch": 0.5, "learning_rate": 5.237586970574275e-05, "loss": 17.3469, "step": 3801 }, { "epoch": 0.5, "learning_rate": 5.2354577293815074e-05, "loss": 15.1687, "step": 3802 }, { "epoch": 0.5, "learning_rate": 5.233328445393488e-05, "loss": 16.402, "step": 3803 }, { "epoch": 0.5, "learning_rate": 5.231199118997222e-05, "loss": 19.152, "step": 3804 }, { "epoch": 0.5, "learning_rate": 5.229069750579722e-05, "loss": 16.8986, "step": 3805 }, { "epoch": 0.5, "learning_rate": 5.2269403405280105e-05, "loss": 18.1894, "step": 3806 }, { "epoch": 0.5, "learning_rate": 5.2248108892291104e-05, "loss": 16.766, "step": 3807 }, { "epoch": 0.5, "learning_rate": 5.2226813970700606e-05, "loss": 0.0, "step": 3808 }, { "epoch": 0.5, "learning_rate": 5.2205518644379e-05, "loss": 0.0002, "step": 3809 }, { "epoch": 0.5, "learning_rate": 5.218422291719682e-05, "loss": 0.0, "step": 3810 }, { "epoch": 0.5, "learning_rate": 5.2162926793024635e-05, "loss": 15.9478, "step": 3811 }, { "epoch": 0.5, "learning_rate": 5.2141630275733054e-05, "loss": 0.0, "step": 3812 }, { "epoch": 0.5, "learning_rate": 5.212033336919283e-05, "loss": 17.6458, "step": 3813 }, { "epoch": 0.5, "learning_rate": 5.209903607727473e-05, "loss": 18.2098, "step": 3814 }, { "epoch": 0.5, "learning_rate": 5.207773840384962e-05, "loss": 0.0001, "step": 3815 }, { "epoch": 0.5, "learning_rate": 5.2056440352788404e-05, "loss": 0.0, "step": 3816 }, { "epoch": 0.5, "learning_rate": 5.2035141927962105e-05, "loss": 17.0666, "step": 3817 }, { "epoch": 0.5, "learning_rate": 5.201384313324177e-05, "loss": 18.1852, "step": 3818 }, { "epoch": 0.5, "learning_rate": 5.199254397249853e-05, "loss": 17.7695, "step": 3819 }, { "epoch": 0.5, "learning_rate": 5.197124444960357e-05, "loss": 0.0001, "step": 3820 }, { "epoch": 0.5, "learning_rate": 5.194994456842817e-05, "loss": 0.0, "step": 3821 }, { "epoch": 0.5, "learning_rate": 5.1928644332843646e-05, "loss": 16.7232, "step": 3822 }, { "epoch": 0.5, "learning_rate": 5.190734374672139e-05, "loss": 0.0003, "step": 3823 }, { "epoch": 0.5, "learning_rate": 5.188604281393286e-05, "loss": 18.8329, "step": 3824 }, { "epoch": 0.5, "learning_rate": 5.186474153834957e-05, "loss": 15.2369, "step": 3825 }, { "epoch": 0.5, "learning_rate": 5.184343992384311e-05, "loss": 16.177, "step": 3826 }, { "epoch": 0.5, "learning_rate": 5.18221379742851e-05, "loss": 17.1498, "step": 3827 }, { "epoch": 0.5, "learning_rate": 5.180083569354725e-05, "loss": 16.8045, "step": 3828 }, { "epoch": 0.5, "learning_rate": 5.177953308550134e-05, "loss": 15.4568, "step": 3829 }, { "epoch": 0.5, "learning_rate": 5.175823015401918e-05, "loss": 15.7345, "step": 3830 }, { "epoch": 0.5, "learning_rate": 5.173692690297266e-05, "loss": 17.6519, "step": 3831 }, { "epoch": 0.5, "learning_rate": 5.17156233362337e-05, "loss": 0.0, "step": 3832 }, { "epoch": 0.5, "learning_rate": 5.16943194576743e-05, "loss": 18.48, "step": 3833 }, { "epoch": 0.5, "learning_rate": 5.1673015271166525e-05, "loss": 19.1595, "step": 3834 }, { "epoch": 0.5, "learning_rate": 5.165171078058249e-05, "loss": 17.3086, "step": 3835 }, { "epoch": 0.5, "learning_rate": 5.163040598979434e-05, "loss": 0.0007, "step": 3836 }, { "epoch": 0.51, "learning_rate": 5.160910090267429e-05, "loss": 0.0008, "step": 3837 }, { "epoch": 0.51, "learning_rate": 5.158779552309463e-05, "loss": 0.0, "step": 3838 }, { "epoch": 0.51, "learning_rate": 5.156648985492769e-05, "loss": 0.0, "step": 3839 }, { "epoch": 0.51, "learning_rate": 5.1545183902045836e-05, "loss": 17.2247, "step": 3840 }, { "epoch": 0.51, "learning_rate": 5.15238776683215e-05, "loss": 18.7231, "step": 3841 }, { "epoch": 0.51, "learning_rate": 5.1502571157627164e-05, "loss": 18.3566, "step": 3842 }, { "epoch": 0.51, "learning_rate": 5.148126437383536e-05, "loss": 0.0, "step": 3843 }, { "epoch": 0.51, "learning_rate": 5.1459957320818676e-05, "loss": 17.5324, "step": 3844 }, { "epoch": 0.51, "learning_rate": 5.143865000244972e-05, "loss": 0.0001, "step": 3845 }, { "epoch": 0.51, "learning_rate": 5.141734242260119e-05, "loss": 0.0001, "step": 3846 }, { "epoch": 0.51, "learning_rate": 5.139603458514582e-05, "loss": 18.7369, "step": 3847 }, { "epoch": 0.51, "learning_rate": 5.137472649395636e-05, "loss": 16.9472, "step": 3848 }, { "epoch": 0.51, "learning_rate": 5.135341815290563e-05, "loss": 16.9694, "step": 3849 }, { "epoch": 0.51, "learning_rate": 5.1332109565866514e-05, "loss": 18.6297, "step": 3850 }, { "epoch": 0.51, "learning_rate": 5.1310800736711903e-05, "loss": 0.0006, "step": 3851 }, { "epoch": 0.51, "learning_rate": 5.128949166931477e-05, "loss": 0.0001, "step": 3852 }, { "epoch": 0.51, "learning_rate": 5.1268182367548076e-05, "loss": 0.0001, "step": 3853 }, { "epoch": 0.51, "learning_rate": 5.12468728352849e-05, "loss": 18.5621, "step": 3854 }, { "epoch": 0.51, "learning_rate": 5.12255630763983e-05, "loss": 16.7804, "step": 3855 }, { "epoch": 0.51, "learning_rate": 5.12042530947614e-05, "loss": 17.4162, "step": 3856 }, { "epoch": 0.51, "learning_rate": 5.1182942894247366e-05, "loss": 18.0446, "step": 3857 }, { "epoch": 0.51, "learning_rate": 5.1161632478729384e-05, "loss": 0.0, "step": 3858 }, { "epoch": 0.51, "learning_rate": 5.114032185208073e-05, "loss": 17.2307, "step": 3859 }, { "epoch": 0.51, "learning_rate": 5.111901101817466e-05, "loss": 19.4561, "step": 3860 }, { "epoch": 0.51, "learning_rate": 5.109769998088448e-05, "loss": 15.6054, "step": 3861 }, { "epoch": 0.51, "learning_rate": 5.107638874408357e-05, "loss": 15.7438, "step": 3862 }, { "epoch": 0.51, "learning_rate": 5.1055077311645304e-05, "loss": 17.8653, "step": 3863 }, { "epoch": 0.51, "learning_rate": 5.103376568744313e-05, "loss": 19.3662, "step": 3864 }, { "epoch": 0.51, "learning_rate": 5.1012453875350476e-05, "loss": 0.0, "step": 3865 }, { "epoch": 0.51, "learning_rate": 5.0991141879240846e-05, "loss": 19.1703, "step": 3866 }, { "epoch": 0.51, "learning_rate": 5.096982970298778e-05, "loss": 16.6848, "step": 3867 }, { "epoch": 0.51, "learning_rate": 5.0948517350464844e-05, "loss": 16.4885, "step": 3868 }, { "epoch": 0.51, "learning_rate": 5.092720482554562e-05, "loss": 0.0005, "step": 3869 }, { "epoch": 0.51, "learning_rate": 5.090589213210373e-05, "loss": 18.1077, "step": 3870 }, { "epoch": 0.51, "learning_rate": 5.0884579274012834e-05, "loss": 15.7721, "step": 3871 }, { "epoch": 0.51, "learning_rate": 5.0863266255146624e-05, "loss": 18.6717, "step": 3872 }, { "epoch": 0.51, "learning_rate": 5.08419530793788e-05, "loss": 18.3838, "step": 3873 }, { "epoch": 0.51, "learning_rate": 5.082063975058311e-05, "loss": 0.0, "step": 3874 }, { "epoch": 0.51, "learning_rate": 5.079932627263333e-05, "loss": 15.8568, "step": 3875 }, { "epoch": 0.51, "learning_rate": 5.077801264940326e-05, "loss": 18.453, "step": 3876 }, { "epoch": 0.51, "learning_rate": 5.0756698884766716e-05, "loss": 17.328, "step": 3877 }, { "epoch": 0.51, "learning_rate": 5.0735384982597547e-05, "loss": 0.0, "step": 3878 }, { "epoch": 0.51, "learning_rate": 5.0714070946769655e-05, "loss": 16.572, "step": 3879 }, { "epoch": 0.51, "learning_rate": 5.0692756781156894e-05, "loss": 16.5594, "step": 3880 }, { "epoch": 0.51, "learning_rate": 5.0671442489633215e-05, "loss": 16.6956, "step": 3881 }, { "epoch": 0.51, "learning_rate": 5.0650128076072545e-05, "loss": 17.0585, "step": 3882 }, { "epoch": 0.51, "learning_rate": 5.0628813544348876e-05, "loss": 0.0, "step": 3883 }, { "epoch": 0.51, "learning_rate": 5.060749889833619e-05, "loss": 17.0021, "step": 3884 }, { "epoch": 0.51, "learning_rate": 5.058618414190849e-05, "loss": 16.5088, "step": 3885 }, { "epoch": 0.51, "learning_rate": 5.05648692789398e-05, "loss": 16.3281, "step": 3886 }, { "epoch": 0.51, "learning_rate": 5.0543554313304174e-05, "loss": 15.9957, "step": 3887 }, { "epoch": 0.51, "learning_rate": 5.0522239248875704e-05, "loss": 0.0, "step": 3888 }, { "epoch": 0.51, "learning_rate": 5.0500924089528444e-05, "loss": 17.1367, "step": 3889 }, { "epoch": 0.51, "learning_rate": 5.047960883913652e-05, "loss": 0.0001, "step": 3890 }, { "epoch": 0.51, "learning_rate": 5.045829350157403e-05, "loss": 17.7773, "step": 3891 }, { "epoch": 0.51, "learning_rate": 5.0436978080715137e-05, "loss": 0.0, "step": 3892 }, { "epoch": 0.51, "learning_rate": 5.0415662580433964e-05, "loss": 18.8861, "step": 3893 }, { "epoch": 0.51, "learning_rate": 5.039434700460469e-05, "loss": 18.0217, "step": 3894 }, { "epoch": 0.51, "learning_rate": 5.03730313571015e-05, "loss": 16.4378, "step": 3895 }, { "epoch": 0.51, "learning_rate": 5.035171564179858e-05, "loss": 17.2024, "step": 3896 }, { "epoch": 0.51, "learning_rate": 5.033039986257013e-05, "loss": 0.0, "step": 3897 }, { "epoch": 0.51, "learning_rate": 5.030908402329036e-05, "loss": 16.4741, "step": 3898 }, { "epoch": 0.51, "learning_rate": 5.028776812783354e-05, "loss": 16.6656, "step": 3899 }, { "epoch": 0.51, "learning_rate": 5.026645218007385e-05, "loss": 16.5561, "step": 3900 }, { "epoch": 0.51, "learning_rate": 5.0245136183885564e-05, "loss": 0.0, "step": 3901 }, { "epoch": 0.51, "learning_rate": 5.0223820143142944e-05, "loss": 17.1925, "step": 3902 }, { "epoch": 0.51, "learning_rate": 5.0202504061720225e-05, "loss": 16.3573, "step": 3903 }, { "epoch": 0.51, "learning_rate": 5.0181187943491715e-05, "loss": 0.0, "step": 3904 }, { "epoch": 0.51, "learning_rate": 5.015987179233167e-05, "loss": 16.4754, "step": 3905 }, { "epoch": 0.51, "learning_rate": 5.013855561211437e-05, "loss": 0.0, "step": 3906 }, { "epoch": 0.51, "learning_rate": 5.011723940671411e-05, "loss": 17.6935, "step": 3907 }, { "epoch": 0.51, "learning_rate": 5.00959231800052e-05, "loss": 15.7869, "step": 3908 }, { "epoch": 0.51, "learning_rate": 5.007460693586191e-05, "loss": 17.3234, "step": 3909 }, { "epoch": 0.51, "learning_rate": 5.005329067815856e-05, "loss": 16.1014, "step": 3910 }, { "epoch": 0.51, "learning_rate": 5.003197441076944e-05, "loss": 16.9744, "step": 3911 }, { "epoch": 0.51, "learning_rate": 5.001065813756887e-05, "loss": 17.72, "step": 3912 }, { "epoch": 0.52, "learning_rate": 4.9989341862431144e-05, "loss": 0.0001, "step": 3913 }, { "epoch": 0.52, "learning_rate": 4.996802558923057e-05, "loss": 18.2588, "step": 3914 }, { "epoch": 0.52, "learning_rate": 4.994670932184144e-05, "loss": 18.3205, "step": 3915 }, { "epoch": 0.52, "learning_rate": 4.9925393064138096e-05, "loss": 15.6304, "step": 3916 }, { "epoch": 0.52, "learning_rate": 4.990407681999482e-05, "loss": 16.946, "step": 3917 }, { "epoch": 0.52, "learning_rate": 4.98827605932859e-05, "loss": 17.4107, "step": 3918 }, { "epoch": 0.52, "learning_rate": 4.986144438788564e-05, "loss": 15.617, "step": 3919 }, { "epoch": 0.52, "learning_rate": 4.984012820766835e-05, "loss": 16.5495, "step": 3920 }, { "epoch": 0.52, "learning_rate": 4.98188120565083e-05, "loss": 16.3408, "step": 3921 }, { "epoch": 0.52, "learning_rate": 4.979749593827979e-05, "loss": 0.0, "step": 3922 }, { "epoch": 0.52, "learning_rate": 4.977617985685707e-05, "loss": 17.1311, "step": 3923 }, { "epoch": 0.52, "learning_rate": 4.975486381611444e-05, "loss": 0.0001, "step": 3924 }, { "epoch": 0.52, "learning_rate": 4.973354781992616e-05, "loss": 17.8961, "step": 3925 }, { "epoch": 0.52, "learning_rate": 4.971223187216648e-05, "loss": 0.0, "step": 3926 }, { "epoch": 0.52, "learning_rate": 4.969091597670964e-05, "loss": 16.3865, "step": 3927 }, { "epoch": 0.52, "learning_rate": 4.966960013742988e-05, "loss": 14.843, "step": 3928 }, { "epoch": 0.52, "learning_rate": 4.964828435820143e-05, "loss": 16.3695, "step": 3929 }, { "epoch": 0.52, "learning_rate": 4.962696864289851e-05, "loss": 17.4865, "step": 3930 }, { "epoch": 0.52, "learning_rate": 4.960565299539532e-05, "loss": 16.2978, "step": 3931 }, { "epoch": 0.52, "learning_rate": 4.958433741956605e-05, "loss": 0.0, "step": 3932 }, { "epoch": 0.52, "learning_rate": 4.956302191928488e-05, "loss": 0.0, "step": 3933 }, { "epoch": 0.52, "learning_rate": 4.954170649842598e-05, "loss": 18.4428, "step": 3934 }, { "epoch": 0.52, "learning_rate": 4.95203911608635e-05, "loss": 16.3168, "step": 3935 }, { "epoch": 0.52, "learning_rate": 4.949907591047156e-05, "loss": 18.2765, "step": 3936 }, { "epoch": 0.52, "learning_rate": 4.947776075112431e-05, "loss": 18.3331, "step": 3937 }, { "epoch": 0.52, "learning_rate": 4.945644568669583e-05, "loss": 16.9867, "step": 3938 }, { "epoch": 0.52, "learning_rate": 4.943513072106022e-05, "loss": 15.8877, "step": 3939 }, { "epoch": 0.52, "learning_rate": 4.9413815858091524e-05, "loss": 17.2455, "step": 3940 }, { "epoch": 0.52, "learning_rate": 4.939250110166382e-05, "loss": 18.4534, "step": 3941 }, { "epoch": 0.52, "learning_rate": 4.9371186455651136e-05, "loss": 17.7879, "step": 3942 }, { "epoch": 0.52, "learning_rate": 4.9349871923927466e-05, "loss": 0.0, "step": 3943 }, { "epoch": 0.52, "learning_rate": 4.93285575103668e-05, "loss": 15.7361, "step": 3944 }, { "epoch": 0.52, "learning_rate": 4.930724321884311e-05, "loss": 0.0, "step": 3945 }, { "epoch": 0.52, "learning_rate": 4.9285929053230364e-05, "loss": 0.0001, "step": 3946 }, { "epoch": 0.52, "learning_rate": 4.926461501740245e-05, "loss": 15.753, "step": 3947 }, { "epoch": 0.52, "learning_rate": 4.9243301115233275e-05, "loss": 15.1991, "step": 3948 }, { "epoch": 0.52, "learning_rate": 4.922198735059674e-05, "loss": 0.0, "step": 3949 }, { "epoch": 0.52, "learning_rate": 4.9200673727366664e-05, "loss": 17.9872, "step": 3950 }, { "epoch": 0.52, "learning_rate": 4.917936024941689e-05, "loss": 0.0002, "step": 3951 }, { "epoch": 0.52, "learning_rate": 4.915804692062121e-05, "loss": 15.3761, "step": 3952 }, { "epoch": 0.52, "learning_rate": 4.913673374485338e-05, "loss": 18.1664, "step": 3953 }, { "epoch": 0.52, "learning_rate": 4.9115420725987164e-05, "loss": 16.8435, "step": 3954 }, { "epoch": 0.52, "learning_rate": 4.909410786789628e-05, "loss": 0.0001, "step": 3955 }, { "epoch": 0.52, "learning_rate": 4.9072795174454386e-05, "loss": 0.0, "step": 3956 }, { "epoch": 0.52, "learning_rate": 4.905148264953516e-05, "loss": 16.1013, "step": 3957 }, { "epoch": 0.52, "learning_rate": 4.903017029701222e-05, "loss": 0.0, "step": 3958 }, { "epoch": 0.52, "learning_rate": 4.900885812075916e-05, "loss": 0.0001, "step": 3959 }, { "epoch": 0.52, "learning_rate": 4.898754612464954e-05, "loss": 16.3462, "step": 3960 }, { "epoch": 0.52, "learning_rate": 4.896623431255688e-05, "loss": 0.0002, "step": 3961 }, { "epoch": 0.52, "learning_rate": 4.8944922688354694e-05, "loss": 0.0001, "step": 3962 }, { "epoch": 0.52, "learning_rate": 4.892361125591643e-05, "loss": 18.7059, "step": 3963 }, { "epoch": 0.52, "learning_rate": 4.890230001911553e-05, "loss": 16.6149, "step": 3964 }, { "epoch": 0.52, "learning_rate": 4.888098898182535e-05, "loss": 18.5702, "step": 3965 }, { "epoch": 0.52, "learning_rate": 4.885967814791928e-05, "loss": 16.8512, "step": 3966 }, { "epoch": 0.52, "learning_rate": 4.8838367521270614e-05, "loss": 16.6478, "step": 3967 }, { "epoch": 0.52, "learning_rate": 4.8817057105752646e-05, "loss": 0.0002, "step": 3968 }, { "epoch": 0.52, "learning_rate": 4.8795746905238595e-05, "loss": 17.4917, "step": 3969 }, { "epoch": 0.52, "learning_rate": 4.87744369236017e-05, "loss": 0.0, "step": 3970 }, { "epoch": 0.52, "learning_rate": 4.8753127164715104e-05, "loss": 16.86, "step": 3971 }, { "epoch": 0.52, "learning_rate": 4.873181763245193e-05, "loss": 0.0, "step": 3972 }, { "epoch": 0.52, "learning_rate": 4.8710508330685236e-05, "loss": 0.0, "step": 3973 }, { "epoch": 0.52, "learning_rate": 4.8689199263288095e-05, "loss": 15.2612, "step": 3974 }, { "epoch": 0.52, "learning_rate": 4.866789043413349e-05, "loss": 0.0, "step": 3975 }, { "epoch": 0.52, "learning_rate": 4.864658184709437e-05, "loss": 18.051, "step": 3976 }, { "epoch": 0.52, "learning_rate": 4.8625273506043656e-05, "loss": 17.2577, "step": 3977 }, { "epoch": 0.52, "learning_rate": 4.860396541485419e-05, "loss": 17.3249, "step": 3978 }, { "epoch": 0.52, "learning_rate": 4.858265757739881e-05, "loss": 16.7721, "step": 3979 }, { "epoch": 0.52, "learning_rate": 4.856134999755029e-05, "loss": 0.0001, "step": 3980 }, { "epoch": 0.52, "learning_rate": 4.8540042679181355e-05, "loss": 17.4432, "step": 3981 }, { "epoch": 0.52, "learning_rate": 4.8518735626164664e-05, "loss": 18.4703, "step": 3982 }, { "epoch": 0.52, "learning_rate": 4.849742884237286e-05, "loss": 18.844, "step": 3983 }, { "epoch": 0.52, "learning_rate": 4.8476122331678514e-05, "loss": 16.6877, "step": 3984 }, { "epoch": 0.52, "learning_rate": 4.845481609795418e-05, "loss": 17.8325, "step": 3985 }, { "epoch": 0.52, "learning_rate": 4.843351014507233e-05, "loss": 18.1894, "step": 3986 }, { "epoch": 0.52, "learning_rate": 4.841220447690539e-05, "loss": 0.0, "step": 3987 }, { "epoch": 0.52, "learning_rate": 4.839089909732573e-05, "loss": 0.0, "step": 3988 }, { "epoch": 0.53, "learning_rate": 4.8369594010205685e-05, "loss": 0.0001, "step": 3989 }, { "epoch": 0.53, "learning_rate": 4.8348289219417545e-05, "loss": 0.0, "step": 3990 }, { "epoch": 0.53, "learning_rate": 4.8326984728833494e-05, "loss": 0.0, "step": 3991 }, { "epoch": 0.53, "learning_rate": 4.830568054232572e-05, "loss": 17.2264, "step": 3992 }, { "epoch": 0.53, "learning_rate": 4.8284376663766326e-05, "loss": 0.0, "step": 3993 }, { "epoch": 0.53, "learning_rate": 4.826307309702737e-05, "loss": 16.984, "step": 3994 }, { "epoch": 0.53, "learning_rate": 4.8241769845980835e-05, "loss": 16.5126, "step": 3995 }, { "epoch": 0.53, "learning_rate": 4.822046691449868e-05, "loss": 17.6065, "step": 3996 }, { "epoch": 0.53, "learning_rate": 4.819916430645276e-05, "loss": 20.5304, "step": 3997 }, { "epoch": 0.53, "learning_rate": 4.817786202571492e-05, "loss": 0.0002, "step": 3998 }, { "epoch": 0.53, "learning_rate": 4.815656007615692e-05, "loss": 16.6138, "step": 3999 }, { "epoch": 0.53, "learning_rate": 4.813525846165045e-05, "loss": 17.9583, "step": 4000 }, { "epoch": 0.53, "learning_rate": 4.8113957186067154e-05, "loss": 16.8694, "step": 4001 }, { "epoch": 0.53, "learning_rate": 4.809265625327862e-05, "loss": 0.0003, "step": 4002 }, { "epoch": 0.53, "learning_rate": 4.807135566715637e-05, "loss": 0.0, "step": 4003 }, { "epoch": 0.53, "learning_rate": 4.805005543157185e-05, "loss": 16.9909, "step": 4004 }, { "epoch": 0.53, "learning_rate": 4.802875555039644e-05, "loss": 17.5305, "step": 4005 }, { "epoch": 0.53, "learning_rate": 4.800745602750149e-05, "loss": 17.8154, "step": 4006 }, { "epoch": 0.53, "learning_rate": 4.7986156866758255e-05, "loss": 18.3176, "step": 4007 }, { "epoch": 0.53, "learning_rate": 4.796485807203791e-05, "loss": 0.0001, "step": 4008 }, { "epoch": 0.53, "learning_rate": 4.794355964721161e-05, "loss": 18.9845, "step": 4009 }, { "epoch": 0.53, "learning_rate": 4.79222615961504e-05, "loss": 17.4604, "step": 4010 }, { "epoch": 0.53, "learning_rate": 4.790096392272529e-05, "loss": 0.0, "step": 4011 }, { "epoch": 0.53, "learning_rate": 4.787966663080719e-05, "loss": 16.4962, "step": 4012 }, { "epoch": 0.53, "learning_rate": 4.785836972426696e-05, "loss": 0.0001, "step": 4013 }, { "epoch": 0.53, "learning_rate": 4.783707320697538e-05, "loss": 14.9738, "step": 4014 }, { "epoch": 0.53, "learning_rate": 4.781577708280319e-05, "loss": 15.7343, "step": 4015 }, { "epoch": 0.53, "learning_rate": 4.7794481355621016e-05, "loss": 17.415, "step": 4016 }, { "epoch": 0.53, "learning_rate": 4.777318602929941e-05, "loss": 17.8621, "step": 4017 }, { "epoch": 0.53, "learning_rate": 4.775189110770891e-05, "loss": 0.0, "step": 4018 }, { "epoch": 0.53, "learning_rate": 4.7730596594719914e-05, "loss": 17.7833, "step": 4019 }, { "epoch": 0.53, "learning_rate": 4.770930249420278e-05, "loss": 17.5334, "step": 4020 }, { "epoch": 0.53, "learning_rate": 4.768800881002779e-05, "loss": 17.1608, "step": 4021 }, { "epoch": 0.53, "learning_rate": 4.766671554606513e-05, "loss": 17.1331, "step": 4022 }, { "epoch": 0.53, "learning_rate": 4.764542270618494e-05, "loss": 0.0, "step": 4023 }, { "epoch": 0.53, "learning_rate": 4.762413029425726e-05, "loss": 18.9521, "step": 4024 }, { "epoch": 0.53, "learning_rate": 4.760283831415206e-05, "loss": 17.7176, "step": 4025 }, { "epoch": 0.53, "learning_rate": 4.7581546769739216e-05, "loss": 16.4792, "step": 4026 }, { "epoch": 0.53, "learning_rate": 4.7560255664888564e-05, "loss": 17.3551, "step": 4027 }, { "epoch": 0.53, "learning_rate": 4.7538965003469824e-05, "loss": 0.0, "step": 4028 }, { "epoch": 0.53, "learning_rate": 4.751767478935264e-05, "loss": 19.1098, "step": 4029 }, { "epoch": 0.53, "learning_rate": 4.7496385026406585e-05, "loss": 0.0, "step": 4030 }, { "epoch": 0.53, "learning_rate": 4.747509571850116e-05, "loss": 18.5016, "step": 4031 }, { "epoch": 0.53, "learning_rate": 4.7453806869505754e-05, "loss": 16.1817, "step": 4032 }, { "epoch": 0.53, "learning_rate": 4.74325184832897e-05, "loss": 18.974, "step": 4033 }, { "epoch": 0.53, "learning_rate": 4.741123056372222e-05, "loss": 15.3745, "step": 4034 }, { "epoch": 0.53, "learning_rate": 4.738994311467248e-05, "loss": 0.0, "step": 4035 }, { "epoch": 0.53, "learning_rate": 4.736865614000955e-05, "loss": 0.0006, "step": 4036 }, { "epoch": 0.53, "learning_rate": 4.7347369643602395e-05, "loss": 17.1693, "step": 4037 }, { "epoch": 0.53, "learning_rate": 4.7326083629319914e-05, "loss": 0.0, "step": 4038 }, { "epoch": 0.53, "learning_rate": 4.730479810103092e-05, "loss": 0.0, "step": 4039 }, { "epoch": 0.53, "learning_rate": 4.728351306260413e-05, "loss": 17.4134, "step": 4040 }, { "epoch": 0.53, "learning_rate": 4.726222851790817e-05, "loss": 0.0001, "step": 4041 }, { "epoch": 0.53, "learning_rate": 4.724094447081157e-05, "loss": 17.2985, "step": 4042 }, { "epoch": 0.53, "learning_rate": 4.7219660925182804e-05, "loss": 18.0429, "step": 4043 }, { "epoch": 0.53, "learning_rate": 4.7198377884890206e-05, "loss": 17.6212, "step": 4044 }, { "epoch": 0.53, "learning_rate": 4.717709535380205e-05, "loss": 17.2172, "step": 4045 }, { "epoch": 0.53, "learning_rate": 4.71558133357865e-05, "loss": 0.0001, "step": 4046 }, { "epoch": 0.53, "learning_rate": 4.7134531834711646e-05, "loss": 0.0001, "step": 4047 }, { "epoch": 0.53, "learning_rate": 4.711325085444548e-05, "loss": 18.4759, "step": 4048 }, { "epoch": 0.53, "learning_rate": 4.709197039885588e-05, "loss": 16.3589, "step": 4049 }, { "epoch": 0.53, "learning_rate": 4.707069047181065e-05, "loss": 17.2593, "step": 4050 }, { "epoch": 0.53, "learning_rate": 4.704941107717748e-05, "loss": 15.822, "step": 4051 }, { "epoch": 0.53, "learning_rate": 4.702813221882398e-05, "loss": 17.2321, "step": 4052 }, { "epoch": 0.53, "learning_rate": 4.700685390061767e-05, "loss": 16.6884, "step": 4053 }, { "epoch": 0.53, "learning_rate": 4.6985576126425936e-05, "loss": 17.8583, "step": 4054 }, { "epoch": 0.53, "learning_rate": 4.6964298900116083e-05, "loss": 17.5482, "step": 4055 }, { "epoch": 0.53, "learning_rate": 4.694302222555534e-05, "loss": 18.0519, "step": 4056 }, { "epoch": 0.53, "learning_rate": 4.69217461066108e-05, "loss": 0.0, "step": 4057 }, { "epoch": 0.53, "learning_rate": 4.690047054714949e-05, "loss": 0.0, "step": 4058 }, { "epoch": 0.53, "learning_rate": 4.687919555103828e-05, "loss": 0.0, "step": 4059 }, { "epoch": 0.53, "learning_rate": 4.6857921122144016e-05, "loss": 16.3336, "step": 4060 }, { "epoch": 0.53, "learning_rate": 4.6836647264333375e-05, "loss": 17.9508, "step": 4061 }, { "epoch": 0.53, "learning_rate": 4.681537398147296e-05, "loss": 17.0634, "step": 4062 }, { "epoch": 0.53, "learning_rate": 4.6794101277429246e-05, "loss": 0.0002, "step": 4063 }, { "epoch": 0.53, "learning_rate": 4.6772829156068646e-05, "loss": 18.2152, "step": 4064 }, { "epoch": 0.54, "learning_rate": 4.675155762125744e-05, "loss": 17.4789, "step": 4065 }, { "epoch": 0.54, "learning_rate": 4.6730286676861776e-05, "loss": 17.691, "step": 4066 }, { "epoch": 0.54, "learning_rate": 4.670901632674774e-05, "loss": 18.9751, "step": 4067 }, { "epoch": 0.54, "learning_rate": 4.6687746574781294e-05, "loss": 16.3108, "step": 4068 }, { "epoch": 0.54, "learning_rate": 4.666647742482828e-05, "loss": 17.8117, "step": 4069 }, { "epoch": 0.54, "learning_rate": 4.664520888075445e-05, "loss": 16.9038, "step": 4070 }, { "epoch": 0.54, "learning_rate": 4.662394094642542e-05, "loss": 17.1549, "step": 4071 }, { "epoch": 0.54, "learning_rate": 4.6602673625706714e-05, "loss": 0.0, "step": 4072 }, { "epoch": 0.54, "learning_rate": 4.658140692246375e-05, "loss": 17.1102, "step": 4073 }, { "epoch": 0.54, "learning_rate": 4.6560140840561817e-05, "loss": 14.9273, "step": 4074 }, { "epoch": 0.54, "learning_rate": 4.65388753838661e-05, "loss": 16.2422, "step": 4075 }, { "epoch": 0.54, "learning_rate": 4.651761055624166e-05, "loss": 17.556, "step": 4076 }, { "epoch": 0.54, "learning_rate": 4.6496346361553476e-05, "loss": 17.5372, "step": 4077 }, { "epoch": 0.54, "learning_rate": 4.647508280366637e-05, "loss": 17.9481, "step": 4078 }, { "epoch": 0.54, "learning_rate": 4.645381988644506e-05, "loss": 18.0348, "step": 4079 }, { "epoch": 0.54, "learning_rate": 4.643255761375417e-05, "loss": 0.0006, "step": 4080 }, { "epoch": 0.54, "learning_rate": 4.6411295989458183e-05, "loss": 17.1951, "step": 4081 }, { "epoch": 0.54, "learning_rate": 4.6390035017421486e-05, "loss": 17.6037, "step": 4082 }, { "epoch": 0.54, "learning_rate": 4.6368774701508314e-05, "loss": 0.0, "step": 4083 }, { "epoch": 0.54, "learning_rate": 4.6347515045582804e-05, "loss": 18.997, "step": 4084 }, { "epoch": 0.54, "learning_rate": 4.6326256053508986e-05, "loss": 0.001, "step": 4085 }, { "epoch": 0.54, "learning_rate": 4.630499772915075e-05, "loss": 0.0, "step": 4086 }, { "epoch": 0.54, "learning_rate": 4.628374007637186e-05, "loss": 0.0001, "step": 4087 }, { "epoch": 0.54, "learning_rate": 4.6262483099035966e-05, "loss": 0.0, "step": 4088 }, { "epoch": 0.54, "learning_rate": 4.624122680100662e-05, "loss": 0.0, "step": 4089 }, { "epoch": 0.54, "learning_rate": 4.621997118614721e-05, "loss": 16.7383, "step": 4090 }, { "epoch": 0.54, "learning_rate": 4.619871625832102e-05, "loss": 18.1163, "step": 4091 }, { "epoch": 0.54, "learning_rate": 4.617746202139119e-05, "loss": 17.1827, "step": 4092 }, { "epoch": 0.54, "learning_rate": 4.615620847922078e-05, "loss": 18.3984, "step": 4093 }, { "epoch": 0.54, "learning_rate": 4.6134955635672675e-05, "loss": 0.0001, "step": 4094 }, { "epoch": 0.54, "learning_rate": 4.6113703494609663e-05, "loss": 17.4727, "step": 4095 }, { "epoch": 0.54, "learning_rate": 4.609245205989438e-05, "loss": 16.0893, "step": 4096 }, { "epoch": 0.54, "learning_rate": 4.6071201335389346e-05, "loss": 0.0015, "step": 4097 }, { "epoch": 0.54, "learning_rate": 4.6049951324956974e-05, "loss": 17.5179, "step": 4098 }, { "epoch": 0.54, "learning_rate": 4.602870203245952e-05, "loss": 16.8992, "step": 4099 }, { "epoch": 0.54, "learning_rate": 4.6007453461759104e-05, "loss": 0.0001, "step": 4100 }, { "epoch": 0.54, "learning_rate": 4.598620561671772e-05, "loss": 17.7121, "step": 4101 }, { "epoch": 0.54, "learning_rate": 4.5964958501197265e-05, "loss": 18.3002, "step": 4102 }, { "epoch": 0.54, "learning_rate": 4.594371211905946e-05, "loss": 16.5854, "step": 4103 }, { "epoch": 0.54, "learning_rate": 4.5922466474165906e-05, "loss": 19.0297, "step": 4104 }, { "epoch": 0.54, "learning_rate": 4.590122157037806e-05, "loss": 16.0294, "step": 4105 }, { "epoch": 0.54, "learning_rate": 4.587997741155729e-05, "loss": 17.9169, "step": 4106 }, { "epoch": 0.54, "learning_rate": 4.585873400156477e-05, "loss": 0.0, "step": 4107 }, { "epoch": 0.54, "learning_rate": 4.5837491344261564e-05, "loss": 17.1493, "step": 4108 }, { "epoch": 0.54, "learning_rate": 4.581624944350859e-05, "loss": 18.1493, "step": 4109 }, { "epoch": 0.54, "learning_rate": 4.5795008303166664e-05, "loss": 0.0, "step": 4110 }, { "epoch": 0.54, "learning_rate": 4.577376792709642e-05, "loss": 0.0015, "step": 4111 }, { "epoch": 0.54, "learning_rate": 4.5752528319158365e-05, "loss": 0.0001, "step": 4112 }, { "epoch": 0.54, "learning_rate": 4.5731289483212866e-05, "loss": 17.2604, "step": 4113 }, { "epoch": 0.54, "learning_rate": 4.571005142312018e-05, "loss": 16.6358, "step": 4114 }, { "epoch": 0.54, "learning_rate": 4.568881414274038e-05, "loss": 0.0, "step": 4115 }, { "epoch": 0.54, "learning_rate": 4.566757764593341e-05, "loss": 16.4894, "step": 4116 }, { "epoch": 0.54, "learning_rate": 4.564634193655907e-05, "loss": 17.0519, "step": 4117 }, { "epoch": 0.54, "learning_rate": 4.562510701847707e-05, "loss": 17.6028, "step": 4118 }, { "epoch": 0.54, "learning_rate": 4.560387289554687e-05, "loss": 17.055, "step": 4119 }, { "epoch": 0.54, "learning_rate": 4.558263957162786e-05, "loss": 17.1905, "step": 4120 }, { "epoch": 0.54, "learning_rate": 4.556140705057929e-05, "loss": 16.8705, "step": 4121 }, { "epoch": 0.54, "learning_rate": 4.5540175336260235e-05, "loss": 17.8412, "step": 4122 }, { "epoch": 0.54, "learning_rate": 4.551894443252962e-05, "loss": 17.9716, "step": 4123 }, { "epoch": 0.54, "learning_rate": 4.549771434324624e-05, "loss": 19.2468, "step": 4124 }, { "epoch": 0.54, "learning_rate": 4.5476485072268746e-05, "loss": 19.7756, "step": 4125 }, { "epoch": 0.54, "learning_rate": 4.545525662345563e-05, "loss": 17.4549, "step": 4126 }, { "epoch": 0.54, "learning_rate": 4.543402900066523e-05, "loss": 18.2373, "step": 4127 }, { "epoch": 0.54, "learning_rate": 4.541280220775573e-05, "loss": 17.3266, "step": 4128 }, { "epoch": 0.54, "learning_rate": 4.5391576248585185e-05, "loss": 15.2554, "step": 4129 }, { "epoch": 0.54, "learning_rate": 4.5370351127011494e-05, "loss": 15.9262, "step": 4130 }, { "epoch": 0.54, "learning_rate": 4.534912684689238e-05, "loss": 18.4248, "step": 4131 }, { "epoch": 0.54, "learning_rate": 4.5327903412085424e-05, "loss": 15.48, "step": 4132 }, { "epoch": 0.54, "learning_rate": 4.5306680826448085e-05, "loss": 17.6873, "step": 4133 }, { "epoch": 0.54, "learning_rate": 4.5285459093837617e-05, "loss": 16.2203, "step": 4134 }, { "epoch": 0.54, "learning_rate": 4.526423821811116e-05, "loss": 16.357, "step": 4135 }, { "epoch": 0.54, "learning_rate": 4.524301820312564e-05, "loss": 16.2538, "step": 4136 }, { "epoch": 0.54, "learning_rate": 4.522179905273792e-05, "loss": 0.0004, "step": 4137 }, { "epoch": 0.54, "learning_rate": 4.520058077080463e-05, "loss": 17.6079, "step": 4138 }, { "epoch": 0.54, "learning_rate": 4.5179363361182284e-05, "loss": 18.4787, "step": 4139 }, { "epoch": 0.54, "learning_rate": 4.5158146827727186e-05, "loss": 17.8241, "step": 4140 }, { "epoch": 0.55, "learning_rate": 4.5136931174295524e-05, "loss": 18.3683, "step": 4141 }, { "epoch": 0.55, "learning_rate": 4.511571640474333e-05, "loss": 19.0033, "step": 4142 }, { "epoch": 0.55, "learning_rate": 4.5094502522926455e-05, "loss": 19.7348, "step": 4143 }, { "epoch": 0.55, "learning_rate": 4.507328953270059e-05, "loss": 0.0, "step": 4144 }, { "epoch": 0.55, "learning_rate": 4.505207743792127e-05, "loss": 15.9028, "step": 4145 }, { "epoch": 0.55, "learning_rate": 4.503086624244387e-05, "loss": 0.0001, "step": 4146 }, { "epoch": 0.55, "learning_rate": 4.50096559501236e-05, "loss": 17.1658, "step": 4147 }, { "epoch": 0.55, "learning_rate": 4.49884465648155e-05, "loss": 0.0024, "step": 4148 }, { "epoch": 0.55, "learning_rate": 4.4967238090374434e-05, "loss": 0.0005, "step": 4149 }, { "epoch": 0.55, "learning_rate": 4.4946030530655145e-05, "loss": 0.0006, "step": 4150 }, { "epoch": 0.55, "learning_rate": 4.492482388951216e-05, "loss": 0.0, "step": 4151 }, { "epoch": 0.55, "learning_rate": 4.490361817079987e-05, "loss": 17.0371, "step": 4152 }, { "epoch": 0.55, "learning_rate": 4.488241337837246e-05, "loss": 0.0001, "step": 4153 }, { "epoch": 0.55, "learning_rate": 4.486120951608401e-05, "loss": 15.4936, "step": 4154 }, { "epoch": 0.55, "learning_rate": 4.484000658778838e-05, "loss": 0.0, "step": 4155 }, { "epoch": 0.55, "learning_rate": 4.481880459733927e-05, "loss": 16.9588, "step": 4156 }, { "epoch": 0.55, "learning_rate": 4.479760354859021e-05, "loss": 17.1265, "step": 4157 }, { "epoch": 0.55, "learning_rate": 4.477640344539458e-05, "loss": 17.3277, "step": 4158 }, { "epoch": 0.55, "learning_rate": 4.475520429160556e-05, "loss": 17.792, "step": 4159 }, { "epoch": 0.55, "learning_rate": 4.473400609107617e-05, "loss": 0.0001, "step": 4160 }, { "epoch": 0.55, "learning_rate": 4.471280884765926e-05, "loss": 18.2211, "step": 4161 }, { "epoch": 0.55, "learning_rate": 4.4691612565207505e-05, "loss": 16.3351, "step": 4162 }, { "epoch": 0.55, "learning_rate": 4.467041724757339e-05, "loss": 16.3862, "step": 4163 }, { "epoch": 0.55, "learning_rate": 4.464922289860925e-05, "loss": 0.0, "step": 4164 }, { "epoch": 0.55, "learning_rate": 4.462802952216722e-05, "loss": 16.5229, "step": 4165 }, { "epoch": 0.55, "learning_rate": 4.4606837122099267e-05, "loss": 15.221, "step": 4166 }, { "epoch": 0.55, "learning_rate": 4.45856457022572e-05, "loss": 17.4288, "step": 4167 }, { "epoch": 0.55, "learning_rate": 4.4564455266492625e-05, "loss": 17.7763, "step": 4168 }, { "epoch": 0.55, "learning_rate": 4.454326581865696e-05, "loss": 17.5701, "step": 4169 }, { "epoch": 0.55, "learning_rate": 4.4522077362601465e-05, "loss": 0.0, "step": 4170 }, { "epoch": 0.55, "learning_rate": 4.450088990217724e-05, "loss": 0.0, "step": 4171 }, { "epoch": 0.55, "learning_rate": 4.447970344123516e-05, "loss": 17.1009, "step": 4172 }, { "epoch": 0.55, "learning_rate": 4.4458517983625935e-05, "loss": 0.0001, "step": 4173 }, { "epoch": 0.55, "learning_rate": 4.443733353320009e-05, "loss": 16.7694, "step": 4174 }, { "epoch": 0.55, "learning_rate": 4.4416150093807984e-05, "loss": 18.1247, "step": 4175 }, { "epoch": 0.55, "learning_rate": 4.439496766929978e-05, "loss": 16.9454, "step": 4176 }, { "epoch": 0.55, "learning_rate": 4.437378626352545e-05, "loss": 16.1563, "step": 4177 }, { "epoch": 0.55, "learning_rate": 4.435260588033477e-05, "loss": 17.2341, "step": 4178 }, { "epoch": 0.55, "learning_rate": 4.433142652357739e-05, "loss": 17.4334, "step": 4179 }, { "epoch": 0.55, "learning_rate": 4.43102481971027e-05, "loss": 17.968, "step": 4180 }, { "epoch": 0.55, "learning_rate": 4.4289070904759944e-05, "loss": 15.6201, "step": 4181 }, { "epoch": 0.55, "learning_rate": 4.426789465039815e-05, "loss": 16.0477, "step": 4182 }, { "epoch": 0.55, "learning_rate": 4.4246719437866204e-05, "loss": 0.0, "step": 4183 }, { "epoch": 0.55, "learning_rate": 4.4225545271012754e-05, "loss": 15.248, "step": 4184 }, { "epoch": 0.55, "learning_rate": 4.4204372153686285e-05, "loss": 17.4082, "step": 4185 }, { "epoch": 0.55, "learning_rate": 4.4183200089735064e-05, "loss": 16.9361, "step": 4186 }, { "epoch": 0.55, "learning_rate": 4.4162029083007224e-05, "loss": 17.9839, "step": 4187 }, { "epoch": 0.55, "learning_rate": 4.4140859137350645e-05, "loss": 16.5457, "step": 4188 }, { "epoch": 0.55, "learning_rate": 4.411969025661303e-05, "loss": 17.1498, "step": 4189 }, { "epoch": 0.55, "learning_rate": 4.4098522444641914e-05, "loss": 18.7668, "step": 4190 }, { "epoch": 0.55, "learning_rate": 4.40773557052846e-05, "loss": 16.6019, "step": 4191 }, { "epoch": 0.55, "learning_rate": 4.405619004238824e-05, "loss": 16.1931, "step": 4192 }, { "epoch": 0.55, "learning_rate": 4.403502545979975e-05, "loss": 17.9424, "step": 4193 }, { "epoch": 0.55, "learning_rate": 4.401386196136586e-05, "loss": 0.0, "step": 4194 }, { "epoch": 0.55, "learning_rate": 4.399269955093312e-05, "loss": 17.6963, "step": 4195 }, { "epoch": 0.55, "learning_rate": 4.3971538232347874e-05, "loss": 17.0259, "step": 4196 }, { "epoch": 0.55, "learning_rate": 4.3950378009456254e-05, "loss": 16.7673, "step": 4197 }, { "epoch": 0.55, "learning_rate": 4.392921888610421e-05, "loss": 17.8146, "step": 4198 }, { "epoch": 0.55, "learning_rate": 4.390806086613747e-05, "loss": 16.797, "step": 4199 }, { "epoch": 0.55, "learning_rate": 4.38869039534016e-05, "loss": 18.2956, "step": 4200 }, { "epoch": 0.55, "learning_rate": 4.386574815174193e-05, "loss": 17.5388, "step": 4201 }, { "epoch": 0.55, "learning_rate": 4.38445934650036e-05, "loss": 18.0297, "step": 4202 }, { "epoch": 0.55, "learning_rate": 4.382343989703154e-05, "loss": 16.5891, "step": 4203 }, { "epoch": 0.55, "learning_rate": 4.38022874516705e-05, "loss": 15.7923, "step": 4204 }, { "epoch": 0.55, "learning_rate": 4.3781136132765e-05, "loss": 17.6462, "step": 4205 }, { "epoch": 0.55, "learning_rate": 4.375998594415936e-05, "loss": 0.0, "step": 4206 }, { "epoch": 0.55, "learning_rate": 4.3738836889697706e-05, "loss": 0.0001, "step": 4207 }, { "epoch": 0.55, "learning_rate": 4.371768897322395e-05, "loss": 0.0, "step": 4208 }, { "epoch": 0.55, "learning_rate": 4.3696542198581805e-05, "loss": 0.0001, "step": 4209 }, { "epoch": 0.55, "learning_rate": 4.367539656961476e-05, "loss": 0.0001, "step": 4210 }, { "epoch": 0.55, "learning_rate": 4.36542520901661e-05, "loss": 17.4191, "step": 4211 }, { "epoch": 0.55, "learning_rate": 4.363310876407892e-05, "loss": 18.693, "step": 4212 }, { "epoch": 0.55, "learning_rate": 4.3611966595196095e-05, "loss": 16.9773, "step": 4213 }, { "epoch": 0.55, "learning_rate": 4.359082558736028e-05, "loss": 16.4605, "step": 4214 }, { "epoch": 0.55, "learning_rate": 4.3569685744413926e-05, "loss": 16.0559, "step": 4215 }, { "epoch": 0.55, "learning_rate": 4.354854707019925e-05, "loss": 0.0001, "step": 4216 }, { "epoch": 0.56, "learning_rate": 4.352740956855833e-05, "loss": 0.0001, "step": 4217 }, { "epoch": 0.56, "learning_rate": 4.3506273243332935e-05, "loss": 18.6695, "step": 4218 }, { "epoch": 0.56, "learning_rate": 4.348513809836468e-05, "loss": 19.1916, "step": 4219 }, { "epoch": 0.56, "learning_rate": 4.346400413749495e-05, "loss": 16.0721, "step": 4220 }, { "epoch": 0.56, "learning_rate": 4.3442871364564916e-05, "loss": 17.1231, "step": 4221 }, { "epoch": 0.56, "learning_rate": 4.3421739783415526e-05, "loss": 17.2227, "step": 4222 }, { "epoch": 0.56, "learning_rate": 4.340060939788753e-05, "loss": 0.0001, "step": 4223 }, { "epoch": 0.56, "learning_rate": 4.3379480211821424e-05, "loss": 16.6818, "step": 4224 }, { "epoch": 0.56, "learning_rate": 4.3358352229057547e-05, "loss": 0.0, "step": 4225 }, { "epoch": 0.56, "learning_rate": 4.333722545343595e-05, "loss": 16.2479, "step": 4226 }, { "epoch": 0.56, "learning_rate": 4.331609988879652e-05, "loss": 17.3594, "step": 4227 }, { "epoch": 0.56, "learning_rate": 4.329497553897887e-05, "loss": 0.0002, "step": 4228 }, { "epoch": 0.56, "learning_rate": 4.327385240782246e-05, "loss": 16.3254, "step": 4229 }, { "epoch": 0.56, "learning_rate": 4.325273049916647e-05, "loss": 17.3516, "step": 4230 }, { "epoch": 0.56, "learning_rate": 4.323160981684989e-05, "loss": 17.1366, "step": 4231 }, { "epoch": 0.56, "learning_rate": 4.321049036471145e-05, "loss": 17.093, "step": 4232 }, { "epoch": 0.56, "learning_rate": 4.3189372146589724e-05, "loss": 16.947, "step": 4233 }, { "epoch": 0.56, "learning_rate": 4.316825516632299e-05, "loss": 17.8709, "step": 4234 }, { "epoch": 0.56, "learning_rate": 4.314713942774935e-05, "loss": 18.3349, "step": 4235 }, { "epoch": 0.56, "learning_rate": 4.312602493470664e-05, "loss": 0.0, "step": 4236 }, { "epoch": 0.56, "learning_rate": 4.3104911691032516e-05, "loss": 17.4868, "step": 4237 }, { "epoch": 0.56, "learning_rate": 4.3083799700564376e-05, "loss": 0.0, "step": 4238 }, { "epoch": 0.56, "learning_rate": 4.3062688967139384e-05, "loss": 18.4653, "step": 4239 }, { "epoch": 0.56, "learning_rate": 4.3041579494594505e-05, "loss": 17.9589, "step": 4240 }, { "epoch": 0.56, "learning_rate": 4.302047128676644e-05, "loss": 0.0, "step": 4241 }, { "epoch": 0.56, "learning_rate": 4.29993643474917e-05, "loss": 0.0, "step": 4242 }, { "epoch": 0.56, "learning_rate": 4.297825868060652e-05, "loss": 16.5113, "step": 4243 }, { "epoch": 0.56, "learning_rate": 4.295715428994695e-05, "loss": 17.4374, "step": 4244 }, { "epoch": 0.56, "learning_rate": 4.293605117934876e-05, "loss": 16.8147, "step": 4245 }, { "epoch": 0.56, "learning_rate": 4.2914949352647536e-05, "loss": 17.6022, "step": 4246 }, { "epoch": 0.56, "learning_rate": 4.28938488136786e-05, "loss": 0.0001, "step": 4247 }, { "epoch": 0.56, "learning_rate": 4.287274956627706e-05, "loss": 17.4324, "step": 4248 }, { "epoch": 0.56, "learning_rate": 4.285165161427774e-05, "loss": 0.0001, "step": 4249 }, { "epoch": 0.56, "learning_rate": 4.28305549615153e-05, "loss": 16.6789, "step": 4250 }, { "epoch": 0.56, "learning_rate": 4.280945961182412e-05, "loss": 16.4921, "step": 4251 }, { "epoch": 0.56, "learning_rate": 4.278836556903835e-05, "loss": 0.0001, "step": 4252 }, { "epoch": 0.56, "learning_rate": 4.27672728369919e-05, "loss": 16.2167, "step": 4253 }, { "epoch": 0.56, "learning_rate": 4.274618141951848e-05, "loss": 16.5676, "step": 4254 }, { "epoch": 0.56, "learning_rate": 4.2725091320451474e-05, "loss": 16.9026, "step": 4255 }, { "epoch": 0.56, "learning_rate": 4.270400254362409e-05, "loss": 17.3066, "step": 4256 }, { "epoch": 0.56, "learning_rate": 4.268291509286932e-05, "loss": 0.0007, "step": 4257 }, { "epoch": 0.56, "learning_rate": 4.266182897201986e-05, "loss": 16.0402, "step": 4258 }, { "epoch": 0.56, "learning_rate": 4.2640744184908174e-05, "loss": 0.0, "step": 4259 }, { "epoch": 0.56, "learning_rate": 4.2619660735366505e-05, "loss": 0.0004, "step": 4260 }, { "epoch": 0.56, "learning_rate": 4.2598578627226856e-05, "loss": 18.1515, "step": 4261 }, { "epoch": 0.56, "learning_rate": 4.257749786432096e-05, "loss": 0.0, "step": 4262 }, { "epoch": 0.56, "learning_rate": 4.255641845048032e-05, "loss": 17.7109, "step": 4263 }, { "epoch": 0.56, "learning_rate": 4.253534038953617e-05, "loss": 17.7003, "step": 4264 }, { "epoch": 0.56, "learning_rate": 4.251426368531956e-05, "loss": 17.1999, "step": 4265 }, { "epoch": 0.56, "learning_rate": 4.2493188341661224e-05, "loss": 16.5834, "step": 4266 }, { "epoch": 0.56, "learning_rate": 4.24721143623917e-05, "loss": 17.661, "step": 4267 }, { "epoch": 0.56, "learning_rate": 4.2451041751341226e-05, "loss": 0.0, "step": 4268 }, { "epoch": 0.56, "learning_rate": 4.242997051233985e-05, "loss": 0.0001, "step": 4269 }, { "epoch": 0.56, "learning_rate": 4.240890064921733e-05, "loss": 19.7688, "step": 4270 }, { "epoch": 0.56, "learning_rate": 4.2387832165803184e-05, "loss": 18.3465, "step": 4271 }, { "epoch": 0.56, "learning_rate": 4.236676506592668e-05, "loss": 15.9129, "step": 4272 }, { "epoch": 0.56, "learning_rate": 4.234569935341685e-05, "loss": 17.2257, "step": 4273 }, { "epoch": 0.56, "learning_rate": 4.232463503210245e-05, "loss": 0.0, "step": 4274 }, { "epoch": 0.56, "learning_rate": 4.230357210581199e-05, "loss": 0.0001, "step": 4275 }, { "epoch": 0.56, "learning_rate": 4.228251057837372e-05, "loss": 17.0726, "step": 4276 }, { "epoch": 0.56, "learning_rate": 4.226145045361567e-05, "loss": 17.5104, "step": 4277 }, { "epoch": 0.56, "learning_rate": 4.224039173536558e-05, "loss": 16.1813, "step": 4278 }, { "epoch": 0.56, "learning_rate": 4.221933442745093e-05, "loss": 17.5444, "step": 4279 }, { "epoch": 0.56, "learning_rate": 4.2198278533698965e-05, "loss": 17.6521, "step": 4280 }, { "epoch": 0.56, "learning_rate": 4.217722405793668e-05, "loss": 16.6258, "step": 4281 }, { "epoch": 0.56, "learning_rate": 4.215617100399079e-05, "loss": 17.2033, "step": 4282 }, { "epoch": 0.56, "learning_rate": 4.2135119375687757e-05, "loss": 16.3651, "step": 4283 }, { "epoch": 0.56, "learning_rate": 4.211406917685379e-05, "loss": 16.4192, "step": 4284 }, { "epoch": 0.56, "learning_rate": 4.2093020411314815e-05, "loss": 15.9796, "step": 4285 }, { "epoch": 0.56, "learning_rate": 4.207197308289655e-05, "loss": 0.0001, "step": 4286 }, { "epoch": 0.56, "learning_rate": 4.205092719542441e-05, "loss": 0.0, "step": 4287 }, { "epoch": 0.56, "learning_rate": 4.202988275272354e-05, "loss": 17.8437, "step": 4288 }, { "epoch": 0.56, "learning_rate": 4.200883975861885e-05, "loss": 17.8679, "step": 4289 }, { "epoch": 0.56, "learning_rate": 4.198779821693499e-05, "loss": 16.4892, "step": 4290 }, { "epoch": 0.56, "learning_rate": 4.1966758131496306e-05, "loss": 18.3155, "step": 4291 }, { "epoch": 0.56, "learning_rate": 4.194571950612693e-05, "loss": 16.4394, "step": 4292 }, { "epoch": 0.57, "learning_rate": 4.192468234465069e-05, "loss": 16.5103, "step": 4293 }, { "epoch": 0.57, "learning_rate": 4.190364665089117e-05, "loss": 17.1328, "step": 4294 }, { "epoch": 0.57, "learning_rate": 4.188261242867168e-05, "loss": 16.9023, "step": 4295 }, { "epoch": 0.57, "learning_rate": 4.1861579681815256e-05, "loss": 0.0, "step": 4296 }, { "epoch": 0.57, "learning_rate": 4.184054841414466e-05, "loss": 15.8785, "step": 4297 }, { "epoch": 0.57, "learning_rate": 4.181951862948243e-05, "loss": 17.0139, "step": 4298 }, { "epoch": 0.57, "learning_rate": 4.1798490331650786e-05, "loss": 16.1993, "step": 4299 }, { "epoch": 0.57, "learning_rate": 4.1777463524471684e-05, "loss": 16.6535, "step": 4300 }, { "epoch": 0.57, "learning_rate": 4.175643821176681e-05, "loss": 0.0, "step": 4301 }, { "epoch": 0.57, "learning_rate": 4.173541439735762e-05, "loss": 16.3803, "step": 4302 }, { "epoch": 0.57, "learning_rate": 4.171439208506525e-05, "loss": 17.3091, "step": 4303 }, { "epoch": 0.57, "learning_rate": 4.169337127871057e-05, "loss": 18.5852, "step": 4304 }, { "epoch": 0.57, "learning_rate": 4.1672351982114174e-05, "loss": 17.1114, "step": 4305 }, { "epoch": 0.57, "learning_rate": 4.1651334199096414e-05, "loss": 17.1974, "step": 4306 }, { "epoch": 0.57, "learning_rate": 4.1630317933477335e-05, "loss": 15.7248, "step": 4307 }, { "epoch": 0.57, "learning_rate": 4.1609303189076724e-05, "loss": 18.2174, "step": 4308 }, { "epoch": 0.57, "learning_rate": 4.158828996971407e-05, "loss": 0.0, "step": 4309 }, { "epoch": 0.57, "learning_rate": 4.1567278279208586e-05, "loss": 16.7121, "step": 4310 }, { "epoch": 0.57, "learning_rate": 4.154626812137925e-05, "loss": 18.9562, "step": 4311 }, { "epoch": 0.57, "learning_rate": 4.1525259500044716e-05, "loss": 16.736, "step": 4312 }, { "epoch": 0.57, "learning_rate": 4.150425241902337e-05, "loss": 0.0, "step": 4313 }, { "epoch": 0.57, "learning_rate": 4.148324688213332e-05, "loss": 0.0001, "step": 4314 }, { "epoch": 0.57, "learning_rate": 4.1462242893192407e-05, "loss": 17.614, "step": 4315 }, { "epoch": 0.57, "learning_rate": 4.1441240456018166e-05, "loss": 0.0, "step": 4316 }, { "epoch": 0.57, "learning_rate": 4.142023957442787e-05, "loss": 0.0001, "step": 4317 }, { "epoch": 0.57, "learning_rate": 4.139924025223848e-05, "loss": 0.0, "step": 4318 }, { "epoch": 0.57, "learning_rate": 4.137824249326672e-05, "loss": 0.0002, "step": 4319 }, { "epoch": 0.57, "learning_rate": 4.1357246301329007e-05, "loss": 16.1916, "step": 4320 }, { "epoch": 0.57, "learning_rate": 4.133625168024146e-05, "loss": 17.3653, "step": 4321 }, { "epoch": 0.57, "learning_rate": 4.13152586338199e-05, "loss": 17.5166, "step": 4322 }, { "epoch": 0.57, "learning_rate": 4.129426716587993e-05, "loss": 0.0001, "step": 4323 }, { "epoch": 0.57, "learning_rate": 4.12732772802368e-05, "loss": 17.94, "step": 4324 }, { "epoch": 0.57, "learning_rate": 4.125228898070549e-05, "loss": 16.5923, "step": 4325 }, { "epoch": 0.57, "learning_rate": 4.12313022711007e-05, "loss": 17.0801, "step": 4326 }, { "epoch": 0.57, "learning_rate": 4.121031715523684e-05, "loss": 17.6983, "step": 4327 }, { "epoch": 0.57, "learning_rate": 4.1189333636928016e-05, "loss": 0.0, "step": 4328 }, { "epoch": 0.57, "learning_rate": 4.116835171998807e-05, "loss": 0.0, "step": 4329 }, { "epoch": 0.57, "learning_rate": 4.114737140823052e-05, "loss": 17.6576, "step": 4330 }, { "epoch": 0.57, "learning_rate": 4.1126392705468634e-05, "loss": 17.2711, "step": 4331 }, { "epoch": 0.57, "learning_rate": 4.110541561551534e-05, "loss": 16.1647, "step": 4332 }, { "epoch": 0.57, "learning_rate": 4.108444014218331e-05, "loss": 0.0001, "step": 4333 }, { "epoch": 0.57, "learning_rate": 4.10634662892849e-05, "loss": 16.735, "step": 4334 }, { "epoch": 0.57, "learning_rate": 4.104249406063217e-05, "loss": 16.5106, "step": 4335 }, { "epoch": 0.57, "learning_rate": 4.102152346003692e-05, "loss": 15.5493, "step": 4336 }, { "epoch": 0.57, "learning_rate": 4.1000554491310615e-05, "loss": 17.2693, "step": 4337 }, { "epoch": 0.57, "learning_rate": 4.0979587158264434e-05, "loss": 19.5852, "step": 4338 }, { "epoch": 0.57, "learning_rate": 4.095862146470927e-05, "loss": 16.5454, "step": 4339 }, { "epoch": 0.57, "learning_rate": 4.0937657414455696e-05, "loss": 15.8226, "step": 4340 }, { "epoch": 0.57, "learning_rate": 4.091669501131403e-05, "loss": 18.53, "step": 4341 }, { "epoch": 0.57, "learning_rate": 4.089573425909423e-05, "loss": 18.3391, "step": 4342 }, { "epoch": 0.57, "learning_rate": 4.087477516160598e-05, "loss": 15.6953, "step": 4343 }, { "epoch": 0.57, "learning_rate": 4.08538177226587e-05, "loss": 0.0, "step": 4344 }, { "epoch": 0.57, "learning_rate": 4.083286194606146e-05, "loss": 16.9497, "step": 4345 }, { "epoch": 0.57, "learning_rate": 4.081190783562304e-05, "loss": 18.9198, "step": 4346 }, { "epoch": 0.57, "learning_rate": 4.079095539515192e-05, "loss": 16.4838, "step": 4347 }, { "epoch": 0.57, "learning_rate": 4.077000462845629e-05, "loss": 15.3853, "step": 4348 }, { "epoch": 0.57, "learning_rate": 4.074905553934402e-05, "loss": 17.232, "step": 4349 }, { "epoch": 0.57, "learning_rate": 4.0728108131622675e-05, "loss": 18.0378, "step": 4350 }, { "epoch": 0.57, "learning_rate": 4.070716240909951e-05, "loss": 16.6723, "step": 4351 }, { "epoch": 0.57, "learning_rate": 4.0686218375581496e-05, "loss": 18.2128, "step": 4352 }, { "epoch": 0.57, "learning_rate": 4.066527603487529e-05, "loss": 17.9981, "step": 4353 }, { "epoch": 0.57, "learning_rate": 4.064433539078721e-05, "loss": 18.2417, "step": 4354 }, { "epoch": 0.57, "learning_rate": 4.06233964471233e-05, "loss": 16.7045, "step": 4355 }, { "epoch": 0.57, "learning_rate": 4.06024592076893e-05, "loss": 17.2655, "step": 4356 }, { "epoch": 0.57, "learning_rate": 4.058152367629061e-05, "loss": 16.665, "step": 4357 }, { "epoch": 0.57, "learning_rate": 4.056058985673233e-05, "loss": 0.0003, "step": 4358 }, { "epoch": 0.57, "learning_rate": 4.053965775281927e-05, "loss": 15.9764, "step": 4359 }, { "epoch": 0.57, "learning_rate": 4.0518727368355895e-05, "loss": 0.0001, "step": 4360 }, { "epoch": 0.57, "learning_rate": 4.049779870714639e-05, "loss": 17.4551, "step": 4361 }, { "epoch": 0.57, "learning_rate": 4.04768717729946e-05, "loss": 16.592, "step": 4362 }, { "epoch": 0.57, "learning_rate": 4.045594656970408e-05, "loss": 17.0075, "step": 4363 }, { "epoch": 0.57, "learning_rate": 4.043502310107803e-05, "loss": 0.0004, "step": 4364 }, { "epoch": 0.57, "learning_rate": 4.0414101370919386e-05, "loss": 17.6986, "step": 4365 }, { "epoch": 0.57, "learning_rate": 4.039318138303075e-05, "loss": 17.2722, "step": 4366 }, { "epoch": 0.57, "learning_rate": 4.0372263141214384e-05, "loss": 15.5482, "step": 4367 }, { "epoch": 0.57, "learning_rate": 4.035134664927225e-05, "loss": 16.9189, "step": 4368 }, { "epoch": 0.58, "learning_rate": 4.0330431911006e-05, "loss": 16.0146, "step": 4369 }, { "epoch": 0.58, "learning_rate": 4.030951893021696e-05, "loss": 0.0001, "step": 4370 }, { "epoch": 0.58, "learning_rate": 4.0288607710706144e-05, "loss": 0.0, "step": 4371 }, { "epoch": 0.58, "learning_rate": 4.026769825627421e-05, "loss": 16.5419, "step": 4372 }, { "epoch": 0.58, "learning_rate": 4.024679057072156e-05, "loss": 0.0, "step": 4373 }, { "epoch": 0.58, "learning_rate": 4.022588465784821e-05, "loss": 18.0179, "step": 4374 }, { "epoch": 0.58, "learning_rate": 4.0204980521453897e-05, "loss": 0.0, "step": 4375 }, { "epoch": 0.58, "learning_rate": 4.0184078165338e-05, "loss": 17.482, "step": 4376 }, { "epoch": 0.58, "learning_rate": 4.016317759329962e-05, "loss": 0.0, "step": 4377 }, { "epoch": 0.58, "learning_rate": 4.0142278809137496e-05, "loss": 16.2451, "step": 4378 }, { "epoch": 0.58, "learning_rate": 4.012138181665005e-05, "loss": 17.0948, "step": 4379 }, { "epoch": 0.58, "learning_rate": 4.0100486619635374e-05, "loss": 16.7893, "step": 4380 }, { "epoch": 0.58, "learning_rate": 4.007959322189127e-05, "loss": 16.4249, "step": 4381 }, { "epoch": 0.58, "learning_rate": 4.005870162721516e-05, "loss": 0.0001, "step": 4382 }, { "epoch": 0.58, "learning_rate": 4.0037811839404164e-05, "loss": 0.0, "step": 4383 }, { "epoch": 0.58, "learning_rate": 4.001692386225509e-05, "loss": 17.8901, "step": 4384 }, { "epoch": 0.58, "learning_rate": 3.999603769956438e-05, "loss": 16.8755, "step": 4385 }, { "epoch": 0.58, "learning_rate": 3.997515335512817e-05, "loss": 16.6037, "step": 4386 }, { "epoch": 0.58, "learning_rate": 3.9954270832742284e-05, "loss": 0.0002, "step": 4387 }, { "epoch": 0.58, "learning_rate": 3.993339013620217e-05, "loss": 0.0, "step": 4388 }, { "epoch": 0.58, "learning_rate": 3.9912511269302956e-05, "loss": 0.0, "step": 4389 }, { "epoch": 0.58, "learning_rate": 3.989163423583947e-05, "loss": 16.9465, "step": 4390 }, { "epoch": 0.58, "learning_rate": 3.9870759039606196e-05, "loss": 17.438, "step": 4391 }, { "epoch": 0.58, "learning_rate": 3.984988568439724e-05, "loss": 15.1882, "step": 4392 }, { "epoch": 0.58, "learning_rate": 3.982901417400642e-05, "loss": 17.496, "step": 4393 }, { "epoch": 0.58, "learning_rate": 3.98081445122272e-05, "loss": 17.21, "step": 4394 }, { "epoch": 0.58, "learning_rate": 3.9787276702852705e-05, "loss": 16.3471, "step": 4395 }, { "epoch": 0.58, "learning_rate": 3.976641074967575e-05, "loss": 16.7295, "step": 4396 }, { "epoch": 0.58, "learning_rate": 3.97455466564888e-05, "loss": 0.0004, "step": 4397 }, { "epoch": 0.58, "learning_rate": 3.972468442708395e-05, "loss": 16.492, "step": 4398 }, { "epoch": 0.58, "learning_rate": 3.9703824065253e-05, "loss": 17.6749, "step": 4399 }, { "epoch": 0.58, "learning_rate": 3.968296557478738e-05, "loss": 17.6752, "step": 4400 }, { "epoch": 0.58, "learning_rate": 3.96621089594782e-05, "loss": 0.0, "step": 4401 }, { "epoch": 0.58, "learning_rate": 3.964125422311623e-05, "loss": 18.0533, "step": 4402 }, { "epoch": 0.58, "learning_rate": 3.9620401369491866e-05, "loss": 16.6364, "step": 4403 }, { "epoch": 0.58, "learning_rate": 3.9599550402395195e-05, "loss": 0.0, "step": 4404 }, { "epoch": 0.58, "learning_rate": 3.9578701325615976e-05, "loss": 0.0, "step": 4405 }, { "epoch": 0.58, "learning_rate": 3.955785414294357e-05, "loss": 17.1086, "step": 4406 }, { "epoch": 0.58, "learning_rate": 3.953700885816704e-05, "loss": 0.0001, "step": 4407 }, { "epoch": 0.58, "learning_rate": 3.951616547507507e-05, "loss": 0.0, "step": 4408 }, { "epoch": 0.58, "learning_rate": 3.949532399745604e-05, "loss": 0.0, "step": 4409 }, { "epoch": 0.58, "learning_rate": 3.947448442909795e-05, "loss": 16.6976, "step": 4410 }, { "epoch": 0.58, "learning_rate": 3.945364677378847e-05, "loss": 15.1805, "step": 4411 }, { "epoch": 0.58, "learning_rate": 3.943281103531489e-05, "loss": 16.7523, "step": 4412 }, { "epoch": 0.58, "learning_rate": 3.94119772174642e-05, "loss": 16.6418, "step": 4413 }, { "epoch": 0.58, "learning_rate": 3.939114532402303e-05, "loss": 16.6402, "step": 4414 }, { "epoch": 0.58, "learning_rate": 3.9370315358777616e-05, "loss": 17.1094, "step": 4415 }, { "epoch": 0.58, "learning_rate": 3.934948732551389e-05, "loss": 17.4147, "step": 4416 }, { "epoch": 0.58, "learning_rate": 3.932866122801742e-05, "loss": 17.4447, "step": 4417 }, { "epoch": 0.58, "learning_rate": 3.930783707007343e-05, "loss": 17.6233, "step": 4418 }, { "epoch": 0.58, "learning_rate": 3.928701485546677e-05, "loss": 17.0981, "step": 4419 }, { "epoch": 0.58, "learning_rate": 3.9266194587981944e-05, "loss": 18.9462, "step": 4420 }, { "epoch": 0.58, "learning_rate": 3.9245376271403115e-05, "loss": 0.0, "step": 4421 }, { "epoch": 0.58, "learning_rate": 3.9224559909514094e-05, "loss": 15.645, "step": 4422 }, { "epoch": 0.58, "learning_rate": 3.920374550609831e-05, "loss": 16.4299, "step": 4423 }, { "epoch": 0.58, "learning_rate": 3.918293306493886e-05, "loss": 17.8968, "step": 4424 }, { "epoch": 0.58, "learning_rate": 3.916212258981846e-05, "loss": 0.0, "step": 4425 }, { "epoch": 0.58, "learning_rate": 3.914131408451952e-05, "loss": 0.0002, "step": 4426 }, { "epoch": 0.58, "learning_rate": 3.912050755282403e-05, "loss": 18.7656, "step": 4427 }, { "epoch": 0.58, "learning_rate": 3.9099702998513656e-05, "loss": 17.0865, "step": 4428 }, { "epoch": 0.58, "learning_rate": 3.907890042536968e-05, "loss": 0.0, "step": 4429 }, { "epoch": 0.58, "learning_rate": 3.905809983717308e-05, "loss": 16.9021, "step": 4430 }, { "epoch": 0.58, "learning_rate": 3.9037301237704404e-05, "loss": 0.0001, "step": 4431 }, { "epoch": 0.58, "learning_rate": 3.901650463074388e-05, "loss": 17.6555, "step": 4432 }, { "epoch": 0.58, "learning_rate": 3.899571002007135e-05, "loss": 18.7998, "step": 4433 }, { "epoch": 0.58, "learning_rate": 3.897491740946633e-05, "loss": 16.0356, "step": 4434 }, { "epoch": 0.58, "learning_rate": 3.8954126802707935e-05, "loss": 15.368, "step": 4435 }, { "epoch": 0.58, "learning_rate": 3.893333820357492e-05, "loss": 17.8835, "step": 4436 }, { "epoch": 0.58, "learning_rate": 3.891255161584569e-05, "loss": 17.6016, "step": 4437 }, { "epoch": 0.58, "learning_rate": 3.889176704329829e-05, "loss": 16.2968, "step": 4438 }, { "epoch": 0.58, "learning_rate": 3.8870984489710374e-05, "loss": 18.1094, "step": 4439 }, { "epoch": 0.58, "learning_rate": 3.885020395885926e-05, "loss": 0.0001, "step": 4440 }, { "epoch": 0.58, "learning_rate": 3.8829425454521836e-05, "loss": 14.9383, "step": 4441 }, { "epoch": 0.58, "learning_rate": 3.8808648980474726e-05, "loss": 18.5206, "step": 4442 }, { "epoch": 0.58, "learning_rate": 3.878787454049409e-05, "loss": 0.0002, "step": 4443 }, { "epoch": 0.58, "learning_rate": 3.8767102138355773e-05, "loss": 16.4539, "step": 4444 }, { "epoch": 0.59, "learning_rate": 3.87463317778352e-05, "loss": 18.3012, "step": 4445 }, { "epoch": 0.59, "learning_rate": 3.872556346270749e-05, "loss": 18.6921, "step": 4446 }, { "epoch": 0.59, "learning_rate": 3.870479719674733e-05, "loss": 15.966, "step": 4447 }, { "epoch": 0.59, "learning_rate": 3.868403298372909e-05, "loss": 0.0, "step": 4448 }, { "epoch": 0.59, "learning_rate": 3.86632708274267e-05, "loss": 16.996, "step": 4449 }, { "epoch": 0.59, "learning_rate": 3.864251073161377e-05, "loss": 0.0, "step": 4450 }, { "epoch": 0.59, "learning_rate": 3.862175270006352e-05, "loss": 16.2163, "step": 4451 }, { "epoch": 0.59, "learning_rate": 3.86009967365488e-05, "loss": 0.0, "step": 4452 }, { "epoch": 0.59, "learning_rate": 3.8580242844842075e-05, "loss": 14.6248, "step": 4453 }, { "epoch": 0.59, "learning_rate": 3.855949102871541e-05, "loss": 17.5138, "step": 4454 }, { "epoch": 0.59, "learning_rate": 3.853874129194056e-05, "loss": 18.4065, "step": 4455 }, { "epoch": 0.59, "learning_rate": 3.851799363828884e-05, "loss": 0.0, "step": 4456 }, { "epoch": 0.59, "learning_rate": 3.849724807153121e-05, "loss": 15.0886, "step": 4457 }, { "epoch": 0.59, "learning_rate": 3.847650459543824e-05, "loss": 17.175, "step": 4458 }, { "epoch": 0.59, "learning_rate": 3.8455763213780136e-05, "loss": 16.4792, "step": 4459 }, { "epoch": 0.59, "learning_rate": 3.843502393032672e-05, "loss": 0.0001, "step": 4460 }, { "epoch": 0.59, "learning_rate": 3.841428674884742e-05, "loss": 16.5692, "step": 4461 }, { "epoch": 0.59, "learning_rate": 3.8393551673111276e-05, "loss": 0.0, "step": 4462 }, { "epoch": 0.59, "learning_rate": 3.837281870688699e-05, "loss": 19.5586, "step": 4463 }, { "epoch": 0.59, "learning_rate": 3.8352087853942845e-05, "loss": 14.9507, "step": 4464 }, { "epoch": 0.59, "learning_rate": 3.8331359118046724e-05, "loss": 16.1822, "step": 4465 }, { "epoch": 0.59, "learning_rate": 3.831063250296615e-05, "loss": 17.2215, "step": 4466 }, { "epoch": 0.59, "learning_rate": 3.8289908012468273e-05, "loss": 17.0996, "step": 4467 }, { "epoch": 0.59, "learning_rate": 3.8269185650319833e-05, "loss": 15.8554, "step": 4468 }, { "epoch": 0.59, "learning_rate": 3.824846542028718e-05, "loss": 17.828, "step": 4469 }, { "epoch": 0.59, "learning_rate": 3.8227747326136296e-05, "loss": 0.0002, "step": 4470 }, { "epoch": 0.59, "learning_rate": 3.820703137163277e-05, "loss": 19.134, "step": 4471 }, { "epoch": 0.59, "learning_rate": 3.81863175605418e-05, "loss": 18.2512, "step": 4472 }, { "epoch": 0.59, "learning_rate": 3.816560589662817e-05, "loss": 16.9374, "step": 4473 }, { "epoch": 0.59, "learning_rate": 3.814489638365632e-05, "loss": 18.5429, "step": 4474 }, { "epoch": 0.59, "learning_rate": 3.812418902539025e-05, "loss": 15.4721, "step": 4475 }, { "epoch": 0.59, "learning_rate": 3.8103483825593634e-05, "loss": 17.4428, "step": 4476 }, { "epoch": 0.59, "learning_rate": 3.808278078802968e-05, "loss": 17.5553, "step": 4477 }, { "epoch": 0.59, "learning_rate": 3.806207991646125e-05, "loss": 18.9163, "step": 4478 }, { "epoch": 0.59, "learning_rate": 3.804138121465077e-05, "loss": 16.1366, "step": 4479 }, { "epoch": 0.59, "learning_rate": 3.802068468636035e-05, "loss": 0.0, "step": 4480 }, { "epoch": 0.59, "learning_rate": 3.7999990335351614e-05, "loss": 18.0115, "step": 4481 }, { "epoch": 0.59, "learning_rate": 3.797929816538586e-05, "loss": 17.0943, "step": 4482 }, { "epoch": 0.59, "learning_rate": 3.7958608180223927e-05, "loss": 0.0001, "step": 4483 }, { "epoch": 0.59, "learning_rate": 3.793792038362633e-05, "loss": 0.0, "step": 4484 }, { "epoch": 0.59, "learning_rate": 3.7917234779353114e-05, "loss": 17.1633, "step": 4485 }, { "epoch": 0.59, "learning_rate": 3.789655137116399e-05, "loss": 17.6745, "step": 4486 }, { "epoch": 0.59, "learning_rate": 3.78758701628182e-05, "loss": 18.9344, "step": 4487 }, { "epoch": 0.59, "learning_rate": 3.7855191158074656e-05, "loss": 0.0002, "step": 4488 }, { "epoch": 0.59, "learning_rate": 3.783451436069183e-05, "loss": 17.1793, "step": 4489 }, { "epoch": 0.59, "learning_rate": 3.78138397744278e-05, "loss": 0.0001, "step": 4490 }, { "epoch": 0.59, "learning_rate": 3.779316740304023e-05, "loss": 16.7534, "step": 4491 }, { "epoch": 0.59, "learning_rate": 3.777249725028642e-05, "loss": 0.0001, "step": 4492 }, { "epoch": 0.59, "learning_rate": 3.775182931992322e-05, "loss": 16.9031, "step": 4493 }, { "epoch": 0.59, "learning_rate": 3.773116361570711e-05, "loss": 17.4589, "step": 4494 }, { "epoch": 0.59, "learning_rate": 3.771050014139414e-05, "loss": 0.0003, "step": 4495 }, { "epoch": 0.59, "learning_rate": 3.768983890073998e-05, "loss": 17.7367, "step": 4496 }, { "epoch": 0.59, "learning_rate": 3.7669179897499886e-05, "loss": 0.0, "step": 4497 }, { "epoch": 0.59, "learning_rate": 3.764852313542868e-05, "loss": 0.0001, "step": 4498 }, { "epoch": 0.59, "learning_rate": 3.762786861828083e-05, "loss": 16.1267, "step": 4499 }, { "epoch": 0.59, "learning_rate": 3.760721634981032e-05, "loss": 0.0001, "step": 4500 }, { "epoch": 0.59, "learning_rate": 3.7586566333770816e-05, "loss": 15.6826, "step": 4501 }, { "epoch": 0.59, "learning_rate": 3.7565918573915517e-05, "loss": 15.5362, "step": 4502 }, { "epoch": 0.59, "learning_rate": 3.754527307399721e-05, "loss": 0.0, "step": 4503 }, { "epoch": 0.59, "learning_rate": 3.752462983776829e-05, "loss": 16.4055, "step": 4504 }, { "epoch": 0.59, "learning_rate": 3.7503988868980756e-05, "loss": 17.3845, "step": 4505 }, { "epoch": 0.59, "learning_rate": 3.748335017138616e-05, "loss": 0.0, "step": 4506 }, { "epoch": 0.59, "learning_rate": 3.7462713748735646e-05, "loss": 18.1665, "step": 4507 }, { "epoch": 0.59, "learning_rate": 3.744207960477997e-05, "loss": 0.0, "step": 4508 }, { "epoch": 0.59, "learning_rate": 3.742144774326944e-05, "loss": 0.0001, "step": 4509 }, { "epoch": 0.59, "learning_rate": 3.740081816795401e-05, "loss": 18.3151, "step": 4510 }, { "epoch": 0.59, "learning_rate": 3.738019088258313e-05, "loss": 17.3465, "step": 4511 }, { "epoch": 0.59, "learning_rate": 3.735956589090589e-05, "loss": 17.7125, "step": 4512 }, { "epoch": 0.59, "learning_rate": 3.733894319667097e-05, "loss": 15.4887, "step": 4513 }, { "epoch": 0.59, "learning_rate": 3.731832280362661e-05, "loss": 16.0672, "step": 4514 }, { "epoch": 0.59, "learning_rate": 3.729770471552062e-05, "loss": 17.9042, "step": 4515 }, { "epoch": 0.59, "learning_rate": 3.727708893610042e-05, "loss": 0.0, "step": 4516 }, { "epoch": 0.59, "learning_rate": 3.725647546911302e-05, "loss": 16.1742, "step": 4517 }, { "epoch": 0.59, "learning_rate": 3.723586431830496e-05, "loss": 17.6729, "step": 4518 }, { "epoch": 0.59, "learning_rate": 3.72152554874224e-05, "loss": 18.5792, "step": 4519 }, { "epoch": 0.59, "learning_rate": 3.719464898021104e-05, "loss": 16.7569, "step": 4520 }, { "epoch": 0.6, "learning_rate": 3.717404480041622e-05, "loss": 0.0, "step": 4521 }, { "epoch": 0.6, "learning_rate": 3.715344295178281e-05, "loss": 0.0001, "step": 4522 }, { "epoch": 0.6, "learning_rate": 3.713284343805527e-05, "loss": 16.8096, "step": 4523 }, { "epoch": 0.6, "learning_rate": 3.7112246262977614e-05, "loss": 16.2188, "step": 4524 }, { "epoch": 0.6, "learning_rate": 3.709165143029345e-05, "loss": 17.1975, "step": 4525 }, { "epoch": 0.6, "learning_rate": 3.707105894374598e-05, "loss": 0.0, "step": 4526 }, { "epoch": 0.6, "learning_rate": 3.705046880707796e-05, "loss": 0.0002, "step": 4527 }, { "epoch": 0.6, "learning_rate": 3.70298810240317e-05, "loss": 17.7032, "step": 4528 }, { "epoch": 0.6, "learning_rate": 3.70092955983491e-05, "loss": 15.3339, "step": 4529 }, { "epoch": 0.6, "learning_rate": 3.698871253377164e-05, "loss": 16.3062, "step": 4530 }, { "epoch": 0.6, "learning_rate": 3.696813183404036e-05, "loss": 17.8179, "step": 4531 }, { "epoch": 0.6, "learning_rate": 3.6947553502895885e-05, "loss": 17.4011, "step": 4532 }, { "epoch": 0.6, "learning_rate": 3.692697754407838e-05, "loss": 0.0004, "step": 4533 }, { "epoch": 0.6, "learning_rate": 3.69064039613276e-05, "loss": 16.8959, "step": 4534 }, { "epoch": 0.6, "learning_rate": 3.6885832758382865e-05, "loss": 17.6129, "step": 4535 }, { "epoch": 0.6, "learning_rate": 3.686526393898307e-05, "loss": 0.0, "step": 4536 }, { "epoch": 0.6, "learning_rate": 3.6844697506866665e-05, "loss": 17.4581, "step": 4537 }, { "epoch": 0.6, "learning_rate": 3.6824133465771656e-05, "loss": 17.4542, "step": 4538 }, { "epoch": 0.6, "learning_rate": 3.680357181943565e-05, "loss": 17.384, "step": 4539 }, { "epoch": 0.6, "learning_rate": 3.678301257159578e-05, "loss": 17.5485, "step": 4540 }, { "epoch": 0.6, "learning_rate": 3.676245572598878e-05, "loss": 0.0003, "step": 4541 }, { "epoch": 0.6, "learning_rate": 3.6741901286350896e-05, "loss": 17.5472, "step": 4542 }, { "epoch": 0.6, "learning_rate": 3.6721349256418e-05, "loss": 15.4218, "step": 4543 }, { "epoch": 0.6, "learning_rate": 3.670079963992547e-05, "loss": 16.5579, "step": 4544 }, { "epoch": 0.6, "learning_rate": 3.6680252440608274e-05, "loss": 0.0, "step": 4545 }, { "epoch": 0.6, "learning_rate": 3.665970766220095e-05, "loss": 16.5344, "step": 4546 }, { "epoch": 0.6, "learning_rate": 3.663916530843757e-05, "loss": 15.7555, "step": 4547 }, { "epoch": 0.6, "learning_rate": 3.6618625383051776e-05, "loss": 18.5698, "step": 4548 }, { "epoch": 0.6, "learning_rate": 3.659808788977678e-05, "loss": 16.5169, "step": 4549 }, { "epoch": 0.6, "learning_rate": 3.6577552832345344e-05, "loss": 17.2167, "step": 4550 }, { "epoch": 0.6, "learning_rate": 3.6557020214489766e-05, "loss": 17.3675, "step": 4551 }, { "epoch": 0.6, "learning_rate": 3.653649003994193e-05, "loss": 0.0001, "step": 4552 }, { "epoch": 0.6, "learning_rate": 3.651596231243327e-05, "loss": 17.4048, "step": 4553 }, { "epoch": 0.6, "learning_rate": 3.649543703569477e-05, "loss": 16.6676, "step": 4554 }, { "epoch": 0.6, "learning_rate": 3.6474914213456975e-05, "loss": 16.7938, "step": 4555 }, { "epoch": 0.6, "learning_rate": 3.6454393849449954e-05, "loss": 18.8185, "step": 4556 }, { "epoch": 0.6, "learning_rate": 3.6433875947403385e-05, "loss": 17.3986, "step": 4557 }, { "epoch": 0.6, "learning_rate": 3.641336051104644e-05, "loss": 16.4557, "step": 4558 }, { "epoch": 0.6, "learning_rate": 3.63928475441079e-05, "loss": 0.0007, "step": 4559 }, { "epoch": 0.6, "learning_rate": 3.637233705031603e-05, "loss": 17.6057, "step": 4560 }, { "epoch": 0.6, "learning_rate": 3.635182903339871e-05, "loss": 0.0001, "step": 4561 }, { "epoch": 0.6, "learning_rate": 3.6331323497083344e-05, "loss": 0.0, "step": 4562 }, { "epoch": 0.6, "learning_rate": 3.6310820445096874e-05, "loss": 17.0901, "step": 4563 }, { "epoch": 0.6, "learning_rate": 3.629031988116578e-05, "loss": 17.5521, "step": 4564 }, { "epoch": 0.6, "learning_rate": 3.6269821809016144e-05, "loss": 17.6412, "step": 4565 }, { "epoch": 0.6, "learning_rate": 3.624932623237355e-05, "loss": 15.5806, "step": 4566 }, { "epoch": 0.6, "learning_rate": 3.6228833154963144e-05, "loss": 16.7817, "step": 4567 }, { "epoch": 0.6, "learning_rate": 3.6208342580509604e-05, "loss": 17.7735, "step": 4568 }, { "epoch": 0.6, "learning_rate": 3.618785451273715e-05, "loss": 0.0006, "step": 4569 }, { "epoch": 0.6, "learning_rate": 3.616736895536959e-05, "loss": 0.0, "step": 4570 }, { "epoch": 0.6, "learning_rate": 3.614688591213021e-05, "loss": 15.2862, "step": 4571 }, { "epoch": 0.6, "learning_rate": 3.6126405386741915e-05, "loss": 0.0001, "step": 4572 }, { "epoch": 0.6, "learning_rate": 3.6105927382927065e-05, "loss": 0.0002, "step": 4573 }, { "epoch": 0.6, "learning_rate": 3.608545190440764e-05, "loss": 16.6772, "step": 4574 }, { "epoch": 0.6, "learning_rate": 3.6064978954905115e-05, "loss": 17.2703, "step": 4575 }, { "epoch": 0.6, "learning_rate": 3.604450853814053e-05, "loss": 0.0, "step": 4576 }, { "epoch": 0.6, "learning_rate": 3.6024040657834425e-05, "loss": 0.0, "step": 4577 }, { "epoch": 0.6, "learning_rate": 3.600357531770695e-05, "loss": 18.3101, "step": 4578 }, { "epoch": 0.6, "learning_rate": 3.598311252147772e-05, "loss": 0.0009, "step": 4579 }, { "epoch": 0.6, "learning_rate": 3.596265227286593e-05, "loss": 18.2496, "step": 4580 }, { "epoch": 0.6, "learning_rate": 3.594219457559028e-05, "loss": 18.0201, "step": 4581 }, { "epoch": 0.6, "learning_rate": 3.592173943336907e-05, "loss": 16.9492, "step": 4582 }, { "epoch": 0.6, "learning_rate": 3.590128684992006e-05, "loss": 0.0003, "step": 4583 }, { "epoch": 0.6, "learning_rate": 3.588083682896059e-05, "loss": 0.0001, "step": 4584 }, { "epoch": 0.6, "learning_rate": 3.586038937420749e-05, "loss": 17.9878, "step": 4585 }, { "epoch": 0.6, "learning_rate": 3.5839944489377205e-05, "loss": 15.8319, "step": 4586 }, { "epoch": 0.6, "learning_rate": 3.581950217818563e-05, "loss": 16.9819, "step": 4587 }, { "epoch": 0.6, "learning_rate": 3.579906244434824e-05, "loss": 16.7399, "step": 4588 }, { "epoch": 0.6, "learning_rate": 3.577862529158002e-05, "loss": 15.8185, "step": 4589 }, { "epoch": 0.6, "learning_rate": 3.5758190723595485e-05, "loss": 0.0002, "step": 4590 }, { "epoch": 0.6, "learning_rate": 3.573775874410871e-05, "loss": 17.1137, "step": 4591 }, { "epoch": 0.6, "learning_rate": 3.571732935683325e-05, "loss": 0.0002, "step": 4592 }, { "epoch": 0.6, "learning_rate": 3.5696902565482244e-05, "loss": 17.5906, "step": 4593 }, { "epoch": 0.6, "learning_rate": 3.56764783737683e-05, "loss": 0.0003, "step": 4594 }, { "epoch": 0.6, "learning_rate": 3.565605678540362e-05, "loss": 18.1527, "step": 4595 }, { "epoch": 0.6, "learning_rate": 3.563563780409987e-05, "loss": 0.0014, "step": 4596 }, { "epoch": 0.61, "learning_rate": 3.561522143356829e-05, "loss": 0.0, "step": 4597 }, { "epoch": 0.61, "learning_rate": 3.5594807677519604e-05, "loss": 18.3842, "step": 4598 }, { "epoch": 0.61, "learning_rate": 3.557439653966411e-05, "loss": 0.0, "step": 4599 }, { "epoch": 0.61, "learning_rate": 3.5553988023711575e-05, "loss": 17.7161, "step": 4600 }, { "epoch": 0.61, "learning_rate": 3.553358213337133e-05, "loss": 0.0, "step": 4601 }, { "epoch": 0.61, "learning_rate": 3.5513178872352216e-05, "loss": 17.3354, "step": 4602 }, { "epoch": 0.61, "learning_rate": 3.5492778244362595e-05, "loss": 17.2193, "step": 4603 }, { "epoch": 0.61, "learning_rate": 3.5472380253110346e-05, "loss": 0.0002, "step": 4604 }, { "epoch": 0.61, "learning_rate": 3.545198490230289e-05, "loss": 16.0706, "step": 4605 }, { "epoch": 0.61, "learning_rate": 3.543159219564712e-05, "loss": 15.8766, "step": 4606 }, { "epoch": 0.61, "learning_rate": 3.541120213684952e-05, "loss": 15.5798, "step": 4607 }, { "epoch": 0.61, "learning_rate": 3.5390814729616025e-05, "loss": 0.0001, "step": 4608 }, { "epoch": 0.61, "learning_rate": 3.537042997765213e-05, "loss": 0.0002, "step": 4609 }, { "epoch": 0.61, "learning_rate": 3.535004788466282e-05, "loss": 19.2571, "step": 4610 }, { "epoch": 0.61, "learning_rate": 3.532966845435263e-05, "loss": 16.0541, "step": 4611 }, { "epoch": 0.61, "learning_rate": 3.5309291690425574e-05, "loss": 15.0864, "step": 4612 }, { "epoch": 0.61, "learning_rate": 3.5288917596585213e-05, "loss": 17.6287, "step": 4613 }, { "epoch": 0.61, "learning_rate": 3.5268546176534584e-05, "loss": 16.2061, "step": 4614 }, { "epoch": 0.61, "learning_rate": 3.524817743397629e-05, "loss": 0.0, "step": 4615 }, { "epoch": 0.61, "learning_rate": 3.522781137261242e-05, "loss": 14.5301, "step": 4616 }, { "epoch": 0.61, "learning_rate": 3.520744799614455e-05, "loss": 17.9439, "step": 4617 }, { "epoch": 0.61, "learning_rate": 3.5187087308273824e-05, "loss": 17.4689, "step": 4618 }, { "epoch": 0.61, "learning_rate": 3.516672931270083e-05, "loss": 0.0001, "step": 4619 }, { "epoch": 0.61, "learning_rate": 3.514637401312574e-05, "loss": 16.3473, "step": 4620 }, { "epoch": 0.61, "learning_rate": 3.512602141324819e-05, "loss": 16.2368, "step": 4621 }, { "epoch": 0.61, "learning_rate": 3.510567151676732e-05, "loss": 17.1394, "step": 4622 }, { "epoch": 0.61, "learning_rate": 3.50853243273818e-05, "loss": 0.0009, "step": 4623 }, { "epoch": 0.61, "learning_rate": 3.506497984878981e-05, "loss": 18.6713, "step": 4624 }, { "epoch": 0.61, "learning_rate": 3.504463808468903e-05, "loss": 0.0, "step": 4625 }, { "epoch": 0.61, "learning_rate": 3.502429903877663e-05, "loss": 15.6916, "step": 4626 }, { "epoch": 0.61, "learning_rate": 3.5003962714749306e-05, "loss": 16.5426, "step": 4627 }, { "epoch": 0.61, "learning_rate": 3.4983629116303276e-05, "loss": 16.3753, "step": 4628 }, { "epoch": 0.61, "learning_rate": 3.4963298247134214e-05, "loss": 16.4477, "step": 4629 }, { "epoch": 0.61, "learning_rate": 3.494297011093735e-05, "loss": 0.0001, "step": 4630 }, { "epoch": 0.61, "learning_rate": 3.492264471140735e-05, "loss": 18.4938, "step": 4631 }, { "epoch": 0.61, "learning_rate": 3.490232205223848e-05, "loss": 17.5577, "step": 4632 }, { "epoch": 0.61, "learning_rate": 3.488200213712442e-05, "loss": 17.0902, "step": 4633 }, { "epoch": 0.61, "learning_rate": 3.486168496975839e-05, "loss": 0.0, "step": 4634 }, { "epoch": 0.61, "learning_rate": 3.4841370553833095e-05, "loss": 0.0003, "step": 4635 }, { "epoch": 0.61, "learning_rate": 3.482105889304077e-05, "loss": 15.4755, "step": 4636 }, { "epoch": 0.61, "learning_rate": 3.480074999107313e-05, "loss": 0.0002, "step": 4637 }, { "epoch": 0.61, "learning_rate": 3.4780443851621366e-05, "loss": 0.0001, "step": 4638 }, { "epoch": 0.61, "learning_rate": 3.4760140478376205e-05, "loss": 17.5757, "step": 4639 }, { "epoch": 0.61, "learning_rate": 3.473983987502785e-05, "loss": 18.0724, "step": 4640 }, { "epoch": 0.61, "learning_rate": 3.471954204526601e-05, "loss": 19.2048, "step": 4641 }, { "epoch": 0.61, "learning_rate": 3.4699246992779874e-05, "loss": 17.0649, "step": 4642 }, { "epoch": 0.61, "learning_rate": 3.467895472125815e-05, "loss": 15.3742, "step": 4643 }, { "epoch": 0.61, "learning_rate": 3.465866523438901e-05, "loss": 17.8457, "step": 4644 }, { "epoch": 0.61, "learning_rate": 3.463837853586016e-05, "loss": 15.132, "step": 4645 }, { "epoch": 0.61, "learning_rate": 3.461809462935877e-05, "loss": 18.1409, "step": 4646 }, { "epoch": 0.61, "learning_rate": 3.459781351857151e-05, "loss": 19.2757, "step": 4647 }, { "epoch": 0.61, "learning_rate": 3.457753520718452e-05, "loss": 0.0, "step": 4648 }, { "epoch": 0.61, "learning_rate": 3.4557259698883486e-05, "loss": 0.0, "step": 4649 }, { "epoch": 0.61, "learning_rate": 3.453698699735354e-05, "loss": 17.1599, "step": 4650 }, { "epoch": 0.61, "learning_rate": 3.4516717106279305e-05, "loss": 0.0, "step": 4651 }, { "epoch": 0.61, "learning_rate": 3.449645002934491e-05, "loss": 0.0001, "step": 4652 }, { "epoch": 0.61, "learning_rate": 3.4476185770233974e-05, "loss": 16.6524, "step": 4653 }, { "epoch": 0.61, "learning_rate": 3.4455924332629576e-05, "loss": 0.0002, "step": 4654 }, { "epoch": 0.61, "learning_rate": 3.4435665720214325e-05, "loss": 0.0003, "step": 4655 }, { "epoch": 0.61, "learning_rate": 3.441540993667027e-05, "loss": 0.0, "step": 4656 }, { "epoch": 0.61, "learning_rate": 3.439515698567899e-05, "loss": 17.8977, "step": 4657 }, { "epoch": 0.61, "learning_rate": 3.437490687092152e-05, "loss": 16.2556, "step": 4658 }, { "epoch": 0.61, "learning_rate": 3.435465959607838e-05, "loss": 0.0002, "step": 4659 }, { "epoch": 0.61, "learning_rate": 3.4334415164829585e-05, "loss": 0.0001, "step": 4660 }, { "epoch": 0.61, "learning_rate": 3.431417358085465e-05, "loss": 16.8545, "step": 4661 }, { "epoch": 0.61, "learning_rate": 3.429393484783253e-05, "loss": 17.8062, "step": 4662 }, { "epoch": 0.61, "learning_rate": 3.4273698969441695e-05, "loss": 0.0002, "step": 4663 }, { "epoch": 0.61, "learning_rate": 3.425346594936009e-05, "loss": 17.7416, "step": 4664 }, { "epoch": 0.61, "learning_rate": 3.423323579126511e-05, "loss": 17.8523, "step": 4665 }, { "epoch": 0.61, "learning_rate": 3.4213008498833675e-05, "loss": 15.4167, "step": 4666 }, { "epoch": 0.61, "learning_rate": 3.419278407574216e-05, "loss": 19.2145, "step": 4667 }, { "epoch": 0.61, "learning_rate": 3.417256252566643e-05, "loss": 17.0125, "step": 4668 }, { "epoch": 0.61, "learning_rate": 3.415234385228182e-05, "loss": 17.3019, "step": 4669 }, { "epoch": 0.61, "learning_rate": 3.4132128059263136e-05, "loss": 0.0003, "step": 4670 }, { "epoch": 0.61, "learning_rate": 3.411191515028467e-05, "loss": 16.9919, "step": 4671 }, { "epoch": 0.61, "learning_rate": 3.409170512902019e-05, "loss": 16.4015, "step": 4672 }, { "epoch": 0.62, "learning_rate": 3.407149799914294e-05, "loss": 17.1134, "step": 4673 }, { "epoch": 0.62, "learning_rate": 3.405129376432564e-05, "loss": 17.1042, "step": 4674 }, { "epoch": 0.62, "learning_rate": 3.403109242824045e-05, "loss": 18.0272, "step": 4675 }, { "epoch": 0.62, "learning_rate": 3.401089399455907e-05, "loss": 18.2366, "step": 4676 }, { "epoch": 0.62, "learning_rate": 3.3990698466952615e-05, "loss": 17.1193, "step": 4677 }, { "epoch": 0.62, "learning_rate": 3.3970505849091694e-05, "loss": 15.2921, "step": 4678 }, { "epoch": 0.62, "learning_rate": 3.3950316144646376e-05, "loss": 17.9865, "step": 4679 }, { "epoch": 0.62, "learning_rate": 3.393012935728623e-05, "loss": 16.9119, "step": 4680 }, { "epoch": 0.62, "learning_rate": 3.390994549068026e-05, "loss": 18.7905, "step": 4681 }, { "epoch": 0.62, "learning_rate": 3.3889764548496954e-05, "loss": 0.0, "step": 4682 }, { "epoch": 0.62, "learning_rate": 3.386958653440426e-05, "loss": 16.8915, "step": 4683 }, { "epoch": 0.62, "learning_rate": 3.384941145206962e-05, "loss": 18.0648, "step": 4684 }, { "epoch": 0.62, "learning_rate": 3.3829239305159914e-05, "loss": 17.6897, "step": 4685 }, { "epoch": 0.62, "learning_rate": 3.38090700973415e-05, "loss": 16.9197, "step": 4686 }, { "epoch": 0.62, "learning_rate": 3.378890383228021e-05, "loss": 19.0245, "step": 4687 }, { "epoch": 0.62, "learning_rate": 3.37687405136413e-05, "loss": 16.1996, "step": 4688 }, { "epoch": 0.62, "learning_rate": 3.374858014508955e-05, "loss": 18.0418, "step": 4689 }, { "epoch": 0.62, "learning_rate": 3.3728422730289175e-05, "loss": 0.0, "step": 4690 }, { "epoch": 0.62, "learning_rate": 3.370826827290386e-05, "loss": 17.955, "step": 4691 }, { "epoch": 0.62, "learning_rate": 3.368811677659672e-05, "loss": 16.5775, "step": 4692 }, { "epoch": 0.62, "learning_rate": 3.3667968245030385e-05, "loss": 18.3681, "step": 4693 }, { "epoch": 0.62, "learning_rate": 3.3647822681866917e-05, "loss": 16.7946, "step": 4694 }, { "epoch": 0.62, "learning_rate": 3.362768009076782e-05, "loss": 0.0002, "step": 4695 }, { "epoch": 0.62, "learning_rate": 3.360754047539409e-05, "loss": 18.0223, "step": 4696 }, { "epoch": 0.62, "learning_rate": 3.358740383940619e-05, "loss": 15.8767, "step": 4697 }, { "epoch": 0.62, "learning_rate": 3.3567270186464014e-05, "loss": 0.0001, "step": 4698 }, { "epoch": 0.62, "learning_rate": 3.354713952022691e-05, "loss": 15.1911, "step": 4699 }, { "epoch": 0.62, "learning_rate": 3.352701184435369e-05, "loss": 17.9138, "step": 4700 }, { "epoch": 0.62, "learning_rate": 3.3506887162502655e-05, "loss": 17.5901, "step": 4701 }, { "epoch": 0.62, "learning_rate": 3.3486765478331516e-05, "loss": 17.4154, "step": 4702 }, { "epoch": 0.62, "learning_rate": 3.346664679549746e-05, "loss": 16.1616, "step": 4703 }, { "epoch": 0.62, "learning_rate": 3.344653111765712e-05, "loss": 18.6278, "step": 4704 }, { "epoch": 0.62, "learning_rate": 3.342641844846661e-05, "loss": 16.1862, "step": 4705 }, { "epoch": 0.62, "learning_rate": 3.3406308791581475e-05, "loss": 16.5531, "step": 4706 }, { "epoch": 0.62, "learning_rate": 3.3386202150656696e-05, "loss": 0.0, "step": 4707 }, { "epoch": 0.62, "learning_rate": 3.336609852934672e-05, "loss": 18.1847, "step": 4708 }, { "epoch": 0.62, "learning_rate": 3.334599793130548e-05, "loss": 16.4766, "step": 4709 }, { "epoch": 0.62, "learning_rate": 3.332590036018631e-05, "loss": 0.0002, "step": 4710 }, { "epoch": 0.62, "learning_rate": 3.330580581964201e-05, "loss": 16.8844, "step": 4711 }, { "epoch": 0.62, "learning_rate": 3.3285714313324836e-05, "loss": 18.1837, "step": 4712 }, { "epoch": 0.62, "learning_rate": 3.3265625844886475e-05, "loss": 16.6392, "step": 4713 }, { "epoch": 0.62, "learning_rate": 3.3245540417978106e-05, "loss": 0.0001, "step": 4714 }, { "epoch": 0.62, "learning_rate": 3.32254580362503e-05, "loss": 14.8648, "step": 4715 }, { "epoch": 0.62, "learning_rate": 3.32053787033531e-05, "loss": 0.0001, "step": 4716 }, { "epoch": 0.62, "learning_rate": 3.318530242293599e-05, "loss": 0.0, "step": 4717 }, { "epoch": 0.62, "learning_rate": 3.316522919864793e-05, "loss": 15.9968, "step": 4718 }, { "epoch": 0.62, "learning_rate": 3.3145159034137275e-05, "loss": 16.1145, "step": 4719 }, { "epoch": 0.62, "learning_rate": 3.312509193305184e-05, "loss": 16.8384, "step": 4720 }, { "epoch": 0.62, "learning_rate": 3.3105027899038896e-05, "loss": 18.173, "step": 4721 }, { "epoch": 0.62, "learning_rate": 3.3084966935745174e-05, "loss": 18.4287, "step": 4722 }, { "epoch": 0.62, "learning_rate": 3.306490904681679e-05, "loss": 0.0, "step": 4723 }, { "epoch": 0.62, "learning_rate": 3.3044854235899354e-05, "loss": 0.0, "step": 4724 }, { "epoch": 0.62, "learning_rate": 3.302480250663788e-05, "loss": 0.0, "step": 4725 }, { "epoch": 0.62, "learning_rate": 3.300475386267685e-05, "loss": 17.7078, "step": 4726 }, { "epoch": 0.62, "learning_rate": 3.298470830766018e-05, "loss": 17.0152, "step": 4727 }, { "epoch": 0.62, "learning_rate": 3.296466584523121e-05, "loss": 17.276, "step": 4728 }, { "epoch": 0.62, "learning_rate": 3.2944626479032715e-05, "loss": 18.1555, "step": 4729 }, { "epoch": 0.62, "learning_rate": 3.292459021270694e-05, "loss": 0.0001, "step": 4730 }, { "epoch": 0.62, "learning_rate": 3.290455704989553e-05, "loss": 15.985, "step": 4731 }, { "epoch": 0.62, "learning_rate": 3.2884526994239594e-05, "loss": 17.7943, "step": 4732 }, { "epoch": 0.62, "learning_rate": 3.286450004937965e-05, "loss": 0.0001, "step": 4733 }, { "epoch": 0.62, "learning_rate": 3.284447621895568e-05, "loss": 16.5099, "step": 4734 }, { "epoch": 0.62, "learning_rate": 3.2824455506607066e-05, "loss": 17.4996, "step": 4735 }, { "epoch": 0.62, "learning_rate": 3.2804437915972656e-05, "loss": 16.7305, "step": 4736 }, { "epoch": 0.62, "learning_rate": 3.278442345069071e-05, "loss": 16.2203, "step": 4737 }, { "epoch": 0.62, "learning_rate": 3.2764412114398915e-05, "loss": 17.0645, "step": 4738 }, { "epoch": 0.62, "learning_rate": 3.274440391073442e-05, "loss": 16.5799, "step": 4739 }, { "epoch": 0.62, "learning_rate": 3.272439884333377e-05, "loss": 18.152, "step": 4740 }, { "epoch": 0.62, "learning_rate": 3.270439691583296e-05, "loss": 16.3611, "step": 4741 }, { "epoch": 0.62, "learning_rate": 3.268439813186741e-05, "loss": 16.8592, "step": 4742 }, { "epoch": 0.62, "learning_rate": 3.2664402495071964e-05, "loss": 15.6579, "step": 4743 }, { "epoch": 0.62, "learning_rate": 3.264441000908091e-05, "loss": 0.0001, "step": 4744 }, { "epoch": 0.62, "learning_rate": 3.262442067752793e-05, "loss": 15.0015, "step": 4745 }, { "epoch": 0.62, "learning_rate": 3.260443450404617e-05, "loss": 18.4021, "step": 4746 }, { "epoch": 0.62, "learning_rate": 3.258445149226818e-05, "loss": 18.6844, "step": 4747 }, { "epoch": 0.62, "learning_rate": 3.256447164582594e-05, "loss": 16.1116, "step": 4748 }, { "epoch": 0.63, "learning_rate": 3.254449496835086e-05, "loss": 17.8114, "step": 4749 }, { "epoch": 0.63, "learning_rate": 3.2524521463473756e-05, "loss": 16.9426, "step": 4750 }, { "epoch": 0.63, "learning_rate": 3.2504551134824904e-05, "loss": 0.0, "step": 4751 }, { "epoch": 0.63, "learning_rate": 3.2484583986033965e-05, "loss": 16.9244, "step": 4752 }, { "epoch": 0.63, "learning_rate": 3.246462002073004e-05, "loss": 15.9695, "step": 4753 }, { "epoch": 0.63, "learning_rate": 3.244465924254164e-05, "loss": 16.8496, "step": 4754 }, { "epoch": 0.63, "learning_rate": 3.242470165509672e-05, "loss": 17.5481, "step": 4755 }, { "epoch": 0.63, "learning_rate": 3.2404747262022636e-05, "loss": 15.8091, "step": 4756 }, { "epoch": 0.63, "learning_rate": 3.238479606694617e-05, "loss": 0.0, "step": 4757 }, { "epoch": 0.63, "learning_rate": 3.2364848073493495e-05, "loss": 0.0, "step": 4758 }, { "epoch": 0.63, "learning_rate": 3.234490328529026e-05, "loss": 16.2465, "step": 4759 }, { "epoch": 0.63, "learning_rate": 3.2324961705961487e-05, "loss": 16.7145, "step": 4760 }, { "epoch": 0.63, "learning_rate": 3.230502333913162e-05, "loss": 15.7469, "step": 4761 }, { "epoch": 0.63, "learning_rate": 3.228508818842455e-05, "loss": 16.6744, "step": 4762 }, { "epoch": 0.63, "learning_rate": 3.2265156257463515e-05, "loss": 0.0001, "step": 4763 }, { "epoch": 0.63, "learning_rate": 3.2245227549871246e-05, "loss": 17.4834, "step": 4764 }, { "epoch": 0.63, "learning_rate": 3.222530206926985e-05, "loss": 18.0697, "step": 4765 }, { "epoch": 0.63, "learning_rate": 3.220537981928083e-05, "loss": 17.1713, "step": 4766 }, { "epoch": 0.63, "learning_rate": 3.218546080352515e-05, "loss": 17.4141, "step": 4767 }, { "epoch": 0.63, "learning_rate": 3.216554502562316e-05, "loss": 16.988, "step": 4768 }, { "epoch": 0.63, "learning_rate": 3.21456324891946e-05, "loss": 0.0, "step": 4769 }, { "epoch": 0.63, "learning_rate": 3.212572319785865e-05, "loss": 16.8066, "step": 4770 }, { "epoch": 0.63, "learning_rate": 3.210581715523389e-05, "loss": 15.4798, "step": 4771 }, { "epoch": 0.63, "learning_rate": 3.208591436493833e-05, "loss": 18.2547, "step": 4772 }, { "epoch": 0.63, "learning_rate": 3.2066014830589354e-05, "loss": 15.2592, "step": 4773 }, { "epoch": 0.63, "learning_rate": 3.204611855580377e-05, "loss": 17.569, "step": 4774 }, { "epoch": 0.63, "learning_rate": 3.202622554419779e-05, "loss": 18.4066, "step": 4775 }, { "epoch": 0.63, "learning_rate": 3.200633579938705e-05, "loss": 15.6608, "step": 4776 }, { "epoch": 0.63, "learning_rate": 3.1986449324986576e-05, "loss": 14.8817, "step": 4777 }, { "epoch": 0.63, "learning_rate": 3.1966566124610805e-05, "loss": 0.0, "step": 4778 }, { "epoch": 0.63, "learning_rate": 3.194668620187356e-05, "loss": 16.0485, "step": 4779 }, { "epoch": 0.63, "learning_rate": 3.192680956038811e-05, "loss": 17.5977, "step": 4780 }, { "epoch": 0.63, "learning_rate": 3.1906936203767094e-05, "loss": 16.6509, "step": 4781 }, { "epoch": 0.63, "learning_rate": 3.1887066135622554e-05, "loss": 0.0, "step": 4782 }, { "epoch": 0.63, "learning_rate": 3.186719935956594e-05, "loss": 16.5229, "step": 4783 }, { "epoch": 0.63, "learning_rate": 3.184733587920813e-05, "loss": 16.7147, "step": 4784 }, { "epoch": 0.63, "learning_rate": 3.1827475698159356e-05, "loss": 0.0002, "step": 4785 }, { "epoch": 0.63, "learning_rate": 3.180761882002929e-05, "loss": 16.6027, "step": 4786 }, { "epoch": 0.63, "learning_rate": 3.178776524842699e-05, "loss": 16.2909, "step": 4787 }, { "epoch": 0.63, "learning_rate": 3.176791498696088e-05, "loss": 18.2539, "step": 4788 }, { "epoch": 0.63, "learning_rate": 3.1748068039238856e-05, "loss": 17.6259, "step": 4789 }, { "epoch": 0.63, "learning_rate": 3.172822440886814e-05, "loss": 17.7231, "step": 4790 }, { "epoch": 0.63, "learning_rate": 3.170838409945539e-05, "loss": 0.0, "step": 4791 }, { "epoch": 0.63, "learning_rate": 3.1688547114606636e-05, "loss": 15.9042, "step": 4792 }, { "epoch": 0.63, "learning_rate": 3.166871345792735e-05, "loss": 17.3372, "step": 4793 }, { "epoch": 0.63, "learning_rate": 3.1648883133022345e-05, "loss": 0.0, "step": 4794 }, { "epoch": 0.63, "learning_rate": 3.162905614349585e-05, "loss": 0.0, "step": 4795 }, { "epoch": 0.63, "learning_rate": 3.160923249295148e-05, "loss": 16.4508, "step": 4796 }, { "epoch": 0.63, "learning_rate": 3.1589412184992286e-05, "loss": 0.0002, "step": 4797 }, { "epoch": 0.63, "learning_rate": 3.1569595223220635e-05, "loss": 15.9579, "step": 4798 }, { "epoch": 0.63, "learning_rate": 3.1549781611238364e-05, "loss": 17.0333, "step": 4799 }, { "epoch": 0.63, "learning_rate": 3.152997135264666e-05, "loss": 18.7905, "step": 4800 }, { "epoch": 0.63, "learning_rate": 3.151016445104608e-05, "loss": 0.0001, "step": 4801 }, { "epoch": 0.63, "learning_rate": 3.149036091003659e-05, "loss": 17.723, "step": 4802 }, { "epoch": 0.63, "learning_rate": 3.147056073321759e-05, "loss": 17.0039, "step": 4803 }, { "epoch": 0.63, "learning_rate": 3.145076392418782e-05, "loss": 16.6292, "step": 4804 }, { "epoch": 0.63, "learning_rate": 3.1430970486545404e-05, "loss": 18.4378, "step": 4805 }, { "epoch": 0.63, "learning_rate": 3.141118042388787e-05, "loss": 16.8573, "step": 4806 }, { "epoch": 0.63, "learning_rate": 3.139139373981212e-05, "loss": 0.0005, "step": 4807 }, { "epoch": 0.63, "learning_rate": 3.137161043791449e-05, "loss": 17.7245, "step": 4808 }, { "epoch": 0.63, "learning_rate": 3.1351830521790634e-05, "loss": 17.9544, "step": 4809 }, { "epoch": 0.63, "learning_rate": 3.133205399503561e-05, "loss": 0.0005, "step": 4810 }, { "epoch": 0.63, "learning_rate": 3.131228086124389e-05, "loss": 16.269, "step": 4811 }, { "epoch": 0.63, "learning_rate": 3.129251112400931e-05, "loss": 0.0, "step": 4812 }, { "epoch": 0.63, "learning_rate": 3.127274478692508e-05, "loss": 0.0001, "step": 4813 }, { "epoch": 0.63, "learning_rate": 3.1252981853583793e-05, "loss": 17.9716, "step": 4814 }, { "epoch": 0.63, "learning_rate": 3.1233222327577434e-05, "loss": 16.9504, "step": 4815 }, { "epoch": 0.63, "learning_rate": 3.121346621249738e-05, "loss": 16.613, "step": 4816 }, { "epoch": 0.63, "learning_rate": 3.1193713511934355e-05, "loss": 18.3335, "step": 4817 }, { "epoch": 0.63, "learning_rate": 3.117396422947848e-05, "loss": 0.0, "step": 4818 }, { "epoch": 0.63, "learning_rate": 3.115421836871926e-05, "loss": 17.9806, "step": 4819 }, { "epoch": 0.63, "learning_rate": 3.1134475933245577e-05, "loss": 0.0001, "step": 4820 }, { "epoch": 0.63, "learning_rate": 3.111473692664569e-05, "loss": 17.3883, "step": 4821 }, { "epoch": 0.63, "learning_rate": 3.109500135250721e-05, "loss": 16.7849, "step": 4822 }, { "epoch": 0.63, "learning_rate": 3.107526921441717e-05, "loss": 16.3508, "step": 4823 }, { "epoch": 0.63, "learning_rate": 3.105554051596194e-05, "loss": 16.0012, "step": 4824 }, { "epoch": 0.64, "learning_rate": 3.1035815260727285e-05, "loss": 18.1894, "step": 4825 }, { "epoch": 0.64, "learning_rate": 3.101609345229833e-05, "loss": 17.2048, "step": 4826 }, { "epoch": 0.64, "learning_rate": 3.099637509425959e-05, "loss": 19.7881, "step": 4827 }, { "epoch": 0.64, "learning_rate": 3.097666019019494e-05, "loss": 17.8243, "step": 4828 }, { "epoch": 0.64, "learning_rate": 3.095694874368764e-05, "loss": 16.6506, "step": 4829 }, { "epoch": 0.64, "learning_rate": 3.0937240758320295e-05, "loss": 0.0, "step": 4830 }, { "epoch": 0.64, "learning_rate": 3.091753623767491e-05, "loss": 16.0947, "step": 4831 }, { "epoch": 0.64, "learning_rate": 3.089783518533285e-05, "loss": 17.3557, "step": 4832 }, { "epoch": 0.64, "learning_rate": 3.087813760487486e-05, "loss": 16.6368, "step": 4833 }, { "epoch": 0.64, "learning_rate": 3.085844349988102e-05, "loss": 15.0077, "step": 4834 }, { "epoch": 0.64, "learning_rate": 3.083875287393081e-05, "loss": 0.0001, "step": 4835 }, { "epoch": 0.64, "learning_rate": 3.081906573060306e-05, "loss": 18.4198, "step": 4836 }, { "epoch": 0.64, "learning_rate": 3.0799382073476e-05, "loss": 0.0, "step": 4837 }, { "epoch": 0.64, "learning_rate": 3.077970190612719e-05, "loss": 15.7891, "step": 4838 }, { "epoch": 0.64, "learning_rate": 3.076002523213357e-05, "loss": 17.5327, "step": 4839 }, { "epoch": 0.64, "learning_rate": 3.074035205507142e-05, "loss": 16.2148, "step": 4840 }, { "epoch": 0.64, "learning_rate": 3.0720682378516455e-05, "loss": 17.7032, "step": 4841 }, { "epoch": 0.64, "learning_rate": 3.0701016206043665e-05, "loss": 15.0765, "step": 4842 }, { "epoch": 0.64, "learning_rate": 3.0681353541227455e-05, "loss": 17.7109, "step": 4843 }, { "epoch": 0.64, "learning_rate": 3.066169438764158e-05, "loss": 17.4517, "step": 4844 }, { "epoch": 0.64, "learning_rate": 3.064203874885917e-05, "loss": 17.2206, "step": 4845 }, { "epoch": 0.64, "learning_rate": 3.06223866284527e-05, "loss": 16.1556, "step": 4846 }, { "epoch": 0.64, "learning_rate": 3.0602738029994014e-05, "loss": 16.2377, "step": 4847 }, { "epoch": 0.64, "learning_rate": 3.0583092957054294e-05, "loss": 0.0, "step": 4848 }, { "epoch": 0.64, "learning_rate": 3.0563451413204124e-05, "loss": 15.7661, "step": 4849 }, { "epoch": 0.64, "learning_rate": 3.054381340201341e-05, "loss": 0.0, "step": 4850 }, { "epoch": 0.64, "learning_rate": 3.0524178927051426e-05, "loss": 15.6461, "step": 4851 }, { "epoch": 0.64, "learning_rate": 3.05045479918868e-05, "loss": 0.0, "step": 4852 }, { "epoch": 0.64, "learning_rate": 3.048492060008754e-05, "loss": 0.0001, "step": 4853 }, { "epoch": 0.64, "learning_rate": 3.0465296755220983e-05, "loss": 17.0417, "step": 4854 }, { "epoch": 0.64, "learning_rate": 3.044567646085382e-05, "loss": 18.259, "step": 4855 }, { "epoch": 0.64, "learning_rate": 3.0426059720552125e-05, "loss": 19.0686, "step": 4856 }, { "epoch": 0.64, "learning_rate": 3.0406446537881283e-05, "loss": 17.2039, "step": 4857 }, { "epoch": 0.64, "learning_rate": 3.038683691640609e-05, "loss": 16.1167, "step": 4858 }, { "epoch": 0.64, "learning_rate": 3.0367230859690643e-05, "loss": 17.5735, "step": 4859 }, { "epoch": 0.64, "learning_rate": 3.0347628371298407e-05, "loss": 16.8623, "step": 4860 }, { "epoch": 0.64, "learning_rate": 3.0328029454792194e-05, "loss": 17.6333, "step": 4861 }, { "epoch": 0.64, "learning_rate": 3.0308434113734206e-05, "loss": 18.7404, "step": 4862 }, { "epoch": 0.64, "learning_rate": 3.028884235168594e-05, "loss": 16.7111, "step": 4863 }, { "epoch": 0.64, "learning_rate": 3.0269254172208272e-05, "loss": 17.7172, "step": 4864 }, { "epoch": 0.64, "learning_rate": 3.0249669578861406e-05, "loss": 16.6376, "step": 4865 }, { "epoch": 0.64, "learning_rate": 3.023008857520494e-05, "loss": 16.3318, "step": 4866 }, { "epoch": 0.64, "learning_rate": 3.0210511164797763e-05, "loss": 17.8083, "step": 4867 }, { "epoch": 0.64, "learning_rate": 3.0190937351198146e-05, "loss": 18.6195, "step": 4868 }, { "epoch": 0.64, "learning_rate": 3.017136713796369e-05, "loss": 19.769, "step": 4869 }, { "epoch": 0.64, "learning_rate": 3.0151800528651363e-05, "loss": 16.3316, "step": 4870 }, { "epoch": 0.64, "learning_rate": 3.013223752681745e-05, "loss": 17.7638, "step": 4871 }, { "epoch": 0.64, "learning_rate": 3.01126781360176e-05, "loss": 0.0002, "step": 4872 }, { "epoch": 0.64, "learning_rate": 3.009312235980679e-05, "loss": 0.0, "step": 4873 }, { "epoch": 0.64, "learning_rate": 3.007357020173937e-05, "loss": 17.2313, "step": 4874 }, { "epoch": 0.64, "learning_rate": 3.0054021665368996e-05, "loss": 0.0001, "step": 4875 }, { "epoch": 0.64, "learning_rate": 3.0034476754248687e-05, "loss": 17.3144, "step": 4876 }, { "epoch": 0.64, "learning_rate": 3.001493547193078e-05, "loss": 17.3145, "step": 4877 }, { "epoch": 0.64, "learning_rate": 2.9995397821967002e-05, "loss": 16.4123, "step": 4878 }, { "epoch": 0.64, "learning_rate": 2.9975863807908368e-05, "loss": 16.0446, "step": 4879 }, { "epoch": 0.64, "learning_rate": 2.9956333433305257e-05, "loss": 17.9864, "step": 4880 }, { "epoch": 0.64, "learning_rate": 2.9936806701707377e-05, "loss": 16.7579, "step": 4881 }, { "epoch": 0.64, "learning_rate": 2.9917283616663776e-05, "loss": 17.7405, "step": 4882 }, { "epoch": 0.64, "learning_rate": 2.989776418172286e-05, "loss": 15.0586, "step": 4883 }, { "epoch": 0.64, "learning_rate": 2.987824840043234e-05, "loss": 16.1756, "step": 4884 }, { "epoch": 0.64, "learning_rate": 2.9858736276339272e-05, "loss": 16.133, "step": 4885 }, { "epoch": 0.64, "learning_rate": 2.983922781299005e-05, "loss": 17.5202, "step": 4886 }, { "epoch": 0.64, "learning_rate": 2.9819723013930424e-05, "loss": 0.0, "step": 4887 }, { "epoch": 0.64, "learning_rate": 2.9800221882705435e-05, "loss": 16.5687, "step": 4888 }, { "epoch": 0.64, "learning_rate": 2.97807244228595e-05, "loss": 0.0, "step": 4889 }, { "epoch": 0.64, "learning_rate": 2.9761230637936333e-05, "loss": 0.0001, "step": 4890 }, { "epoch": 0.64, "learning_rate": 2.9741740531479008e-05, "loss": 17.1336, "step": 4891 }, { "epoch": 0.64, "learning_rate": 2.9722254107029913e-05, "loss": 18.3173, "step": 4892 }, { "epoch": 0.64, "learning_rate": 2.970277136813078e-05, "loss": 16.0728, "step": 4893 }, { "epoch": 0.64, "learning_rate": 2.968329231832264e-05, "loss": 17.3882, "step": 4894 }, { "epoch": 0.64, "learning_rate": 2.9663816961145906e-05, "loss": 17.1552, "step": 4895 }, { "epoch": 0.64, "learning_rate": 2.9644345300140287e-05, "loss": 17.728, "step": 4896 }, { "epoch": 0.64, "learning_rate": 2.9624877338844813e-05, "loss": 15.4077, "step": 4897 }, { "epoch": 0.64, "learning_rate": 2.9605413080797844e-05, "loss": 0.0, "step": 4898 }, { "epoch": 0.64, "learning_rate": 2.9585952529537097e-05, "loss": 16.0091, "step": 4899 }, { "epoch": 0.64, "learning_rate": 2.9566495688599594e-05, "loss": 17.3563, "step": 4900 }, { "epoch": 0.65, "learning_rate": 2.954704256152167e-05, "loss": 0.0, "step": 4901 }, { "epoch": 0.65, "learning_rate": 2.9527593151838993e-05, "loss": 18.0464, "step": 4902 }, { "epoch": 0.65, "learning_rate": 2.9508147463086578e-05, "loss": 16.3052, "step": 4903 }, { "epoch": 0.65, "learning_rate": 2.9488705498798734e-05, "loss": 16.5439, "step": 4904 }, { "epoch": 0.65, "learning_rate": 2.9469267262509116e-05, "loss": 16.6009, "step": 4905 }, { "epoch": 0.65, "learning_rate": 2.9449832757750672e-05, "loss": 14.7158, "step": 4906 }, { "epoch": 0.65, "learning_rate": 2.9430401988055694e-05, "loss": 17.43, "step": 4907 }, { "epoch": 0.65, "learning_rate": 2.9410974956955807e-05, "loss": 19.0409, "step": 4908 }, { "epoch": 0.65, "learning_rate": 2.939155166798193e-05, "loss": 15.9787, "step": 4909 }, { "epoch": 0.65, "learning_rate": 2.9372132124664314e-05, "loss": 16.8209, "step": 4910 }, { "epoch": 0.65, "learning_rate": 2.9352716330532515e-05, "loss": 17.6547, "step": 4911 }, { "epoch": 0.65, "learning_rate": 2.9333304289115442e-05, "loss": 16.1398, "step": 4912 }, { "epoch": 0.65, "learning_rate": 2.9313896003941288e-05, "loss": 16.168, "step": 4913 }, { "epoch": 0.65, "learning_rate": 2.9294491478537583e-05, "loss": 17.4787, "step": 4914 }, { "epoch": 0.65, "learning_rate": 2.9275090716431146e-05, "loss": 0.0002, "step": 4915 }, { "epoch": 0.65, "learning_rate": 2.925569372114816e-05, "loss": 15.4668, "step": 4916 }, { "epoch": 0.65, "learning_rate": 2.923630049621408e-05, "loss": 18.448, "step": 4917 }, { "epoch": 0.65, "learning_rate": 2.9216911045153695e-05, "loss": 19.576, "step": 4918 }, { "epoch": 0.65, "learning_rate": 2.9197525371491098e-05, "loss": 16.0676, "step": 4919 }, { "epoch": 0.65, "learning_rate": 2.917814347874972e-05, "loss": 16.8226, "step": 4920 }, { "epoch": 0.65, "learning_rate": 2.915876537045224e-05, "loss": 17.7652, "step": 4921 }, { "epoch": 0.65, "learning_rate": 2.9139391050120755e-05, "loss": 15.2772, "step": 4922 }, { "epoch": 0.65, "learning_rate": 2.9120020521276582e-05, "loss": 17.3138, "step": 4923 }, { "epoch": 0.65, "learning_rate": 2.910065378744039e-05, "loss": 18.4241, "step": 4924 }, { "epoch": 0.65, "learning_rate": 2.9081290852132155e-05, "loss": 16.0895, "step": 4925 }, { "epoch": 0.65, "learning_rate": 2.9061931718871138e-05, "loss": 15.7892, "step": 4926 }, { "epoch": 0.65, "learning_rate": 2.9042576391175946e-05, "loss": 18.1927, "step": 4927 }, { "epoch": 0.65, "learning_rate": 2.9023224872564464e-05, "loss": 16.765, "step": 4928 }, { "epoch": 0.65, "learning_rate": 2.9003877166553883e-05, "loss": 17.7045, "step": 4929 }, { "epoch": 0.65, "learning_rate": 2.898453327666075e-05, "loss": 16.8933, "step": 4930 }, { "epoch": 0.65, "learning_rate": 2.8965193206400875e-05, "loss": 0.0002, "step": 4931 }, { "epoch": 0.65, "learning_rate": 2.8945856959289373e-05, "loss": 16.4953, "step": 4932 }, { "epoch": 0.65, "learning_rate": 2.8926524538840677e-05, "loss": 18.7911, "step": 4933 }, { "epoch": 0.65, "learning_rate": 2.890719594856851e-05, "loss": 0.0005, "step": 4934 }, { "epoch": 0.65, "learning_rate": 2.8887871191985925e-05, "loss": 16.7346, "step": 4935 }, { "epoch": 0.65, "learning_rate": 2.8868550272605255e-05, "loss": 18.6097, "step": 4936 }, { "epoch": 0.65, "learning_rate": 2.8849233193938146e-05, "loss": 18.214, "step": 4937 }, { "epoch": 0.65, "learning_rate": 2.8829919959495543e-05, "loss": 0.0001, "step": 4938 }, { "epoch": 0.65, "learning_rate": 2.881061057278769e-05, "loss": 0.0, "step": 4939 }, { "epoch": 0.65, "learning_rate": 2.8791305037324134e-05, "loss": 18.4022, "step": 4940 }, { "epoch": 0.65, "learning_rate": 2.8772003356613725e-05, "loss": 16.3585, "step": 4941 }, { "epoch": 0.65, "learning_rate": 2.8752705534164608e-05, "loss": 0.0, "step": 4942 }, { "epoch": 0.65, "learning_rate": 2.8733411573484238e-05, "loss": 17.6909, "step": 4943 }, { "epoch": 0.65, "learning_rate": 2.871412147807932e-05, "loss": 18.0385, "step": 4944 }, { "epoch": 0.65, "learning_rate": 2.8694835251455948e-05, "loss": 15.6983, "step": 4945 }, { "epoch": 0.65, "learning_rate": 2.867555289711944e-05, "loss": 16.2641, "step": 4946 }, { "epoch": 0.65, "learning_rate": 2.8656274418574424e-05, "loss": 17.9253, "step": 4947 }, { "epoch": 0.65, "learning_rate": 2.863699981932484e-05, "loss": 16.0351, "step": 4948 }, { "epoch": 0.65, "learning_rate": 2.8617729102873898e-05, "loss": 18.0089, "step": 4949 }, { "epoch": 0.65, "learning_rate": 2.8598462272724124e-05, "loss": 16.4063, "step": 4950 }, { "epoch": 0.65, "learning_rate": 2.8579199332377338e-05, "loss": 0.0, "step": 4951 }, { "epoch": 0.65, "learning_rate": 2.855994028533464e-05, "loss": 16.7832, "step": 4952 }, { "epoch": 0.65, "learning_rate": 2.85406851350964e-05, "loss": 16.9597, "step": 4953 }, { "epoch": 0.65, "learning_rate": 2.8521433885162357e-05, "loss": 17.4916, "step": 4954 }, { "epoch": 0.65, "learning_rate": 2.8502186539031474e-05, "loss": 16.5552, "step": 4955 }, { "epoch": 0.65, "learning_rate": 2.8482943100202008e-05, "loss": 18.0601, "step": 4956 }, { "epoch": 0.65, "learning_rate": 2.846370357217153e-05, "loss": 0.0, "step": 4957 }, { "epoch": 0.65, "learning_rate": 2.8444467958436894e-05, "loss": 17.8471, "step": 4958 }, { "epoch": 0.65, "learning_rate": 2.842523626249424e-05, "loss": 17.5565, "step": 4959 }, { "epoch": 0.65, "learning_rate": 2.8406008487838987e-05, "loss": 17.8205, "step": 4960 }, { "epoch": 0.65, "learning_rate": 2.8386784637965826e-05, "loss": 17.1406, "step": 4961 }, { "epoch": 0.65, "learning_rate": 2.8367564716368805e-05, "loss": 0.0001, "step": 4962 }, { "epoch": 0.65, "learning_rate": 2.8348348726541185e-05, "loss": 15.8471, "step": 4963 }, { "epoch": 0.65, "learning_rate": 2.8329136671975555e-05, "loss": 17.8995, "step": 4964 }, { "epoch": 0.65, "learning_rate": 2.8309928556163745e-05, "loss": 17.2892, "step": 4965 }, { "epoch": 0.65, "learning_rate": 2.8290724382596924e-05, "loss": 18.3814, "step": 4966 }, { "epoch": 0.65, "learning_rate": 2.8271524154765494e-05, "loss": 16.1798, "step": 4967 }, { "epoch": 0.65, "learning_rate": 2.825232787615917e-05, "loss": 0.0, "step": 4968 }, { "epoch": 0.65, "learning_rate": 2.823313555026692e-05, "loss": 16.7732, "step": 4969 }, { "epoch": 0.65, "learning_rate": 2.8213947180577066e-05, "loss": 15.8341, "step": 4970 }, { "epoch": 0.65, "learning_rate": 2.8194762770577127e-05, "loss": 16.2008, "step": 4971 }, { "epoch": 0.65, "learning_rate": 2.8175582323753942e-05, "loss": 17.7393, "step": 4972 }, { "epoch": 0.65, "learning_rate": 2.8156405843593618e-05, "loss": 16.8665, "step": 4973 }, { "epoch": 0.65, "learning_rate": 2.8137233333581554e-05, "loss": 16.8887, "step": 4974 }, { "epoch": 0.65, "learning_rate": 2.8118064797202415e-05, "loss": 16.5082, "step": 4975 }, { "epoch": 0.65, "learning_rate": 2.809890023794014e-05, "loss": 16.3302, "step": 4976 }, { "epoch": 0.66, "learning_rate": 2.807973965927797e-05, "loss": 17.3438, "step": 4977 }, { "epoch": 0.66, "learning_rate": 2.8060583064698376e-05, "loss": 17.3624, "step": 4978 }, { "epoch": 0.66, "learning_rate": 2.804143045768317e-05, "loss": 0.0, "step": 4979 }, { "epoch": 0.66, "learning_rate": 2.8022281841713403e-05, "loss": 16.429, "step": 4980 }, { "epoch": 0.66, "learning_rate": 2.800313722026938e-05, "loss": 17.3494, "step": 4981 }, { "epoch": 0.66, "learning_rate": 2.7983996596830715e-05, "loss": 18.5078, "step": 4982 }, { "epoch": 0.66, "learning_rate": 2.796485997487628e-05, "loss": 0.0, "step": 4983 }, { "epoch": 0.66, "learning_rate": 2.794572735788421e-05, "loss": 17.9388, "step": 4984 }, { "epoch": 0.66, "learning_rate": 2.7926598749331945e-05, "loss": 16.5496, "step": 4985 }, { "epoch": 0.66, "learning_rate": 2.7907474152696133e-05, "loss": 17.2838, "step": 4986 }, { "epoch": 0.66, "learning_rate": 2.7888353571452797e-05, "loss": 17.477, "step": 4987 }, { "epoch": 0.66, "learning_rate": 2.7869237009077132e-05, "loss": 17.0408, "step": 4988 }, { "epoch": 0.66, "learning_rate": 2.785012446904365e-05, "loss": 17.5227, "step": 4989 }, { "epoch": 0.66, "learning_rate": 2.7831015954826102e-05, "loss": 17.1928, "step": 4990 }, { "epoch": 0.66, "learning_rate": 2.781191146989755e-05, "loss": 15.1029, "step": 4991 }, { "epoch": 0.66, "learning_rate": 2.7792811017730286e-05, "loss": 18.7881, "step": 4992 }, { "epoch": 0.66, "learning_rate": 2.7773714601795885e-05, "loss": 0.0001, "step": 4993 }, { "epoch": 0.66, "learning_rate": 2.7754622225565162e-05, "loss": 16.104, "step": 4994 }, { "epoch": 0.66, "learning_rate": 2.7735533892508282e-05, "loss": 18.213, "step": 4995 }, { "epoch": 0.66, "learning_rate": 2.7716449606094563e-05, "loss": 0.0001, "step": 4996 }, { "epoch": 0.66, "learning_rate": 2.769736936979267e-05, "loss": 16.7204, "step": 4997 }, { "epoch": 0.66, "learning_rate": 2.7678293187070482e-05, "loss": 0.0, "step": 4998 }, { "epoch": 0.66, "learning_rate": 2.765922106139516e-05, "loss": 16.6514, "step": 4999 }, { "epoch": 0.66, "learning_rate": 2.7640152996233144e-05, "loss": 17.1692, "step": 5000 }, { "epoch": 0.66, "learning_rate": 2.7621088995050104e-05, "loss": 15.9961, "step": 5001 }, { "epoch": 0.66, "learning_rate": 2.7602029061311003e-05, "loss": 17.9924, "step": 5002 }, { "epoch": 0.66, "learning_rate": 2.7582973198480016e-05, "loss": 17.384, "step": 5003 }, { "epoch": 0.66, "learning_rate": 2.756392141002066e-05, "loss": 15.9833, "step": 5004 }, { "epoch": 0.66, "learning_rate": 2.7544873699395634e-05, "loss": 0.0, "step": 5005 }, { "epoch": 0.66, "learning_rate": 2.7525830070066937e-05, "loss": 17.466, "step": 5006 }, { "epoch": 0.66, "learning_rate": 2.7506790525495807e-05, "loss": 16.7855, "step": 5007 }, { "epoch": 0.66, "learning_rate": 2.7487755069142752e-05, "loss": 17.857, "step": 5008 }, { "epoch": 0.66, "learning_rate": 2.746872370446752e-05, "loss": 15.8939, "step": 5009 }, { "epoch": 0.66, "learning_rate": 2.7449696434929133e-05, "loss": 17.8919, "step": 5010 }, { "epoch": 0.66, "learning_rate": 2.7430673263985852e-05, "loss": 18.6343, "step": 5011 }, { "epoch": 0.66, "learning_rate": 2.7411654195095226e-05, "loss": 16.4672, "step": 5012 }, { "epoch": 0.66, "learning_rate": 2.7392639231714024e-05, "loss": 17.8204, "step": 5013 }, { "epoch": 0.66, "learning_rate": 2.737362837729829e-05, "loss": 16.0593, "step": 5014 }, { "epoch": 0.66, "learning_rate": 2.73546216353033e-05, "loss": 18.9174, "step": 5015 }, { "epoch": 0.66, "learning_rate": 2.73356190091836e-05, "loss": 0.0, "step": 5016 }, { "epoch": 0.66, "learning_rate": 2.7316620502392975e-05, "loss": 0.0002, "step": 5017 }, { "epoch": 0.66, "learning_rate": 2.7297626118384485e-05, "loss": 15.2568, "step": 5018 }, { "epoch": 0.66, "learning_rate": 2.727863586061039e-05, "loss": 16.8341, "step": 5019 }, { "epoch": 0.66, "learning_rate": 2.725964973252228e-05, "loss": 16.2757, "step": 5020 }, { "epoch": 0.66, "learning_rate": 2.7240667737570925e-05, "loss": 16.6417, "step": 5021 }, { "epoch": 0.66, "learning_rate": 2.722168987920638e-05, "loss": 16.8944, "step": 5022 }, { "epoch": 0.66, "learning_rate": 2.7202716160877916e-05, "loss": 18.19, "step": 5023 }, { "epoch": 0.66, "learning_rate": 2.7183746586034088e-05, "loss": 17.1838, "step": 5024 }, { "epoch": 0.66, "learning_rate": 2.7164781158122677e-05, "loss": 17.0017, "step": 5025 }, { "epoch": 0.66, "learning_rate": 2.714581988059071e-05, "loss": 16.1423, "step": 5026 }, { "epoch": 0.66, "learning_rate": 2.7126862756884476e-05, "loss": 16.7326, "step": 5027 }, { "epoch": 0.66, "learning_rate": 2.710790979044946e-05, "loss": 16.4248, "step": 5028 }, { "epoch": 0.66, "learning_rate": 2.7088960984730477e-05, "loss": 17.6583, "step": 5029 }, { "epoch": 0.66, "learning_rate": 2.7070016343171523e-05, "loss": 16.4611, "step": 5030 }, { "epoch": 0.66, "learning_rate": 2.7051075869215847e-05, "loss": 16.6111, "step": 5031 }, { "epoch": 0.66, "learning_rate": 2.703213956630595e-05, "loss": 16.2055, "step": 5032 }, { "epoch": 0.66, "learning_rate": 2.701320743788355e-05, "loss": 15.925, "step": 5033 }, { "epoch": 0.66, "learning_rate": 2.6994279487389652e-05, "loss": 0.0002, "step": 5034 }, { "epoch": 0.66, "learning_rate": 2.6975355718264467e-05, "loss": 16.4583, "step": 5035 }, { "epoch": 0.66, "learning_rate": 2.6956436133947432e-05, "loss": 0.0001, "step": 5036 }, { "epoch": 0.66, "learning_rate": 2.6937520737877287e-05, "loss": 16.6892, "step": 5037 }, { "epoch": 0.66, "learning_rate": 2.6918609533491957e-05, "loss": 0.0, "step": 5038 }, { "epoch": 0.66, "learning_rate": 2.6899702524228604e-05, "loss": 18.0743, "step": 5039 }, { "epoch": 0.66, "learning_rate": 2.6880799713523654e-05, "loss": 16.5827, "step": 5040 }, { "epoch": 0.66, "learning_rate": 2.6861901104812752e-05, "loss": 0.0, "step": 5041 }, { "epoch": 0.66, "learning_rate": 2.6843006701530793e-05, "loss": 18.4274, "step": 5042 }, { "epoch": 0.66, "learning_rate": 2.682411650711189e-05, "loss": 17.3356, "step": 5043 }, { "epoch": 0.66, "learning_rate": 2.680523052498939e-05, "loss": 18.8894, "step": 5044 }, { "epoch": 0.66, "learning_rate": 2.6786348758595914e-05, "loss": 16.7842, "step": 5045 }, { "epoch": 0.66, "learning_rate": 2.676747121136327e-05, "loss": 0.0, "step": 5046 }, { "epoch": 0.66, "learning_rate": 2.6748597886722528e-05, "loss": 0.0, "step": 5047 }, { "epoch": 0.66, "learning_rate": 2.672972878810397e-05, "loss": 15.8262, "step": 5048 }, { "epoch": 0.66, "learning_rate": 2.6710863918937124e-05, "loss": 0.0002, "step": 5049 }, { "epoch": 0.66, "learning_rate": 2.669200328265074e-05, "loss": 16.6122, "step": 5050 }, { "epoch": 0.66, "learning_rate": 2.6673146882672807e-05, "loss": 0.0, "step": 5051 }, { "epoch": 0.66, "learning_rate": 2.665429472243054e-05, "loss": 16.9908, "step": 5052 }, { "epoch": 0.67, "learning_rate": 2.6635446805350363e-05, "loss": 0.0001, "step": 5053 }, { "epoch": 0.67, "learning_rate": 2.6616603134857986e-05, "loss": 17.1169, "step": 5054 }, { "epoch": 0.67, "learning_rate": 2.6597763714378304e-05, "loss": 18.3861, "step": 5055 }, { "epoch": 0.67, "learning_rate": 2.657892854733543e-05, "loss": 17.5216, "step": 5056 }, { "epoch": 0.67, "learning_rate": 2.656009763715273e-05, "loss": 15.9876, "step": 5057 }, { "epoch": 0.67, "learning_rate": 2.6541270987252788e-05, "loss": 16.6457, "step": 5058 }, { "epoch": 0.67, "learning_rate": 2.65224486010574e-05, "loss": 0.0, "step": 5059 }, { "epoch": 0.67, "learning_rate": 2.6503630481987605e-05, "loss": 18.0414, "step": 5060 }, { "epoch": 0.67, "learning_rate": 2.6484816633463654e-05, "loss": 17.6035, "step": 5061 }, { "epoch": 0.67, "learning_rate": 2.6466007058905053e-05, "loss": 0.0001, "step": 5062 }, { "epoch": 0.67, "learning_rate": 2.6447201761730495e-05, "loss": 16.4465, "step": 5063 }, { "epoch": 0.67, "learning_rate": 2.64284007453579e-05, "loss": 15.977, "step": 5064 }, { "epoch": 0.67, "learning_rate": 2.6409604013204427e-05, "loss": 16.931, "step": 5065 }, { "epoch": 0.67, "learning_rate": 2.6390811568686437e-05, "loss": 16.9654, "step": 5066 }, { "epoch": 0.67, "learning_rate": 2.6372023415219526e-05, "loss": 0.0, "step": 5067 }, { "epoch": 0.67, "learning_rate": 2.63532395562185e-05, "loss": 0.0, "step": 5068 }, { "epoch": 0.67, "learning_rate": 2.6334459995097384e-05, "loss": 16.8606, "step": 5069 }, { "epoch": 0.67, "learning_rate": 2.6315684735269453e-05, "loss": 0.0, "step": 5070 }, { "epoch": 0.67, "learning_rate": 2.6296913780147158e-05, "loss": 17.1086, "step": 5071 }, { "epoch": 0.67, "learning_rate": 2.6278147133142195e-05, "loss": 16.7104, "step": 5072 }, { "epoch": 0.67, "learning_rate": 2.625938479766547e-05, "loss": 0.0, "step": 5073 }, { "epoch": 0.67, "learning_rate": 2.6240626777127076e-05, "loss": 16.9561, "step": 5074 }, { "epoch": 0.67, "learning_rate": 2.6221873074936354e-05, "loss": 16.4005, "step": 5075 }, { "epoch": 0.67, "learning_rate": 2.6203123694501852e-05, "loss": 17.5902, "step": 5076 }, { "epoch": 0.67, "learning_rate": 2.6184378639231357e-05, "loss": 16.9214, "step": 5077 }, { "epoch": 0.67, "learning_rate": 2.6165637912531837e-05, "loss": 17.8973, "step": 5078 }, { "epoch": 0.67, "learning_rate": 2.6146901517809487e-05, "loss": 16.9281, "step": 5079 }, { "epoch": 0.67, "learning_rate": 2.61281694584697e-05, "loss": 15.0509, "step": 5080 }, { "epoch": 0.67, "learning_rate": 2.610944173791709e-05, "loss": 18.7954, "step": 5081 }, { "epoch": 0.67, "learning_rate": 2.6090718359555498e-05, "loss": 17.4373, "step": 5082 }, { "epoch": 0.67, "learning_rate": 2.6071999326787955e-05, "loss": 0.0, "step": 5083 }, { "epoch": 0.67, "learning_rate": 2.605328464301669e-05, "loss": 17.8434, "step": 5084 }, { "epoch": 0.67, "learning_rate": 2.60345743116432e-05, "loss": 17.6922, "step": 5085 }, { "epoch": 0.67, "learning_rate": 2.601586833606814e-05, "loss": 16.6326, "step": 5086 }, { "epoch": 0.67, "learning_rate": 2.599716671969137e-05, "loss": 0.0001, "step": 5087 }, { "epoch": 0.67, "learning_rate": 2.597846946591198e-05, "loss": 0.0, "step": 5088 }, { "epoch": 0.67, "learning_rate": 2.595977657812827e-05, "loss": 0.0, "step": 5089 }, { "epoch": 0.67, "learning_rate": 2.594108805973773e-05, "loss": 16.1715, "step": 5090 }, { "epoch": 0.67, "learning_rate": 2.592240391413705e-05, "loss": 16.1392, "step": 5091 }, { "epoch": 0.67, "learning_rate": 2.590372414472215e-05, "loss": 16.7722, "step": 5092 }, { "epoch": 0.67, "learning_rate": 2.588504875488813e-05, "loss": 0.0001, "step": 5093 }, { "epoch": 0.67, "learning_rate": 2.586637774802933e-05, "loss": 0.0001, "step": 5094 }, { "epoch": 0.67, "learning_rate": 2.5847711127539255e-05, "loss": 0.0, "step": 5095 }, { "epoch": 0.67, "learning_rate": 2.582904889681063e-05, "loss": 0.0, "step": 5096 }, { "epoch": 0.67, "learning_rate": 2.5810391059235384e-05, "loss": 18.4802, "step": 5097 }, { "epoch": 0.67, "learning_rate": 2.5791737618204632e-05, "loss": 18.1144, "step": 5098 }, { "epoch": 0.67, "learning_rate": 2.5773088577108707e-05, "loss": 18.0633, "step": 5099 }, { "epoch": 0.67, "learning_rate": 2.5754443939337136e-05, "loss": 17.6605, "step": 5100 }, { "epoch": 0.67, "learning_rate": 2.5735803708278627e-05, "loss": 16.5738, "step": 5101 }, { "epoch": 0.67, "learning_rate": 2.5717167887321136e-05, "loss": 17.1928, "step": 5102 }, { "epoch": 0.67, "learning_rate": 2.5698536479851788e-05, "loss": 19.0108, "step": 5103 }, { "epoch": 0.67, "learning_rate": 2.5679909489256882e-05, "loss": 17.992, "step": 5104 }, { "epoch": 0.67, "learning_rate": 2.5661286918921958e-05, "loss": 0.0003, "step": 5105 }, { "epoch": 0.67, "learning_rate": 2.5642668772231722e-05, "loss": 17.9831, "step": 5106 }, { "epoch": 0.67, "learning_rate": 2.5624055052570085e-05, "loss": 16.1746, "step": 5107 }, { "epoch": 0.67, "learning_rate": 2.5605445763320153e-05, "loss": 17.8325, "step": 5108 }, { "epoch": 0.67, "learning_rate": 2.5586840907864217e-05, "loss": 15.9873, "step": 5109 }, { "epoch": 0.67, "learning_rate": 2.5568240489583807e-05, "loss": 0.0002, "step": 5110 }, { "epoch": 0.67, "learning_rate": 2.5549644511859595e-05, "loss": 17.1182, "step": 5111 }, { "epoch": 0.67, "learning_rate": 2.5531052978071457e-05, "loss": 17.9633, "step": 5112 }, { "epoch": 0.67, "learning_rate": 2.5512465891598476e-05, "loss": 16.2837, "step": 5113 }, { "epoch": 0.67, "learning_rate": 2.549388325581892e-05, "loss": 16.8013, "step": 5114 }, { "epoch": 0.67, "learning_rate": 2.547530507411024e-05, "loss": 0.0001, "step": 5115 }, { "epoch": 0.67, "learning_rate": 2.5456731349849088e-05, "loss": 18.1704, "step": 5116 }, { "epoch": 0.67, "learning_rate": 2.5438162086411305e-05, "loss": 17.5224, "step": 5117 }, { "epoch": 0.67, "learning_rate": 2.5419597287171892e-05, "loss": 17.0563, "step": 5118 }, { "epoch": 0.67, "learning_rate": 2.5401036955505113e-05, "loss": 18.3021, "step": 5119 }, { "epoch": 0.67, "learning_rate": 2.538248109478435e-05, "loss": 15.7665, "step": 5120 }, { "epoch": 0.67, "learning_rate": 2.5363929708382184e-05, "loss": 0.0002, "step": 5121 }, { "epoch": 0.67, "learning_rate": 2.5345382799670402e-05, "loss": 17.1391, "step": 5122 }, { "epoch": 0.67, "learning_rate": 2.5326840372019973e-05, "loss": 17.9613, "step": 5123 }, { "epoch": 0.67, "learning_rate": 2.5308302428801033e-05, "loss": 15.7069, "step": 5124 }, { "epoch": 0.67, "learning_rate": 2.5289768973382933e-05, "loss": 15.4434, "step": 5125 }, { "epoch": 0.67, "learning_rate": 2.527124000913417e-05, "loss": 16.0922, "step": 5126 }, { "epoch": 0.67, "learning_rate": 2.5252715539422467e-05, "loss": 18.0687, "step": 5127 }, { "epoch": 0.68, "learning_rate": 2.523419556761471e-05, "loss": 16.8168, "step": 5128 }, { "epoch": 0.68, "learning_rate": 2.5215680097076967e-05, "loss": 16.8475, "step": 5129 }, { "epoch": 0.68, "learning_rate": 2.5197169131174475e-05, "loss": 17.9066, "step": 5130 }, { "epoch": 0.68, "learning_rate": 2.5178662673271676e-05, "loss": 0.0001, "step": 5131 }, { "epoch": 0.68, "learning_rate": 2.5160160726732186e-05, "loss": 16.4972, "step": 5132 }, { "epoch": 0.68, "learning_rate": 2.5141663294918782e-05, "loss": 18.5778, "step": 5133 }, { "epoch": 0.68, "learning_rate": 2.5123170381193427e-05, "loss": 0.0015, "step": 5134 }, { "epoch": 0.68, "learning_rate": 2.510468198891731e-05, "loss": 0.0, "step": 5135 }, { "epoch": 0.68, "learning_rate": 2.5086198121450743e-05, "loss": 19.6493, "step": 5136 }, { "epoch": 0.68, "learning_rate": 2.5067718782153232e-05, "loss": 16.372, "step": 5137 }, { "epoch": 0.68, "learning_rate": 2.504924397438345e-05, "loss": 16.7356, "step": 5138 }, { "epoch": 0.68, "learning_rate": 2.5030773701499267e-05, "loss": 0.0, "step": 5139 }, { "epoch": 0.68, "learning_rate": 2.5012307966857718e-05, "loss": 15.5212, "step": 5140 }, { "epoch": 0.68, "learning_rate": 2.4993846773815006e-05, "loss": 16.0506, "step": 5141 }, { "epoch": 0.68, "learning_rate": 2.4975390125726528e-05, "loss": 17.6613, "step": 5142 }, { "epoch": 0.68, "learning_rate": 2.4956938025946813e-05, "loss": 17.3359, "step": 5143 }, { "epoch": 0.68, "learning_rate": 2.4938490477829633e-05, "loss": 16.6531, "step": 5144 }, { "epoch": 0.68, "learning_rate": 2.4920047484727883e-05, "loss": 17.2559, "step": 5145 }, { "epoch": 0.68, "learning_rate": 2.4901609049993636e-05, "loss": 16.5756, "step": 5146 }, { "epoch": 0.68, "learning_rate": 2.4883175176978136e-05, "loss": 17.4821, "step": 5147 }, { "epoch": 0.68, "learning_rate": 2.48647458690318e-05, "loss": 0.0, "step": 5148 }, { "epoch": 0.68, "learning_rate": 2.4846321129504237e-05, "loss": 16.2585, "step": 5149 }, { "epoch": 0.68, "learning_rate": 2.4827900961744187e-05, "loss": 17.9916, "step": 5150 }, { "epoch": 0.68, "learning_rate": 2.4809485369099566e-05, "loss": 16.9508, "step": 5151 }, { "epoch": 0.68, "learning_rate": 2.4791074354917514e-05, "loss": 18.0965, "step": 5152 }, { "epoch": 0.68, "learning_rate": 2.4772667922544268e-05, "loss": 0.001, "step": 5153 }, { "epoch": 0.68, "learning_rate": 2.4754266075325266e-05, "loss": 0.0002, "step": 5154 }, { "epoch": 0.68, "learning_rate": 2.4735868816605112e-05, "loss": 17.3122, "step": 5155 }, { "epoch": 0.68, "learning_rate": 2.4717476149727557e-05, "loss": 17.4051, "step": 5156 }, { "epoch": 0.68, "learning_rate": 2.469908807803555e-05, "loss": 16.6275, "step": 5157 }, { "epoch": 0.68, "learning_rate": 2.4680704604871168e-05, "loss": 18.4137, "step": 5158 }, { "epoch": 0.68, "learning_rate": 2.466232573357566e-05, "loss": 17.1257, "step": 5159 }, { "epoch": 0.68, "learning_rate": 2.4643951467489483e-05, "loss": 16.1729, "step": 5160 }, { "epoch": 0.68, "learning_rate": 2.4625581809952215e-05, "loss": 0.0002, "step": 5161 }, { "epoch": 0.68, "learning_rate": 2.4607216764302593e-05, "loss": 15.7396, "step": 5162 }, { "epoch": 0.68, "learning_rate": 2.4588856333878528e-05, "loss": 15.6211, "step": 5163 }, { "epoch": 0.68, "learning_rate": 2.4570500522017093e-05, "loss": 15.9437, "step": 5164 }, { "epoch": 0.68, "learning_rate": 2.4552149332054526e-05, "loss": 0.0001, "step": 5165 }, { "epoch": 0.68, "learning_rate": 2.453380276732621e-05, "loss": 16.6515, "step": 5166 }, { "epoch": 0.68, "learning_rate": 2.45154608311667e-05, "loss": 0.0, "step": 5167 }, { "epoch": 0.68, "learning_rate": 2.449712352690968e-05, "loss": 0.0001, "step": 5168 }, { "epoch": 0.68, "learning_rate": 2.4478790857888074e-05, "loss": 17.1783, "step": 5169 }, { "epoch": 0.68, "learning_rate": 2.4460462827433872e-05, "loss": 15.2955, "step": 5170 }, { "epoch": 0.68, "learning_rate": 2.4442139438878257e-05, "loss": 0.0001, "step": 5171 }, { "epoch": 0.68, "learning_rate": 2.442382069555158e-05, "loss": 16.9051, "step": 5172 }, { "epoch": 0.68, "learning_rate": 2.4405506600783324e-05, "loss": 16.3793, "step": 5173 }, { "epoch": 0.68, "learning_rate": 2.438719715790214e-05, "loss": 0.0013, "step": 5174 }, { "epoch": 0.68, "learning_rate": 2.436889237023584e-05, "loss": 17.1578, "step": 5175 }, { "epoch": 0.68, "learning_rate": 2.435059224111136e-05, "loss": 17.6539, "step": 5176 }, { "epoch": 0.68, "learning_rate": 2.433229677385484e-05, "loss": 15.491, "step": 5177 }, { "epoch": 0.68, "learning_rate": 2.431400597179153e-05, "loss": 16.3919, "step": 5178 }, { "epoch": 0.68, "learning_rate": 2.4295719838245855e-05, "loss": 17.6689, "step": 5179 }, { "epoch": 0.68, "learning_rate": 2.427743837654137e-05, "loss": 17.8712, "step": 5180 }, { "epoch": 0.68, "learning_rate": 2.4259161590000806e-05, "loss": 17.0544, "step": 5181 }, { "epoch": 0.68, "learning_rate": 2.4240889481946023e-05, "loss": 16.0757, "step": 5182 }, { "epoch": 0.68, "learning_rate": 2.4222622055698035e-05, "loss": 16.2421, "step": 5183 }, { "epoch": 0.68, "learning_rate": 2.4204359314577e-05, "loss": 17.119, "step": 5184 }, { "epoch": 0.68, "learning_rate": 2.4186101261902267e-05, "loss": 17.8067, "step": 5185 }, { "epoch": 0.68, "learning_rate": 2.4167847900992284e-05, "loss": 16.3642, "step": 5186 }, { "epoch": 0.68, "learning_rate": 2.414959923516466e-05, "loss": 16.1656, "step": 5187 }, { "epoch": 0.68, "learning_rate": 2.4131355267736155e-05, "loss": 15.8847, "step": 5188 }, { "epoch": 0.68, "learning_rate": 2.411311600202267e-05, "loss": 16.3512, "step": 5189 }, { "epoch": 0.68, "learning_rate": 2.409488144133925e-05, "loss": 17.6846, "step": 5190 }, { "epoch": 0.68, "learning_rate": 2.40766515890001e-05, "loss": 16.7351, "step": 5191 }, { "epoch": 0.68, "learning_rate": 2.4058426448318548e-05, "loss": 17.1137, "step": 5192 }, { "epoch": 0.68, "learning_rate": 2.4040206022607066e-05, "loss": 17.5795, "step": 5193 }, { "epoch": 0.68, "learning_rate": 2.402199031517731e-05, "loss": 16.5169, "step": 5194 }, { "epoch": 0.68, "learning_rate": 2.400377932934003e-05, "loss": 17.7175, "step": 5195 }, { "epoch": 0.68, "learning_rate": 2.3985573068405136e-05, "loss": 17.7033, "step": 5196 }, { "epoch": 0.68, "learning_rate": 2.3967371535681675e-05, "loss": 16.7558, "step": 5197 }, { "epoch": 0.68, "learning_rate": 2.3949174734477847e-05, "loss": 0.0, "step": 5198 }, { "epoch": 0.68, "learning_rate": 2.393098266810097e-05, "loss": 0.0001, "step": 5199 }, { "epoch": 0.68, "learning_rate": 2.391279533985753e-05, "loss": 16.4458, "step": 5200 }, { "epoch": 0.68, "learning_rate": 2.3894612753053103e-05, "loss": 15.6618, "step": 5201 }, { "epoch": 0.68, "learning_rate": 2.3876434910992483e-05, "loss": 16.5218, "step": 5202 }, { "epoch": 0.68, "learning_rate": 2.385826181697954e-05, "loss": 17.3267, "step": 5203 }, { "epoch": 0.69, "learning_rate": 2.384009347431728e-05, "loss": 0.0, "step": 5204 }, { "epoch": 0.69, "learning_rate": 2.3821929886307875e-05, "loss": 16.7704, "step": 5205 }, { "epoch": 0.69, "learning_rate": 2.380377105625262e-05, "loss": 0.0001, "step": 5206 }, { "epoch": 0.69, "learning_rate": 2.3785616987451937e-05, "loss": 0.0001, "step": 5207 }, { "epoch": 0.69, "learning_rate": 2.3767467683205397e-05, "loss": 17.0956, "step": 5208 }, { "epoch": 0.69, "learning_rate": 2.3749323146811697e-05, "loss": 0.0, "step": 5209 }, { "epoch": 0.69, "learning_rate": 2.3731183381568665e-05, "loss": 16.8126, "step": 5210 }, { "epoch": 0.69, "learning_rate": 2.3713048390773273e-05, "loss": 18.2808, "step": 5211 }, { "epoch": 0.69, "learning_rate": 2.369491817772161e-05, "loss": 15.7085, "step": 5212 }, { "epoch": 0.69, "learning_rate": 2.367679274570891e-05, "loss": 16.9805, "step": 5213 }, { "epoch": 0.69, "learning_rate": 2.3658672098029523e-05, "loss": 0.0006, "step": 5214 }, { "epoch": 0.69, "learning_rate": 2.364055623797695e-05, "loss": 17.8737, "step": 5215 }, { "epoch": 0.69, "learning_rate": 2.3622445168843793e-05, "loss": 17.9793, "step": 5216 }, { "epoch": 0.69, "learning_rate": 2.360433889392183e-05, "loss": 0.0001, "step": 5217 }, { "epoch": 0.69, "learning_rate": 2.3586237416501922e-05, "loss": 16.0441, "step": 5218 }, { "epoch": 0.69, "learning_rate": 2.356814073987408e-05, "loss": 0.0, "step": 5219 }, { "epoch": 0.69, "learning_rate": 2.3550048867327428e-05, "loss": 16.122, "step": 5220 }, { "epoch": 0.69, "learning_rate": 2.3531961802150238e-05, "loss": 0.0006, "step": 5221 }, { "epoch": 0.69, "learning_rate": 2.3513879547629885e-05, "loss": 16.2274, "step": 5222 }, { "epoch": 0.69, "learning_rate": 2.3495802107052888e-05, "loss": 17.6745, "step": 5223 }, { "epoch": 0.69, "learning_rate": 2.347772948370486e-05, "loss": 16.687, "step": 5224 }, { "epoch": 0.69, "learning_rate": 2.3459661680870604e-05, "loss": 16.5189, "step": 5225 }, { "epoch": 0.69, "learning_rate": 2.3441598701833984e-05, "loss": 15.2784, "step": 5226 }, { "epoch": 0.69, "learning_rate": 2.342354054987801e-05, "loss": 17.246, "step": 5227 }, { "epoch": 0.69, "learning_rate": 2.3405487228284818e-05, "loss": 0.0, "step": 5228 }, { "epoch": 0.69, "learning_rate": 2.338743874033565e-05, "loss": 0.0, "step": 5229 }, { "epoch": 0.69, "learning_rate": 2.3369395089310886e-05, "loss": 18.8943, "step": 5230 }, { "epoch": 0.69, "learning_rate": 2.335135627849002e-05, "loss": 16.8454, "step": 5231 }, { "epoch": 0.69, "learning_rate": 2.3333322311151657e-05, "loss": 0.0, "step": 5232 }, { "epoch": 0.69, "learning_rate": 2.331529319057356e-05, "loss": 16.527, "step": 5233 }, { "epoch": 0.69, "learning_rate": 2.329726892003257e-05, "loss": 0.0, "step": 5234 }, { "epoch": 0.69, "learning_rate": 2.3279249502804656e-05, "loss": 17.8088, "step": 5235 }, { "epoch": 0.69, "learning_rate": 2.3261234942164918e-05, "loss": 18.4729, "step": 5236 }, { "epoch": 0.69, "learning_rate": 2.3243225241387557e-05, "loss": 17.7818, "step": 5237 }, { "epoch": 0.69, "learning_rate": 2.3225220403745895e-05, "loss": 0.0008, "step": 5238 }, { "epoch": 0.69, "learning_rate": 2.320722043251238e-05, "loss": 15.487, "step": 5239 }, { "epoch": 0.69, "learning_rate": 2.3189225330958564e-05, "loss": 17.5206, "step": 5240 }, { "epoch": 0.69, "learning_rate": 2.3171235102355104e-05, "loss": 16.6481, "step": 5241 }, { "epoch": 0.69, "learning_rate": 2.3153249749971823e-05, "loss": 16.7847, "step": 5242 }, { "epoch": 0.69, "learning_rate": 2.3135269277077593e-05, "loss": 0.0, "step": 5243 }, { "epoch": 0.69, "learning_rate": 2.3117293686940438e-05, "loss": 15.6827, "step": 5244 }, { "epoch": 0.69, "learning_rate": 2.3099322982827475e-05, "loss": 0.0, "step": 5245 }, { "epoch": 0.69, "learning_rate": 2.3081357168004948e-05, "loss": 18.3155, "step": 5246 }, { "epoch": 0.69, "learning_rate": 2.30633962457382e-05, "loss": 0.0001, "step": 5247 }, { "epoch": 0.69, "learning_rate": 2.3045440219291698e-05, "loss": 17.6286, "step": 5248 }, { "epoch": 0.69, "learning_rate": 2.3027489091928983e-05, "loss": 16.8818, "step": 5249 }, { "epoch": 0.69, "learning_rate": 2.3009542866912775e-05, "loss": 16.9773, "step": 5250 }, { "epoch": 0.69, "learning_rate": 2.299160154750485e-05, "loss": 0.0, "step": 5251 }, { "epoch": 0.69, "learning_rate": 2.2973665136966092e-05, "loss": 16.7481, "step": 5252 }, { "epoch": 0.69, "learning_rate": 2.2955733638556514e-05, "loss": 18.7988, "step": 5253 }, { "epoch": 0.69, "learning_rate": 2.2937807055535226e-05, "loss": 15.7116, "step": 5254 }, { "epoch": 0.69, "learning_rate": 2.2919885391160445e-05, "loss": 16.9768, "step": 5255 }, { "epoch": 0.69, "learning_rate": 2.2901968648689498e-05, "loss": 16.0084, "step": 5256 }, { "epoch": 0.69, "learning_rate": 2.2884056831378792e-05, "loss": 0.0008, "step": 5257 }, { "epoch": 0.69, "learning_rate": 2.2866149942483896e-05, "loss": 16.3341, "step": 5258 }, { "epoch": 0.69, "learning_rate": 2.2848247985259435e-05, "loss": 16.3208, "step": 5259 }, { "epoch": 0.69, "learning_rate": 2.283035096295915e-05, "loss": 15.9822, "step": 5260 }, { "epoch": 0.69, "learning_rate": 2.281245887883589e-05, "loss": 16.9511, "step": 5261 }, { "epoch": 0.69, "learning_rate": 2.2794571736141597e-05, "loss": 0.0, "step": 5262 }, { "epoch": 0.69, "learning_rate": 2.277668953812732e-05, "loss": 16.4151, "step": 5263 }, { "epoch": 0.69, "learning_rate": 2.2758812288043212e-05, "loss": 15.816, "step": 5264 }, { "epoch": 0.69, "learning_rate": 2.274093998913853e-05, "loss": 19.5187, "step": 5265 }, { "epoch": 0.69, "learning_rate": 2.2723072644661597e-05, "loss": 17.7338, "step": 5266 }, { "epoch": 0.69, "learning_rate": 2.270521025785991e-05, "loss": 0.0, "step": 5267 }, { "epoch": 0.69, "learning_rate": 2.2687352831979997e-05, "loss": 17.807, "step": 5268 }, { "epoch": 0.69, "learning_rate": 2.2669500370267504e-05, "loss": 17.2041, "step": 5269 }, { "epoch": 0.69, "learning_rate": 2.2651652875967183e-05, "loss": 17.5258, "step": 5270 }, { "epoch": 0.69, "learning_rate": 2.263381035232287e-05, "loss": 16.5483, "step": 5271 }, { "epoch": 0.69, "learning_rate": 2.2615972802577512e-05, "loss": 16.7049, "step": 5272 }, { "epoch": 0.69, "learning_rate": 2.259814022997314e-05, "loss": 17.0217, "step": 5273 }, { "epoch": 0.69, "learning_rate": 2.2580312637750873e-05, "loss": 0.0005, "step": 5274 }, { "epoch": 0.69, "learning_rate": 2.2562490029150966e-05, "loss": 17.8576, "step": 5275 }, { "epoch": 0.69, "learning_rate": 2.2544672407412732e-05, "loss": 18.1815, "step": 5276 }, { "epoch": 0.69, "learning_rate": 2.2526859775774566e-05, "loss": 0.0001, "step": 5277 }, { "epoch": 0.69, "learning_rate": 2.2509052137473996e-05, "loss": 16.933, "step": 5278 }, { "epoch": 0.69, "learning_rate": 2.2491249495747608e-05, "loss": 0.0, "step": 5279 }, { "epoch": 0.7, "learning_rate": 2.2473451853831102e-05, "loss": 16.4637, "step": 5280 }, { "epoch": 0.7, "learning_rate": 2.2455659214959253e-05, "loss": 15.7864, "step": 5281 }, { "epoch": 0.7, "learning_rate": 2.2437871582365917e-05, "loss": 0.0001, "step": 5282 }, { "epoch": 0.7, "learning_rate": 2.2420088959284098e-05, "loss": 17.0393, "step": 5283 }, { "epoch": 0.7, "learning_rate": 2.2402311348945825e-05, "loss": 0.0, "step": 5284 }, { "epoch": 0.7, "learning_rate": 2.2384538754582245e-05, "loss": 0.0, "step": 5285 }, { "epoch": 0.7, "learning_rate": 2.2366771179423586e-05, "loss": 0.0001, "step": 5286 }, { "epoch": 0.7, "learning_rate": 2.234900862669917e-05, "loss": 18.3126, "step": 5287 }, { "epoch": 0.7, "learning_rate": 2.233125109963739e-05, "loss": 17.4507, "step": 5288 }, { "epoch": 0.7, "learning_rate": 2.2313498601465753e-05, "loss": 18.8208, "step": 5289 }, { "epoch": 0.7, "learning_rate": 2.2295751135410824e-05, "loss": 16.2614, "step": 5290 }, { "epoch": 0.7, "learning_rate": 2.2278008704698257e-05, "loss": 16.2837, "step": 5291 }, { "epoch": 0.7, "learning_rate": 2.2260271312552828e-05, "loss": 0.0, "step": 5292 }, { "epoch": 0.7, "learning_rate": 2.2242538962198355e-05, "loss": 16.0803, "step": 5293 }, { "epoch": 0.7, "learning_rate": 2.222481165685775e-05, "loss": 16.9733, "step": 5294 }, { "epoch": 0.7, "learning_rate": 2.220708939975302e-05, "loss": 17.1648, "step": 5295 }, { "epoch": 0.7, "learning_rate": 2.218937219410524e-05, "loss": 16.4145, "step": 5296 }, { "epoch": 0.7, "learning_rate": 2.2171660043134574e-05, "loss": 17.2101, "step": 5297 }, { "epoch": 0.7, "learning_rate": 2.2153952950060265e-05, "loss": 18.2718, "step": 5298 }, { "epoch": 0.7, "learning_rate": 2.213625091810062e-05, "loss": 16.2667, "step": 5299 }, { "epoch": 0.7, "learning_rate": 2.2118553950473082e-05, "loss": 15.8993, "step": 5300 }, { "epoch": 0.7, "learning_rate": 2.2100862050394117e-05, "loss": 0.0, "step": 5301 }, { "epoch": 0.7, "learning_rate": 2.2083175221079283e-05, "loss": 17.066, "step": 5302 }, { "epoch": 0.7, "learning_rate": 2.2065493465743236e-05, "loss": 16.8006, "step": 5303 }, { "epoch": 0.7, "learning_rate": 2.2047816787599677e-05, "loss": 16.9855, "step": 5304 }, { "epoch": 0.7, "learning_rate": 2.2030145189861423e-05, "loss": 0.0, "step": 5305 }, { "epoch": 0.7, "learning_rate": 2.2012478675740333e-05, "loss": 16.2883, "step": 5306 }, { "epoch": 0.7, "learning_rate": 2.199481724844734e-05, "loss": 16.3949, "step": 5307 }, { "epoch": 0.7, "learning_rate": 2.197716091119252e-05, "loss": 0.0, "step": 5308 }, { "epoch": 0.7, "learning_rate": 2.195950966718493e-05, "loss": 16.5238, "step": 5309 }, { "epoch": 0.7, "learning_rate": 2.1941863519632764e-05, "loss": 17.4604, "step": 5310 }, { "epoch": 0.7, "learning_rate": 2.192422247174326e-05, "loss": 0.0001, "step": 5311 }, { "epoch": 0.7, "learning_rate": 2.1906586526722745e-05, "loss": 16.2084, "step": 5312 }, { "epoch": 0.7, "learning_rate": 2.1888955687776615e-05, "loss": 16.9549, "step": 5313 }, { "epoch": 0.7, "learning_rate": 2.1871329958109322e-05, "loss": 18.2347, "step": 5314 }, { "epoch": 0.7, "learning_rate": 2.185370934092441e-05, "loss": 16.6523, "step": 5315 }, { "epoch": 0.7, "learning_rate": 2.1836093839424477e-05, "loss": 0.0001, "step": 5316 }, { "epoch": 0.7, "learning_rate": 2.181848345681123e-05, "loss": 18.1072, "step": 5317 }, { "epoch": 0.7, "learning_rate": 2.1800878196285394e-05, "loss": 16.4973, "step": 5318 }, { "epoch": 0.7, "learning_rate": 2.178327806104679e-05, "loss": 0.0001, "step": 5319 }, { "epoch": 0.7, "learning_rate": 2.1765683054294305e-05, "loss": 17.8103, "step": 5320 }, { "epoch": 0.7, "learning_rate": 2.1748093179225894e-05, "loss": 17.6747, "step": 5321 }, { "epoch": 0.7, "learning_rate": 2.1730508439038566e-05, "loss": 0.0, "step": 5322 }, { "epoch": 0.7, "learning_rate": 2.171292883692842e-05, "loss": 16.2516, "step": 5323 }, { "epoch": 0.7, "learning_rate": 2.169535437609058e-05, "loss": 0.0009, "step": 5324 }, { "epoch": 0.7, "learning_rate": 2.1677785059719313e-05, "loss": 18.0801, "step": 5325 }, { "epoch": 0.7, "learning_rate": 2.1660220891007877e-05, "loss": 16.6792, "step": 5326 }, { "epoch": 0.7, "learning_rate": 2.1642661873148618e-05, "loss": 18.5547, "step": 5327 }, { "epoch": 0.7, "learning_rate": 2.1625108009332957e-05, "loss": 18.2822, "step": 5328 }, { "epoch": 0.7, "learning_rate": 2.160755930275136e-05, "loss": 17.6345, "step": 5329 }, { "epoch": 0.7, "learning_rate": 2.159001575659337e-05, "loss": 16.1032, "step": 5330 }, { "epoch": 0.7, "learning_rate": 2.157247737404758e-05, "loss": 17.8184, "step": 5331 }, { "epoch": 0.7, "learning_rate": 2.1554944158301638e-05, "loss": 0.0, "step": 5332 }, { "epoch": 0.7, "learning_rate": 2.15374161125423e-05, "loss": 17.156, "step": 5333 }, { "epoch": 0.7, "learning_rate": 2.151989323995534e-05, "loss": 16.2008, "step": 5334 }, { "epoch": 0.7, "learning_rate": 2.1502375543725588e-05, "loss": 17.8163, "step": 5335 }, { "epoch": 0.7, "learning_rate": 2.1484863027036957e-05, "loss": 0.0, "step": 5336 }, { "epoch": 0.7, "learning_rate": 2.1467355693072394e-05, "loss": 16.6891, "step": 5337 }, { "epoch": 0.7, "learning_rate": 2.1449853545013925e-05, "loss": 17.7783, "step": 5338 }, { "epoch": 0.7, "learning_rate": 2.1432356586042623e-05, "loss": 16.7089, "step": 5339 }, { "epoch": 0.7, "learning_rate": 2.141486481933862e-05, "loss": 18.6764, "step": 5340 }, { "epoch": 0.7, "learning_rate": 2.139737824808109e-05, "loss": 17.6557, "step": 5341 }, { "epoch": 0.7, "learning_rate": 2.137989687544831e-05, "loss": 0.0, "step": 5342 }, { "epoch": 0.7, "learning_rate": 2.136242070461757e-05, "loss": 16.6709, "step": 5343 }, { "epoch": 0.7, "learning_rate": 2.1344949738765206e-05, "loss": 14.9082, "step": 5344 }, { "epoch": 0.7, "learning_rate": 2.1327483981066643e-05, "loss": 17.9179, "step": 5345 }, { "epoch": 0.7, "learning_rate": 2.1310023434696357e-05, "loss": 0.0, "step": 5346 }, { "epoch": 0.7, "learning_rate": 2.1292568102827803e-05, "loss": 16.7309, "step": 5347 }, { "epoch": 0.7, "learning_rate": 2.1275117988633602e-05, "loss": 17.6263, "step": 5348 }, { "epoch": 0.7, "learning_rate": 2.1257673095285356e-05, "loss": 17.244, "step": 5349 }, { "epoch": 0.7, "learning_rate": 2.1240233425953737e-05, "loss": 0.0, "step": 5350 }, { "epoch": 0.7, "learning_rate": 2.122279898380846e-05, "loss": 0.0011, "step": 5351 }, { "epoch": 0.7, "learning_rate": 2.12053697720183e-05, "loss": 16.3736, "step": 5352 }, { "epoch": 0.7, "learning_rate": 2.1187945793751076e-05, "loss": 16.233, "step": 5353 }, { "epoch": 0.7, "learning_rate": 2.117052705217365e-05, "loss": 15.9644, "step": 5354 }, { "epoch": 0.7, "learning_rate": 2.115311355045194e-05, "loss": 0.0001, "step": 5355 }, { "epoch": 0.71, "learning_rate": 2.1135705291750895e-05, "loss": 17.5275, "step": 5356 }, { "epoch": 0.71, "learning_rate": 2.1118302279234558e-05, "loss": 16.5025, "step": 5357 }, { "epoch": 0.71, "learning_rate": 2.110090451606597e-05, "loss": 15.4781, "step": 5358 }, { "epoch": 0.71, "learning_rate": 2.1083512005407237e-05, "loss": 15.1921, "step": 5359 }, { "epoch": 0.71, "learning_rate": 2.1066124750419502e-05, "loss": 16.4944, "step": 5360 }, { "epoch": 0.71, "learning_rate": 2.1048742754262964e-05, "loss": 19.1662, "step": 5361 }, { "epoch": 0.71, "learning_rate": 2.1031366020096853e-05, "loss": 16.1711, "step": 5362 }, { "epoch": 0.71, "learning_rate": 2.101399455107946e-05, "loss": 15.395, "step": 5363 }, { "epoch": 0.71, "learning_rate": 2.0996628350368085e-05, "loss": 17.4221, "step": 5364 }, { "epoch": 0.71, "learning_rate": 2.097926742111913e-05, "loss": 16.1837, "step": 5365 }, { "epoch": 0.71, "learning_rate": 2.0961911766487985e-05, "loss": 16.5018, "step": 5366 }, { "epoch": 0.71, "learning_rate": 2.0944561389629107e-05, "loss": 17.3592, "step": 5367 }, { "epoch": 0.71, "learning_rate": 2.0927216293695978e-05, "loss": 0.0001, "step": 5368 }, { "epoch": 0.71, "learning_rate": 2.0909876481841135e-05, "loss": 17.2712, "step": 5369 }, { "epoch": 0.71, "learning_rate": 2.0892541957216144e-05, "loss": 18.8152, "step": 5370 }, { "epoch": 0.71, "learning_rate": 2.0875212722971615e-05, "loss": 0.0001, "step": 5371 }, { "epoch": 0.71, "learning_rate": 2.085788878225718e-05, "loss": 16.04, "step": 5372 }, { "epoch": 0.71, "learning_rate": 2.084057013822156e-05, "loss": 17.1573, "step": 5373 }, { "epoch": 0.71, "learning_rate": 2.0823256794012456e-05, "loss": 15.8291, "step": 5374 }, { "epoch": 0.71, "learning_rate": 2.0805948752776633e-05, "loss": 0.0001, "step": 5375 }, { "epoch": 0.71, "learning_rate": 2.0788646017659887e-05, "loss": 16.3105, "step": 5376 }, { "epoch": 0.71, "learning_rate": 2.077134859180705e-05, "loss": 17.274, "step": 5377 }, { "epoch": 0.71, "learning_rate": 2.0754056478361984e-05, "loss": 16.7147, "step": 5378 }, { "epoch": 0.71, "learning_rate": 2.0736769680467587e-05, "loss": 14.4657, "step": 5379 }, { "epoch": 0.71, "learning_rate": 2.0719488201265807e-05, "loss": 0.0001, "step": 5380 }, { "epoch": 0.71, "learning_rate": 2.0702212043897583e-05, "loss": 18.0249, "step": 5381 }, { "epoch": 0.71, "learning_rate": 2.0684941211502956e-05, "loss": 0.0, "step": 5382 }, { "epoch": 0.71, "learning_rate": 2.066767570722094e-05, "loss": 0.0001, "step": 5383 }, { "epoch": 0.71, "learning_rate": 2.0650415534189598e-05, "loss": 0.0001, "step": 5384 }, { "epoch": 0.71, "learning_rate": 2.0633160695546032e-05, "loss": 0.0, "step": 5385 }, { "epoch": 0.71, "learning_rate": 2.0615911194426364e-05, "loss": 15.8843, "step": 5386 }, { "epoch": 0.71, "learning_rate": 2.0598667033965753e-05, "loss": 16.5775, "step": 5387 }, { "epoch": 0.71, "learning_rate": 2.0581428217298377e-05, "loss": 16.5276, "step": 5388 }, { "epoch": 0.71, "learning_rate": 2.0564194747557438e-05, "loss": 0.0003, "step": 5389 }, { "epoch": 0.71, "learning_rate": 2.054696662787521e-05, "loss": 0.0, "step": 5390 }, { "epoch": 0.71, "learning_rate": 2.052974386138295e-05, "loss": 17.7868, "step": 5391 }, { "epoch": 0.71, "learning_rate": 2.051252645121095e-05, "loss": 0.0, "step": 5392 }, { "epoch": 0.71, "learning_rate": 2.049531440048854e-05, "loss": 0.0, "step": 5393 }, { "epoch": 0.71, "learning_rate": 2.0478107712344062e-05, "loss": 17.015, "step": 5394 }, { "epoch": 0.71, "learning_rate": 2.04609063899049e-05, "loss": 16.9003, "step": 5395 }, { "epoch": 0.71, "learning_rate": 2.044371043629744e-05, "loss": 17.9168, "step": 5396 }, { "epoch": 0.71, "learning_rate": 2.0426519854647103e-05, "loss": 18.3201, "step": 5397 }, { "epoch": 0.71, "learning_rate": 2.0409334648078367e-05, "loss": 18.3472, "step": 5398 }, { "epoch": 0.71, "learning_rate": 2.039215481971467e-05, "loss": 17.4796, "step": 5399 }, { "epoch": 0.71, "learning_rate": 2.0374980372678524e-05, "loss": 17.586, "step": 5400 }, { "epoch": 0.71, "learning_rate": 2.0357811310091434e-05, "loss": 16.7017, "step": 5401 }, { "epoch": 0.71, "learning_rate": 2.0340647635073945e-05, "loss": 16.6091, "step": 5402 }, { "epoch": 0.71, "learning_rate": 2.03234893507456e-05, "loss": 17.9299, "step": 5403 }, { "epoch": 0.71, "learning_rate": 2.030633646022499e-05, "loss": 17.0332, "step": 5404 }, { "epoch": 0.71, "learning_rate": 2.0289188966629696e-05, "loss": 16.2204, "step": 5405 }, { "epoch": 0.71, "learning_rate": 2.0272046873076335e-05, "loss": 0.0, "step": 5406 }, { "epoch": 0.71, "learning_rate": 2.025491018268056e-05, "loss": 16.1502, "step": 5407 }, { "epoch": 0.71, "learning_rate": 2.023777889855702e-05, "loss": 16.6502, "step": 5408 }, { "epoch": 0.71, "learning_rate": 2.022065302381937e-05, "loss": 0.0001, "step": 5409 }, { "epoch": 0.71, "learning_rate": 2.0203532561580308e-05, "loss": 16.6802, "step": 5410 }, { "epoch": 0.71, "learning_rate": 2.0186417514951532e-05, "loss": 17.3292, "step": 5411 }, { "epoch": 0.71, "learning_rate": 2.0169307887043764e-05, "loss": 0.0, "step": 5412 }, { "epoch": 0.71, "learning_rate": 2.015220368096673e-05, "loss": 16.7094, "step": 5413 }, { "epoch": 0.71, "learning_rate": 2.0135104899829172e-05, "loss": 16.7797, "step": 5414 }, { "epoch": 0.71, "learning_rate": 2.0118011546738872e-05, "loss": 14.058, "step": 5415 }, { "epoch": 0.71, "learning_rate": 2.0100923624802604e-05, "loss": 17.2459, "step": 5416 }, { "epoch": 0.71, "learning_rate": 2.0083841137126148e-05, "loss": 16.2143, "step": 5417 }, { "epoch": 0.71, "learning_rate": 2.006676408681431e-05, "loss": 14.9783, "step": 5418 }, { "epoch": 0.71, "learning_rate": 2.0049692476970894e-05, "loss": 17.6661, "step": 5419 }, { "epoch": 0.71, "learning_rate": 2.0032626310698725e-05, "loss": 14.9946, "step": 5420 }, { "epoch": 0.71, "learning_rate": 2.001556559109965e-05, "loss": 18.1782, "step": 5421 }, { "epoch": 0.71, "learning_rate": 1.9998510321274483e-05, "loss": 16.9799, "step": 5422 }, { "epoch": 0.71, "learning_rate": 1.9981460504323118e-05, "loss": 16.1042, "step": 5423 }, { "epoch": 0.71, "learning_rate": 1.9964416143344394e-05, "loss": 15.9048, "step": 5424 }, { "epoch": 0.71, "learning_rate": 1.9947377241436193e-05, "loss": 16.5036, "step": 5425 }, { "epoch": 0.71, "learning_rate": 1.9930343801695382e-05, "loss": 16.8368, "step": 5426 }, { "epoch": 0.71, "learning_rate": 1.991331582721786e-05, "loss": 0.0, "step": 5427 }, { "epoch": 0.71, "learning_rate": 1.9896293321098507e-05, "loss": 17.6891, "step": 5428 }, { "epoch": 0.71, "learning_rate": 1.987927628643123e-05, "loss": 16.9762, "step": 5429 }, { "epoch": 0.71, "learning_rate": 1.986226472630894e-05, "loss": 17.8668, "step": 5430 }, { "epoch": 0.71, "learning_rate": 1.9845258643823512e-05, "loss": 18.2629, "step": 5431 }, { "epoch": 0.72, "learning_rate": 1.98282580420659e-05, "loss": 0.0, "step": 5432 }, { "epoch": 0.72, "learning_rate": 1.981126292412601e-05, "loss": 17.4733, "step": 5433 }, { "epoch": 0.72, "learning_rate": 1.9794273293092764e-05, "loss": 15.8401, "step": 5434 }, { "epoch": 0.72, "learning_rate": 1.9777289152054084e-05, "loss": 0.0002, "step": 5435 }, { "epoch": 0.72, "learning_rate": 1.976031050409689e-05, "loss": 17.5962, "step": 5436 }, { "epoch": 0.72, "learning_rate": 1.9743337352307118e-05, "loss": 19.1346, "step": 5437 }, { "epoch": 0.72, "learning_rate": 1.9726369699769686e-05, "loss": 16.6499, "step": 5438 }, { "epoch": 0.72, "learning_rate": 1.970940754956852e-05, "loss": 15.799, "step": 5439 }, { "epoch": 0.72, "learning_rate": 1.969245090478657e-05, "loss": 17.7999, "step": 5440 }, { "epoch": 0.72, "learning_rate": 1.9675499768505755e-05, "loss": 0.0, "step": 5441 }, { "epoch": 0.72, "learning_rate": 1.9658554143807e-05, "loss": 16.4588, "step": 5442 }, { "epoch": 0.72, "learning_rate": 1.9641614033770235e-05, "loss": 0.0, "step": 5443 }, { "epoch": 0.72, "learning_rate": 1.9624679441474376e-05, "loss": 15.1459, "step": 5444 }, { "epoch": 0.72, "learning_rate": 1.9607750369997346e-05, "loss": 17.3658, "step": 5445 }, { "epoch": 0.72, "learning_rate": 1.9590826822416062e-05, "loss": 0.0, "step": 5446 }, { "epoch": 0.72, "learning_rate": 1.9573908801806424e-05, "loss": 14.784, "step": 5447 }, { "epoch": 0.72, "learning_rate": 1.955699631124337e-05, "loss": 17.9766, "step": 5448 }, { "epoch": 0.72, "learning_rate": 1.9540089353800783e-05, "loss": 0.0, "step": 5449 }, { "epoch": 0.72, "learning_rate": 1.9523187932551566e-05, "loss": 16.3556, "step": 5450 }, { "epoch": 0.72, "learning_rate": 1.950629205056761e-05, "loss": 0.0, "step": 5451 }, { "epoch": 0.72, "learning_rate": 1.9489401710919797e-05, "loss": 17.9299, "step": 5452 }, { "epoch": 0.72, "learning_rate": 1.9472516916678006e-05, "loss": 0.0, "step": 5453 }, { "epoch": 0.72, "learning_rate": 1.945563767091111e-05, "loss": 17.6052, "step": 5454 }, { "epoch": 0.72, "learning_rate": 1.943876397668696e-05, "loss": 16.0956, "step": 5455 }, { "epoch": 0.72, "learning_rate": 1.9421895837072396e-05, "loss": 15.34, "step": 5456 }, { "epoch": 0.72, "learning_rate": 1.9405033255133298e-05, "loss": 18.2729, "step": 5457 }, { "epoch": 0.72, "learning_rate": 1.9388176233934474e-05, "loss": 17.9764, "step": 5458 }, { "epoch": 0.72, "learning_rate": 1.9371324776539756e-05, "loss": 15.9682, "step": 5459 }, { "epoch": 0.72, "learning_rate": 1.9354478886011945e-05, "loss": 17.2378, "step": 5460 }, { "epoch": 0.72, "learning_rate": 1.9337638565412842e-05, "loss": 17.3894, "step": 5461 }, { "epoch": 0.72, "learning_rate": 1.9320803817803234e-05, "loss": 0.0, "step": 5462 }, { "epoch": 0.72, "learning_rate": 1.9303974646242888e-05, "loss": 18.1152, "step": 5463 }, { "epoch": 0.72, "learning_rate": 1.928715105379056e-05, "loss": 16.5132, "step": 5464 }, { "epoch": 0.72, "learning_rate": 1.927033304350402e-05, "loss": 0.0, "step": 5465 }, { "epoch": 0.72, "learning_rate": 1.9253520618439984e-05, "loss": 17.3993, "step": 5466 }, { "epoch": 0.72, "learning_rate": 1.923671378165417e-05, "loss": 16.5884, "step": 5467 }, { "epoch": 0.72, "learning_rate": 1.9219912536201273e-05, "loss": 16.8505, "step": 5468 }, { "epoch": 0.72, "learning_rate": 1.920311688513498e-05, "loss": 16.865, "step": 5469 }, { "epoch": 0.72, "learning_rate": 1.9186326831507966e-05, "loss": 0.0, "step": 5470 }, { "epoch": 0.72, "learning_rate": 1.916954237837187e-05, "loss": 17.3097, "step": 5471 }, { "epoch": 0.72, "learning_rate": 1.9152763528777308e-05, "loss": 0.0, "step": 5472 }, { "epoch": 0.72, "learning_rate": 1.913599028577393e-05, "loss": 17.966, "step": 5473 }, { "epoch": 0.72, "learning_rate": 1.9119222652410317e-05, "loss": 17.7139, "step": 5474 }, { "epoch": 0.72, "learning_rate": 1.9102460631734038e-05, "loss": 17.5487, "step": 5475 }, { "epoch": 0.72, "learning_rate": 1.9085704226791652e-05, "loss": 16.9833, "step": 5476 }, { "epoch": 0.72, "learning_rate": 1.906895344062869e-05, "loss": 16.6095, "step": 5477 }, { "epoch": 0.72, "learning_rate": 1.9052208276289668e-05, "loss": 0.0, "step": 5478 }, { "epoch": 0.72, "learning_rate": 1.903546873681808e-05, "loss": 15.0482, "step": 5479 }, { "epoch": 0.72, "learning_rate": 1.9018734825256385e-05, "loss": 0.0, "step": 5480 }, { "epoch": 0.72, "learning_rate": 1.9002006544646015e-05, "loss": 16.8394, "step": 5481 }, { "epoch": 0.72, "learning_rate": 1.898528389802745e-05, "loss": 0.0, "step": 5482 }, { "epoch": 0.72, "learning_rate": 1.8968566888440038e-05, "loss": 0.0, "step": 5483 }, { "epoch": 0.72, "learning_rate": 1.8951855518922156e-05, "loss": 17.5436, "step": 5484 }, { "epoch": 0.72, "learning_rate": 1.893514979251117e-05, "loss": 17.142, "step": 5485 }, { "epoch": 0.72, "learning_rate": 1.891844971224339e-05, "loss": 0.0002, "step": 5486 }, { "epoch": 0.72, "learning_rate": 1.8901755281154104e-05, "loss": 16.6007, "step": 5487 }, { "epoch": 0.72, "learning_rate": 1.888506650227761e-05, "loss": 0.0, "step": 5488 }, { "epoch": 0.72, "learning_rate": 1.8868383378647143e-05, "loss": 15.5535, "step": 5489 }, { "epoch": 0.72, "learning_rate": 1.885170591329491e-05, "loss": 17.2596, "step": 5490 }, { "epoch": 0.72, "learning_rate": 1.8835034109252097e-05, "loss": 17.5865, "step": 5491 }, { "epoch": 0.72, "learning_rate": 1.8818367969548862e-05, "loss": 15.9627, "step": 5492 }, { "epoch": 0.72, "learning_rate": 1.8801707497214343e-05, "loss": 17.1529, "step": 5493 }, { "epoch": 0.72, "learning_rate": 1.8785052695276623e-05, "loss": 17.2414, "step": 5494 }, { "epoch": 0.72, "learning_rate": 1.8768403566762765e-05, "loss": 15.4786, "step": 5495 }, { "epoch": 0.72, "learning_rate": 1.8751760114698835e-05, "loss": 16.1154, "step": 5496 }, { "epoch": 0.72, "learning_rate": 1.8735122342109817e-05, "loss": 16.305, "step": 5497 }, { "epoch": 0.72, "learning_rate": 1.8718490252019688e-05, "loss": 16.9945, "step": 5498 }, { "epoch": 0.72, "learning_rate": 1.8701863847451385e-05, "loss": 17.3582, "step": 5499 }, { "epoch": 0.72, "learning_rate": 1.8685243131426817e-05, "loss": 0.0002, "step": 5500 }, { "epoch": 0.72, "learning_rate": 1.866862810696685e-05, "loss": 17.3478, "step": 5501 }, { "epoch": 0.72, "learning_rate": 1.865201877709133e-05, "loss": 0.0001, "step": 5502 }, { "epoch": 0.72, "learning_rate": 1.863541514481905e-05, "loss": 15.8876, "step": 5503 }, { "epoch": 0.72, "learning_rate": 1.8618817213167773e-05, "loss": 15.8859, "step": 5504 }, { "epoch": 0.72, "learning_rate": 1.8602224985154255e-05, "loss": 0.0, "step": 5505 }, { "epoch": 0.72, "learning_rate": 1.8585638463794175e-05, "loss": 15.8487, "step": 5506 }, { "epoch": 0.72, "learning_rate": 1.856905765210219e-05, "loss": 0.0, "step": 5507 }, { "epoch": 0.73, "learning_rate": 1.8552482553091914e-05, "loss": 18.1954, "step": 5508 }, { "epoch": 0.73, "learning_rate": 1.8535913169775942e-05, "loss": 16.5406, "step": 5509 }, { "epoch": 0.73, "learning_rate": 1.8519349505165806e-05, "loss": 16.2475, "step": 5510 }, { "epoch": 0.73, "learning_rate": 1.850279156227201e-05, "loss": 16.0714, "step": 5511 }, { "epoch": 0.73, "learning_rate": 1.8486239344104007e-05, "loss": 19.0365, "step": 5512 }, { "epoch": 0.73, "learning_rate": 1.8469692853670238e-05, "loss": 16.5709, "step": 5513 }, { "epoch": 0.73, "learning_rate": 1.845315209397809e-05, "loss": 17.8911, "step": 5514 }, { "epoch": 0.73, "learning_rate": 1.8436617068033878e-05, "loss": 18.1755, "step": 5515 }, { "epoch": 0.73, "learning_rate": 1.842008777884292e-05, "loss": 15.8935, "step": 5516 }, { "epoch": 0.73, "learning_rate": 1.8403564229409457e-05, "loss": 0.0001, "step": 5517 }, { "epoch": 0.73, "learning_rate": 1.838704642273671e-05, "loss": 17.023, "step": 5518 }, { "epoch": 0.73, "learning_rate": 1.8370534361826847e-05, "loss": 0.0, "step": 5519 }, { "epoch": 0.73, "learning_rate": 1.8354028049680967e-05, "loss": 17.3324, "step": 5520 }, { "epoch": 0.73, "learning_rate": 1.833752748929919e-05, "loss": 19.3563, "step": 5521 }, { "epoch": 0.73, "learning_rate": 1.8321032683680526e-05, "loss": 17.1914, "step": 5522 }, { "epoch": 0.73, "learning_rate": 1.8304543635822973e-05, "loss": 0.0, "step": 5523 }, { "epoch": 0.73, "learning_rate": 1.8288060348723464e-05, "loss": 0.0, "step": 5524 }, { "epoch": 0.73, "learning_rate": 1.8271582825377897e-05, "loss": 15.8708, "step": 5525 }, { "epoch": 0.73, "learning_rate": 1.8255111068781112e-05, "loss": 0.0, "step": 5526 }, { "epoch": 0.73, "learning_rate": 1.8238645081926915e-05, "loss": 0.0004, "step": 5527 }, { "epoch": 0.73, "learning_rate": 1.8222184867808053e-05, "loss": 0.001, "step": 5528 }, { "epoch": 0.73, "learning_rate": 1.82057304294162e-05, "loss": 0.0001, "step": 5529 }, { "epoch": 0.73, "learning_rate": 1.8189281769742055e-05, "loss": 16.4213, "step": 5530 }, { "epoch": 0.73, "learning_rate": 1.81728388917752e-05, "loss": 0.0, "step": 5531 }, { "epoch": 0.73, "learning_rate": 1.815640179850418e-05, "loss": 16.5491, "step": 5532 }, { "epoch": 0.73, "learning_rate": 1.8139970492916485e-05, "loss": 0.0, "step": 5533 }, { "epoch": 0.73, "learning_rate": 1.812354497799858e-05, "loss": 16.5456, "step": 5534 }, { "epoch": 0.73, "learning_rate": 1.8107125256735842e-05, "loss": 16.0126, "step": 5535 }, { "epoch": 0.73, "learning_rate": 1.809071133211262e-05, "loss": 18.7951, "step": 5536 }, { "epoch": 0.73, "learning_rate": 1.8074303207112185e-05, "loss": 18.0012, "step": 5537 }, { "epoch": 0.73, "learning_rate": 1.8057900884716798e-05, "loss": 16.7314, "step": 5538 }, { "epoch": 0.73, "learning_rate": 1.8041504367907623e-05, "loss": 16.2946, "step": 5539 }, { "epoch": 0.73, "learning_rate": 1.8025113659664787e-05, "loss": 16.1845, "step": 5540 }, { "epoch": 0.73, "learning_rate": 1.800872876296736e-05, "loss": 18.0982, "step": 5541 }, { "epoch": 0.73, "learning_rate": 1.799234968079334e-05, "loss": 15.8993, "step": 5542 }, { "epoch": 0.73, "learning_rate": 1.7975976416119687e-05, "loss": 16.3518, "step": 5543 }, { "epoch": 0.73, "learning_rate": 1.795960897192231e-05, "loss": 0.0003, "step": 5544 }, { "epoch": 0.73, "learning_rate": 1.794324735117601e-05, "loss": 0.0, "step": 5545 }, { "epoch": 0.73, "learning_rate": 1.7926891556854626e-05, "loss": 16.5995, "step": 5546 }, { "epoch": 0.73, "learning_rate": 1.791054159193084e-05, "loss": 0.0001, "step": 5547 }, { "epoch": 0.73, "learning_rate": 1.7894197459376332e-05, "loss": 16.1595, "step": 5548 }, { "epoch": 0.73, "learning_rate": 1.787785916216169e-05, "loss": 17.5211, "step": 5549 }, { "epoch": 0.73, "learning_rate": 1.7861526703256466e-05, "loss": 0.0, "step": 5550 }, { "epoch": 0.73, "learning_rate": 1.784520008562914e-05, "loss": 16.7336, "step": 5551 }, { "epoch": 0.73, "learning_rate": 1.782887931224712e-05, "loss": 16.5704, "step": 5552 }, { "epoch": 0.73, "learning_rate": 1.7812564386076775e-05, "loss": 17.2818, "step": 5553 }, { "epoch": 0.73, "learning_rate": 1.7796255310083378e-05, "loss": 16.4482, "step": 5554 }, { "epoch": 0.73, "learning_rate": 1.7779952087231188e-05, "loss": 16.265, "step": 5555 }, { "epoch": 0.73, "learning_rate": 1.7763654720483364e-05, "loss": 19.1287, "step": 5556 }, { "epoch": 0.73, "learning_rate": 1.7747363212802003e-05, "loss": 18.1579, "step": 5557 }, { "epoch": 0.73, "learning_rate": 1.7731077567148143e-05, "loss": 16.8992, "step": 5558 }, { "epoch": 0.73, "learning_rate": 1.771479778648175e-05, "loss": 17.6879, "step": 5559 }, { "epoch": 0.73, "learning_rate": 1.7698523873761746e-05, "loss": 16.0234, "step": 5560 }, { "epoch": 0.73, "learning_rate": 1.7682255831945954e-05, "loss": 16.2071, "step": 5561 }, { "epoch": 0.73, "learning_rate": 1.7665993663991144e-05, "loss": 0.0002, "step": 5562 }, { "epoch": 0.73, "learning_rate": 1.7649737372853044e-05, "loss": 16.4682, "step": 5563 }, { "epoch": 0.73, "learning_rate": 1.7633486961486273e-05, "loss": 17.9397, "step": 5564 }, { "epoch": 0.73, "learning_rate": 1.7617242432844407e-05, "loss": 15.8648, "step": 5565 }, { "epoch": 0.73, "learning_rate": 1.760100378987994e-05, "loss": 15.3212, "step": 5566 }, { "epoch": 0.73, "learning_rate": 1.75847710355443e-05, "loss": 15.7228, "step": 5567 }, { "epoch": 0.73, "learning_rate": 1.756854417278785e-05, "loss": 0.0, "step": 5568 }, { "epoch": 0.73, "learning_rate": 1.7552323204559874e-05, "loss": 15.9436, "step": 5569 }, { "epoch": 0.73, "learning_rate": 1.753610813380857e-05, "loss": 15.9771, "step": 5570 }, { "epoch": 0.73, "learning_rate": 1.751989896348113e-05, "loss": 17.0379, "step": 5571 }, { "epoch": 0.73, "learning_rate": 1.750369569652359e-05, "loss": 0.0, "step": 5572 }, { "epoch": 0.73, "learning_rate": 1.748749833588097e-05, "loss": 0.0001, "step": 5573 }, { "epoch": 0.73, "learning_rate": 1.7471306884497174e-05, "loss": 15.8955, "step": 5574 }, { "epoch": 0.73, "learning_rate": 1.7455121345315074e-05, "loss": 17.4276, "step": 5575 }, { "epoch": 0.73, "learning_rate": 1.743894172127643e-05, "loss": 16.7992, "step": 5576 }, { "epoch": 0.73, "learning_rate": 1.742276801532196e-05, "loss": 17.3989, "step": 5577 }, { "epoch": 0.73, "learning_rate": 1.7406600230391278e-05, "loss": 0.0012, "step": 5578 }, { "epoch": 0.73, "learning_rate": 1.739043836942293e-05, "loss": 0.0, "step": 5579 }, { "epoch": 0.73, "learning_rate": 1.737428243535441e-05, "loss": 16.1637, "step": 5580 }, { "epoch": 0.73, "learning_rate": 1.7358132431122104e-05, "loss": 17.0976, "step": 5581 }, { "epoch": 0.73, "learning_rate": 1.734198835966133e-05, "loss": 18.7804, "step": 5582 }, { "epoch": 0.73, "learning_rate": 1.7325850223906332e-05, "loss": 17.0886, "step": 5583 }, { "epoch": 0.74, "learning_rate": 1.7309718026790267e-05, "loss": 17.5823, "step": 5584 }, { "epoch": 0.74, "learning_rate": 1.7293591771245222e-05, "loss": 0.0, "step": 5585 }, { "epoch": 0.74, "learning_rate": 1.7277471460202188e-05, "loss": 18.2158, "step": 5586 }, { "epoch": 0.74, "learning_rate": 1.7261357096591087e-05, "loss": 15.803, "step": 5587 }, { "epoch": 0.74, "learning_rate": 1.724524868334078e-05, "loss": 0.0001, "step": 5588 }, { "epoch": 0.74, "learning_rate": 1.7229146223379017e-05, "loss": 17.6586, "step": 5589 }, { "epoch": 0.74, "learning_rate": 1.7213049719632467e-05, "loss": 16.9025, "step": 5590 }, { "epoch": 0.74, "learning_rate": 1.719695917502674e-05, "loss": 0.0, "step": 5591 }, { "epoch": 0.74, "learning_rate": 1.718087459248633e-05, "loss": 16.9101, "step": 5592 }, { "epoch": 0.74, "learning_rate": 1.7164795974934677e-05, "loss": 17.1932, "step": 5593 }, { "epoch": 0.74, "learning_rate": 1.7148723325294123e-05, "loss": 17.6856, "step": 5594 }, { "epoch": 0.74, "learning_rate": 1.7132656646485905e-05, "loss": 16.1235, "step": 5595 }, { "epoch": 0.74, "learning_rate": 1.711659594143024e-05, "loss": 0.0, "step": 5596 }, { "epoch": 0.74, "learning_rate": 1.710054121304619e-05, "loss": 16.5993, "step": 5597 }, { "epoch": 0.74, "learning_rate": 1.7084492464251762e-05, "loss": 15.8864, "step": 5598 }, { "epoch": 0.74, "learning_rate": 1.7068449697963874e-05, "loss": 17.1521, "step": 5599 }, { "epoch": 0.74, "learning_rate": 1.7052412917098343e-05, "loss": 0.0, "step": 5600 }, { "epoch": 0.74, "learning_rate": 1.7036382124569923e-05, "loss": 15.6407, "step": 5601 }, { "epoch": 0.74, "learning_rate": 1.7020357323292252e-05, "loss": 0.0003, "step": 5602 }, { "epoch": 0.74, "learning_rate": 1.7004338516177904e-05, "loss": 16.6448, "step": 5603 }, { "epoch": 0.74, "learning_rate": 1.698832570613833e-05, "loss": 16.3893, "step": 5604 }, { "epoch": 0.74, "learning_rate": 1.697231889608395e-05, "loss": 17.7972, "step": 5605 }, { "epoch": 0.74, "learning_rate": 1.6956318088924034e-05, "loss": 0.0, "step": 5606 }, { "epoch": 0.74, "learning_rate": 1.694032328756679e-05, "loss": 15.7078, "step": 5607 }, { "epoch": 0.74, "learning_rate": 1.6924334494919318e-05, "loss": 16.7032, "step": 5608 }, { "epoch": 0.74, "learning_rate": 1.690835171388765e-05, "loss": 16.9119, "step": 5609 }, { "epoch": 0.74, "learning_rate": 1.6892374947376705e-05, "loss": 0.0, "step": 5610 }, { "epoch": 0.74, "learning_rate": 1.6876404198290312e-05, "loss": 17.4306, "step": 5611 }, { "epoch": 0.74, "learning_rate": 1.68604394695312e-05, "loss": 18.2551, "step": 5612 }, { "epoch": 0.74, "learning_rate": 1.6844480764001035e-05, "loss": 17.2788, "step": 5613 }, { "epoch": 0.74, "learning_rate": 1.6828528084600363e-05, "loss": 17.4591, "step": 5614 }, { "epoch": 0.74, "learning_rate": 1.6812581434228632e-05, "loss": 0.0001, "step": 5615 }, { "epoch": 0.74, "learning_rate": 1.6796640815784198e-05, "loss": 0.0, "step": 5616 }, { "epoch": 0.74, "learning_rate": 1.6780706232164328e-05, "loss": 18.3549, "step": 5617 }, { "epoch": 0.74, "learning_rate": 1.676477768626519e-05, "loss": 16.7704, "step": 5618 }, { "epoch": 0.74, "learning_rate": 1.674885518098184e-05, "loss": 16.199, "step": 5619 }, { "epoch": 0.74, "learning_rate": 1.6732938719208253e-05, "loss": 16.8854, "step": 5620 }, { "epoch": 0.74, "learning_rate": 1.6717028303837307e-05, "loss": 0.0, "step": 5621 }, { "epoch": 0.74, "learning_rate": 1.6701123937760772e-05, "loss": 17.4583, "step": 5622 }, { "epoch": 0.74, "learning_rate": 1.6685225623869316e-05, "loss": 17.5645, "step": 5623 }, { "epoch": 0.74, "learning_rate": 1.6669333365052518e-05, "loss": 0.0001, "step": 5624 }, { "epoch": 0.74, "learning_rate": 1.665344716419885e-05, "loss": 0.0004, "step": 5625 }, { "epoch": 0.74, "learning_rate": 1.6637567024195677e-05, "loss": 16.0886, "step": 5626 }, { "epoch": 0.74, "learning_rate": 1.6621692947929264e-05, "loss": 16.6742, "step": 5627 }, { "epoch": 0.74, "learning_rate": 1.6605824938284805e-05, "loss": 17.376, "step": 5628 }, { "epoch": 0.74, "learning_rate": 1.658996299814634e-05, "loss": 16.6681, "step": 5629 }, { "epoch": 0.74, "learning_rate": 1.657410713039685e-05, "loss": 17.5767, "step": 5630 }, { "epoch": 0.74, "learning_rate": 1.6558257337918176e-05, "loss": 16.6954, "step": 5631 }, { "epoch": 0.74, "learning_rate": 1.6542413623591085e-05, "loss": 0.0, "step": 5632 }, { "epoch": 0.74, "learning_rate": 1.6526575990295218e-05, "loss": 16.2173, "step": 5633 }, { "epoch": 0.74, "learning_rate": 1.6510744440909128e-05, "loss": 17.464, "step": 5634 }, { "epoch": 0.74, "learning_rate": 1.6494918978310226e-05, "loss": 17.1517, "step": 5635 }, { "epoch": 0.74, "learning_rate": 1.647909960537488e-05, "loss": 0.0, "step": 5636 }, { "epoch": 0.74, "learning_rate": 1.6463286324978312e-05, "loss": 16.8744, "step": 5637 }, { "epoch": 0.74, "learning_rate": 1.644747913999462e-05, "loss": 15.252, "step": 5638 }, { "epoch": 0.74, "learning_rate": 1.6431678053296828e-05, "loss": 17.2644, "step": 5639 }, { "epoch": 0.74, "learning_rate": 1.6415883067756832e-05, "loss": 0.0, "step": 5640 }, { "epoch": 0.74, "learning_rate": 1.640009418624543e-05, "loss": 17.2409, "step": 5641 }, { "epoch": 0.74, "learning_rate": 1.6384311411632298e-05, "loss": 16.2696, "step": 5642 }, { "epoch": 0.74, "learning_rate": 1.6368534746786012e-05, "loss": 18.0991, "step": 5643 }, { "epoch": 0.74, "learning_rate": 1.635276419457402e-05, "loss": 18.2949, "step": 5644 }, { "epoch": 0.74, "learning_rate": 1.633699975786271e-05, "loss": 18.3101, "step": 5645 }, { "epoch": 0.74, "learning_rate": 1.63212414395173e-05, "loss": 16.8639, "step": 5646 }, { "epoch": 0.74, "learning_rate": 1.6305489242401922e-05, "loss": 15.3777, "step": 5647 }, { "epoch": 0.74, "learning_rate": 1.6289743169379595e-05, "loss": 16.7598, "step": 5648 }, { "epoch": 0.74, "learning_rate": 1.6274003223312212e-05, "loss": 0.0001, "step": 5649 }, { "epoch": 0.74, "learning_rate": 1.6258269407060573e-05, "loss": 15.7137, "step": 5650 }, { "epoch": 0.74, "learning_rate": 1.6242541723484343e-05, "loss": 0.0, "step": 5651 }, { "epoch": 0.74, "learning_rate": 1.6226820175442075e-05, "loss": 0.0001, "step": 5652 }, { "epoch": 0.74, "learning_rate": 1.6211104765791245e-05, "loss": 0.0001, "step": 5653 }, { "epoch": 0.74, "learning_rate": 1.6195395497388166e-05, "loss": 15.3517, "step": 5654 }, { "epoch": 0.74, "learning_rate": 1.6179692373088047e-05, "loss": 17.4544, "step": 5655 }, { "epoch": 0.74, "learning_rate": 1.616399539574499e-05, "loss": 18.2216, "step": 5656 }, { "epoch": 0.74, "learning_rate": 1.6148304568211975e-05, "loss": 17.6728, "step": 5657 }, { "epoch": 0.74, "learning_rate": 1.6132619893340856e-05, "loss": 16.4297, "step": 5658 }, { "epoch": 0.74, "learning_rate": 1.6116941373982387e-05, "loss": 17.2106, "step": 5659 }, { "epoch": 0.75, "learning_rate": 1.610126901298617e-05, "loss": 15.6585, "step": 5660 }, { "epoch": 0.75, "learning_rate": 1.608560281320074e-05, "loss": 16.2296, "step": 5661 }, { "epoch": 0.75, "learning_rate": 1.606994277747348e-05, "loss": 17.5732, "step": 5662 }, { "epoch": 0.75, "learning_rate": 1.6054288908650637e-05, "loss": 17.625, "step": 5663 }, { "epoch": 0.75, "learning_rate": 1.6038641209577372e-05, "loss": 17.1147, "step": 5664 }, { "epoch": 0.75, "learning_rate": 1.6022999683097695e-05, "loss": 16.2856, "step": 5665 }, { "epoch": 0.75, "learning_rate": 1.6007364332054513e-05, "loss": 0.0, "step": 5666 }, { "epoch": 0.75, "learning_rate": 1.5991735159289605e-05, "loss": 0.0, "step": 5667 }, { "epoch": 0.75, "learning_rate": 1.597611216764363e-05, "loss": 17.6146, "step": 5668 }, { "epoch": 0.75, "learning_rate": 1.59604953599561e-05, "loss": 16.7561, "step": 5669 }, { "epoch": 0.75, "learning_rate": 1.5944884739065452e-05, "loss": 18.8609, "step": 5670 }, { "epoch": 0.75, "learning_rate": 1.5929280307808963e-05, "loss": 0.0003, "step": 5671 }, { "epoch": 0.75, "learning_rate": 1.5913682069022784e-05, "loss": 0.0, "step": 5672 }, { "epoch": 0.75, "learning_rate": 1.5898090025541955e-05, "loss": 16.2584, "step": 5673 }, { "epoch": 0.75, "learning_rate": 1.5882504180200374e-05, "loss": 16.0796, "step": 5674 }, { "epoch": 0.75, "learning_rate": 1.5866924535830835e-05, "loss": 19.1168, "step": 5675 }, { "epoch": 0.75, "learning_rate": 1.5851351095264984e-05, "loss": 16.165, "step": 5676 }, { "epoch": 0.75, "learning_rate": 1.5835783861333325e-05, "loss": 0.0001, "step": 5677 }, { "epoch": 0.75, "learning_rate": 1.58202228368653e-05, "loss": 0.0001, "step": 5678 }, { "epoch": 0.75, "learning_rate": 1.5804668024689158e-05, "loss": 18.709, "step": 5679 }, { "epoch": 0.75, "learning_rate": 1.5789119427632037e-05, "loss": 16.5693, "step": 5680 }, { "epoch": 0.75, "learning_rate": 1.5773577048519956e-05, "loss": 0.0001, "step": 5681 }, { "epoch": 0.75, "learning_rate": 1.5758040890177783e-05, "loss": 15.6555, "step": 5682 }, { "epoch": 0.75, "learning_rate": 1.574251095542928e-05, "loss": 19.6644, "step": 5683 }, { "epoch": 0.75, "learning_rate": 1.5726987247097063e-05, "loss": 16.0175, "step": 5684 }, { "epoch": 0.75, "learning_rate": 1.5711469768002597e-05, "loss": 16.8251, "step": 5685 }, { "epoch": 0.75, "learning_rate": 1.5695958520966274e-05, "loss": 16.5492, "step": 5686 }, { "epoch": 0.75, "learning_rate": 1.5680453508807302e-05, "loss": 16.1056, "step": 5687 }, { "epoch": 0.75, "learning_rate": 1.5664954734343773e-05, "loss": 16.3535, "step": 5688 }, { "epoch": 0.75, "learning_rate": 1.5649462200392633e-05, "loss": 16.2947, "step": 5689 }, { "epoch": 0.75, "learning_rate": 1.5633975909769704e-05, "loss": 17.0341, "step": 5690 }, { "epoch": 0.75, "learning_rate": 1.5618495865289685e-05, "loss": 18.483, "step": 5691 }, { "epoch": 0.75, "learning_rate": 1.5603022069766116e-05, "loss": 0.0016, "step": 5692 }, { "epoch": 0.75, "learning_rate": 1.558755452601141e-05, "loss": 0.0, "step": 5693 }, { "epoch": 0.75, "learning_rate": 1.5572093236836836e-05, "loss": 18.531, "step": 5694 }, { "epoch": 0.75, "learning_rate": 1.5556638205052575e-05, "loss": 15.919, "step": 5695 }, { "epoch": 0.75, "learning_rate": 1.5541189433467606e-05, "loss": 16.6377, "step": 5696 }, { "epoch": 0.75, "learning_rate": 1.55257469248898e-05, "loss": 0.0001, "step": 5697 }, { "epoch": 0.75, "learning_rate": 1.5510310682125885e-05, "loss": 16.3557, "step": 5698 }, { "epoch": 0.75, "learning_rate": 1.5494880707981445e-05, "loss": 17.1331, "step": 5699 }, { "epoch": 0.75, "learning_rate": 1.5479457005260944e-05, "loss": 16.9386, "step": 5700 }, { "epoch": 0.75, "learning_rate": 1.5464039576767685e-05, "loss": 0.0001, "step": 5701 }, { "epoch": 0.75, "learning_rate": 1.5448628425303825e-05, "loss": 14.9414, "step": 5702 }, { "epoch": 0.75, "learning_rate": 1.5433223553670424e-05, "loss": 16.1943, "step": 5703 }, { "epoch": 0.75, "learning_rate": 1.5417824964667354e-05, "loss": 15.2155, "step": 5704 }, { "epoch": 0.75, "learning_rate": 1.5402432661093363e-05, "loss": 0.0, "step": 5705 }, { "epoch": 0.75, "learning_rate": 1.538704664574605e-05, "loss": 17.5069, "step": 5706 }, { "epoch": 0.75, "learning_rate": 1.5371666921421885e-05, "loss": 18.2196, "step": 5707 }, { "epoch": 0.75, "learning_rate": 1.5356293490916178e-05, "loss": 18.0306, "step": 5708 }, { "epoch": 0.75, "learning_rate": 1.5340926357023112e-05, "loss": 16.4189, "step": 5709 }, { "epoch": 0.75, "learning_rate": 1.532556552253569e-05, "loss": 17.5375, "step": 5710 }, { "epoch": 0.75, "learning_rate": 1.531021099024583e-05, "loss": 17.0779, "step": 5711 }, { "epoch": 0.75, "learning_rate": 1.529486276294426e-05, "loss": 0.0, "step": 5712 }, { "epoch": 0.75, "learning_rate": 1.5279520843420568e-05, "loss": 17.6804, "step": 5713 }, { "epoch": 0.75, "learning_rate": 1.5264185234463207e-05, "loss": 16.361, "step": 5714 }, { "epoch": 0.75, "learning_rate": 1.5248855938859475e-05, "loss": 15.3921, "step": 5715 }, { "epoch": 0.75, "learning_rate": 1.5233532959395518e-05, "loss": 16.8025, "step": 5716 }, { "epoch": 0.75, "learning_rate": 1.5218216298856348e-05, "loss": 19.2851, "step": 5717 }, { "epoch": 0.75, "learning_rate": 1.520290596002581e-05, "loss": 16.0368, "step": 5718 }, { "epoch": 0.75, "learning_rate": 1.5187601945686603e-05, "loss": 0.0, "step": 5719 }, { "epoch": 0.75, "learning_rate": 1.5172304258620317e-05, "loss": 0.0, "step": 5720 }, { "epoch": 0.75, "learning_rate": 1.5157012901607343e-05, "loss": 17.3726, "step": 5721 }, { "epoch": 0.75, "learning_rate": 1.514172787742693e-05, "loss": 16.619, "step": 5722 }, { "epoch": 0.75, "learning_rate": 1.5126449188857194e-05, "loss": 0.0, "step": 5723 }, { "epoch": 0.75, "learning_rate": 1.5111176838675078e-05, "loss": 16.1946, "step": 5724 }, { "epoch": 0.75, "learning_rate": 1.5095910829656396e-05, "loss": 16.7111, "step": 5725 }, { "epoch": 0.75, "learning_rate": 1.5080651164575788e-05, "loss": 16.6332, "step": 5726 }, { "epoch": 0.75, "learning_rate": 1.5065397846206741e-05, "loss": 16.8999, "step": 5727 }, { "epoch": 0.75, "learning_rate": 1.5050150877321628e-05, "loss": 16.0188, "step": 5728 }, { "epoch": 0.75, "learning_rate": 1.503491026069162e-05, "loss": 0.0004, "step": 5729 }, { "epoch": 0.75, "learning_rate": 1.5019675999086752e-05, "loss": 0.0, "step": 5730 }, { "epoch": 0.75, "learning_rate": 1.5004448095275902e-05, "loss": 17.5621, "step": 5731 }, { "epoch": 0.75, "learning_rate": 1.4989226552026798e-05, "loss": 0.0, "step": 5732 }, { "epoch": 0.75, "learning_rate": 1.4974011372106006e-05, "loss": 18.119, "step": 5733 }, { "epoch": 0.75, "learning_rate": 1.495880255827894e-05, "loss": 0.0, "step": 5734 }, { "epoch": 0.75, "learning_rate": 1.494360011330983e-05, "loss": 17.21, "step": 5735 }, { "epoch": 0.76, "learning_rate": 1.4928404039961814e-05, "loss": 17.3093, "step": 5736 }, { "epoch": 0.76, "learning_rate": 1.4913214340996806e-05, "loss": 0.0001, "step": 5737 }, { "epoch": 0.76, "learning_rate": 1.489803101917559e-05, "loss": 16.7573, "step": 5738 }, { "epoch": 0.76, "learning_rate": 1.4882854077257786e-05, "loss": 17.9531, "step": 5739 }, { "epoch": 0.76, "learning_rate": 1.4867683518001857e-05, "loss": 0.0, "step": 5740 }, { "epoch": 0.76, "learning_rate": 1.4852519344165105e-05, "loss": 16.1667, "step": 5741 }, { "epoch": 0.76, "learning_rate": 1.483736155850367e-05, "loss": 15.7335, "step": 5742 }, { "epoch": 0.76, "learning_rate": 1.4822210163772532e-05, "loss": 0.0002, "step": 5743 }, { "epoch": 0.76, "learning_rate": 1.4807065162725487e-05, "loss": 16.6165, "step": 5744 }, { "epoch": 0.76, "learning_rate": 1.4791926558115233e-05, "loss": 15.1555, "step": 5745 }, { "epoch": 0.76, "learning_rate": 1.4776794352693241e-05, "loss": 16.4595, "step": 5746 }, { "epoch": 0.76, "learning_rate": 1.476166854920984e-05, "loss": 16.0269, "step": 5747 }, { "epoch": 0.76, "learning_rate": 1.4746549150414196e-05, "loss": 0.0, "step": 5748 }, { "epoch": 0.76, "learning_rate": 1.4731436159054323e-05, "loss": 17.2668, "step": 5749 }, { "epoch": 0.76, "learning_rate": 1.4716329577877047e-05, "loss": 18.016, "step": 5750 }, { "epoch": 0.76, "learning_rate": 1.4701229409628042e-05, "loss": 17.6703, "step": 5751 }, { "epoch": 0.76, "learning_rate": 1.4686135657051808e-05, "loss": 17.1828, "step": 5752 }, { "epoch": 0.76, "learning_rate": 1.4671048322891706e-05, "loss": 16.9122, "step": 5753 }, { "epoch": 0.76, "learning_rate": 1.4655967409889909e-05, "loss": 0.0, "step": 5754 }, { "epoch": 0.76, "learning_rate": 1.4640892920787425e-05, "loss": 18.8787, "step": 5755 }, { "epoch": 0.76, "learning_rate": 1.4625824858324078e-05, "loss": 17.006, "step": 5756 }, { "epoch": 0.76, "learning_rate": 1.4610763225238544e-05, "loss": 17.307, "step": 5757 }, { "epoch": 0.76, "learning_rate": 1.4595708024268334e-05, "loss": 18.327, "step": 5758 }, { "epoch": 0.76, "learning_rate": 1.4580659258149759e-05, "loss": 16.5682, "step": 5759 }, { "epoch": 0.76, "learning_rate": 1.4565616929618026e-05, "loss": 18.0001, "step": 5760 }, { "epoch": 0.76, "learning_rate": 1.4550581041407101e-05, "loss": 0.0003, "step": 5761 }, { "epoch": 0.76, "learning_rate": 1.4535551596249825e-05, "loss": 17.3317, "step": 5762 }, { "epoch": 0.76, "learning_rate": 1.4520528596877835e-05, "loss": 16.9174, "step": 5763 }, { "epoch": 0.76, "learning_rate": 1.4505512046021619e-05, "loss": 0.0001, "step": 5764 }, { "epoch": 0.76, "learning_rate": 1.449050194641049e-05, "loss": 0.0011, "step": 5765 }, { "epoch": 0.76, "learning_rate": 1.4475498300772584e-05, "loss": 17.1765, "step": 5766 }, { "epoch": 0.76, "learning_rate": 1.4460501111834845e-05, "loss": 16.6422, "step": 5767 }, { "epoch": 0.76, "learning_rate": 1.4445510382323101e-05, "loss": 18.2752, "step": 5768 }, { "epoch": 0.76, "learning_rate": 1.4430526114961945e-05, "loss": 0.0001, "step": 5769 }, { "epoch": 0.76, "learning_rate": 1.441554831247483e-05, "loss": 0.0001, "step": 5770 }, { "epoch": 0.76, "learning_rate": 1.4400576977584013e-05, "loss": 0.0001, "step": 5771 }, { "epoch": 0.76, "learning_rate": 1.4385612113010588e-05, "loss": 0.0, "step": 5772 }, { "epoch": 0.76, "learning_rate": 1.4370653721474474e-05, "loss": 0.0, "step": 5773 }, { "epoch": 0.76, "learning_rate": 1.435570180569441e-05, "loss": 19.9776, "step": 5774 }, { "epoch": 0.76, "learning_rate": 1.4340756368387937e-05, "loss": 17.7991, "step": 5775 }, { "epoch": 0.76, "learning_rate": 1.4325817412271475e-05, "loss": 17.5166, "step": 5776 }, { "epoch": 0.76, "learning_rate": 1.4310884940060214e-05, "loss": 16.6007, "step": 5777 }, { "epoch": 0.76, "learning_rate": 1.4295958954468186e-05, "loss": 0.0, "step": 5778 }, { "epoch": 0.76, "learning_rate": 1.4281039458208234e-05, "loss": 15.6816, "step": 5779 }, { "epoch": 0.76, "learning_rate": 1.426612645399203e-05, "loss": 16.0939, "step": 5780 }, { "epoch": 0.76, "learning_rate": 1.4251219944530058e-05, "loss": 17.2412, "step": 5781 }, { "epoch": 0.76, "learning_rate": 1.4236319932531638e-05, "loss": 17.1838, "step": 5782 }, { "epoch": 0.76, "learning_rate": 1.4221426420704897e-05, "loss": 17.2664, "step": 5783 }, { "epoch": 0.76, "learning_rate": 1.4206539411756758e-05, "loss": 17.0486, "step": 5784 }, { "epoch": 0.76, "learning_rate": 1.4191658908393024e-05, "loss": 16.9127, "step": 5785 }, { "epoch": 0.76, "learning_rate": 1.4176784913318258e-05, "loss": 15.1614, "step": 5786 }, { "epoch": 0.76, "learning_rate": 1.4161917429235861e-05, "loss": 16.1608, "step": 5787 }, { "epoch": 0.76, "learning_rate": 1.4147056458848051e-05, "loss": 16.9219, "step": 5788 }, { "epoch": 0.76, "learning_rate": 1.413220200485586e-05, "loss": 0.0, "step": 5789 }, { "epoch": 0.76, "learning_rate": 1.4117354069959138e-05, "loss": 17.6046, "step": 5790 }, { "epoch": 0.76, "learning_rate": 1.4102512656856543e-05, "loss": 0.0, "step": 5791 }, { "epoch": 0.76, "learning_rate": 1.4087677768245538e-05, "loss": 16.1268, "step": 5792 }, { "epoch": 0.76, "learning_rate": 1.4072849406822453e-05, "loss": 17.548, "step": 5793 }, { "epoch": 0.76, "learning_rate": 1.4058027575282367e-05, "loss": 0.0001, "step": 5794 }, { "epoch": 0.76, "learning_rate": 1.4043212276319206e-05, "loss": 17.4094, "step": 5795 }, { "epoch": 0.76, "learning_rate": 1.4028403512625704e-05, "loss": 16.8994, "step": 5796 }, { "epoch": 0.76, "learning_rate": 1.4013601286893397e-05, "loss": 16.6494, "step": 5797 }, { "epoch": 0.76, "learning_rate": 1.3998805601812643e-05, "loss": 16.1796, "step": 5798 }, { "epoch": 0.76, "learning_rate": 1.3984016460072614e-05, "loss": 16.8013, "step": 5799 }, { "epoch": 0.76, "learning_rate": 1.3969233864361265e-05, "loss": 16.6251, "step": 5800 }, { "epoch": 0.76, "learning_rate": 1.3954457817365419e-05, "loss": 17.4662, "step": 5801 }, { "epoch": 0.76, "learning_rate": 1.393968832177065e-05, "loss": 16.5555, "step": 5802 }, { "epoch": 0.76, "learning_rate": 1.3924925380261377e-05, "loss": 17.9514, "step": 5803 }, { "epoch": 0.76, "learning_rate": 1.3910168995520806e-05, "loss": 16.5937, "step": 5804 }, { "epoch": 0.76, "learning_rate": 1.3895419170230967e-05, "loss": 16.6238, "step": 5805 }, { "epoch": 0.76, "learning_rate": 1.3880675907072683e-05, "loss": 16.2859, "step": 5806 }, { "epoch": 0.76, "learning_rate": 1.3865939208725599e-05, "loss": 17.96, "step": 5807 }, { "epoch": 0.76, "learning_rate": 1.385120907786816e-05, "loss": 0.0, "step": 5808 }, { "epoch": 0.76, "learning_rate": 1.38364855171776e-05, "loss": 16.9397, "step": 5809 }, { "epoch": 0.76, "learning_rate": 1.382176852933001e-05, "loss": 0.0001, "step": 5810 }, { "epoch": 0.76, "learning_rate": 1.3807058117000233e-05, "loss": 16.8417, "step": 5811 }, { "epoch": 0.77, "learning_rate": 1.3792354282861946e-05, "loss": 17.113, "step": 5812 }, { "epoch": 0.77, "learning_rate": 1.3777657029587609e-05, "loss": 17.0837, "step": 5813 }, { "epoch": 0.77, "learning_rate": 1.376296635984851e-05, "loss": 0.0001, "step": 5814 }, { "epoch": 0.77, "learning_rate": 1.3748282276314728e-05, "loss": 17.5594, "step": 5815 }, { "epoch": 0.77, "learning_rate": 1.3733604781655135e-05, "loss": 0.0001, "step": 5816 }, { "epoch": 0.77, "learning_rate": 1.371893387853741e-05, "loss": 19.4732, "step": 5817 }, { "epoch": 0.77, "learning_rate": 1.370426956962807e-05, "loss": 16.0206, "step": 5818 }, { "epoch": 0.77, "learning_rate": 1.3689611857592382e-05, "loss": 17.7655, "step": 5819 }, { "epoch": 0.77, "learning_rate": 1.3674960745094439e-05, "loss": 18.1314, "step": 5820 }, { "epoch": 0.77, "learning_rate": 1.3660316234797132e-05, "loss": 18.4411, "step": 5821 }, { "epoch": 0.77, "learning_rate": 1.3645678329362155e-05, "loss": 16.3895, "step": 5822 }, { "epoch": 0.77, "learning_rate": 1.363104703144999e-05, "loss": 16.86, "step": 5823 }, { "epoch": 0.77, "learning_rate": 1.3616422343719931e-05, "loss": 0.0, "step": 5824 }, { "epoch": 0.77, "learning_rate": 1.3601804268830048e-05, "loss": 16.118, "step": 5825 }, { "epoch": 0.77, "learning_rate": 1.3587192809437255e-05, "loss": 15.9741, "step": 5826 }, { "epoch": 0.77, "learning_rate": 1.357258796819722e-05, "loss": 16.1542, "step": 5827 }, { "epoch": 0.77, "learning_rate": 1.3557989747764428e-05, "loss": 0.0, "step": 5828 }, { "epoch": 0.77, "learning_rate": 1.3543398150792147e-05, "loss": 17.8445, "step": 5829 }, { "epoch": 0.77, "learning_rate": 1.3528813179932458e-05, "loss": 17.8816, "step": 5830 }, { "epoch": 0.77, "learning_rate": 1.3514234837836226e-05, "loss": 0.0001, "step": 5831 }, { "epoch": 0.77, "learning_rate": 1.3499663127153118e-05, "loss": 15.0357, "step": 5832 }, { "epoch": 0.77, "learning_rate": 1.3485098050531585e-05, "loss": 15.9256, "step": 5833 }, { "epoch": 0.77, "learning_rate": 1.3470539610618871e-05, "loss": 0.0004, "step": 5834 }, { "epoch": 0.77, "learning_rate": 1.3455987810061054e-05, "loss": 16.5113, "step": 5835 }, { "epoch": 0.77, "learning_rate": 1.344144265150295e-05, "loss": 15.7971, "step": 5836 }, { "epoch": 0.77, "learning_rate": 1.3426904137588203e-05, "loss": 16.4692, "step": 5837 }, { "epoch": 0.77, "learning_rate": 1.3412372270959228e-05, "loss": 0.0, "step": 5838 }, { "epoch": 0.77, "learning_rate": 1.3397847054257245e-05, "loss": 16.2682, "step": 5839 }, { "epoch": 0.77, "learning_rate": 1.3383328490122265e-05, "loss": 16.1479, "step": 5840 }, { "epoch": 0.77, "learning_rate": 1.3368816581193083e-05, "loss": 16.4916, "step": 5841 }, { "epoch": 0.77, "learning_rate": 1.3354311330107278e-05, "loss": 14.4254, "step": 5842 }, { "epoch": 0.77, "learning_rate": 1.3339812739501256e-05, "loss": 16.6557, "step": 5843 }, { "epoch": 0.77, "learning_rate": 1.3325320812010167e-05, "loss": 0.0004, "step": 5844 }, { "epoch": 0.77, "learning_rate": 1.3310835550267975e-05, "loss": 18.4601, "step": 5845 }, { "epoch": 0.77, "learning_rate": 1.3296356956907418e-05, "loss": 0.0001, "step": 5846 }, { "epoch": 0.77, "learning_rate": 1.3281885034560043e-05, "loss": 17.8729, "step": 5847 }, { "epoch": 0.77, "learning_rate": 1.3267419785856162e-05, "loss": 18.4152, "step": 5848 }, { "epoch": 0.77, "learning_rate": 1.325296121342488e-05, "loss": 16.2282, "step": 5849 }, { "epoch": 0.77, "learning_rate": 1.3238509319894088e-05, "loss": 16.9433, "step": 5850 }, { "epoch": 0.77, "learning_rate": 1.3224064107890489e-05, "loss": 15.856, "step": 5851 }, { "epoch": 0.77, "learning_rate": 1.3209625580039543e-05, "loss": 17.1508, "step": 5852 }, { "epoch": 0.77, "learning_rate": 1.3195193738965488e-05, "loss": 16.0444, "step": 5853 }, { "epoch": 0.77, "learning_rate": 1.3180768587291375e-05, "loss": 18.2165, "step": 5854 }, { "epoch": 0.77, "learning_rate": 1.3166350127639022e-05, "loss": 0.0, "step": 5855 }, { "epoch": 0.77, "learning_rate": 1.3151938362629029e-05, "loss": 16.7622, "step": 5856 }, { "epoch": 0.77, "learning_rate": 1.3137533294880788e-05, "loss": 17.4286, "step": 5857 }, { "epoch": 0.77, "learning_rate": 1.3123134927012464e-05, "loss": 16.3539, "step": 5858 }, { "epoch": 0.77, "learning_rate": 1.3108743261641e-05, "loss": 15.3213, "step": 5859 }, { "epoch": 0.77, "learning_rate": 1.3094358301382154e-05, "loss": 14.7203, "step": 5860 }, { "epoch": 0.77, "learning_rate": 1.3079980048850437e-05, "loss": 15.3142, "step": 5861 }, { "epoch": 0.77, "learning_rate": 1.3065608506659138e-05, "loss": 14.9945, "step": 5862 }, { "epoch": 0.77, "learning_rate": 1.3051243677420338e-05, "loss": 15.3906, "step": 5863 }, { "epoch": 0.77, "learning_rate": 1.3036885563744893e-05, "loss": 17.2465, "step": 5864 }, { "epoch": 0.77, "learning_rate": 1.302253416824244e-05, "loss": 17.2467, "step": 5865 }, { "epoch": 0.77, "learning_rate": 1.3008189493521394e-05, "loss": 0.0, "step": 5866 }, { "epoch": 0.77, "learning_rate": 1.2993851542188929e-05, "loss": 18.238, "step": 5867 }, { "epoch": 0.77, "learning_rate": 1.2979520316851058e-05, "loss": 0.0, "step": 5868 }, { "epoch": 0.77, "learning_rate": 1.2965195820112503e-05, "loss": 16.9718, "step": 5869 }, { "epoch": 0.77, "learning_rate": 1.2950878054576797e-05, "loss": 17.039, "step": 5870 }, { "epoch": 0.77, "learning_rate": 1.2936567022846246e-05, "loss": 16.5422, "step": 5871 }, { "epoch": 0.77, "learning_rate": 1.2922262727521922e-05, "loss": 16.7741, "step": 5872 }, { "epoch": 0.77, "learning_rate": 1.2907965171203685e-05, "loss": 17.9701, "step": 5873 }, { "epoch": 0.77, "learning_rate": 1.2893674356490166e-05, "loss": 16.616, "step": 5874 }, { "epoch": 0.77, "learning_rate": 1.2879390285978743e-05, "loss": 16.1925, "step": 5875 }, { "epoch": 0.77, "learning_rate": 1.2865112962265641e-05, "loss": 17.2078, "step": 5876 }, { "epoch": 0.77, "learning_rate": 1.2850842387945788e-05, "loss": 17.5399, "step": 5877 }, { "epoch": 0.77, "learning_rate": 1.2836578565612917e-05, "loss": 17.5584, "step": 5878 }, { "epoch": 0.77, "learning_rate": 1.2822321497859519e-05, "loss": 16.8854, "step": 5879 }, { "epoch": 0.77, "learning_rate": 1.2808071187276866e-05, "loss": 0.0001, "step": 5880 }, { "epoch": 0.77, "learning_rate": 1.2793827636455003e-05, "loss": 0.0, "step": 5881 }, { "epoch": 0.77, "learning_rate": 1.2779590847982742e-05, "loss": 17.5675, "step": 5882 }, { "epoch": 0.77, "learning_rate": 1.2765360824447675e-05, "loss": 15.904, "step": 5883 }, { "epoch": 0.77, "learning_rate": 1.2751137568436139e-05, "loss": 17.2377, "step": 5884 }, { "epoch": 0.77, "learning_rate": 1.2736921082533287e-05, "loss": 0.0001, "step": 5885 }, { "epoch": 0.77, "learning_rate": 1.2722711369322998e-05, "loss": 16.8527, "step": 5886 }, { "epoch": 0.77, "learning_rate": 1.2708508431387945e-05, "loss": 0.0002, "step": 5887 }, { "epoch": 0.78, "learning_rate": 1.269431227130955e-05, "loss": 16.2736, "step": 5888 }, { "epoch": 0.78, "learning_rate": 1.2680122891668017e-05, "loss": 17.7184, "step": 5889 }, { "epoch": 0.78, "learning_rate": 1.2665940295042317e-05, "loss": 17.1931, "step": 5890 }, { "epoch": 0.78, "learning_rate": 1.2651764484010187e-05, "loss": 18.1039, "step": 5891 }, { "epoch": 0.78, "learning_rate": 1.2637595461148127e-05, "loss": 16.2872, "step": 5892 }, { "epoch": 0.78, "learning_rate": 1.2623433229031406e-05, "loss": 14.9511, "step": 5893 }, { "epoch": 0.78, "learning_rate": 1.2609277790234059e-05, "loss": 17.9568, "step": 5894 }, { "epoch": 0.78, "learning_rate": 1.2595129147328877e-05, "loss": 16.6327, "step": 5895 }, { "epoch": 0.78, "learning_rate": 1.2580987302887437e-05, "loss": 16.8114, "step": 5896 }, { "epoch": 0.78, "learning_rate": 1.2566852259480067e-05, "loss": 16.4578, "step": 5897 }, { "epoch": 0.78, "learning_rate": 1.2552724019675838e-05, "loss": 16.5323, "step": 5898 }, { "epoch": 0.78, "learning_rate": 1.2538602586042636e-05, "loss": 16.1281, "step": 5899 }, { "epoch": 0.78, "learning_rate": 1.2524487961147069e-05, "loss": 16.8657, "step": 5900 }, { "epoch": 0.78, "learning_rate": 1.2510380147554512e-05, "loss": 0.0, "step": 5901 }, { "epoch": 0.78, "learning_rate": 1.2496279147829122e-05, "loss": 17.2402, "step": 5902 }, { "epoch": 0.78, "learning_rate": 1.2482184964533795e-05, "loss": 17.9794, "step": 5903 }, { "epoch": 0.78, "learning_rate": 1.2468097600230195e-05, "loss": 16.0351, "step": 5904 }, { "epoch": 0.78, "learning_rate": 1.245401705747875e-05, "loss": 16.8237, "step": 5905 }, { "epoch": 0.78, "learning_rate": 1.243994333883865e-05, "loss": 16.324, "step": 5906 }, { "epoch": 0.78, "learning_rate": 1.2425876446867829e-05, "loss": 17.405, "step": 5907 }, { "epoch": 0.78, "learning_rate": 1.2411816384123015e-05, "loss": 16.6018, "step": 5908 }, { "epoch": 0.78, "learning_rate": 1.239776315315967e-05, "loss": 17.5501, "step": 5909 }, { "epoch": 0.78, "learning_rate": 1.2383716756532005e-05, "loss": 17.1229, "step": 5910 }, { "epoch": 0.78, "learning_rate": 1.2369677196793001e-05, "loss": 18.3275, "step": 5911 }, { "epoch": 0.78, "learning_rate": 1.2355644476494405e-05, "loss": 16.4693, "step": 5912 }, { "epoch": 0.78, "learning_rate": 1.2341618598186705e-05, "loss": 17.1214, "step": 5913 }, { "epoch": 0.78, "learning_rate": 1.2327599564419157e-05, "loss": 15.2917, "step": 5914 }, { "epoch": 0.78, "learning_rate": 1.231358737773975e-05, "loss": 0.0, "step": 5915 }, { "epoch": 0.78, "learning_rate": 1.2299582040695279e-05, "loss": 0.0015, "step": 5916 }, { "epoch": 0.78, "learning_rate": 1.2285583555831243e-05, "loss": 15.9946, "step": 5917 }, { "epoch": 0.78, "learning_rate": 1.2271591925691922e-05, "loss": 0.0001, "step": 5918 }, { "epoch": 0.78, "learning_rate": 1.2257607152820338e-05, "loss": 15.4764, "step": 5919 }, { "epoch": 0.78, "learning_rate": 1.2243629239758275e-05, "loss": 0.0001, "step": 5920 }, { "epoch": 0.78, "learning_rate": 1.222965818904626e-05, "loss": 17.3496, "step": 5921 }, { "epoch": 0.78, "learning_rate": 1.2215694003223588e-05, "loss": 17.3508, "step": 5922 }, { "epoch": 0.78, "learning_rate": 1.220173668482828e-05, "loss": 0.0, "step": 5923 }, { "epoch": 0.78, "learning_rate": 1.2187786236397153e-05, "loss": 0.0, "step": 5924 }, { "epoch": 0.78, "learning_rate": 1.2173842660465735e-05, "loss": 0.0, "step": 5925 }, { "epoch": 0.78, "learning_rate": 1.2159905959568319e-05, "loss": 17.3717, "step": 5926 }, { "epoch": 0.78, "learning_rate": 1.2145976136237946e-05, "loss": 17.2769, "step": 5927 }, { "epoch": 0.78, "learning_rate": 1.2132053193006422e-05, "loss": 16.8019, "step": 5928 }, { "epoch": 0.78, "learning_rate": 1.2118137132404273e-05, "loss": 17.1719, "step": 5929 }, { "epoch": 0.78, "learning_rate": 1.2104227956960796e-05, "loss": 17.6442, "step": 5930 }, { "epoch": 0.78, "learning_rate": 1.2090325669204038e-05, "loss": 17.5855, "step": 5931 }, { "epoch": 0.78, "learning_rate": 1.2076430271660767e-05, "loss": 17.3153, "step": 5932 }, { "epoch": 0.78, "learning_rate": 1.2062541766856545e-05, "loss": 18.4693, "step": 5933 }, { "epoch": 0.78, "learning_rate": 1.2048660157315649e-05, "loss": 17.4391, "step": 5934 }, { "epoch": 0.78, "learning_rate": 1.2034785445561108e-05, "loss": 15.737, "step": 5935 }, { "epoch": 0.78, "learning_rate": 1.2020917634114692e-05, "loss": 16.7766, "step": 5936 }, { "epoch": 0.78, "learning_rate": 1.2007056725496935e-05, "loss": 0.0001, "step": 5937 }, { "epoch": 0.78, "learning_rate": 1.1993202722227087e-05, "loss": 17.6063, "step": 5938 }, { "epoch": 0.78, "learning_rate": 1.197935562682318e-05, "loss": 17.8092, "step": 5939 }, { "epoch": 0.78, "learning_rate": 1.1965515441801945e-05, "loss": 0.0002, "step": 5940 }, { "epoch": 0.78, "learning_rate": 1.1951682169678912e-05, "loss": 18.1121, "step": 5941 }, { "epoch": 0.78, "learning_rate": 1.1937855812968313e-05, "loss": 15.797, "step": 5942 }, { "epoch": 0.78, "learning_rate": 1.192403637418314e-05, "loss": 17.1793, "step": 5943 }, { "epoch": 0.78, "learning_rate": 1.1910223855835118e-05, "loss": 15.6376, "step": 5944 }, { "epoch": 0.78, "learning_rate": 1.1896418260434723e-05, "loss": 0.0001, "step": 5945 }, { "epoch": 0.78, "learning_rate": 1.1882619590491168e-05, "loss": 0.0, "step": 5946 }, { "epoch": 0.78, "learning_rate": 1.186882784851241e-05, "loss": 0.0001, "step": 5947 }, { "epoch": 0.78, "learning_rate": 1.1855043037005131e-05, "loss": 0.0003, "step": 5948 }, { "epoch": 0.78, "learning_rate": 1.184126515847479e-05, "loss": 16.399, "step": 5949 }, { "epoch": 0.78, "learning_rate": 1.1827494215425561e-05, "loss": 0.0, "step": 5950 }, { "epoch": 0.78, "learning_rate": 1.1813730210360352e-05, "loss": 16.0218, "step": 5951 }, { "epoch": 0.78, "learning_rate": 1.1799973145780818e-05, "loss": 0.0, "step": 5952 }, { "epoch": 0.78, "learning_rate": 1.178622302418736e-05, "loss": 0.0002, "step": 5953 }, { "epoch": 0.78, "learning_rate": 1.1772479848079105e-05, "loss": 17.4774, "step": 5954 }, { "epoch": 0.78, "learning_rate": 1.1758743619953916e-05, "loss": 0.0008, "step": 5955 }, { "epoch": 0.78, "learning_rate": 1.1745014342308408e-05, "loss": 0.0, "step": 5956 }, { "epoch": 0.78, "learning_rate": 1.1731292017637913e-05, "loss": 0.0003, "step": 5957 }, { "epoch": 0.78, "learning_rate": 1.1717576648436529e-05, "loss": 17.4758, "step": 5958 }, { "epoch": 0.78, "learning_rate": 1.1703868237197063e-05, "loss": 0.0001, "step": 5959 }, { "epoch": 0.78, "learning_rate": 1.1690166786411066e-05, "loss": 16.4266, "step": 5960 }, { "epoch": 0.78, "learning_rate": 1.1676472298568825e-05, "loss": 0.0, "step": 5961 }, { "epoch": 0.78, "learning_rate": 1.1662784776159358e-05, "loss": 16.2357, "step": 5962 }, { "epoch": 0.78, "learning_rate": 1.164910422167042e-05, "loss": 0.0, "step": 5963 }, { "epoch": 0.79, "learning_rate": 1.1635430637588495e-05, "loss": 17.6334, "step": 5964 }, { "epoch": 0.79, "learning_rate": 1.1621764026398796e-05, "loss": 17.4359, "step": 5965 }, { "epoch": 0.79, "learning_rate": 1.16081043905853e-05, "loss": 18.9527, "step": 5966 }, { "epoch": 0.79, "learning_rate": 1.1594451732630678e-05, "loss": 16.6911, "step": 5967 }, { "epoch": 0.79, "learning_rate": 1.1580806055016353e-05, "loss": 17.2705, "step": 5968 }, { "epoch": 0.79, "learning_rate": 1.156716736022247e-05, "loss": 17.883, "step": 5969 }, { "epoch": 0.79, "learning_rate": 1.1553535650727903e-05, "loss": 15.934, "step": 5970 }, { "epoch": 0.79, "learning_rate": 1.1539910929010278e-05, "loss": 16.7785, "step": 5971 }, { "epoch": 0.79, "learning_rate": 1.1526293197545918e-05, "loss": 16.2939, "step": 5972 }, { "epoch": 0.79, "learning_rate": 1.1512682458809887e-05, "loss": 0.0005, "step": 5973 }, { "epoch": 0.79, "learning_rate": 1.1499078715276013e-05, "loss": 0.0, "step": 5974 }, { "epoch": 0.79, "learning_rate": 1.1485481969416806e-05, "loss": 15.4481, "step": 5975 }, { "epoch": 0.79, "learning_rate": 1.1471892223703518e-05, "loss": 17.2671, "step": 5976 }, { "epoch": 0.79, "learning_rate": 1.1458309480606138e-05, "loss": 17.8365, "step": 5977 }, { "epoch": 0.79, "learning_rate": 1.1444733742593373e-05, "loss": 17.2475, "step": 5978 }, { "epoch": 0.79, "learning_rate": 1.143116501213266e-05, "loss": 17.3787, "step": 5979 }, { "epoch": 0.79, "learning_rate": 1.1417603291690165e-05, "loss": 0.0, "step": 5980 }, { "epoch": 0.79, "learning_rate": 1.140404858373077e-05, "loss": 16.0856, "step": 5981 }, { "epoch": 0.79, "learning_rate": 1.1390500890718086e-05, "loss": 17.4845, "step": 5982 }, { "epoch": 0.79, "learning_rate": 1.1376960215114475e-05, "loss": 0.0002, "step": 5983 }, { "epoch": 0.79, "learning_rate": 1.1363426559380985e-05, "loss": 0.0, "step": 5984 }, { "epoch": 0.79, "learning_rate": 1.1349899925977414e-05, "loss": 0.0002, "step": 5985 }, { "epoch": 0.79, "learning_rate": 1.133638031736226e-05, "loss": 17.3941, "step": 5986 }, { "epoch": 0.79, "learning_rate": 1.132286773599277e-05, "loss": 0.0002, "step": 5987 }, { "epoch": 0.79, "learning_rate": 1.1309362184324896e-05, "loss": 16.4704, "step": 5988 }, { "epoch": 0.79, "learning_rate": 1.1295863664813323e-05, "loss": 19.451, "step": 5989 }, { "epoch": 0.79, "learning_rate": 1.1282372179911432e-05, "loss": 16.4881, "step": 5990 }, { "epoch": 0.79, "learning_rate": 1.1268887732071382e-05, "loss": 17.1335, "step": 5991 }, { "epoch": 0.79, "learning_rate": 1.1255410323743998e-05, "loss": 0.0, "step": 5992 }, { "epoch": 0.79, "learning_rate": 1.1241939957378855e-05, "loss": 0.0, "step": 5993 }, { "epoch": 0.79, "learning_rate": 1.1228476635424224e-05, "loss": 16.9685, "step": 5994 }, { "epoch": 0.79, "learning_rate": 1.1215020360327123e-05, "loss": 15.7636, "step": 5995 }, { "epoch": 0.79, "learning_rate": 1.1201571134533268e-05, "loss": 16.672, "step": 5996 }, { "epoch": 0.79, "learning_rate": 1.1188128960487104e-05, "loss": 16.668, "step": 5997 }, { "epoch": 0.79, "learning_rate": 1.1174693840631778e-05, "loss": 16.2486, "step": 5998 }, { "epoch": 0.79, "learning_rate": 1.1161265777409202e-05, "loss": 0.0001, "step": 5999 }, { "epoch": 0.79, "learning_rate": 1.1147844773259953e-05, "loss": 16.158, "step": 6000 }, { "epoch": 0.79, "learning_rate": 1.1134430830623344e-05, "loss": 15.7076, "step": 6001 }, { "epoch": 0.79, "learning_rate": 1.1121023951937409e-05, "loss": 17.5524, "step": 6002 }, { "epoch": 0.79, "learning_rate": 1.1107624139638895e-05, "loss": 17.2504, "step": 6003 }, { "epoch": 0.79, "learning_rate": 1.1094231396163263e-05, "loss": 17.2427, "step": 6004 }, { "epoch": 0.79, "learning_rate": 1.1080845723944683e-05, "loss": 16.5158, "step": 6005 }, { "epoch": 0.79, "learning_rate": 1.1067467125416059e-05, "loss": 16.7463, "step": 6006 }, { "epoch": 0.79, "learning_rate": 1.1054095603008973e-05, "loss": 0.0, "step": 6007 }, { "epoch": 0.79, "learning_rate": 1.1040731159153778e-05, "loss": 17.4927, "step": 6008 }, { "epoch": 0.79, "learning_rate": 1.1027373796279489e-05, "loss": 16.9721, "step": 6009 }, { "epoch": 0.79, "learning_rate": 1.1014023516813859e-05, "loss": 17.1543, "step": 6010 }, { "epoch": 0.79, "learning_rate": 1.1000680323183349e-05, "loss": 16.0637, "step": 6011 }, { "epoch": 0.79, "learning_rate": 1.0987344217813117e-05, "loss": 0.0001, "step": 6012 }, { "epoch": 0.79, "learning_rate": 1.0974015203127064e-05, "loss": 17.5357, "step": 6013 }, { "epoch": 0.79, "learning_rate": 1.0960693281547767e-05, "loss": 18.5605, "step": 6014 }, { "epoch": 0.79, "learning_rate": 1.0947378455496527e-05, "loss": 0.0, "step": 6015 }, { "epoch": 0.79, "learning_rate": 1.0934070727393386e-05, "loss": 16.4328, "step": 6016 }, { "epoch": 0.79, "learning_rate": 1.0920770099657052e-05, "loss": 17.2542, "step": 6017 }, { "epoch": 0.79, "learning_rate": 1.0907476574704966e-05, "loss": 16.1493, "step": 6018 }, { "epoch": 0.79, "learning_rate": 1.0894190154953266e-05, "loss": 17.9861, "step": 6019 }, { "epoch": 0.79, "learning_rate": 1.0880910842816805e-05, "loss": 16.3503, "step": 6020 }, { "epoch": 0.79, "learning_rate": 1.086763864070915e-05, "loss": 0.0003, "step": 6021 }, { "epoch": 0.79, "learning_rate": 1.0854373551042558e-05, "loss": 0.0, "step": 6022 }, { "epoch": 0.79, "learning_rate": 1.0841115576228001e-05, "loss": 0.0002, "step": 6023 }, { "epoch": 0.79, "learning_rate": 1.0827864718675179e-05, "loss": 0.0, "step": 6024 }, { "epoch": 0.79, "learning_rate": 1.0814620980792478e-05, "loss": 16.7123, "step": 6025 }, { "epoch": 0.79, "learning_rate": 1.0801384364986989e-05, "loss": 16.7744, "step": 6026 }, { "epoch": 0.79, "learning_rate": 1.0788154873664514e-05, "loss": 0.0006, "step": 6027 }, { "epoch": 0.79, "learning_rate": 1.0774932509229562e-05, "loss": 17.2655, "step": 6028 }, { "epoch": 0.79, "learning_rate": 1.076171727408533e-05, "loss": 17.5067, "step": 6029 }, { "epoch": 0.79, "learning_rate": 1.0748509170633725e-05, "loss": 0.0, "step": 6030 }, { "epoch": 0.79, "learning_rate": 1.073530820127539e-05, "loss": 16.4731, "step": 6031 }, { "epoch": 0.79, "learning_rate": 1.0722114368409635e-05, "loss": 16.707, "step": 6032 }, { "epoch": 0.79, "learning_rate": 1.0708927674434488e-05, "loss": 16.1913, "step": 6033 }, { "epoch": 0.79, "learning_rate": 1.0695748121746674e-05, "loss": 16.8729, "step": 6034 }, { "epoch": 0.79, "learning_rate": 1.0682575712741621e-05, "loss": 17.0672, "step": 6035 }, { "epoch": 0.79, "learning_rate": 1.066941044981346e-05, "loss": 17.4699, "step": 6036 }, { "epoch": 0.79, "learning_rate": 1.0656252335355022e-05, "loss": 18.4318, "step": 6037 }, { "epoch": 0.79, "learning_rate": 1.064310137175783e-05, "loss": 15.6075, "step": 6038 }, { "epoch": 0.79, "learning_rate": 1.0629957561412135e-05, "loss": 16.1144, "step": 6039 }, { "epoch": 0.8, "learning_rate": 1.0616820906706865e-05, "loss": 17.2378, "step": 6040 }, { "epoch": 0.8, "learning_rate": 1.0603691410029648e-05, "loss": 16.9287, "step": 6041 }, { "epoch": 0.8, "learning_rate": 1.0590569073766815e-05, "loss": 17.5608, "step": 6042 }, { "epoch": 0.8, "learning_rate": 1.0577453900303403e-05, "loss": 19.1717, "step": 6043 }, { "epoch": 0.8, "learning_rate": 1.0564345892023126e-05, "loss": 0.0, "step": 6044 }, { "epoch": 0.8, "learning_rate": 1.0551245051308417e-05, "loss": 18.4613, "step": 6045 }, { "epoch": 0.8, "learning_rate": 1.0538151380540402e-05, "loss": 14.7521, "step": 6046 }, { "epoch": 0.8, "learning_rate": 1.0525064882098883e-05, "loss": 14.995, "step": 6047 }, { "epoch": 0.8, "learning_rate": 1.0511985558362402e-05, "loss": 0.0, "step": 6048 }, { "epoch": 0.8, "learning_rate": 1.0498913411708166e-05, "loss": 18.0823, "step": 6049 }, { "epoch": 0.8, "learning_rate": 1.0485848444512069e-05, "loss": 0.0, "step": 6050 }, { "epoch": 0.8, "learning_rate": 1.0472790659148724e-05, "loss": 16.0247, "step": 6051 }, { "epoch": 0.8, "learning_rate": 1.0459740057991423e-05, "loss": 19.0311, "step": 6052 }, { "epoch": 0.8, "learning_rate": 1.0446696643412163e-05, "loss": 16.2693, "step": 6053 }, { "epoch": 0.8, "learning_rate": 1.043366041778162e-05, "loss": 18.3563, "step": 6054 }, { "epoch": 0.8, "learning_rate": 1.042063138346917e-05, "loss": 16.5843, "step": 6055 }, { "epoch": 0.8, "learning_rate": 1.0407609542842906e-05, "loss": 0.0, "step": 6056 }, { "epoch": 0.8, "learning_rate": 1.0394594898269578e-05, "loss": 17.487, "step": 6057 }, { "epoch": 0.8, "learning_rate": 1.0381587452114644e-05, "loss": 0.0002, "step": 6058 }, { "epoch": 0.8, "learning_rate": 1.0368587206742252e-05, "loss": 17.2702, "step": 6059 }, { "epoch": 0.8, "learning_rate": 1.0355594164515242e-05, "loss": 16.5053, "step": 6060 }, { "epoch": 0.8, "learning_rate": 1.0342608327795144e-05, "loss": 15.6576, "step": 6061 }, { "epoch": 0.8, "learning_rate": 1.0329629698942179e-05, "loss": 17.9795, "step": 6062 }, { "epoch": 0.8, "learning_rate": 1.0316658280315245e-05, "loss": 0.0, "step": 6063 }, { "epoch": 0.8, "learning_rate": 1.0303694074271958e-05, "loss": 18.7537, "step": 6064 }, { "epoch": 0.8, "learning_rate": 1.0290737083168606e-05, "loss": 0.0001, "step": 6065 }, { "epoch": 0.8, "learning_rate": 1.0277787309360166e-05, "loss": 17.8454, "step": 6066 }, { "epoch": 0.8, "learning_rate": 1.0264844755200297e-05, "loss": 0.0005, "step": 6067 }, { "epoch": 0.8, "learning_rate": 1.0251909423041356e-05, "loss": 17.5075, "step": 6068 }, { "epoch": 0.8, "learning_rate": 1.0238981315234387e-05, "loss": 17.3735, "step": 6069 }, { "epoch": 0.8, "learning_rate": 1.0226060434129115e-05, "loss": 15.2459, "step": 6070 }, { "epoch": 0.8, "learning_rate": 1.0213146782073951e-05, "loss": 15.7159, "step": 6071 }, { "epoch": 0.8, "learning_rate": 1.0200240361415986e-05, "loss": 17.8754, "step": 6072 }, { "epoch": 0.8, "learning_rate": 1.0187341174501036e-05, "loss": 16.6092, "step": 6073 }, { "epoch": 0.8, "learning_rate": 1.0174449223673554e-05, "loss": 16.7046, "step": 6074 }, { "epoch": 0.8, "learning_rate": 1.01615645112767e-05, "loss": 16.8055, "step": 6075 }, { "epoch": 0.8, "learning_rate": 1.0148687039652315e-05, "loss": 0.0, "step": 6076 }, { "epoch": 0.8, "learning_rate": 1.013581681114092e-05, "loss": 17.1044, "step": 6077 }, { "epoch": 0.8, "learning_rate": 1.0122953828081721e-05, "loss": 0.0012, "step": 6078 }, { "epoch": 0.8, "learning_rate": 1.011009809281262e-05, "loss": 18.035, "step": 6079 }, { "epoch": 0.8, "learning_rate": 1.0097249607670167e-05, "loss": 18.6431, "step": 6080 }, { "epoch": 0.8, "learning_rate": 1.0084408374989651e-05, "loss": 16.7071, "step": 6081 }, { "epoch": 0.8, "learning_rate": 1.0071574397104994e-05, "loss": 0.0, "step": 6082 }, { "epoch": 0.8, "learning_rate": 1.0058747676348818e-05, "loss": 16.6474, "step": 6083 }, { "epoch": 0.8, "learning_rate": 1.0045928215052419e-05, "loss": 17.309, "step": 6084 }, { "epoch": 0.8, "learning_rate": 1.003311601554578e-05, "loss": 18.2739, "step": 6085 }, { "epoch": 0.8, "learning_rate": 1.0020311080157568e-05, "loss": 18.8952, "step": 6086 }, { "epoch": 0.8, "learning_rate": 1.0007513411215118e-05, "loss": 16.5712, "step": 6087 }, { "epoch": 0.8, "learning_rate": 9.994723011044438e-06, "loss": 17.3033, "step": 6088 }, { "epoch": 0.8, "learning_rate": 9.981939881970254e-06, "loss": 0.0002, "step": 6089 }, { "epoch": 0.8, "learning_rate": 9.96916402631593e-06, "loss": 16.2395, "step": 6090 }, { "epoch": 0.8, "learning_rate": 9.956395446403522e-06, "loss": 0.0, "step": 6091 }, { "epoch": 0.8, "learning_rate": 9.943634144553766e-06, "loss": 0.0, "step": 6092 }, { "epoch": 0.8, "learning_rate": 9.930880123086066e-06, "loss": 0.0, "step": 6093 }, { "epoch": 0.8, "learning_rate": 9.918133384318512e-06, "loss": 0.0001, "step": 6094 }, { "epoch": 0.8, "learning_rate": 9.905393930567874e-06, "loss": 0.0, "step": 6095 }, { "epoch": 0.8, "learning_rate": 9.892661764149585e-06, "loss": 15.7399, "step": 6096 }, { "epoch": 0.8, "learning_rate": 9.879936887377745e-06, "loss": 0.0, "step": 6097 }, { "epoch": 0.8, "learning_rate": 9.867219302565173e-06, "loss": 16.6315, "step": 6098 }, { "epoch": 0.8, "learning_rate": 9.854509012023323e-06, "loss": 18.139, "step": 6099 }, { "epoch": 0.8, "learning_rate": 9.841806018062328e-06, "loss": 15.2942, "step": 6100 }, { "epoch": 0.8, "learning_rate": 9.829110322991008e-06, "loss": 0.0, "step": 6101 }, { "epoch": 0.8, "learning_rate": 9.816421929116842e-06, "loss": 17.1277, "step": 6102 }, { "epoch": 0.8, "learning_rate": 9.803740838746e-06, "loss": 16.5792, "step": 6103 }, { "epoch": 0.8, "learning_rate": 9.7910670541833e-06, "loss": 16.7406, "step": 6104 }, { "epoch": 0.8, "learning_rate": 9.778400577732238e-06, "loss": 16.0, "step": 6105 }, { "epoch": 0.8, "learning_rate": 9.765741411695023e-06, "loss": 0.0001, "step": 6106 }, { "epoch": 0.8, "learning_rate": 9.75308955837248e-06, "loss": 16.0915, "step": 6107 }, { "epoch": 0.8, "learning_rate": 9.740445020064132e-06, "loss": 18.2391, "step": 6108 }, { "epoch": 0.8, "learning_rate": 9.727807799068161e-06, "loss": 16.8439, "step": 6109 }, { "epoch": 0.8, "learning_rate": 9.715177897681432e-06, "loss": 17.2782, "step": 6110 }, { "epoch": 0.8, "learning_rate": 9.70255531819947e-06, "loss": 16.2501, "step": 6111 }, { "epoch": 0.8, "learning_rate": 9.68994006291647e-06, "loss": 17.0378, "step": 6112 }, { "epoch": 0.8, "learning_rate": 9.67733213412529e-06, "loss": 18.9716, "step": 6113 }, { "epoch": 0.8, "learning_rate": 9.664731534117488e-06, "loss": 0.0, "step": 6114 }, { "epoch": 0.8, "learning_rate": 9.652138265183247e-06, "loss": 0.0001, "step": 6115 }, { "epoch": 0.81, "learning_rate": 9.63955232961145e-06, "loss": 15.9899, "step": 6116 }, { "epoch": 0.81, "learning_rate": 9.626973729689625e-06, "loss": 0.0002, "step": 6117 }, { "epoch": 0.81, "learning_rate": 9.614402467703976e-06, "loss": 17.0418, "step": 6118 }, { "epoch": 0.81, "learning_rate": 9.60183854593938e-06, "loss": 17.6135, "step": 6119 }, { "epoch": 0.81, "learning_rate": 9.58928196667936e-06, "loss": 17.8216, "step": 6120 }, { "epoch": 0.81, "learning_rate": 9.576732732206129e-06, "loss": 17.1231, "step": 6121 }, { "epoch": 0.81, "learning_rate": 9.564190844800536e-06, "loss": 0.0, "step": 6122 }, { "epoch": 0.81, "learning_rate": 9.55165630674214e-06, "loss": 0.0002, "step": 6123 }, { "epoch": 0.81, "learning_rate": 9.53912912030912e-06, "loss": 0.0, "step": 6124 }, { "epoch": 0.81, "learning_rate": 9.526609287778337e-06, "loss": 0.0001, "step": 6125 }, { "epoch": 0.81, "learning_rate": 9.514096811425315e-06, "loss": 0.0, "step": 6126 }, { "epoch": 0.81, "learning_rate": 9.501591693524237e-06, "loss": 15.8878, "step": 6127 }, { "epoch": 0.81, "learning_rate": 9.489093936347953e-06, "loss": 0.0, "step": 6128 }, { "epoch": 0.81, "learning_rate": 9.476603542167973e-06, "loss": 18.1988, "step": 6129 }, { "epoch": 0.81, "learning_rate": 9.46412051325446e-06, "loss": 16.4434, "step": 6130 }, { "epoch": 0.81, "learning_rate": 9.451644851876263e-06, "loss": 0.0, "step": 6131 }, { "epoch": 0.81, "learning_rate": 9.439176560300872e-06, "loss": 16.2055, "step": 6132 }, { "epoch": 0.81, "learning_rate": 9.426715640794436e-06, "loss": 17.6581, "step": 6133 }, { "epoch": 0.81, "learning_rate": 9.414262095621773e-06, "loss": 17.5359, "step": 6134 }, { "epoch": 0.81, "learning_rate": 9.40181592704636e-06, "loss": 0.0001, "step": 6135 }, { "epoch": 0.81, "learning_rate": 9.38937713733033e-06, "loss": 15.5981, "step": 6136 }, { "epoch": 0.81, "learning_rate": 9.37694572873447e-06, "loss": 0.0004, "step": 6137 }, { "epoch": 0.81, "learning_rate": 9.364521703518226e-06, "loss": 15.8637, "step": 6138 }, { "epoch": 0.81, "learning_rate": 9.35210506393973e-06, "loss": 15.0889, "step": 6139 }, { "epoch": 0.81, "learning_rate": 9.339695812255734e-06, "loss": 17.5899, "step": 6140 }, { "epoch": 0.81, "learning_rate": 9.327293950721666e-06, "loss": 0.0, "step": 6141 }, { "epoch": 0.81, "learning_rate": 9.314899481591599e-06, "loss": 17.723, "step": 6142 }, { "epoch": 0.81, "learning_rate": 9.30251240711828e-06, "loss": 18.3785, "step": 6143 }, { "epoch": 0.81, "learning_rate": 9.290132729553103e-06, "loss": 16.7334, "step": 6144 }, { "epoch": 0.81, "learning_rate": 9.277760451146106e-06, "loss": 16.1024, "step": 6145 }, { "epoch": 0.81, "learning_rate": 9.265395574146002e-06, "loss": 17.7752, "step": 6146 }, { "epoch": 0.81, "learning_rate": 9.253038100800133e-06, "loss": 17.4426, "step": 6147 }, { "epoch": 0.81, "learning_rate": 9.240688033354538e-06, "loss": 14.8044, "step": 6148 }, { "epoch": 0.81, "learning_rate": 9.228345374053875e-06, "loss": 16.0956, "step": 6149 }, { "epoch": 0.81, "learning_rate": 9.216010125141461e-06, "loss": 0.0, "step": 6150 }, { "epoch": 0.81, "learning_rate": 9.203682288859272e-06, "loss": 17.558, "step": 6151 }, { "epoch": 0.81, "learning_rate": 9.191361867447934e-06, "loss": 16.8103, "step": 6152 }, { "epoch": 0.81, "learning_rate": 9.179048863146722e-06, "loss": 18.1157, "step": 6153 }, { "epoch": 0.81, "learning_rate": 9.166743278193573e-06, "loss": 15.4835, "step": 6154 }, { "epoch": 0.81, "learning_rate": 9.154445114825055e-06, "loss": 15.2136, "step": 6155 }, { "epoch": 0.81, "learning_rate": 9.142154375276429e-06, "loss": 20.1, "step": 6156 }, { "epoch": 0.81, "learning_rate": 9.12987106178156e-06, "loss": 16.6356, "step": 6157 }, { "epoch": 0.81, "learning_rate": 9.117595176572985e-06, "loss": 0.0, "step": 6158 }, { "epoch": 0.81, "learning_rate": 9.10532672188189e-06, "loss": 17.7265, "step": 6159 }, { "epoch": 0.81, "learning_rate": 9.093065699938109e-06, "loss": 16.4699, "step": 6160 }, { "epoch": 0.81, "learning_rate": 9.080812112970116e-06, "loss": 16.8101, "step": 6161 }, { "epoch": 0.81, "learning_rate": 9.068565963205062e-06, "loss": 0.0, "step": 6162 }, { "epoch": 0.81, "learning_rate": 9.056327252868695e-06, "loss": 15.9966, "step": 6163 }, { "epoch": 0.81, "learning_rate": 9.044095984185474e-06, "loss": 0.0, "step": 6164 }, { "epoch": 0.81, "learning_rate": 9.031872159378479e-06, "loss": 0.0001, "step": 6165 }, { "epoch": 0.81, "learning_rate": 9.019655780669406e-06, "loss": 18.7484, "step": 6166 }, { "epoch": 0.81, "learning_rate": 9.007446850278629e-06, "loss": 0.0002, "step": 6167 }, { "epoch": 0.81, "learning_rate": 8.995245370425165e-06, "loss": 0.0002, "step": 6168 }, { "epoch": 0.81, "learning_rate": 8.983051343326682e-06, "loss": 16.1632, "step": 6169 }, { "epoch": 0.81, "learning_rate": 8.970864771199472e-06, "loss": 15.7162, "step": 6170 }, { "epoch": 0.81, "learning_rate": 8.958685656258503e-06, "loss": 15.9869, "step": 6171 }, { "epoch": 0.81, "learning_rate": 8.946514000717366e-06, "loss": 18.5708, "step": 6172 }, { "epoch": 0.81, "learning_rate": 8.9343498067883e-06, "loss": 15.5929, "step": 6173 }, { "epoch": 0.81, "learning_rate": 8.922193076682189e-06, "loss": 17.9439, "step": 6174 }, { "epoch": 0.81, "learning_rate": 8.91004381260856e-06, "loss": 17.6651, "step": 6175 }, { "epoch": 0.81, "learning_rate": 8.897902016775578e-06, "loss": 16.4348, "step": 6176 }, { "epoch": 0.81, "learning_rate": 8.885767691390067e-06, "loss": 0.0007, "step": 6177 }, { "epoch": 0.81, "learning_rate": 8.87364083865746e-06, "loss": 16.8076, "step": 6178 }, { "epoch": 0.81, "learning_rate": 8.861521460781885e-06, "loss": 17.3135, "step": 6179 }, { "epoch": 0.81, "learning_rate": 8.849409559966072e-06, "loss": 16.3445, "step": 6180 }, { "epoch": 0.81, "learning_rate": 8.837305138411389e-06, "loss": 17.2042, "step": 6181 }, { "epoch": 0.81, "learning_rate": 8.825208198317864e-06, "loss": 16.6587, "step": 6182 }, { "epoch": 0.81, "learning_rate": 8.813118741884152e-06, "loss": 17.2065, "step": 6183 }, { "epoch": 0.81, "learning_rate": 8.801036771307557e-06, "loss": 0.0, "step": 6184 }, { "epoch": 0.81, "learning_rate": 8.788962288784024e-06, "loss": 0.0001, "step": 6185 }, { "epoch": 0.81, "learning_rate": 8.776895296508108e-06, "loss": 17.5024, "step": 6186 }, { "epoch": 0.81, "learning_rate": 8.76483579667306e-06, "loss": 16.5962, "step": 6187 }, { "epoch": 0.81, "learning_rate": 8.75278379147072e-06, "loss": 17.5305, "step": 6188 }, { "epoch": 0.81, "learning_rate": 8.740739283091582e-06, "loss": 16.7588, "step": 6189 }, { "epoch": 0.81, "learning_rate": 8.72870227372477e-06, "loss": 18.0434, "step": 6190 }, { "epoch": 0.81, "learning_rate": 8.716672765558065e-06, "loss": 0.0001, "step": 6191 }, { "epoch": 0.82, "learning_rate": 8.70465076077786e-06, "loss": 18.6866, "step": 6192 }, { "epoch": 0.82, "learning_rate": 8.692636261569197e-06, "loss": 16.7347, "step": 6193 }, { "epoch": 0.82, "learning_rate": 8.680629270115765e-06, "loss": 16.9037, "step": 6194 }, { "epoch": 0.82, "learning_rate": 8.668629788599847e-06, "loss": 15.5125, "step": 6195 }, { "epoch": 0.82, "learning_rate": 8.656637819202418e-06, "loss": 0.0, "step": 6196 }, { "epoch": 0.82, "learning_rate": 8.644653364103055e-06, "loss": 17.0136, "step": 6197 }, { "epoch": 0.82, "learning_rate": 8.632676425479974e-06, "loss": 16.1857, "step": 6198 }, { "epoch": 0.82, "learning_rate": 8.620707005510015e-06, "loss": 0.0008, "step": 6199 }, { "epoch": 0.82, "learning_rate": 8.608745106368665e-06, "loss": 16.0905, "step": 6200 }, { "epoch": 0.82, "learning_rate": 8.596790730230048e-06, "loss": 0.0, "step": 6201 }, { "epoch": 0.82, "learning_rate": 8.584843879266902e-06, "loss": 15.0164, "step": 6202 }, { "epoch": 0.82, "learning_rate": 8.572904555650601e-06, "loss": 16.7482, "step": 6203 }, { "epoch": 0.82, "learning_rate": 8.560972761551183e-06, "loss": 0.0003, "step": 6204 }, { "epoch": 0.82, "learning_rate": 8.54904849913728e-06, "loss": 18.2776, "step": 6205 }, { "epoch": 0.82, "learning_rate": 8.537131770576169e-06, "loss": 0.0, "step": 6206 }, { "epoch": 0.82, "learning_rate": 8.525222578033753e-06, "loss": 17.9376, "step": 6207 }, { "epoch": 0.82, "learning_rate": 8.513320923674572e-06, "loss": 16.1464, "step": 6208 }, { "epoch": 0.82, "learning_rate": 8.501426809661788e-06, "loss": 15.1278, "step": 6209 }, { "epoch": 0.82, "learning_rate": 8.489540238157201e-06, "loss": 16.3693, "step": 6210 }, { "epoch": 0.82, "learning_rate": 8.477661211321219e-06, "loss": 16.3323, "step": 6211 }, { "epoch": 0.82, "learning_rate": 8.465789731312928e-06, "loss": 15.6625, "step": 6212 }, { "epoch": 0.82, "learning_rate": 8.453925800289997e-06, "loss": 16.1719, "step": 6213 }, { "epoch": 0.82, "learning_rate": 8.442069420408732e-06, "loss": 16.4471, "step": 6214 }, { "epoch": 0.82, "learning_rate": 8.430220593824068e-06, "loss": 0.0, "step": 6215 }, { "epoch": 0.82, "learning_rate": 8.41837932268958e-06, "loss": 16.5537, "step": 6216 }, { "epoch": 0.82, "learning_rate": 8.406545609157445e-06, "loss": 16.6078, "step": 6217 }, { "epoch": 0.82, "learning_rate": 8.394719455378497e-06, "loss": 0.0001, "step": 6218 }, { "epoch": 0.82, "learning_rate": 8.382900863502174e-06, "loss": 16.9228, "step": 6219 }, { "epoch": 0.82, "learning_rate": 8.371089835676526e-06, "loss": 17.2978, "step": 6220 }, { "epoch": 0.82, "learning_rate": 8.359286374048286e-06, "loss": 15.6809, "step": 6221 }, { "epoch": 0.82, "learning_rate": 8.347490480762748e-06, "loss": 16.6064, "step": 6222 }, { "epoch": 0.82, "learning_rate": 8.335702157963865e-06, "loss": 0.0, "step": 6223 }, { "epoch": 0.82, "learning_rate": 8.323921407794205e-06, "loss": 0.0, "step": 6224 }, { "epoch": 0.82, "learning_rate": 8.312148232394955e-06, "loss": 17.5867, "step": 6225 }, { "epoch": 0.82, "learning_rate": 8.300382633905934e-06, "loss": 15.6003, "step": 6226 }, { "epoch": 0.82, "learning_rate": 8.288624614465574e-06, "loss": 18.3825, "step": 6227 }, { "epoch": 0.82, "learning_rate": 8.27687417621093e-06, "loss": 14.7195, "step": 6228 }, { "epoch": 0.82, "learning_rate": 8.265131321277708e-06, "loss": 17.085, "step": 6229 }, { "epoch": 0.82, "learning_rate": 8.253396051800194e-06, "loss": 16.544, "step": 6230 }, { "epoch": 0.82, "learning_rate": 8.241668369911321e-06, "loss": 16.5155, "step": 6231 }, { "epoch": 0.82, "learning_rate": 8.229948277742632e-06, "loss": 17.7292, "step": 6232 }, { "epoch": 0.82, "learning_rate": 8.218235777424294e-06, "loss": 0.0, "step": 6233 }, { "epoch": 0.82, "learning_rate": 8.206530871085094e-06, "loss": 16.5116, "step": 6234 }, { "epoch": 0.82, "learning_rate": 8.19483356085244e-06, "loss": 16.7533, "step": 6235 }, { "epoch": 0.82, "learning_rate": 8.183143848852348e-06, "loss": 17.7075, "step": 6236 }, { "epoch": 0.82, "learning_rate": 8.171461737209479e-06, "loss": 14.487, "step": 6237 }, { "epoch": 0.82, "learning_rate": 8.159787228047094e-06, "loss": 17.1996, "step": 6238 }, { "epoch": 0.82, "learning_rate": 8.148120323487074e-06, "loss": 16.8152, "step": 6239 }, { "epoch": 0.82, "learning_rate": 8.136461025649917e-06, "loss": 0.0001, "step": 6240 }, { "epoch": 0.82, "learning_rate": 8.124809336654738e-06, "loss": 15.7698, "step": 6241 }, { "epoch": 0.82, "learning_rate": 8.113165258619276e-06, "loss": 0.0, "step": 6242 }, { "epoch": 0.82, "learning_rate": 8.10152879365988e-06, "loss": 0.0, "step": 6243 }, { "epoch": 0.82, "learning_rate": 8.089899943891516e-06, "loss": 0.0, "step": 6244 }, { "epoch": 0.82, "learning_rate": 8.078278711427762e-06, "loss": 17.6755, "step": 6245 }, { "epoch": 0.82, "learning_rate": 8.066665098380832e-06, "loss": 17.8487, "step": 6246 }, { "epoch": 0.82, "learning_rate": 8.055059106861529e-06, "loss": 17.1379, "step": 6247 }, { "epoch": 0.82, "learning_rate": 8.043460738979291e-06, "loss": 0.0, "step": 6248 }, { "epoch": 0.82, "learning_rate": 8.03186999684215e-06, "loss": 18.3711, "step": 6249 }, { "epoch": 0.82, "learning_rate": 8.02028688255677e-06, "loss": 0.0, "step": 6250 }, { "epoch": 0.82, "learning_rate": 8.008711398228419e-06, "loss": 0.0, "step": 6251 }, { "epoch": 0.82, "learning_rate": 7.997143545960983e-06, "loss": 17.046, "step": 6252 }, { "epoch": 0.82, "learning_rate": 7.985583327856949e-06, "loss": 15.329, "step": 6253 }, { "epoch": 0.82, "learning_rate": 7.974030746017442e-06, "loss": 14.8629, "step": 6254 }, { "epoch": 0.82, "learning_rate": 7.96248580254218e-06, "loss": 0.0, "step": 6255 }, { "epoch": 0.82, "learning_rate": 7.950948499529493e-06, "loss": 17.5465, "step": 6256 }, { "epoch": 0.82, "learning_rate": 7.939418839076323e-06, "loss": 17.0231, "step": 6257 }, { "epoch": 0.82, "learning_rate": 7.927896823278225e-06, "loss": 15.9495, "step": 6258 }, { "epoch": 0.82, "learning_rate": 7.91638245422937e-06, "loss": 18.2131, "step": 6259 }, { "epoch": 0.82, "learning_rate": 7.904875734022538e-06, "loss": 17.0302, "step": 6260 }, { "epoch": 0.82, "learning_rate": 7.893376664749091e-06, "loss": 16.5056, "step": 6261 }, { "epoch": 0.82, "learning_rate": 7.881885248499055e-06, "loss": 0.0003, "step": 6262 }, { "epoch": 0.82, "learning_rate": 7.870401487361017e-06, "loss": 15.7475, "step": 6263 }, { "epoch": 0.82, "learning_rate": 7.8589253834222e-06, "loss": 16.7442, "step": 6264 }, { "epoch": 0.82, "learning_rate": 7.847456938768417e-06, "loss": 0.0001, "step": 6265 }, { "epoch": 0.82, "learning_rate": 7.835996155484099e-06, "loss": 17.7583, "step": 6266 }, { "epoch": 0.82, "learning_rate": 7.824543035652281e-06, "loss": 16.7277, "step": 6267 }, { "epoch": 0.83, "learning_rate": 7.813097581354617e-06, "loss": 0.0, "step": 6268 }, { "epoch": 0.83, "learning_rate": 7.801659794671345e-06, "loss": 0.0, "step": 6269 }, { "epoch": 0.83, "learning_rate": 7.790229677681315e-06, "loss": 16.6316, "step": 6270 }, { "epoch": 0.83, "learning_rate": 7.778807232462016e-06, "loss": 18.0646, "step": 6271 }, { "epoch": 0.83, "learning_rate": 7.767392461089501e-06, "loss": 17.6026, "step": 6272 }, { "epoch": 0.83, "learning_rate": 7.755985365638447e-06, "loss": 15.4539, "step": 6273 }, { "epoch": 0.83, "learning_rate": 7.744585948182131e-06, "loss": 17.7843, "step": 6274 }, { "epoch": 0.83, "learning_rate": 7.733194210792439e-06, "loss": 15.8837, "step": 6275 }, { "epoch": 0.83, "learning_rate": 7.721810155539854e-06, "loss": 0.0004, "step": 6276 }, { "epoch": 0.83, "learning_rate": 7.710433784493464e-06, "loss": 0.0001, "step": 6277 }, { "epoch": 0.83, "learning_rate": 7.699065099720965e-06, "loss": 0.0005, "step": 6278 }, { "epoch": 0.83, "learning_rate": 7.687704103288668e-06, "loss": 18.6196, "step": 6279 }, { "epoch": 0.83, "learning_rate": 7.67635079726146e-06, "loss": 0.0003, "step": 6280 }, { "epoch": 0.83, "learning_rate": 7.66500518370285e-06, "loss": 17.4663, "step": 6281 }, { "epoch": 0.83, "learning_rate": 7.653667264674935e-06, "loss": 0.0001, "step": 6282 }, { "epoch": 0.83, "learning_rate": 7.642337042238429e-06, "loss": 14.6371, "step": 6283 }, { "epoch": 0.83, "learning_rate": 7.631014518452633e-06, "loss": 18.2902, "step": 6284 }, { "epoch": 0.83, "learning_rate": 7.619699695375454e-06, "loss": 15.8274, "step": 6285 }, { "epoch": 0.83, "learning_rate": 7.608392575063394e-06, "loss": 17.0149, "step": 6286 }, { "epoch": 0.83, "learning_rate": 7.597093159571578e-06, "loss": 16.0646, "step": 6287 }, { "epoch": 0.83, "learning_rate": 7.585801450953706e-06, "loss": 19.098, "step": 6288 }, { "epoch": 0.83, "learning_rate": 7.574517451262081e-06, "loss": 15.5647, "step": 6289 }, { "epoch": 0.83, "learning_rate": 7.563241162547613e-06, "loss": 16.2456, "step": 6290 }, { "epoch": 0.83, "learning_rate": 7.551972586859801e-06, "loss": 15.7163, "step": 6291 }, { "epoch": 0.83, "learning_rate": 7.540711726246752e-06, "loss": 18.3662, "step": 6292 }, { "epoch": 0.83, "learning_rate": 7.529458582755167e-06, "loss": 15.6192, "step": 6293 }, { "epoch": 0.83, "learning_rate": 7.518213158430338e-06, "loss": 0.0, "step": 6294 }, { "epoch": 0.83, "learning_rate": 7.506975455316157e-06, "loss": 18.5627, "step": 6295 }, { "epoch": 0.83, "learning_rate": 7.4957454754551235e-06, "loss": 0.0001, "step": 6296 }, { "epoch": 0.83, "learning_rate": 7.484523220888328e-06, "loss": 17.1495, "step": 6297 }, { "epoch": 0.83, "learning_rate": 7.473308693655445e-06, "loss": 17.5582, "step": 6298 }, { "epoch": 0.83, "learning_rate": 7.46210189579476e-06, "loss": 15.4428, "step": 6299 }, { "epoch": 0.83, "learning_rate": 7.4509028293431425e-06, "loss": 16.966, "step": 6300 }, { "epoch": 0.83, "learning_rate": 7.439711496336061e-06, "loss": 0.0, "step": 6301 }, { "epoch": 0.83, "learning_rate": 7.42852789880758e-06, "loss": 0.0002, "step": 6302 }, { "epoch": 0.83, "learning_rate": 7.417352038790354e-06, "loss": 0.0, "step": 6303 }, { "epoch": 0.83, "learning_rate": 7.40618391831564e-06, "loss": 0.0, "step": 6304 }, { "epoch": 0.83, "learning_rate": 7.395023539413276e-06, "loss": 16.3027, "step": 6305 }, { "epoch": 0.83, "learning_rate": 7.383870904111706e-06, "loss": 0.0001, "step": 6306 }, { "epoch": 0.83, "learning_rate": 7.372726014437953e-06, "loss": 17.0284, "step": 6307 }, { "epoch": 0.83, "learning_rate": 7.361588872417641e-06, "loss": 16.6216, "step": 6308 }, { "epoch": 0.83, "learning_rate": 7.350459480074984e-06, "loss": 16.8344, "step": 6309 }, { "epoch": 0.83, "learning_rate": 7.33933783943278e-06, "loss": 18.4024, "step": 6310 }, { "epoch": 0.83, "learning_rate": 7.328223952512442e-06, "loss": 18.0205, "step": 6311 }, { "epoch": 0.83, "learning_rate": 7.317117821333946e-06, "loss": 17.1367, "step": 6312 }, { "epoch": 0.83, "learning_rate": 7.306019447915874e-06, "loss": 17.4719, "step": 6313 }, { "epoch": 0.83, "learning_rate": 7.294928834275394e-06, "loss": 16.5461, "step": 6314 }, { "epoch": 0.83, "learning_rate": 7.283845982428255e-06, "loss": 18.9602, "step": 6315 }, { "epoch": 0.83, "learning_rate": 7.272770894388814e-06, "loss": 17.7293, "step": 6316 }, { "epoch": 0.83, "learning_rate": 7.261703572169998e-06, "loss": 17.5821, "step": 6317 }, { "epoch": 0.83, "learning_rate": 7.2506440177833255e-06, "loss": 16.4018, "step": 6318 }, { "epoch": 0.83, "learning_rate": 7.239592233238924e-06, "loss": 18.2579, "step": 6319 }, { "epoch": 0.83, "learning_rate": 7.228548220545489e-06, "loss": 17.1111, "step": 6320 }, { "epoch": 0.83, "learning_rate": 7.217511981710301e-06, "loss": 0.0, "step": 6321 }, { "epoch": 0.83, "learning_rate": 7.206483518739243e-06, "loss": 16.6794, "step": 6322 }, { "epoch": 0.83, "learning_rate": 7.195462833636768e-06, "loss": 0.0, "step": 6323 }, { "epoch": 0.83, "learning_rate": 7.184449928405929e-06, "loss": 15.5351, "step": 6324 }, { "epoch": 0.83, "learning_rate": 7.1734448050483575e-06, "loss": 0.0001, "step": 6325 }, { "epoch": 0.83, "learning_rate": 7.1624474655642535e-06, "loss": 18.6026, "step": 6326 }, { "epoch": 0.83, "learning_rate": 7.1514579119524584e-06, "loss": 17.227, "step": 6327 }, { "epoch": 0.83, "learning_rate": 7.14047614621034e-06, "loss": 0.0, "step": 6328 }, { "epoch": 0.83, "learning_rate": 7.129502170333873e-06, "loss": 17.2834, "step": 6329 }, { "epoch": 0.83, "learning_rate": 7.118535986317621e-06, "loss": 0.0, "step": 6330 }, { "epoch": 0.83, "learning_rate": 7.107577596154719e-06, "loss": 17.3652, "step": 6331 }, { "epoch": 0.83, "learning_rate": 7.096627001836892e-06, "loss": 0.0, "step": 6332 }, { "epoch": 0.83, "learning_rate": 7.085684205354454e-06, "loss": 15.1866, "step": 6333 }, { "epoch": 0.83, "learning_rate": 7.074749208696291e-06, "loss": 16.7049, "step": 6334 }, { "epoch": 0.83, "learning_rate": 7.063822013849863e-06, "loss": 0.0002, "step": 6335 }, { "epoch": 0.83, "learning_rate": 7.052902622801255e-06, "loss": 16.7094, "step": 6336 }, { "epoch": 0.83, "learning_rate": 7.041991037535084e-06, "loss": 17.3382, "step": 6337 }, { "epoch": 0.83, "learning_rate": 7.03108726003458e-06, "loss": 0.0002, "step": 6338 }, { "epoch": 0.83, "learning_rate": 7.020191292281525e-06, "loss": 17.303, "step": 6339 }, { "epoch": 0.83, "learning_rate": 7.0093031362563176e-06, "loss": 0.0001, "step": 6340 }, { "epoch": 0.83, "learning_rate": 6.998422793937903e-06, "loss": 17.514, "step": 6341 }, { "epoch": 0.83, "learning_rate": 6.987550267303827e-06, "loss": 15.9829, "step": 6342 }, { "epoch": 0.83, "learning_rate": 6.976685558330193e-06, "loss": 17.0596, "step": 6343 }, { "epoch": 0.84, "learning_rate": 6.96582866899173e-06, "loss": 17.2779, "step": 6344 }, { "epoch": 0.84, "learning_rate": 6.954979601261696e-06, "loss": 0.0003, "step": 6345 }, { "epoch": 0.84, "learning_rate": 6.944138357111951e-06, "loss": 16.7661, "step": 6346 }, { "epoch": 0.84, "learning_rate": 6.933304938512924e-06, "loss": 15.8933, "step": 6347 }, { "epoch": 0.84, "learning_rate": 6.92247934743363e-06, "loss": 17.4288, "step": 6348 }, { "epoch": 0.84, "learning_rate": 6.911661585841655e-06, "loss": 17.3467, "step": 6349 }, { "epoch": 0.84, "learning_rate": 6.900851655703161e-06, "loss": 0.0, "step": 6350 }, { "epoch": 0.84, "learning_rate": 6.890049558982892e-06, "loss": 17.0958, "step": 6351 }, { "epoch": 0.84, "learning_rate": 6.879255297644171e-06, "loss": 0.0, "step": 6352 }, { "epoch": 0.84, "learning_rate": 6.868468873648892e-06, "loss": 0.0004, "step": 6353 }, { "epoch": 0.84, "learning_rate": 6.857690288957525e-06, "loss": 0.0, "step": 6354 }, { "epoch": 0.84, "learning_rate": 6.846919545529107e-06, "loss": 16.3676, "step": 6355 }, { "epoch": 0.84, "learning_rate": 6.836156645321262e-06, "loss": 0.0001, "step": 6356 }, { "epoch": 0.84, "learning_rate": 6.825401590290187e-06, "loss": 16.7128, "step": 6357 }, { "epoch": 0.84, "learning_rate": 6.814654382390645e-06, "loss": 0.0, "step": 6358 }, { "epoch": 0.84, "learning_rate": 6.80391502357598e-06, "loss": 17.0911, "step": 6359 }, { "epoch": 0.84, "learning_rate": 6.793183515798102e-06, "loss": 18.1414, "step": 6360 }, { "epoch": 0.84, "learning_rate": 6.782459861007512e-06, "loss": 0.0, "step": 6361 }, { "epoch": 0.84, "learning_rate": 6.7717440611532625e-06, "loss": 17.2212, "step": 6362 }, { "epoch": 0.84, "learning_rate": 6.76103611818299e-06, "loss": 15.5893, "step": 6363 }, { "epoch": 0.84, "learning_rate": 6.7503360340429035e-06, "loss": 16.7863, "step": 6364 }, { "epoch": 0.84, "learning_rate": 6.7396438106777705e-06, "loss": 17.6184, "step": 6365 }, { "epoch": 0.84, "learning_rate": 6.728959450030942e-06, "loss": 17.5575, "step": 6366 }, { "epoch": 0.84, "learning_rate": 6.718282954044342e-06, "loss": 17.8889, "step": 6367 }, { "epoch": 0.84, "learning_rate": 6.707614324658445e-06, "loss": 17.4037, "step": 6368 }, { "epoch": 0.84, "learning_rate": 6.696953563812336e-06, "loss": 17.9767, "step": 6369 }, { "epoch": 0.84, "learning_rate": 6.686300673443629e-06, "loss": 14.1845, "step": 6370 }, { "epoch": 0.84, "learning_rate": 6.675655655488533e-06, "loss": 18.6473, "step": 6371 }, { "epoch": 0.84, "learning_rate": 6.665018511881804e-06, "loss": 17.2973, "step": 6372 }, { "epoch": 0.84, "learning_rate": 6.654389244556791e-06, "loss": 17.3098, "step": 6373 }, { "epoch": 0.84, "learning_rate": 6.643767855445393e-06, "loss": 18.4131, "step": 6374 }, { "epoch": 0.84, "learning_rate": 6.633154346478082e-06, "loss": 18.2645, "step": 6375 }, { "epoch": 0.84, "learning_rate": 6.622548719583899e-06, "loss": 0.0, "step": 6376 }, { "epoch": 0.84, "learning_rate": 6.611950976690462e-06, "loss": 15.3569, "step": 6377 }, { "epoch": 0.84, "learning_rate": 6.601361119723948e-06, "loss": 16.4462, "step": 6378 }, { "epoch": 0.84, "learning_rate": 6.590779150609089e-06, "loss": 16.5321, "step": 6379 }, { "epoch": 0.84, "learning_rate": 6.580205071269202e-06, "loss": 17.1559, "step": 6380 }, { "epoch": 0.84, "learning_rate": 6.569638883626161e-06, "loss": 17.3252, "step": 6381 }, { "epoch": 0.84, "learning_rate": 6.559080589600403e-06, "loss": 18.1606, "step": 6382 }, { "epoch": 0.84, "learning_rate": 6.548530191110941e-06, "loss": 19.069, "step": 6383 }, { "epoch": 0.84, "learning_rate": 6.537987690075337e-06, "loss": 17.7186, "step": 6384 }, { "epoch": 0.84, "learning_rate": 6.527453088409724e-06, "loss": 16.2203, "step": 6385 }, { "epoch": 0.84, "learning_rate": 6.516926388028821e-06, "loss": 17.4424, "step": 6386 }, { "epoch": 0.84, "learning_rate": 6.506407590845881e-06, "loss": 15.0178, "step": 6387 }, { "epoch": 0.84, "learning_rate": 6.49589669877273e-06, "loss": 17.3373, "step": 6388 }, { "epoch": 0.84, "learning_rate": 6.485393713719762e-06, "loss": 0.001, "step": 6389 }, { "epoch": 0.84, "learning_rate": 6.474898637595927e-06, "loss": 17.5788, "step": 6390 }, { "epoch": 0.84, "learning_rate": 6.46441147230874e-06, "loss": 0.0001, "step": 6391 }, { "epoch": 0.84, "learning_rate": 6.453932219764286e-06, "loss": 15.8771, "step": 6392 }, { "epoch": 0.84, "learning_rate": 6.443460881867192e-06, "loss": 17.2093, "step": 6393 }, { "epoch": 0.84, "learning_rate": 6.4329974605206785e-06, "loss": 15.1244, "step": 6394 }, { "epoch": 0.84, "learning_rate": 6.422541957626493e-06, "loss": 0.0, "step": 6395 }, { "epoch": 0.84, "learning_rate": 6.4120943750849705e-06, "loss": 19.9686, "step": 6396 }, { "epoch": 0.84, "learning_rate": 6.401654714794991e-06, "loss": 16.3857, "step": 6397 }, { "epoch": 0.84, "learning_rate": 6.391222978653993e-06, "loss": 0.0001, "step": 6398 }, { "epoch": 0.84, "learning_rate": 6.3807991685579824e-06, "loss": 14.9163, "step": 6399 }, { "epoch": 0.84, "learning_rate": 6.370383286401526e-06, "loss": 0.0, "step": 6400 }, { "epoch": 0.84, "learning_rate": 6.359975334077739e-06, "loss": 17.8047, "step": 6401 }, { "epoch": 0.84, "learning_rate": 6.349575313478312e-06, "loss": 15.3625, "step": 6402 }, { "epoch": 0.84, "learning_rate": 6.339183226493483e-06, "loss": 0.0001, "step": 6403 }, { "epoch": 0.84, "learning_rate": 6.328799075012049e-06, "loss": 16.3844, "step": 6404 }, { "epoch": 0.84, "learning_rate": 6.318422860921358e-06, "loss": 17.719, "step": 6405 }, { "epoch": 0.84, "learning_rate": 6.308054586107332e-06, "loss": 0.0, "step": 6406 }, { "epoch": 0.84, "learning_rate": 6.297694252454434e-06, "loss": 16.0783, "step": 6407 }, { "epoch": 0.84, "learning_rate": 6.287341861845691e-06, "loss": 16.4556, "step": 6408 }, { "epoch": 0.84, "learning_rate": 6.276997416162689e-06, "loss": 0.0, "step": 6409 }, { "epoch": 0.84, "learning_rate": 6.266660917285555e-06, "loss": 17.5356, "step": 6410 }, { "epoch": 0.84, "learning_rate": 6.256332367093004e-06, "loss": 0.0001, "step": 6411 }, { "epoch": 0.84, "learning_rate": 6.246011767462273e-06, "loss": 16.4683, "step": 6412 }, { "epoch": 0.84, "learning_rate": 6.235699120269167e-06, "loss": 17.4286, "step": 6413 }, { "epoch": 0.84, "learning_rate": 6.225394427388043e-06, "loss": 16.0218, "step": 6414 }, { "epoch": 0.84, "learning_rate": 6.215097690691818e-06, "loss": 16.5894, "step": 6415 }, { "epoch": 0.84, "learning_rate": 6.2048089120519625e-06, "loss": 17.0866, "step": 6416 }, { "epoch": 0.84, "learning_rate": 6.194528093338486e-06, "loss": 16.8041, "step": 6417 }, { "epoch": 0.84, "learning_rate": 6.184255236419967e-06, "loss": 16.2596, "step": 6418 }, { "epoch": 0.84, "learning_rate": 6.173990343163538e-06, "loss": 17.1934, "step": 6419 }, { "epoch": 0.85, "learning_rate": 6.16373341543488e-06, "loss": 16.1894, "step": 6420 }, { "epoch": 0.85, "learning_rate": 6.153484455098218e-06, "loss": 0.0, "step": 6421 }, { "epoch": 0.85, "learning_rate": 6.14324346401634e-06, "loss": 17.0582, "step": 6422 }, { "epoch": 0.85, "learning_rate": 6.1330104440505795e-06, "loss": 15.261, "step": 6423 }, { "epoch": 0.85, "learning_rate": 6.122785397060827e-06, "loss": 18.4969, "step": 6424 }, { "epoch": 0.85, "learning_rate": 6.11256832490551e-06, "loss": 16.1762, "step": 6425 }, { "epoch": 0.85, "learning_rate": 6.102359229441618e-06, "loss": 17.1822, "step": 6426 }, { "epoch": 0.85, "learning_rate": 6.092158112524698e-06, "loss": 16.4925, "step": 6427 }, { "epoch": 0.85, "learning_rate": 6.081964976008836e-06, "loss": 0.0, "step": 6428 }, { "epoch": 0.85, "learning_rate": 6.07177982174667e-06, "loss": 0.0022, "step": 6429 }, { "epoch": 0.85, "learning_rate": 6.061602651589382e-06, "loss": 16.2654, "step": 6430 }, { "epoch": 0.85, "learning_rate": 6.0514334673867075e-06, "loss": 0.0, "step": 6431 }, { "epoch": 0.85, "learning_rate": 6.041272270986936e-06, "loss": 15.5829, "step": 6432 }, { "epoch": 0.85, "learning_rate": 6.031119064236895e-06, "loss": 0.0, "step": 6433 }, { "epoch": 0.85, "learning_rate": 6.020973848981964e-06, "loss": 0.0, "step": 6434 }, { "epoch": 0.85, "learning_rate": 6.010836627066069e-06, "loss": 16.2077, "step": 6435 }, { "epoch": 0.85, "learning_rate": 6.000707400331695e-06, "loss": 16.2498, "step": 6436 }, { "epoch": 0.85, "learning_rate": 5.990586170619855e-06, "loss": 0.0001, "step": 6437 }, { "epoch": 0.85, "learning_rate": 5.9804729397701375e-06, "loss": 0.0001, "step": 6438 }, { "epoch": 0.85, "learning_rate": 5.970367709620622e-06, "loss": 0.0, "step": 6439 }, { "epoch": 0.85, "learning_rate": 5.96027048200799e-06, "loss": 17.0848, "step": 6440 }, { "epoch": 0.85, "learning_rate": 5.9501812587674345e-06, "loss": 16.9203, "step": 6441 }, { "epoch": 0.85, "learning_rate": 5.940100041732726e-06, "loss": 18.6873, "step": 6442 }, { "epoch": 0.85, "learning_rate": 5.930026832736146e-06, "loss": 0.0001, "step": 6443 }, { "epoch": 0.85, "learning_rate": 5.919961633608539e-06, "loss": 16.9848, "step": 6444 }, { "epoch": 0.85, "learning_rate": 5.909904446179293e-06, "loss": 16.133, "step": 6445 }, { "epoch": 0.85, "learning_rate": 5.899855272276328e-06, "loss": 16.7544, "step": 6446 }, { "epoch": 0.85, "learning_rate": 5.889814113726122e-06, "loss": 16.5829, "step": 6447 }, { "epoch": 0.85, "learning_rate": 5.879780972353688e-06, "loss": 17.1232, "step": 6448 }, { "epoch": 0.85, "learning_rate": 5.869755849982578e-06, "loss": 0.0, "step": 6449 }, { "epoch": 0.85, "learning_rate": 5.859738748434906e-06, "loss": 16.4801, "step": 6450 }, { "epoch": 0.85, "learning_rate": 5.8497296695313065e-06, "loss": 17.6507, "step": 6451 }, { "epoch": 0.85, "learning_rate": 5.839728615090967e-06, "loss": 16.4246, "step": 6452 }, { "epoch": 0.85, "learning_rate": 5.829735586931606e-06, "loss": 17.2847, "step": 6453 }, { "epoch": 0.85, "learning_rate": 5.819750586869499e-06, "loss": 17.105, "step": 6454 }, { "epoch": 0.85, "learning_rate": 5.809773616719455e-06, "loss": 18.7348, "step": 6455 }, { "epoch": 0.85, "learning_rate": 5.7998046782948125e-06, "loss": 14.68, "step": 6456 }, { "epoch": 0.85, "learning_rate": 5.7898437734074714e-06, "loss": 16.6533, "step": 6457 }, { "epoch": 0.85, "learning_rate": 5.77989090386784e-06, "loss": 15.6925, "step": 6458 }, { "epoch": 0.85, "learning_rate": 5.769946071484917e-06, "loss": 16.6889, "step": 6459 }, { "epoch": 0.85, "learning_rate": 5.760009278066192e-06, "loss": 0.0001, "step": 6460 }, { "epoch": 0.85, "learning_rate": 5.750080525417717e-06, "loss": 16.8649, "step": 6461 }, { "epoch": 0.85, "learning_rate": 5.740159815344071e-06, "loss": 18.6124, "step": 6462 }, { "epoch": 0.85, "learning_rate": 5.730247149648377e-06, "loss": 0.0001, "step": 6463 }, { "epoch": 0.85, "learning_rate": 5.720342530132306e-06, "loss": 17.0197, "step": 6464 }, { "epoch": 0.85, "learning_rate": 5.710445958596045e-06, "loss": 16.1795, "step": 6465 }, { "epoch": 0.85, "learning_rate": 5.7005574368383274e-06, "loss": 17.7254, "step": 6466 }, { "epoch": 0.85, "learning_rate": 5.690676966656439e-06, "loss": 16.7938, "step": 6467 }, { "epoch": 0.85, "learning_rate": 5.680804549846186e-06, "loss": 15.6334, "step": 6468 }, { "epoch": 0.85, "learning_rate": 5.670940188201912e-06, "loss": 17.8781, "step": 6469 }, { "epoch": 0.85, "learning_rate": 5.6610838835164995e-06, "loss": 16.9875, "step": 6470 }, { "epoch": 0.85, "learning_rate": 5.651235637581364e-06, "loss": 18.1123, "step": 6471 }, { "epoch": 0.85, "learning_rate": 5.641395452186454e-06, "loss": 16.1661, "step": 6472 }, { "epoch": 0.85, "learning_rate": 5.631563329120265e-06, "loss": 16.0193, "step": 6473 }, { "epoch": 0.85, "learning_rate": 5.621739270169807e-06, "loss": 0.0, "step": 6474 }, { "epoch": 0.85, "learning_rate": 5.611923277120651e-06, "loss": 14.8134, "step": 6475 }, { "epoch": 0.85, "learning_rate": 5.602115351756881e-06, "loss": 15.7845, "step": 6476 }, { "epoch": 0.85, "learning_rate": 5.592315495861128e-06, "loss": 16.8054, "step": 6477 }, { "epoch": 0.85, "learning_rate": 5.582523711214538e-06, "loss": 0.0, "step": 6478 }, { "epoch": 0.85, "learning_rate": 5.572739999596804e-06, "loss": 17.9371, "step": 6479 }, { "epoch": 0.85, "learning_rate": 5.562964362786155e-06, "loss": 15.9471, "step": 6480 }, { "epoch": 0.85, "learning_rate": 5.55319680255934e-06, "loss": 16.3347, "step": 6481 }, { "epoch": 0.85, "learning_rate": 5.543437320691652e-06, "loss": 16.7805, "step": 6482 }, { "epoch": 0.85, "learning_rate": 5.533685918956899e-06, "loss": 15.5431, "step": 6483 }, { "epoch": 0.85, "learning_rate": 5.523942599127452e-06, "loss": 16.6193, "step": 6484 }, { "epoch": 0.85, "learning_rate": 5.5142073629741776e-06, "loss": 19.1651, "step": 6485 }, { "epoch": 0.85, "learning_rate": 5.504480212266494e-06, "loss": 17.1632, "step": 6486 }, { "epoch": 0.85, "learning_rate": 5.494761148772343e-06, "loss": 17.1007, "step": 6487 }, { "epoch": 0.85, "learning_rate": 5.4850501742581995e-06, "loss": 16.025, "step": 6488 }, { "epoch": 0.85, "learning_rate": 5.475347290489058e-06, "loss": 0.0001, "step": 6489 }, { "epoch": 0.85, "learning_rate": 5.465652499228463e-06, "loss": 15.9449, "step": 6490 }, { "epoch": 0.85, "learning_rate": 5.455965802238461e-06, "loss": 18.6337, "step": 6491 }, { "epoch": 0.85, "learning_rate": 5.446287201279659e-06, "loss": 0.0, "step": 6492 }, { "epoch": 0.85, "learning_rate": 5.436616698111175e-06, "loss": 0.0, "step": 6493 }, { "epoch": 0.85, "learning_rate": 5.426954294490644e-06, "loss": 17.8089, "step": 6494 }, { "epoch": 0.85, "learning_rate": 5.417299992174246e-06, "loss": 17.0769, "step": 6495 }, { "epoch": 0.86, "learning_rate": 5.4076537929166896e-06, "loss": 16.7602, "step": 6496 }, { "epoch": 0.86, "learning_rate": 5.398015698471198e-06, "loss": 0.0001, "step": 6497 }, { "epoch": 0.86, "learning_rate": 5.388385710589527e-06, "loss": 15.6139, "step": 6498 }, { "epoch": 0.86, "learning_rate": 5.378763831021955e-06, "loss": 17.9756, "step": 6499 }, { "epoch": 0.86, "learning_rate": 5.369150061517309e-06, "loss": 0.0006, "step": 6500 }, { "epoch": 0.86, "learning_rate": 5.359544403822919e-06, "loss": 18.3933, "step": 6501 }, { "epoch": 0.86, "learning_rate": 5.349946859684635e-06, "loss": 15.6776, "step": 6502 }, { "epoch": 0.86, "learning_rate": 5.340357430846854e-06, "loss": 17.9656, "step": 6503 }, { "epoch": 0.86, "learning_rate": 5.330776119052488e-06, "loss": 15.1532, "step": 6504 }, { "epoch": 0.86, "learning_rate": 5.321202926042967e-06, "loss": 15.5812, "step": 6505 }, { "epoch": 0.86, "learning_rate": 5.311637853558254e-06, "loss": 16.0542, "step": 6506 }, { "epoch": 0.86, "learning_rate": 5.3020809033368365e-06, "loss": 17.4093, "step": 6507 }, { "epoch": 0.86, "learning_rate": 5.292532077115708e-06, "loss": 17.9168, "step": 6508 }, { "epoch": 0.86, "learning_rate": 5.282991376630425e-06, "loss": 17.8047, "step": 6509 }, { "epoch": 0.86, "learning_rate": 5.273458803615028e-06, "loss": 17.1917, "step": 6510 }, { "epoch": 0.86, "learning_rate": 5.263934359802103e-06, "loss": 15.7727, "step": 6511 }, { "epoch": 0.86, "learning_rate": 5.254418046922738e-06, "loss": 18.3271, "step": 6512 }, { "epoch": 0.86, "learning_rate": 5.244909866706571e-06, "loss": 16.331, "step": 6513 }, { "epoch": 0.86, "learning_rate": 5.235409820881731e-06, "loss": 15.839, "step": 6514 }, { "epoch": 0.86, "learning_rate": 5.225917911174893e-06, "loss": 0.0, "step": 6515 }, { "epoch": 0.86, "learning_rate": 5.216434139311233e-06, "loss": 17.5563, "step": 6516 }, { "epoch": 0.86, "learning_rate": 5.206958507014481e-06, "loss": 16.4858, "step": 6517 }, { "epoch": 0.86, "learning_rate": 5.19749101600685e-06, "loss": 0.0001, "step": 6518 }, { "epoch": 0.86, "learning_rate": 5.1880316680090914e-06, "loss": 17.7821, "step": 6519 }, { "epoch": 0.86, "learning_rate": 5.178580464740479e-06, "loss": 16.1171, "step": 6520 }, { "epoch": 0.86, "learning_rate": 5.1691374079187945e-06, "loss": 17.2344, "step": 6521 }, { "epoch": 0.86, "learning_rate": 5.159702499260344e-06, "loss": 0.0, "step": 6522 }, { "epoch": 0.86, "learning_rate": 5.1502757404799654e-06, "loss": 18.0249, "step": 6523 }, { "epoch": 0.86, "learning_rate": 5.1408571332909884e-06, "loss": 19.7565, "step": 6524 }, { "epoch": 0.86, "learning_rate": 5.131446679405294e-06, "loss": 0.0021, "step": 6525 }, { "epoch": 0.86, "learning_rate": 5.122044380533259e-06, "loss": 17.4166, "step": 6526 }, { "epoch": 0.86, "learning_rate": 5.112650238383782e-06, "loss": 16.414, "step": 6527 }, { "epoch": 0.86, "learning_rate": 5.1032642546642806e-06, "loss": 0.0, "step": 6528 }, { "epoch": 0.86, "learning_rate": 5.093886431080691e-06, "loss": 16.7263, "step": 6529 }, { "epoch": 0.86, "learning_rate": 5.084516769337461e-06, "loss": 16.2357, "step": 6530 }, { "epoch": 0.86, "learning_rate": 5.075155271137566e-06, "loss": 17.2521, "step": 6531 }, { "epoch": 0.86, "learning_rate": 5.06580193818248e-06, "loss": 16.5372, "step": 6532 }, { "epoch": 0.86, "learning_rate": 5.056456772172202e-06, "loss": 16.3223, "step": 6533 }, { "epoch": 0.86, "learning_rate": 5.047119774805264e-06, "loss": 16.2602, "step": 6534 }, { "epoch": 0.86, "learning_rate": 5.037790947778692e-06, "loss": 0.0002, "step": 6535 }, { "epoch": 0.86, "learning_rate": 5.028470292788029e-06, "loss": 17.8941, "step": 6536 }, { "epoch": 0.86, "learning_rate": 5.019157811527331e-06, "loss": 17.1626, "step": 6537 }, { "epoch": 0.86, "learning_rate": 5.009853505689183e-06, "loss": 17.193, "step": 6538 }, { "epoch": 0.86, "learning_rate": 5.000557376964665e-06, "loss": 16.8263, "step": 6539 }, { "epoch": 0.86, "learning_rate": 4.991269427043394e-06, "loss": 0.0001, "step": 6540 }, { "epoch": 0.86, "learning_rate": 4.981989657613462e-06, "loss": 0.0, "step": 6541 }, { "epoch": 0.86, "learning_rate": 4.972718070361521e-06, "loss": 0.0004, "step": 6542 }, { "epoch": 0.86, "learning_rate": 4.963454666972717e-06, "loss": 16.9861, "step": 6543 }, { "epoch": 0.86, "learning_rate": 4.954199449130686e-06, "loss": 0.0, "step": 6544 }, { "epoch": 0.86, "learning_rate": 4.944952418517612e-06, "loss": 0.0002, "step": 6545 }, { "epoch": 0.86, "learning_rate": 4.935713576814172e-06, "loss": 15.4576, "step": 6546 }, { "epoch": 0.86, "learning_rate": 4.926482925699549e-06, "loss": 17.0004, "step": 6547 }, { "epoch": 0.86, "learning_rate": 4.9172604668514475e-06, "loss": 15.9211, "step": 6548 }, { "epoch": 0.86, "learning_rate": 4.908046201946081e-06, "loss": 0.0, "step": 6549 }, { "epoch": 0.86, "learning_rate": 4.8988401326581775e-06, "loss": 17.2577, "step": 6550 }, { "epoch": 0.86, "learning_rate": 4.889642260660976e-06, "loss": 16.4381, "step": 6551 }, { "epoch": 0.86, "learning_rate": 4.880452587626217e-06, "loss": 0.0001, "step": 6552 }, { "epoch": 0.86, "learning_rate": 4.871271115224152e-06, "loss": 0.0002, "step": 6553 }, { "epoch": 0.86, "learning_rate": 4.8620978451235475e-06, "loss": 17.0199, "step": 6554 }, { "epoch": 0.86, "learning_rate": 4.8529327789916815e-06, "loss": 18.2231, "step": 6555 }, { "epoch": 0.86, "learning_rate": 4.843775918494325e-06, "loss": 16.2093, "step": 6556 }, { "epoch": 0.86, "learning_rate": 4.834627265295777e-06, "loss": 17.8527, "step": 6557 }, { "epoch": 0.86, "learning_rate": 4.8254868210588274e-06, "loss": 0.0016, "step": 6558 }, { "epoch": 0.86, "learning_rate": 4.816354587444799e-06, "loss": 16.2621, "step": 6559 }, { "epoch": 0.86, "learning_rate": 4.807230566113502e-06, "loss": 16.5096, "step": 6560 }, { "epoch": 0.86, "learning_rate": 4.798114758723249e-06, "loss": 16.8585, "step": 6561 }, { "epoch": 0.86, "learning_rate": 4.789007166930881e-06, "loss": 17.0417, "step": 6562 }, { "epoch": 0.86, "learning_rate": 4.779907792391725e-06, "loss": 0.0001, "step": 6563 }, { "epoch": 0.86, "learning_rate": 4.770816636759623e-06, "loss": 0.0019, "step": 6564 }, { "epoch": 0.86, "learning_rate": 4.7617337016869325e-06, "loss": 0.0, "step": 6565 }, { "epoch": 0.86, "learning_rate": 4.752658988824499e-06, "loss": 15.5523, "step": 6566 }, { "epoch": 0.86, "learning_rate": 4.743592499821686e-06, "loss": 16.949, "step": 6567 }, { "epoch": 0.86, "learning_rate": 4.734534236326371e-06, "loss": 17.6707, "step": 6568 }, { "epoch": 0.86, "learning_rate": 4.725484199984909e-06, "loss": 0.0, "step": 6569 }, { "epoch": 0.86, "learning_rate": 4.716442392442177e-06, "loss": 18.2392, "step": 6570 }, { "epoch": 0.86, "learning_rate": 4.707408815341557e-06, "loss": 15.867, "step": 6571 }, { "epoch": 0.87, "learning_rate": 4.6983834703249356e-06, "loss": 0.0, "step": 6572 }, { "epoch": 0.87, "learning_rate": 4.689366359032699e-06, "loss": 18.3284, "step": 6573 }, { "epoch": 0.87, "learning_rate": 4.680357483103736e-06, "loss": 0.0001, "step": 6574 }, { "epoch": 0.87, "learning_rate": 4.6713568441754455e-06, "loss": 16.5648, "step": 6575 }, { "epoch": 0.87, "learning_rate": 4.662364443883716e-06, "loss": 16.9089, "step": 6576 }, { "epoch": 0.87, "learning_rate": 4.653380283862951e-06, "loss": 16.5144, "step": 6577 }, { "epoch": 0.87, "learning_rate": 4.644404365746053e-06, "loss": 17.3703, "step": 6578 }, { "epoch": 0.87, "learning_rate": 4.63543669116443e-06, "loss": 0.0001, "step": 6579 }, { "epoch": 0.87, "learning_rate": 4.62647726174798e-06, "loss": 16.8087, "step": 6580 }, { "epoch": 0.87, "learning_rate": 4.6175260791251064e-06, "loss": 16.7658, "step": 6581 }, { "epoch": 0.87, "learning_rate": 4.608583144922735e-06, "loss": 0.0, "step": 6582 }, { "epoch": 0.87, "learning_rate": 4.599648460766265e-06, "loss": 0.0, "step": 6583 }, { "epoch": 0.87, "learning_rate": 4.590722028279604e-06, "loss": 17.538, "step": 6584 }, { "epoch": 0.87, "learning_rate": 4.581803849085165e-06, "loss": 0.0005, "step": 6585 }, { "epoch": 0.87, "learning_rate": 4.572893924803856e-06, "loss": 0.0001, "step": 6586 }, { "epoch": 0.87, "learning_rate": 4.563992257055083e-06, "loss": 0.0002, "step": 6587 }, { "epoch": 0.87, "learning_rate": 4.555098847456762e-06, "loss": 0.0001, "step": 6588 }, { "epoch": 0.87, "learning_rate": 4.546213697625285e-06, "loss": 0.0, "step": 6589 }, { "epoch": 0.87, "learning_rate": 4.5373368091755795e-06, "loss": 16.7262, "step": 6590 }, { "epoch": 0.87, "learning_rate": 4.528468183721046e-06, "loss": 17.6651, "step": 6591 }, { "epoch": 0.87, "learning_rate": 4.519607822873579e-06, "loss": 16.5314, "step": 6592 }, { "epoch": 0.87, "learning_rate": 4.510755728243582e-06, "loss": 0.0, "step": 6593 }, { "epoch": 0.87, "learning_rate": 4.501911901439959e-06, "loss": 0.0, "step": 6594 }, { "epoch": 0.87, "learning_rate": 4.493076344070096e-06, "loss": 17.8282, "step": 6595 }, { "epoch": 0.87, "learning_rate": 4.4842490577398974e-06, "loss": 16.1913, "step": 6596 }, { "epoch": 0.87, "learning_rate": 4.4754300440537465e-06, "loss": 16.623, "step": 6597 }, { "epoch": 0.87, "learning_rate": 4.466619304614522e-06, "loss": 17.6366, "step": 6598 }, { "epoch": 0.87, "learning_rate": 4.457816841023621e-06, "loss": 16.4808, "step": 6599 }, { "epoch": 0.87, "learning_rate": 4.449022654880913e-06, "loss": 18.5477, "step": 6600 }, { "epoch": 0.87, "learning_rate": 4.440236747784776e-06, "loss": 0.0, "step": 6601 }, { "epoch": 0.87, "learning_rate": 4.431459121332077e-06, "loss": 18.5235, "step": 6602 }, { "epoch": 0.87, "learning_rate": 4.422689777118172e-06, "loss": 0.0, "step": 6603 }, { "epoch": 0.87, "learning_rate": 4.413928716736931e-06, "loss": 17.8473, "step": 6604 }, { "epoch": 0.87, "learning_rate": 4.405175941780704e-06, "loss": 16.5368, "step": 6605 }, { "epoch": 0.87, "learning_rate": 4.3964314538403215e-06, "loss": 0.0, "step": 6606 }, { "epoch": 0.87, "learning_rate": 4.387695254505153e-06, "loss": 16.943, "step": 6607 }, { "epoch": 0.87, "learning_rate": 4.378967345363017e-06, "loss": 16.9144, "step": 6608 }, { "epoch": 0.87, "learning_rate": 4.370247728000243e-06, "loss": 15.384, "step": 6609 }, { "epoch": 0.87, "learning_rate": 4.361536404001648e-06, "loss": 16.8423, "step": 6610 }, { "epoch": 0.87, "learning_rate": 4.352833374950549e-06, "loss": 0.0002, "step": 6611 }, { "epoch": 0.87, "learning_rate": 4.344138642428747e-06, "loss": 15.9733, "step": 6612 }, { "epoch": 0.87, "learning_rate": 4.335452208016544e-06, "loss": 0.0008, "step": 6613 }, { "epoch": 0.87, "learning_rate": 4.326774073292722e-06, "loss": 16.6708, "step": 6614 }, { "epoch": 0.87, "learning_rate": 4.318104239834575e-06, "loss": 18.428, "step": 6615 }, { "epoch": 0.87, "learning_rate": 4.30944270921787e-06, "loss": 0.0001, "step": 6616 }, { "epoch": 0.87, "learning_rate": 4.300789483016865e-06, "loss": 16.1461, "step": 6617 }, { "epoch": 0.87, "learning_rate": 4.2921445628043124e-06, "loss": 16.1577, "step": 6618 }, { "epoch": 0.87, "learning_rate": 4.283507950151466e-06, "loss": 14.6416, "step": 6619 }, { "epoch": 0.87, "learning_rate": 4.274879646628049e-06, "loss": 16.7886, "step": 6620 }, { "epoch": 0.87, "learning_rate": 4.266259653802296e-06, "loss": 0.0, "step": 6621 }, { "epoch": 0.87, "learning_rate": 4.257647973240908e-06, "loss": 15.4655, "step": 6622 }, { "epoch": 0.87, "learning_rate": 4.249044606509089e-06, "loss": 0.0, "step": 6623 }, { "epoch": 0.87, "learning_rate": 4.2404495551705445e-06, "loss": 15.8435, "step": 6624 }, { "epoch": 0.87, "learning_rate": 4.231862820787441e-06, "loss": 15.6682, "step": 6625 }, { "epoch": 0.87, "learning_rate": 4.223284404920458e-06, "loss": 0.0003, "step": 6626 }, { "epoch": 0.87, "learning_rate": 4.214714309128742e-06, "loss": 15.4142, "step": 6627 }, { "epoch": 0.87, "learning_rate": 4.206152534969937e-06, "loss": 15.6852, "step": 6628 }, { "epoch": 0.87, "learning_rate": 4.197599084000181e-06, "loss": 17.1014, "step": 6629 }, { "epoch": 0.87, "learning_rate": 4.1890539577740916e-06, "loss": 16.3955, "step": 6630 }, { "epoch": 0.87, "learning_rate": 4.180517157844766e-06, "loss": 17.2199, "step": 6631 }, { "epoch": 0.87, "learning_rate": 4.1719886857638115e-06, "loss": 17.4704, "step": 6632 }, { "epoch": 0.87, "learning_rate": 4.163468543081301e-06, "loss": 16.2297, "step": 6633 }, { "epoch": 0.87, "learning_rate": 4.154956731345799e-06, "loss": 0.0001, "step": 6634 }, { "epoch": 0.87, "learning_rate": 4.14645325210436e-06, "loss": 16.8635, "step": 6635 }, { "epoch": 0.87, "learning_rate": 4.137958106902512e-06, "loss": 17.5974, "step": 6636 }, { "epoch": 0.87, "learning_rate": 4.129471297284282e-06, "loss": 15.8409, "step": 6637 }, { "epoch": 0.87, "learning_rate": 4.12099282479218e-06, "loss": 18.5486, "step": 6638 }, { "epoch": 0.87, "learning_rate": 4.112522690967185e-06, "loss": 16.0978, "step": 6639 }, { "epoch": 0.87, "learning_rate": 4.104060897348794e-06, "loss": 0.0001, "step": 6640 }, { "epoch": 0.87, "learning_rate": 4.0956074454749475e-06, "loss": 16.2368, "step": 6641 }, { "epoch": 0.87, "learning_rate": 4.087162336882105e-06, "loss": 0.0, "step": 6642 }, { "epoch": 0.87, "learning_rate": 4.07872557310518e-06, "loss": 16.3342, "step": 6643 }, { "epoch": 0.87, "learning_rate": 4.070297155677594e-06, "loss": 0.0, "step": 6644 }, { "epoch": 0.87, "learning_rate": 4.061877086131233e-06, "loss": 18.3436, "step": 6645 }, { "epoch": 0.87, "learning_rate": 4.053465365996478e-06, "loss": 17.7344, "step": 6646 }, { "epoch": 0.87, "learning_rate": 4.045061996802185e-06, "loss": 0.0, "step": 6647 }, { "epoch": 0.88, "learning_rate": 4.036666980075693e-06, "loss": 18.857, "step": 6648 }, { "epoch": 0.88, "learning_rate": 4.028280317342836e-06, "loss": 0.0, "step": 6649 }, { "epoch": 0.88, "learning_rate": 4.019902010127913e-06, "loss": 0.0, "step": 6650 }, { "epoch": 0.88, "learning_rate": 4.011532059953704e-06, "loss": 0.0001, "step": 6651 }, { "epoch": 0.88, "learning_rate": 4.003170468341488e-06, "loss": 18.3294, "step": 6652 }, { "epoch": 0.88, "learning_rate": 3.994817236810999e-06, "loss": 16.3335, "step": 6653 }, { "epoch": 0.88, "learning_rate": 3.986472366880478e-06, "loss": 17.8951, "step": 6654 }, { "epoch": 0.88, "learning_rate": 3.978135860066629e-06, "loss": 18.1206, "step": 6655 }, { "epoch": 0.88, "learning_rate": 3.969807717884627e-06, "loss": 18.127, "step": 6656 }, { "epoch": 0.88, "learning_rate": 3.961487941848169e-06, "loss": 17.6524, "step": 6657 }, { "epoch": 0.88, "learning_rate": 3.9531765334693825e-06, "loss": 0.0002, "step": 6658 }, { "epoch": 0.88, "learning_rate": 3.9448734942589e-06, "loss": 17.1674, "step": 6659 }, { "epoch": 0.88, "learning_rate": 3.936578825725829e-06, "loss": 17.8202, "step": 6660 }, { "epoch": 0.88, "learning_rate": 3.928292529377753e-06, "loss": 16.7062, "step": 6661 }, { "epoch": 0.88, "learning_rate": 3.920014606720729e-06, "loss": 17.4731, "step": 6662 }, { "epoch": 0.88, "learning_rate": 3.911745059259303e-06, "loss": 0.0, "step": 6663 }, { "epoch": 0.88, "learning_rate": 3.90348388849649e-06, "loss": 15.7337, "step": 6664 }, { "epoch": 0.88, "learning_rate": 3.895231095933793e-06, "loss": 0.0001, "step": 6665 }, { "epoch": 0.88, "learning_rate": 3.886986683071187e-06, "loss": 0.0001, "step": 6666 }, { "epoch": 0.88, "learning_rate": 3.878750651407115e-06, "loss": 16.4686, "step": 6667 }, { "epoch": 0.88, "learning_rate": 3.870523002438503e-06, "loss": 15.4886, "step": 6668 }, { "epoch": 0.88, "learning_rate": 3.862303737660761e-06, "loss": 15.0982, "step": 6669 }, { "epoch": 0.88, "learning_rate": 3.854092858567765e-06, "loss": 15.1867, "step": 6670 }, { "epoch": 0.88, "learning_rate": 3.845890366651872e-06, "loss": 17.0915, "step": 6671 }, { "epoch": 0.88, "learning_rate": 3.837696263403906e-06, "loss": 15.9242, "step": 6672 }, { "epoch": 0.88, "learning_rate": 3.829510550313176e-06, "loss": 17.0915, "step": 6673 }, { "epoch": 0.88, "learning_rate": 3.821333228867474e-06, "loss": 15.9589, "step": 6674 }, { "epoch": 0.88, "learning_rate": 3.8131643005530514e-06, "loss": 0.0, "step": 6675 }, { "epoch": 0.88, "learning_rate": 3.805003766854631e-06, "loss": 16.1581, "step": 6676 }, { "epoch": 0.88, "learning_rate": 3.796851629255427e-06, "loss": 17.6956, "step": 6677 }, { "epoch": 0.88, "learning_rate": 3.788707889237114e-06, "loss": 16.4577, "step": 6678 }, { "epoch": 0.88, "learning_rate": 3.780572548279848e-06, "loss": 17.9839, "step": 6679 }, { "epoch": 0.88, "learning_rate": 3.7724456078622516e-06, "loss": 16.269, "step": 6680 }, { "epoch": 0.88, "learning_rate": 3.764327069461415e-06, "loss": 16.5792, "step": 6681 }, { "epoch": 0.88, "learning_rate": 3.7562169345529308e-06, "loss": 15.6349, "step": 6682 }, { "epoch": 0.88, "learning_rate": 3.748115204610836e-06, "loss": 17.5335, "step": 6683 }, { "epoch": 0.88, "learning_rate": 3.7400218811076416e-06, "loss": 0.0002, "step": 6684 }, { "epoch": 0.88, "learning_rate": 3.731936965514338e-06, "loss": 19.2054, "step": 6685 }, { "epoch": 0.88, "learning_rate": 3.7238604593003933e-06, "loss": 18.733, "step": 6686 }, { "epoch": 0.88, "learning_rate": 3.71579236393374e-06, "loss": 16.9341, "step": 6687 }, { "epoch": 0.88, "learning_rate": 3.707732680880771e-06, "loss": 16.8529, "step": 6688 }, { "epoch": 0.88, "learning_rate": 3.699681411606365e-06, "loss": 0.0, "step": 6689 }, { "epoch": 0.88, "learning_rate": 3.6916385575738787e-06, "loss": 17.7098, "step": 6690 }, { "epoch": 0.88, "learning_rate": 3.683604120245121e-06, "loss": 17.1741, "step": 6691 }, { "epoch": 0.88, "learning_rate": 3.6755781010803793e-06, "loss": 0.0001, "step": 6692 }, { "epoch": 0.88, "learning_rate": 3.6675605015384085e-06, "loss": 18.0476, "step": 6693 }, { "epoch": 0.88, "learning_rate": 3.6595513230764377e-06, "loss": 16.3819, "step": 6694 }, { "epoch": 0.88, "learning_rate": 3.651550567150158e-06, "loss": 16.2806, "step": 6695 }, { "epoch": 0.88, "learning_rate": 3.64355823521374e-06, "loss": 0.0, "step": 6696 }, { "epoch": 0.88, "learning_rate": 3.635574328719815e-06, "loss": 17.4593, "step": 6697 }, { "epoch": 0.88, "learning_rate": 3.6275988491194733e-06, "loss": 18.0811, "step": 6698 }, { "epoch": 0.88, "learning_rate": 3.6196317978623105e-06, "loss": 16.8714, "step": 6699 }, { "epoch": 0.88, "learning_rate": 3.611673176396352e-06, "loss": 17.2327, "step": 6700 }, { "epoch": 0.88, "learning_rate": 3.603722986168101e-06, "loss": 16.31, "step": 6701 }, { "epoch": 0.88, "learning_rate": 3.5957812286225413e-06, "loss": 16.7587, "step": 6702 }, { "epoch": 0.88, "learning_rate": 3.587847905203101e-06, "loss": 0.0005, "step": 6703 }, { "epoch": 0.88, "learning_rate": 3.579923017351705e-06, "loss": 0.0, "step": 6704 }, { "epoch": 0.88, "learning_rate": 3.5720065665087176e-06, "loss": 0.0, "step": 6705 }, { "epoch": 0.88, "learning_rate": 3.5640985541129768e-06, "loss": 0.0001, "step": 6706 }, { "epoch": 0.88, "learning_rate": 3.556198981601805e-06, "loss": 17.4252, "step": 6707 }, { "epoch": 0.88, "learning_rate": 3.5483078504109756e-06, "loss": 16.048, "step": 6708 }, { "epoch": 0.88, "learning_rate": 3.54042516197472e-06, "loss": 16.5626, "step": 6709 }, { "epoch": 0.88, "learning_rate": 3.5325509177257522e-06, "loss": 0.0002, "step": 6710 }, { "epoch": 0.88, "learning_rate": 3.5246851190952334e-06, "loss": 17.8208, "step": 6711 }, { "epoch": 0.88, "learning_rate": 3.5168277675128035e-06, "loss": 0.0, "step": 6712 }, { "epoch": 0.88, "learning_rate": 3.508978864406559e-06, "loss": 0.0005, "step": 6713 }, { "epoch": 0.88, "learning_rate": 3.5011384112030752e-06, "loss": 0.0, "step": 6714 }, { "epoch": 0.88, "learning_rate": 3.493306409327374e-06, "loss": 16.9435, "step": 6715 }, { "epoch": 0.88, "learning_rate": 3.485482860202954e-06, "loss": 16.7933, "step": 6716 }, { "epoch": 0.88, "learning_rate": 3.4776677652517687e-06, "loss": 0.0, "step": 6717 }, { "epoch": 0.88, "learning_rate": 3.469861125894236e-06, "loss": 16.7898, "step": 6718 }, { "epoch": 0.88, "learning_rate": 3.4620629435492435e-06, "loss": 16.9385, "step": 6719 }, { "epoch": 0.88, "learning_rate": 3.4542732196341355e-06, "loss": 16.1695, "step": 6720 }, { "epoch": 0.88, "learning_rate": 3.446491955564718e-06, "loss": 16.6536, "step": 6721 }, { "epoch": 0.88, "learning_rate": 3.4387191527552707e-06, "loss": 15.223, "step": 6722 }, { "epoch": 0.88, "learning_rate": 3.4309548126185253e-06, "loss": 0.0001, "step": 6723 }, { "epoch": 0.89, "learning_rate": 3.423198936565669e-06, "loss": 0.0, "step": 6724 }, { "epoch": 0.89, "learning_rate": 3.415451526006369e-06, "loss": 17.338, "step": 6725 }, { "epoch": 0.89, "learning_rate": 3.4077125823487376e-06, "loss": 0.0, "step": 6726 }, { "epoch": 0.89, "learning_rate": 3.399982106999361e-06, "loss": 17.7679, "step": 6727 }, { "epoch": 0.89, "learning_rate": 3.3922601013632705e-06, "loss": 0.0, "step": 6728 }, { "epoch": 0.89, "learning_rate": 3.384546566843966e-06, "loss": 16.265, "step": 6729 }, { "epoch": 0.89, "learning_rate": 3.3768415048434197e-06, "loss": 15.8431, "step": 6730 }, { "epoch": 0.89, "learning_rate": 3.369144916762046e-06, "loss": 14.658, "step": 6731 }, { "epoch": 0.89, "learning_rate": 3.3614568039987302e-06, "loss": 17.0571, "step": 6732 }, { "epoch": 0.89, "learning_rate": 3.3537771679508113e-06, "loss": 15.7359, "step": 6733 }, { "epoch": 0.89, "learning_rate": 3.3461060100140883e-06, "loss": 17.4402, "step": 6734 }, { "epoch": 0.89, "learning_rate": 3.3384433315828246e-06, "loss": 17.3963, "step": 6735 }, { "epoch": 0.89, "learning_rate": 3.3307891340497332e-06, "loss": 17.1486, "step": 6736 }, { "epoch": 0.89, "learning_rate": 3.3231434188059905e-06, "loss": 17.3433, "step": 6737 }, { "epoch": 0.89, "learning_rate": 3.3155061872412294e-06, "loss": 17.1886, "step": 6738 }, { "epoch": 0.89, "learning_rate": 3.307877440743551e-06, "loss": 16.8067, "step": 6739 }, { "epoch": 0.89, "learning_rate": 3.3002571806995066e-06, "loss": 0.0, "step": 6740 }, { "epoch": 0.89, "learning_rate": 3.2926454084940993e-06, "loss": 15.9347, "step": 6741 }, { "epoch": 0.89, "learning_rate": 3.2850421255107953e-06, "loss": 0.0009, "step": 6742 }, { "epoch": 0.89, "learning_rate": 3.277447333131517e-06, "loss": 16.9017, "step": 6743 }, { "epoch": 0.89, "learning_rate": 3.2698610327366484e-06, "loss": 16.0501, "step": 6744 }, { "epoch": 0.89, "learning_rate": 3.262283225705021e-06, "loss": 15.8408, "step": 6745 }, { "epoch": 0.89, "learning_rate": 3.2547139134139203e-06, "loss": 15.5681, "step": 6746 }, { "epoch": 0.89, "learning_rate": 3.2471530972391085e-06, "loss": 17.9767, "step": 6747 }, { "epoch": 0.89, "learning_rate": 3.2396007785547853e-06, "loss": 19.1275, "step": 6748 }, { "epoch": 0.89, "learning_rate": 3.2320569587336147e-06, "loss": 0.0002, "step": 6749 }, { "epoch": 0.89, "learning_rate": 3.224521639146705e-06, "loss": 16.7922, "step": 6750 }, { "epoch": 0.89, "learning_rate": 3.216994821163627e-06, "loss": 18.9681, "step": 6751 }, { "epoch": 0.89, "learning_rate": 3.2094765061524033e-06, "loss": 17.8751, "step": 6752 }, { "epoch": 0.89, "learning_rate": 3.201966695479525e-06, "loss": 0.0, "step": 6753 }, { "epoch": 0.89, "learning_rate": 3.194465390509904e-06, "loss": 0.0, "step": 6754 }, { "epoch": 0.89, "learning_rate": 3.1869725926069515e-06, "loss": 0.0, "step": 6755 }, { "epoch": 0.89, "learning_rate": 3.1794883031324995e-06, "loss": 16.357, "step": 6756 }, { "epoch": 0.89, "learning_rate": 3.172012523446849e-06, "loss": 16.8132, "step": 6757 }, { "epoch": 0.89, "learning_rate": 3.1645452549087406e-06, "loss": 16.9876, "step": 6758 }, { "epoch": 0.89, "learning_rate": 3.157086498875378e-06, "loss": 0.0, "step": 6759 }, { "epoch": 0.89, "learning_rate": 3.149636256702421e-06, "loss": 0.0001, "step": 6760 }, { "epoch": 0.89, "learning_rate": 3.142194529743969e-06, "loss": 0.0, "step": 6761 }, { "epoch": 0.89, "learning_rate": 3.1347613193525804e-06, "loss": 15.9218, "step": 6762 }, { "epoch": 0.89, "learning_rate": 3.1273366268792738e-06, "loss": 16.0278, "step": 6763 }, { "epoch": 0.89, "learning_rate": 3.1199204536735085e-06, "loss": 16.0476, "step": 6764 }, { "epoch": 0.89, "learning_rate": 3.112512801083206e-06, "loss": 17.0464, "step": 6765 }, { "epoch": 0.89, "learning_rate": 3.105113670454729e-06, "loss": 0.0001, "step": 6766 }, { "epoch": 0.89, "learning_rate": 3.09772306313289e-06, "loss": 18.2414, "step": 6767 }, { "epoch": 0.89, "learning_rate": 3.0903409804609594e-06, "loss": 0.0, "step": 6768 }, { "epoch": 0.89, "learning_rate": 3.0829674237806582e-06, "loss": 17.7644, "step": 6769 }, { "epoch": 0.89, "learning_rate": 3.075602394432159e-06, "loss": 15.11, "step": 6770 }, { "epoch": 0.89, "learning_rate": 3.0682458937540627e-06, "loss": 16.5807, "step": 6771 }, { "epoch": 0.89, "learning_rate": 3.0608979230834666e-06, "loss": 17.204, "step": 6772 }, { "epoch": 0.89, "learning_rate": 3.053558483755875e-06, "loss": 18.4654, "step": 6773 }, { "epoch": 0.89, "learning_rate": 3.046227577105254e-06, "loss": 0.0, "step": 6774 }, { "epoch": 0.89, "learning_rate": 3.0389052044640263e-06, "loss": 17.0364, "step": 6775 }, { "epoch": 0.89, "learning_rate": 3.031591367163056e-06, "loss": 16.0954, "step": 6776 }, { "epoch": 0.89, "learning_rate": 3.0242860665316564e-06, "loss": 0.0, "step": 6777 }, { "epoch": 0.89, "learning_rate": 3.016989303897594e-06, "loss": 16.1553, "step": 6778 }, { "epoch": 0.89, "learning_rate": 3.009701080587074e-06, "loss": 16.0133, "step": 6779 }, { "epoch": 0.89, "learning_rate": 3.002421397924765e-06, "loss": 17.1286, "step": 6780 }, { "epoch": 0.89, "learning_rate": 2.995150257233775e-06, "loss": 0.0, "step": 6781 }, { "epoch": 0.89, "learning_rate": 2.987887659835653e-06, "loss": 17.4158, "step": 6782 }, { "epoch": 0.89, "learning_rate": 2.9806336070503984e-06, "loss": 15.2495, "step": 6783 }, { "epoch": 0.89, "learning_rate": 2.9733881001964725e-06, "loss": 15.4707, "step": 6784 }, { "epoch": 0.89, "learning_rate": 2.9661511405907562e-06, "loss": 0.0, "step": 6785 }, { "epoch": 0.89, "learning_rate": 2.9589227295485965e-06, "loss": 17.818, "step": 6786 }, { "epoch": 0.89, "learning_rate": 2.9517028683837875e-06, "loss": 16.6851, "step": 6787 }, { "epoch": 0.89, "learning_rate": 2.9444915584085575e-06, "loss": 15.8365, "step": 6788 }, { "epoch": 0.89, "learning_rate": 2.9372888009335907e-06, "loss": 16.0068, "step": 6789 }, { "epoch": 0.89, "learning_rate": 2.9300945972680126e-06, "loss": 16.1543, "step": 6790 }, { "epoch": 0.89, "learning_rate": 2.922908948719394e-06, "loss": 16.2124, "step": 6791 }, { "epoch": 0.89, "learning_rate": 2.915731856593751e-06, "loss": 17.9551, "step": 6792 }, { "epoch": 0.89, "learning_rate": 2.9085633221955453e-06, "loss": 17.4323, "step": 6793 }, { "epoch": 0.89, "learning_rate": 2.9014033468276803e-06, "loss": 0.0, "step": 6794 }, { "epoch": 0.89, "learning_rate": 2.8942519317915084e-06, "loss": 0.0001, "step": 6795 }, { "epoch": 0.89, "learning_rate": 2.887109078386813e-06, "loss": 15.425, "step": 6796 }, { "epoch": 0.89, "learning_rate": 2.8799747879118543e-06, "loss": 0.0001, "step": 6797 }, { "epoch": 0.89, "learning_rate": 2.8728490616632965e-06, "loss": 16.6462, "step": 6798 }, { "epoch": 0.89, "learning_rate": 2.8657319009362747e-06, "loss": 0.0, "step": 6799 }, { "epoch": 0.9, "learning_rate": 2.858623307024355e-06, "loss": 0.0, "step": 6800 }, { "epoch": 0.9, "learning_rate": 2.8515232812195427e-06, "loss": 16.5007, "step": 6801 }, { "epoch": 0.9, "learning_rate": 2.844431824812299e-06, "loss": 16.9113, "step": 6802 }, { "epoch": 0.9, "learning_rate": 2.8373489390915165e-06, "loss": 0.0001, "step": 6803 }, { "epoch": 0.9, "learning_rate": 2.8302746253445256e-06, "loss": 17.2226, "step": 6804 }, { "epoch": 0.9, "learning_rate": 2.8232088848571257e-06, "loss": 0.0002, "step": 6805 }, { "epoch": 0.9, "learning_rate": 2.816151718913529e-06, "loss": 15.855, "step": 6806 }, { "epoch": 0.9, "learning_rate": 2.809103128796403e-06, "loss": 0.0003, "step": 6807 }, { "epoch": 0.9, "learning_rate": 2.802063115786846e-06, "loss": 16.3902, "step": 6808 }, { "epoch": 0.9, "learning_rate": 2.795031681164417e-06, "loss": 0.0, "step": 6809 }, { "epoch": 0.9, "learning_rate": 2.7880088262070893e-06, "loss": 16.4728, "step": 6810 }, { "epoch": 0.9, "learning_rate": 2.780994552191302e-06, "loss": 16.831, "step": 6811 }, { "epoch": 0.9, "learning_rate": 2.7739888603919186e-06, "loss": 0.0001, "step": 6812 }, { "epoch": 0.9, "learning_rate": 2.7669917520822377e-06, "loss": 16.8038, "step": 6813 }, { "epoch": 0.9, "learning_rate": 2.7600032285340247e-06, "loss": 0.0001, "step": 6814 }, { "epoch": 0.9, "learning_rate": 2.7530232910174646e-06, "loss": 16.2935, "step": 6815 }, { "epoch": 0.9, "learning_rate": 2.746051940801181e-06, "loss": 0.0, "step": 6816 }, { "epoch": 0.9, "learning_rate": 2.739089179152243e-06, "loss": 16.7351, "step": 6817 }, { "epoch": 0.9, "learning_rate": 2.7321350073361506e-06, "loss": 0.0, "step": 6818 }, { "epoch": 0.9, "learning_rate": 2.7251894266168587e-06, "loss": 17.9841, "step": 6819 }, { "epoch": 0.9, "learning_rate": 2.718252438256735e-06, "loss": 0.0, "step": 6820 }, { "epoch": 0.9, "learning_rate": 2.7113240435166108e-06, "loss": 0.0, "step": 6821 }, { "epoch": 0.9, "learning_rate": 2.7044042436557506e-06, "loss": 18.857, "step": 6822 }, { "epoch": 0.9, "learning_rate": 2.6974930399318475e-06, "loss": 16.4316, "step": 6823 }, { "epoch": 0.9, "learning_rate": 2.6905904336010313e-06, "loss": 17.4558, "step": 6824 }, { "epoch": 0.9, "learning_rate": 2.6836964259178808e-06, "loss": 17.3397, "step": 6825 }, { "epoch": 0.9, "learning_rate": 2.6768110181354057e-06, "loss": 14.7984, "step": 6826 }, { "epoch": 0.9, "learning_rate": 2.6699342115050486e-06, "loss": 17.0879, "step": 6827 }, { "epoch": 0.9, "learning_rate": 2.6630660072766935e-06, "loss": 16.6697, "step": 6828 }, { "epoch": 0.9, "learning_rate": 2.6562064066986525e-06, "loss": 16.2197, "step": 6829 }, { "epoch": 0.9, "learning_rate": 2.6493554110176955e-06, "loss": 16.2262, "step": 6830 }, { "epoch": 0.9, "learning_rate": 2.6425130214790096e-06, "loss": 16.0374, "step": 6831 }, { "epoch": 0.9, "learning_rate": 2.635679239326222e-06, "loss": 16.6544, "step": 6832 }, { "epoch": 0.9, "learning_rate": 2.6288540658014004e-06, "loss": 17.2818, "step": 6833 }, { "epoch": 0.9, "learning_rate": 2.622037502145036e-06, "loss": 17.6326, "step": 6834 }, { "epoch": 0.9, "learning_rate": 2.6152295495960655e-06, "loss": 0.0, "step": 6835 }, { "epoch": 0.9, "learning_rate": 2.6084302093918545e-06, "loss": 16.6972, "step": 6836 }, { "epoch": 0.9, "learning_rate": 2.6016394827682145e-06, "loss": 0.0001, "step": 6837 }, { "epoch": 0.9, "learning_rate": 2.5948573709593695e-06, "loss": 0.0002, "step": 6838 }, { "epoch": 0.9, "learning_rate": 2.5880838751980108e-06, "loss": 16.2828, "step": 6839 }, { "epoch": 0.9, "learning_rate": 2.5813189967152317e-06, "loss": 0.0001, "step": 6840 }, { "epoch": 0.9, "learning_rate": 2.574562736740577e-06, "loss": 15.76, "step": 6841 }, { "epoch": 0.9, "learning_rate": 2.567815096502013e-06, "loss": 16.8346, "step": 6842 }, { "epoch": 0.9, "learning_rate": 2.5610760772259544e-06, "loss": 0.0001, "step": 6843 }, { "epoch": 0.9, "learning_rate": 2.554345680137238e-06, "loss": 16.4227, "step": 6844 }, { "epoch": 0.9, "learning_rate": 2.5476239064591402e-06, "loss": 16.3192, "step": 6845 }, { "epoch": 0.9, "learning_rate": 2.5409107574133504e-06, "loss": 0.0, "step": 6846 }, { "epoch": 0.9, "learning_rate": 2.5342062342200377e-06, "loss": 16.6289, "step": 6847 }, { "epoch": 0.9, "learning_rate": 2.5275103380977438e-06, "loss": 17.527, "step": 6848 }, { "epoch": 0.9, "learning_rate": 2.5208230702634837e-06, "loss": 0.0, "step": 6849 }, { "epoch": 0.9, "learning_rate": 2.5141444319326855e-06, "loss": 0.0001, "step": 6850 }, { "epoch": 0.9, "learning_rate": 2.507474424319217e-06, "loss": 15.1313, "step": 6851 }, { "epoch": 0.9, "learning_rate": 2.5008130486353697e-06, "loss": 16.3152, "step": 6852 }, { "epoch": 0.9, "learning_rate": 2.494160306091886e-06, "loss": 16.3441, "step": 6853 }, { "epoch": 0.9, "learning_rate": 2.4875161978979156e-06, "loss": 18.9514, "step": 6854 }, { "epoch": 0.9, "learning_rate": 2.480880725261042e-06, "loss": 0.0, "step": 6855 }, { "epoch": 0.9, "learning_rate": 2.4742538893872957e-06, "loss": 15.9901, "step": 6856 }, { "epoch": 0.9, "learning_rate": 2.467635691481124e-06, "loss": 17.3189, "step": 6857 }, { "epoch": 0.9, "learning_rate": 2.4610261327454086e-06, "loss": 17.2843, "step": 6858 }, { "epoch": 0.9, "learning_rate": 2.4544252143814506e-06, "loss": 0.0001, "step": 6859 }, { "epoch": 0.9, "learning_rate": 2.447832937589001e-06, "loss": 17.4122, "step": 6860 }, { "epoch": 0.9, "learning_rate": 2.441249303566212e-06, "loss": 15.2062, "step": 6861 }, { "epoch": 0.9, "learning_rate": 2.4346743135096996e-06, "loss": 0.0002, "step": 6862 }, { "epoch": 0.9, "learning_rate": 2.428107968614485e-06, "loss": 16.627, "step": 6863 }, { "epoch": 0.9, "learning_rate": 2.4215502700740244e-06, "loss": 16.5142, "step": 6864 }, { "epoch": 0.9, "learning_rate": 2.415001219080204e-06, "loss": 17.3776, "step": 6865 }, { "epoch": 0.9, "learning_rate": 2.408460816823327e-06, "loss": 15.9554, "step": 6866 }, { "epoch": 0.9, "learning_rate": 2.401929064492142e-06, "loss": 17.1223, "step": 6867 }, { "epoch": 0.9, "learning_rate": 2.3954059632738166e-06, "loss": 0.0001, "step": 6868 }, { "epoch": 0.9, "learning_rate": 2.3888915143539412e-06, "loss": 17.8857, "step": 6869 }, { "epoch": 0.9, "learning_rate": 2.3823857189165467e-06, "loss": 17.8921, "step": 6870 }, { "epoch": 0.9, "learning_rate": 2.375888578144081e-06, "loss": 15.8343, "step": 6871 }, { "epoch": 0.9, "learning_rate": 2.369400093217422e-06, "loss": 18.3485, "step": 6872 }, { "epoch": 0.9, "learning_rate": 2.362920265315877e-06, "loss": 18.1386, "step": 6873 }, { "epoch": 0.9, "learning_rate": 2.3564490956171693e-06, "loss": 0.0, "step": 6874 }, { "epoch": 0.9, "learning_rate": 2.3499865852974647e-06, "loss": 16.3241, "step": 6875 }, { "epoch": 0.91, "learning_rate": 2.3435327355313395e-06, "loss": 19.0851, "step": 6876 }, { "epoch": 0.91, "learning_rate": 2.3370875474918007e-06, "loss": 17.2958, "step": 6877 }, { "epoch": 0.91, "learning_rate": 2.3306510223502998e-06, "loss": 17.5684, "step": 6878 }, { "epoch": 0.91, "learning_rate": 2.3242231612766784e-06, "loss": 14.4695, "step": 6879 }, { "epoch": 0.91, "learning_rate": 2.3178039654392357e-06, "loss": 17.0937, "step": 6880 }, { "epoch": 0.91, "learning_rate": 2.3113934360046772e-06, "loss": 0.0, "step": 6881 }, { "epoch": 0.91, "learning_rate": 2.3049915741381376e-06, "loss": 16.7389, "step": 6882 }, { "epoch": 0.91, "learning_rate": 2.2985983810031754e-06, "loss": 17.1828, "step": 6883 }, { "epoch": 0.91, "learning_rate": 2.2922138577617825e-06, "loss": 17.1785, "step": 6884 }, { "epoch": 0.91, "learning_rate": 2.285838005574359e-06, "loss": 15.6231, "step": 6885 }, { "epoch": 0.91, "learning_rate": 2.2794708255997386e-06, "loss": 14.9371, "step": 6886 }, { "epoch": 0.91, "learning_rate": 2.2731123189951908e-06, "loss": 0.0, "step": 6887 }, { "epoch": 0.91, "learning_rate": 2.266762486916385e-06, "loss": 17.2592, "step": 6888 }, { "epoch": 0.91, "learning_rate": 2.2604213305174317e-06, "loss": 17.3503, "step": 6889 }, { "epoch": 0.91, "learning_rate": 2.254088850950847e-06, "loss": 16.3679, "step": 6890 }, { "epoch": 0.91, "learning_rate": 2.2477650493675895e-06, "loss": 16.7043, "step": 6891 }, { "epoch": 0.91, "learning_rate": 2.241449926917033e-06, "loss": 18.1808, "step": 6892 }, { "epoch": 0.91, "learning_rate": 2.2351434847469655e-06, "loss": 0.0003, "step": 6893 }, { "epoch": 0.91, "learning_rate": 2.2288457240036033e-06, "loss": 16.6838, "step": 6894 }, { "epoch": 0.91, "learning_rate": 2.2225566458315973e-06, "loss": 15.739, "step": 6895 }, { "epoch": 0.91, "learning_rate": 2.2162762513740055e-06, "loss": 16.5427, "step": 6896 }, { "epoch": 0.91, "learning_rate": 2.2100045417723093e-06, "loss": 15.545, "step": 6897 }, { "epoch": 0.91, "learning_rate": 2.203741518166408e-06, "loss": 16.8981, "step": 6898 }, { "epoch": 0.91, "learning_rate": 2.1974871816946353e-06, "loss": 14.6421, "step": 6899 }, { "epoch": 0.91, "learning_rate": 2.1912415334937318e-06, "loss": 17.749, "step": 6900 }, { "epoch": 0.91, "learning_rate": 2.1850045746988735e-06, "loss": 16.8387, "step": 6901 }, { "epoch": 0.91, "learning_rate": 2.178776306443636e-06, "loss": 16.5689, "step": 6902 }, { "epoch": 0.91, "learning_rate": 2.172556729860048e-06, "loss": 17.2058, "step": 6903 }, { "epoch": 0.91, "learning_rate": 2.166345846078521e-06, "loss": 17.6835, "step": 6904 }, { "epoch": 0.91, "learning_rate": 2.16014365622792e-06, "loss": 0.0, "step": 6905 }, { "epoch": 0.91, "learning_rate": 2.1539501614354973e-06, "loss": 18.2839, "step": 6906 }, { "epoch": 0.91, "learning_rate": 2.1477653628269535e-06, "loss": 16.0289, "step": 6907 }, { "epoch": 0.91, "learning_rate": 2.1415892615263944e-06, "loss": 0.0002, "step": 6908 }, { "epoch": 0.91, "learning_rate": 2.13542185865635e-06, "loss": 17.2354, "step": 6909 }, { "epoch": 0.91, "learning_rate": 2.1292631553377573e-06, "loss": 0.0, "step": 6910 }, { "epoch": 0.91, "learning_rate": 2.1231131526899874e-06, "loss": 15.9012, "step": 6911 }, { "epoch": 0.91, "learning_rate": 2.1169718518308346e-06, "loss": 17.1552, "step": 6912 }, { "epoch": 0.91, "learning_rate": 2.1108392538764845e-06, "loss": 0.0, "step": 6913 }, { "epoch": 0.91, "learning_rate": 2.1047153599415727e-06, "loss": 18.7768, "step": 6914 }, { "epoch": 0.91, "learning_rate": 2.0986001711391256e-06, "loss": 17.776, "step": 6915 }, { "epoch": 0.91, "learning_rate": 2.09249368858061e-06, "loss": 0.0, "step": 6916 }, { "epoch": 0.91, "learning_rate": 2.086395913375888e-06, "loss": 15.7315, "step": 6917 }, { "epoch": 0.91, "learning_rate": 2.0803068466332675e-06, "loss": 16.5782, "step": 6918 }, { "epoch": 0.91, "learning_rate": 2.0742264894594356e-06, "loss": 0.0, "step": 6919 }, { "epoch": 0.91, "learning_rate": 2.0681548429595353e-06, "loss": 16.6931, "step": 6920 }, { "epoch": 0.91, "learning_rate": 2.0620919082371125e-06, "loss": 16.787, "step": 6921 }, { "epoch": 0.91, "learning_rate": 2.0560376863941134e-06, "loss": 17.7017, "step": 6922 }, { "epoch": 0.91, "learning_rate": 2.049992178530918e-06, "loss": 15.7911, "step": 6923 }, { "epoch": 0.91, "learning_rate": 2.0439553857463212e-06, "loss": 0.0, "step": 6924 }, { "epoch": 0.91, "learning_rate": 2.037927309137527e-06, "loss": 16.1084, "step": 6925 }, { "epoch": 0.91, "learning_rate": 2.03190794980016e-06, "loss": 16.1475, "step": 6926 }, { "epoch": 0.91, "learning_rate": 2.0258973088282563e-06, "loss": 15.429, "step": 6927 }, { "epoch": 0.91, "learning_rate": 2.0198953873142744e-06, "loss": 16.6082, "step": 6928 }, { "epoch": 0.91, "learning_rate": 2.013902186349087e-06, "loss": 16.9839, "step": 6929 }, { "epoch": 0.91, "learning_rate": 2.007917707021978e-06, "loss": 17.854, "step": 6930 }, { "epoch": 0.91, "learning_rate": 2.001941950420644e-06, "loss": 15.6558, "step": 6931 }, { "epoch": 0.91, "learning_rate": 1.995974917631199e-06, "loss": 19.3416, "step": 6932 }, { "epoch": 0.91, "learning_rate": 1.9900166097381765e-06, "loss": 16.9121, "step": 6933 }, { "epoch": 0.91, "learning_rate": 1.98406702782451e-06, "loss": 17.5486, "step": 6934 }, { "epoch": 0.91, "learning_rate": 1.9781261729715674e-06, "loss": 17.5027, "step": 6935 }, { "epoch": 0.91, "learning_rate": 1.972194046259107e-06, "loss": 0.0, "step": 6936 }, { "epoch": 0.91, "learning_rate": 1.9662706487653226e-06, "loss": 15.6168, "step": 6937 }, { "epoch": 0.91, "learning_rate": 1.960355981566814e-06, "loss": 16.8171, "step": 6938 }, { "epoch": 0.91, "learning_rate": 1.9544500457385874e-06, "loss": 16.3817, "step": 6939 }, { "epoch": 0.91, "learning_rate": 1.948552842354068e-06, "loss": 16.7559, "step": 6940 }, { "epoch": 0.91, "learning_rate": 1.9426643724850924e-06, "loss": 15.98, "step": 6941 }, { "epoch": 0.91, "learning_rate": 1.9367846372019105e-06, "loss": 0.0001, "step": 6942 }, { "epoch": 0.91, "learning_rate": 1.930913637573184e-06, "loss": 0.0004, "step": 6943 }, { "epoch": 0.91, "learning_rate": 1.9250513746659814e-06, "loss": 16.1241, "step": 6944 }, { "epoch": 0.91, "learning_rate": 1.919197849545801e-06, "loss": 17.0474, "step": 6945 }, { "epoch": 0.91, "learning_rate": 1.913353063276535e-06, "loss": 17.8167, "step": 6946 }, { "epoch": 0.91, "learning_rate": 1.907517016920496e-06, "loss": 18.3829, "step": 6947 }, { "epoch": 0.91, "learning_rate": 1.9016897115384014e-06, "loss": 16.443, "step": 6948 }, { "epoch": 0.91, "learning_rate": 1.8958711481893875e-06, "loss": 17.5259, "step": 6949 }, { "epoch": 0.91, "learning_rate": 1.890061327930992e-06, "loss": 17.9737, "step": 6950 }, { "epoch": 0.91, "learning_rate": 1.8842602518191753e-06, "loss": 16.2431, "step": 6951 }, { "epoch": 0.92, "learning_rate": 1.8784679209083e-06, "loss": 16.9458, "step": 6952 }, { "epoch": 0.92, "learning_rate": 1.872684336251146e-06, "loss": 15.591, "step": 6953 }, { "epoch": 0.92, "learning_rate": 1.8669094988989e-06, "loss": 0.0003, "step": 6954 }, { "epoch": 0.92, "learning_rate": 1.8611434099011504e-06, "loss": 17.0199, "step": 6955 }, { "epoch": 0.92, "learning_rate": 1.8553860703059146e-06, "loss": 17.2161, "step": 6956 }, { "epoch": 0.92, "learning_rate": 1.8496374811595996e-06, "loss": 0.0, "step": 6957 }, { "epoch": 0.92, "learning_rate": 1.843897643507042e-06, "loss": 15.3358, "step": 6958 }, { "epoch": 0.92, "learning_rate": 1.8381665583914631e-06, "loss": 17.145, "step": 6959 }, { "epoch": 0.92, "learning_rate": 1.8324442268545183e-06, "loss": 16.5224, "step": 6960 }, { "epoch": 0.92, "learning_rate": 1.8267306499362479e-06, "loss": 15.088, "step": 6961 }, { "epoch": 0.92, "learning_rate": 1.8210258286751326e-06, "loss": 0.0, "step": 6962 }, { "epoch": 0.92, "learning_rate": 1.8153297641080314e-06, "loss": 15.8625, "step": 6963 }, { "epoch": 0.92, "learning_rate": 1.8096424572702276e-06, "loss": 16.1237, "step": 6964 }, { "epoch": 0.92, "learning_rate": 1.8039639091954053e-06, "loss": 0.0, "step": 6965 }, { "epoch": 0.92, "learning_rate": 1.7982941209156668e-06, "loss": 17.0206, "step": 6966 }, { "epoch": 0.92, "learning_rate": 1.7926330934615043e-06, "loss": 16.1229, "step": 6967 }, { "epoch": 0.92, "learning_rate": 1.7869808278618393e-06, "loss": 15.8703, "step": 6968 }, { "epoch": 0.92, "learning_rate": 1.7813373251439835e-06, "loss": 0.0001, "step": 6969 }, { "epoch": 0.92, "learning_rate": 1.7757025863336662e-06, "loss": 19.2377, "step": 6970 }, { "epoch": 0.92, "learning_rate": 1.7700766124550238e-06, "loss": 17.9243, "step": 6971 }, { "epoch": 0.92, "learning_rate": 1.764459404530594e-06, "loss": 0.0, "step": 6972 }, { "epoch": 0.92, "learning_rate": 1.7588509635813211e-06, "loss": 16.4683, "step": 6973 }, { "epoch": 0.92, "learning_rate": 1.7532512906265618e-06, "loss": 16.2851, "step": 6974 }, { "epoch": 0.92, "learning_rate": 1.747660386684069e-06, "loss": 0.0, "step": 6975 }, { "epoch": 0.92, "learning_rate": 1.7420782527700185e-06, "loss": 16.7277, "step": 6976 }, { "epoch": 0.92, "learning_rate": 1.7365048898989712e-06, "loss": 17.7544, "step": 6977 }, { "epoch": 0.92, "learning_rate": 1.7309402990839164e-06, "loss": 17.2635, "step": 6978 }, { "epoch": 0.92, "learning_rate": 1.7253844813362341e-06, "loss": 18.2045, "step": 6979 }, { "epoch": 0.92, "learning_rate": 1.7198374376657111e-06, "loss": 18.0499, "step": 6980 }, { "epoch": 0.92, "learning_rate": 1.714299169080541e-06, "loss": 16.3046, "step": 6981 }, { "epoch": 0.92, "learning_rate": 1.7087696765873296e-06, "loss": 16.2758, "step": 6982 }, { "epoch": 0.92, "learning_rate": 1.703248961191073e-06, "loss": 0.0, "step": 6983 }, { "epoch": 0.92, "learning_rate": 1.6977370238951796e-06, "loss": 16.9598, "step": 6984 }, { "epoch": 0.92, "learning_rate": 1.6922338657014703e-06, "loss": 16.3179, "step": 6985 }, { "epoch": 0.92, "learning_rate": 1.686739487610156e-06, "loss": 15.2505, "step": 6986 }, { "epoch": 0.92, "learning_rate": 1.681253890619866e-06, "loss": 15.2591, "step": 6987 }, { "epoch": 0.92, "learning_rate": 1.6757770757276192e-06, "loss": 16.9551, "step": 6988 }, { "epoch": 0.92, "learning_rate": 1.6703090439288472e-06, "loss": 0.0001, "step": 6989 }, { "epoch": 0.92, "learning_rate": 1.6648497962173882e-06, "loss": 15.8309, "step": 6990 }, { "epoch": 0.92, "learning_rate": 1.6593993335854763e-06, "loss": 15.1075, "step": 6991 }, { "epoch": 0.92, "learning_rate": 1.653957657023747e-06, "loss": 17.7857, "step": 6992 }, { "epoch": 0.92, "learning_rate": 1.6485247675212534e-06, "loss": 16.7304, "step": 6993 }, { "epoch": 0.92, "learning_rate": 1.6431006660654336e-06, "loss": 15.8962, "step": 6994 }, { "epoch": 0.92, "learning_rate": 1.6376853536421432e-06, "loss": 17.5837, "step": 6995 }, { "epoch": 0.92, "learning_rate": 1.6322788312356285e-06, "loss": 16.0978, "step": 6996 }, { "epoch": 0.92, "learning_rate": 1.6268810998285422e-06, "loss": 18.0, "step": 6997 }, { "epoch": 0.92, "learning_rate": 1.6214921604019495e-06, "loss": 15.2424, "step": 6998 }, { "epoch": 0.92, "learning_rate": 1.6161120139353003e-06, "loss": 15.5059, "step": 6999 }, { "epoch": 0.92, "learning_rate": 1.610740661406457e-06, "loss": 16.8541, "step": 7000 }, { "epoch": 0.92, "learning_rate": 1.605378103791677e-06, "loss": 16.938, "step": 7001 }, { "epoch": 0.92, "learning_rate": 1.600024342065637e-06, "loss": 0.0001, "step": 7002 }, { "epoch": 0.92, "learning_rate": 1.5946793772013857e-06, "loss": 15.6556, "step": 7003 }, { "epoch": 0.92, "learning_rate": 1.589343210170402e-06, "loss": 17.1296, "step": 7004 }, { "epoch": 0.92, "learning_rate": 1.5840158419425433e-06, "loss": 17.9567, "step": 7005 }, { "epoch": 0.92, "learning_rate": 1.5786972734860794e-06, "loss": 17.6446, "step": 7006 }, { "epoch": 0.92, "learning_rate": 1.5733875057676818e-06, "loss": 0.0, "step": 7007 }, { "epoch": 0.92, "learning_rate": 1.5680865397524169e-06, "loss": 18.0227, "step": 7008 }, { "epoch": 0.92, "learning_rate": 1.5627943764037478e-06, "loss": 18.1442, "step": 7009 }, { "epoch": 0.92, "learning_rate": 1.557511016683555e-06, "loss": 15.9698, "step": 7010 }, { "epoch": 0.92, "learning_rate": 1.552236461552098e-06, "loss": 16.6374, "step": 7011 }, { "epoch": 0.92, "learning_rate": 1.5469707119680487e-06, "loss": 16.7044, "step": 7012 }, { "epoch": 0.92, "learning_rate": 1.5417137688884753e-06, "loss": 17.1782, "step": 7013 }, { "epoch": 0.92, "learning_rate": 1.536465633268841e-06, "loss": 16.3101, "step": 7014 }, { "epoch": 0.92, "learning_rate": 1.5312263060630216e-06, "loss": 16.9518, "step": 7015 }, { "epoch": 0.92, "learning_rate": 1.5259957882232722e-06, "loss": 16.9302, "step": 7016 }, { "epoch": 0.92, "learning_rate": 1.5207740807002603e-06, "loss": 14.7581, "step": 7017 }, { "epoch": 0.92, "learning_rate": 1.5155611844430596e-06, "loss": 16.6655, "step": 7018 }, { "epoch": 0.92, "learning_rate": 1.510357100399118e-06, "loss": 18.3685, "step": 7019 }, { "epoch": 0.92, "learning_rate": 1.5051618295143067e-06, "loss": 17.018, "step": 7020 }, { "epoch": 0.92, "learning_rate": 1.4999753727328758e-06, "loss": 16.5066, "step": 7021 }, { "epoch": 0.92, "learning_rate": 1.4947977309974826e-06, "loss": 17.0274, "step": 7022 }, { "epoch": 0.92, "learning_rate": 1.4896289052491853e-06, "loss": 18.058, "step": 7023 }, { "epoch": 0.92, "learning_rate": 1.4844688964274323e-06, "loss": 0.0003, "step": 7024 }, { "epoch": 0.92, "learning_rate": 1.479317705470079e-06, "loss": 0.0009, "step": 7025 }, { "epoch": 0.92, "learning_rate": 1.4741753333133545e-06, "loss": 16.1085, "step": 7026 }, { "epoch": 0.92, "learning_rate": 1.4690417808919276e-06, "loss": 17.8255, "step": 7027 }, { "epoch": 0.93, "learning_rate": 1.4639170491388242e-06, "loss": 0.0009, "step": 7028 }, { "epoch": 0.93, "learning_rate": 1.4588011389854828e-06, "loss": 15.6861, "step": 7029 }, { "epoch": 0.93, "learning_rate": 1.4536940513617424e-06, "loss": 0.0001, "step": 7030 }, { "epoch": 0.93, "learning_rate": 1.4485957871958277e-06, "loss": 15.6207, "step": 7031 }, { "epoch": 0.93, "learning_rate": 1.4435063474143694e-06, "loss": 16.6581, "step": 7032 }, { "epoch": 0.93, "learning_rate": 1.438425732942389e-06, "loss": 18.3579, "step": 7033 }, { "epoch": 0.93, "learning_rate": 1.4333539447033028e-06, "loss": 18.0774, "step": 7034 }, { "epoch": 0.93, "learning_rate": 1.4282909836189352e-06, "loss": 15.8629, "step": 7035 }, { "epoch": 0.93, "learning_rate": 1.423236850609494e-06, "loss": 17.2287, "step": 7036 }, { "epoch": 0.93, "learning_rate": 1.418191546593578e-06, "loss": 0.0, "step": 7037 }, { "epoch": 0.93, "learning_rate": 1.4131550724881925e-06, "loss": 17.3605, "step": 7038 }, { "epoch": 0.93, "learning_rate": 1.4081274292087333e-06, "loss": 0.0, "step": 7039 }, { "epoch": 0.93, "learning_rate": 1.4031086176689968e-06, "loss": 15.3674, "step": 7040 }, { "epoch": 0.93, "learning_rate": 1.3980986387811646e-06, "loss": 0.0, "step": 7041 }, { "epoch": 0.93, "learning_rate": 1.3930974934558138e-06, "loss": 17.971, "step": 7042 }, { "epoch": 0.93, "learning_rate": 1.3881051826019288e-06, "loss": 0.0007, "step": 7043 }, { "epoch": 0.93, "learning_rate": 1.383121707126872e-06, "loss": 17.6751, "step": 7044 }, { "epoch": 0.93, "learning_rate": 1.3781470679364084e-06, "loss": 17.183, "step": 7045 }, { "epoch": 0.93, "learning_rate": 1.3731812659347031e-06, "loss": 17.2548, "step": 7046 }, { "epoch": 0.93, "learning_rate": 1.3682243020243013e-06, "loss": 0.0, "step": 7047 }, { "epoch": 0.93, "learning_rate": 1.363276177106143e-06, "loss": 0.0002, "step": 7048 }, { "epoch": 0.93, "learning_rate": 1.3583368920795758e-06, "loss": 0.0, "step": 7049 }, { "epoch": 0.93, "learning_rate": 1.3534064478423313e-06, "loss": 15.0575, "step": 7050 }, { "epoch": 0.93, "learning_rate": 1.3484848452905207e-06, "loss": 17.092, "step": 7051 }, { "epoch": 0.93, "learning_rate": 1.3435720853186839e-06, "loss": 17.1201, "step": 7052 }, { "epoch": 0.93, "learning_rate": 1.3386681688197178e-06, "loss": 18.1572, "step": 7053 }, { "epoch": 0.93, "learning_rate": 1.3337730966849315e-06, "loss": 16.7613, "step": 7054 }, { "epoch": 0.93, "learning_rate": 1.3288868698040191e-06, "loss": 15.77, "step": 7055 }, { "epoch": 0.93, "learning_rate": 1.32400948906507e-06, "loss": 0.0001, "step": 7056 }, { "epoch": 0.93, "learning_rate": 1.3191409553545587e-06, "loss": 0.0, "step": 7057 }, { "epoch": 0.93, "learning_rate": 1.3142812695573714e-06, "loss": 0.0, "step": 7058 }, { "epoch": 0.93, "learning_rate": 1.309430432556752e-06, "loss": 14.3809, "step": 7059 }, { "epoch": 0.93, "learning_rate": 1.3045884452343781e-06, "loss": 17.2088, "step": 7060 }, { "epoch": 0.93, "learning_rate": 1.2997553084702906e-06, "loss": 15.9495, "step": 7061 }, { "epoch": 0.93, "learning_rate": 1.2949310231429257e-06, "loss": 0.0004, "step": 7062 }, { "epoch": 0.93, "learning_rate": 1.290115590129115e-06, "loss": 16.2397, "step": 7063 }, { "epoch": 0.93, "learning_rate": 1.285309010304081e-06, "loss": 0.0001, "step": 7064 }, { "epoch": 0.93, "learning_rate": 1.2805112845414357e-06, "loss": 0.0001, "step": 7065 }, { "epoch": 0.93, "learning_rate": 1.2757224137131763e-06, "loss": 0.0006, "step": 7066 }, { "epoch": 0.93, "learning_rate": 1.2709423986897006e-06, "loss": 18.2431, "step": 7067 }, { "epoch": 0.93, "learning_rate": 1.2661712403398028e-06, "loss": 16.61, "step": 7068 }, { "epoch": 0.93, "learning_rate": 1.261408939530645e-06, "loss": 16.587, "step": 7069 }, { "epoch": 0.93, "learning_rate": 1.2566554971277955e-06, "loss": 16.175, "step": 7070 }, { "epoch": 0.93, "learning_rate": 1.251910913995208e-06, "loss": 17.0589, "step": 7071 }, { "epoch": 0.93, "learning_rate": 1.2471751909952257e-06, "loss": 0.0, "step": 7072 }, { "epoch": 0.93, "learning_rate": 1.242448328988588e-06, "loss": 16.6189, "step": 7073 }, { "epoch": 0.93, "learning_rate": 1.2377303288344134e-06, "loss": 17.0894, "step": 7074 }, { "epoch": 0.93, "learning_rate": 1.2330211913902156e-06, "loss": 0.0, "step": 7075 }, { "epoch": 0.93, "learning_rate": 1.2283209175118882e-06, "loss": 0.0005, "step": 7076 }, { "epoch": 0.93, "learning_rate": 1.2236295080537418e-06, "loss": 17.5533, "step": 7077 }, { "epoch": 0.93, "learning_rate": 1.2189469638684393e-06, "loss": 0.0, "step": 7078 }, { "epoch": 0.93, "learning_rate": 1.2142732858070605e-06, "loss": 0.0, "step": 7079 }, { "epoch": 0.93, "learning_rate": 1.209608474719054e-06, "loss": 15.4965, "step": 7080 }, { "epoch": 0.93, "learning_rate": 1.2049525314522692e-06, "loss": 16.8338, "step": 7081 }, { "epoch": 0.93, "learning_rate": 1.200305456852935e-06, "loss": 16.4909, "step": 7082 }, { "epoch": 0.93, "learning_rate": 1.1956672517656809e-06, "loss": 0.0, "step": 7083 }, { "epoch": 0.93, "learning_rate": 1.1910379170335051e-06, "loss": 16.0269, "step": 7084 }, { "epoch": 0.93, "learning_rate": 1.1864174534978179e-06, "loss": 16.2599, "step": 7085 }, { "epoch": 0.93, "learning_rate": 1.1818058619983974e-06, "loss": 18.524, "step": 7086 }, { "epoch": 0.93, "learning_rate": 1.1772031433734177e-06, "loss": 13.9582, "step": 7087 }, { "epoch": 0.93, "learning_rate": 1.1726092984594428e-06, "loss": 16.6378, "step": 7088 }, { "epoch": 0.93, "learning_rate": 1.1680243280914106e-06, "loss": 16.7823, "step": 7089 }, { "epoch": 0.93, "learning_rate": 1.1634482331026652e-06, "loss": 16.1892, "step": 7090 }, { "epoch": 0.93, "learning_rate": 1.1588810143249195e-06, "loss": 0.0001, "step": 7091 }, { "epoch": 0.93, "learning_rate": 1.1543226725882817e-06, "loss": 0.0001, "step": 7092 }, { "epoch": 0.93, "learning_rate": 1.1497732087212498e-06, "loss": 17.5941, "step": 7093 }, { "epoch": 0.93, "learning_rate": 1.145232623550707e-06, "loss": 17.5697, "step": 7094 }, { "epoch": 0.93, "learning_rate": 1.1407009179019158e-06, "loss": 0.0, "step": 7095 }, { "epoch": 0.93, "learning_rate": 1.1361780925985333e-06, "loss": 16.1251, "step": 7096 }, { "epoch": 0.93, "learning_rate": 1.1316641484625912e-06, "loss": 15.8234, "step": 7097 }, { "epoch": 0.93, "learning_rate": 1.1271590863145165e-06, "loss": 0.0, "step": 7098 }, { "epoch": 0.93, "learning_rate": 1.1226629069731265e-06, "loss": 0.0, "step": 7099 }, { "epoch": 0.93, "learning_rate": 1.118175611255612e-06, "loss": 0.0001, "step": 7100 }, { "epoch": 0.93, "learning_rate": 1.1136971999775482e-06, "loss": 0.0005, "step": 7101 }, { "epoch": 0.93, "learning_rate": 1.1092276739529172e-06, "loss": 17.2063, "step": 7102 }, { "epoch": 0.93, "learning_rate": 1.1047670339940585e-06, "loss": 15.8802, "step": 7103 }, { "epoch": 0.94, "learning_rate": 1.1003152809117124e-06, "loss": 17.3388, "step": 7104 }, { "epoch": 0.94, "learning_rate": 1.0958724155150036e-06, "loss": 17.0302, "step": 7105 }, { "epoch": 0.94, "learning_rate": 1.0914384386114307e-06, "loss": 18.4922, "step": 7106 }, { "epoch": 0.94, "learning_rate": 1.087013351006888e-06, "loss": 16.8436, "step": 7107 }, { "epoch": 0.94, "learning_rate": 1.0825971535056545e-06, "loss": 0.0, "step": 7108 }, { "epoch": 0.94, "learning_rate": 1.0781898469103767e-06, "loss": 0.0001, "step": 7109 }, { "epoch": 0.94, "learning_rate": 1.0737914320221144e-06, "loss": 17.3941, "step": 7110 }, { "epoch": 0.94, "learning_rate": 1.069401909640283e-06, "loss": 17.4266, "step": 7111 }, { "epoch": 0.94, "learning_rate": 1.0650212805626947e-06, "loss": 16.9792, "step": 7112 }, { "epoch": 0.94, "learning_rate": 1.0606495455855514e-06, "loss": 17.8811, "step": 7113 }, { "epoch": 0.94, "learning_rate": 1.0562867055034231e-06, "loss": 0.0001, "step": 7114 }, { "epoch": 0.94, "learning_rate": 1.0519327611092755e-06, "loss": 17.8139, "step": 7115 }, { "epoch": 0.94, "learning_rate": 1.0475877131944478e-06, "loss": 15.983, "step": 7116 }, { "epoch": 0.94, "learning_rate": 1.0432515625486695e-06, "loss": 17.3027, "step": 7117 }, { "epoch": 0.94, "learning_rate": 1.0389243099600544e-06, "loss": 0.0, "step": 7118 }, { "epoch": 0.94, "learning_rate": 1.0346059562150956e-06, "loss": 17.0815, "step": 7119 }, { "epoch": 0.94, "learning_rate": 1.0302965020986655e-06, "loss": 15.8116, "step": 7120 }, { "epoch": 0.94, "learning_rate": 1.0259959483940206e-06, "loss": 16.8216, "step": 7121 }, { "epoch": 0.94, "learning_rate": 1.0217042958828026e-06, "loss": 0.0, "step": 7122 }, { "epoch": 0.94, "learning_rate": 1.0174215453450375e-06, "loss": 17.7879, "step": 7123 }, { "epoch": 0.94, "learning_rate": 1.0131476975591192e-06, "loss": 16.8229, "step": 7124 }, { "epoch": 0.94, "learning_rate": 1.0088827533018487e-06, "loss": 0.0001, "step": 7125 }, { "epoch": 0.94, "learning_rate": 1.0046267133483888e-06, "loss": 17.1221, "step": 7126 }, { "epoch": 0.94, "learning_rate": 1.000379578472288e-06, "loss": 16.2267, "step": 7127 }, { "epoch": 0.94, "learning_rate": 9.961413494454786e-07, "loss": 18.2362, "step": 7128 }, { "epoch": 0.94, "learning_rate": 9.91912027038272e-07, "loss": 18.0384, "step": 7129 }, { "epoch": 0.94, "learning_rate": 9.876916120193646e-07, "loss": 0.0002, "step": 7130 }, { "epoch": 0.94, "learning_rate": 9.834801051558262e-07, "loss": 15.6744, "step": 7131 }, { "epoch": 0.94, "learning_rate": 9.792775072131166e-07, "loss": 17.0293, "step": 7132 }, { "epoch": 0.94, "learning_rate": 9.750838189550692e-07, "loss": 0.0001, "step": 7133 }, { "epoch": 0.94, "learning_rate": 9.708990411439078e-07, "loss": 0.0, "step": 7134 }, { "epoch": 0.94, "learning_rate": 9.667231745402293e-07, "loss": 16.0112, "step": 7135 }, { "epoch": 0.94, "learning_rate": 9.625562199030047e-07, "loss": 0.0, "step": 7136 }, { "epoch": 0.94, "learning_rate": 9.583981779896e-07, "loss": 16.7637, "step": 7137 }, { "epoch": 0.94, "learning_rate": 9.542490495557443e-07, "loss": 16.8031, "step": 7138 }, { "epoch": 0.94, "learning_rate": 9.501088353555676e-07, "loss": 17.0843, "step": 7139 }, { "epoch": 0.94, "learning_rate": 9.459775361415512e-07, "loss": 16.5813, "step": 7140 }, { "epoch": 0.94, "learning_rate": 9.418551526645946e-07, "loss": 16.1098, "step": 7141 }, { "epoch": 0.94, "learning_rate": 9.37741685673943e-07, "loss": 17.3862, "step": 7142 }, { "epoch": 0.94, "learning_rate": 9.336371359172313e-07, "loss": 15.9989, "step": 7143 }, { "epoch": 0.94, "learning_rate": 9.295415041404798e-07, "loss": 0.0, "step": 7144 }, { "epoch": 0.94, "learning_rate": 9.254547910880818e-07, "loss": 17.0504, "step": 7145 }, { "epoch": 0.94, "learning_rate": 9.213769975028152e-07, "loss": 0.0, "step": 7146 }, { "epoch": 0.94, "learning_rate": 9.173081241258263e-07, "loss": 0.0, "step": 7147 }, { "epoch": 0.94, "learning_rate": 9.132481716966568e-07, "loss": 0.0, "step": 7148 }, { "epoch": 0.94, "learning_rate": 9.091971409531997e-07, "loss": 0.0, "step": 7149 }, { "epoch": 0.94, "learning_rate": 9.051550326317603e-07, "loss": 17.6355, "step": 7150 }, { "epoch": 0.94, "learning_rate": 9.011218474670013e-07, "loss": 0.0, "step": 7151 }, { "epoch": 0.94, "learning_rate": 8.970975861919695e-07, "loss": 16.7752, "step": 7152 }, { "epoch": 0.94, "learning_rate": 8.930822495380798e-07, "loss": 15.9118, "step": 7153 }, { "epoch": 0.94, "learning_rate": 8.890758382351427e-07, "loss": 15.284, "step": 7154 }, { "epoch": 0.94, "learning_rate": 8.850783530113372e-07, "loss": 16.9383, "step": 7155 }, { "epoch": 0.94, "learning_rate": 8.810897945932095e-07, "loss": 0.0, "step": 7156 }, { "epoch": 0.94, "learning_rate": 8.771101637056967e-07, "loss": 17.3244, "step": 7157 }, { "epoch": 0.94, "learning_rate": 8.731394610721255e-07, "loss": 16.3138, "step": 7158 }, { "epoch": 0.94, "learning_rate": 8.691776874141689e-07, "loss": 18.3201, "step": 7159 }, { "epoch": 0.94, "learning_rate": 8.65224843451895e-07, "loss": 17.121, "step": 7160 }, { "epoch": 0.94, "learning_rate": 8.612809299037516e-07, "loss": 16.5288, "step": 7161 }, { "epoch": 0.94, "learning_rate": 8.573459474865542e-07, "loss": 17.0559, "step": 7162 }, { "epoch": 0.94, "learning_rate": 8.534198969155083e-07, "loss": 0.0002, "step": 7163 }, { "epoch": 0.94, "learning_rate": 8.495027789041709e-07, "loss": 0.0, "step": 7164 }, { "epoch": 0.94, "learning_rate": 8.455945941645061e-07, "loss": 16.5627, "step": 7165 }, { "epoch": 0.94, "learning_rate": 8.416953434068342e-07, "loss": 16.771, "step": 7166 }, { "epoch": 0.94, "learning_rate": 8.378050273398552e-07, "loss": 0.0, "step": 7167 }, { "epoch": 0.94, "learning_rate": 8.339236466706535e-07, "loss": 17.2882, "step": 7168 }, { "epoch": 0.94, "learning_rate": 8.300512021046813e-07, "loss": 0.0, "step": 7169 }, { "epoch": 0.94, "learning_rate": 8.261876943457647e-07, "loss": 0.0, "step": 7170 }, { "epoch": 0.94, "learning_rate": 8.22333124096114e-07, "loss": 17.5752, "step": 7171 }, { "epoch": 0.94, "learning_rate": 8.184874920563079e-07, "loss": 17.0146, "step": 7172 }, { "epoch": 0.94, "learning_rate": 8.146507989253038e-07, "loss": 0.0, "step": 7173 }, { "epoch": 0.94, "learning_rate": 8.108230454004273e-07, "loss": 16.7314, "step": 7174 }, { "epoch": 0.94, "learning_rate": 8.070042321773996e-07, "loss": 0.0, "step": 7175 }, { "epoch": 0.94, "learning_rate": 8.031943599502989e-07, "loss": 17.8315, "step": 7176 }, { "epoch": 0.94, "learning_rate": 7.993934294115768e-07, "loss": 17.1207, "step": 7177 }, { "epoch": 0.94, "learning_rate": 7.956014412520696e-07, "loss": 18.1295, "step": 7178 }, { "epoch": 0.94, "learning_rate": 7.918183961609815e-07, "loss": 16.1073, "step": 7179 }, { "epoch": 0.95, "learning_rate": 7.88044294825896e-07, "loss": 17.4095, "step": 7180 }, { "epoch": 0.95, "learning_rate": 7.842791379327696e-07, "loss": 0.0, "step": 7181 }, { "epoch": 0.95, "learning_rate": 7.805229261659274e-07, "loss": 0.0003, "step": 7182 }, { "epoch": 0.95, "learning_rate": 7.767756602080789e-07, "loss": 0.0, "step": 7183 }, { "epoch": 0.95, "learning_rate": 7.730373407403069e-07, "loss": 17.1403, "step": 7184 }, { "epoch": 0.95, "learning_rate": 7.693079684420512e-07, "loss": 18.6638, "step": 7185 }, { "epoch": 0.95, "learning_rate": 7.655875439911531e-07, "loss": 15.9217, "step": 7186 }, { "epoch": 0.95, "learning_rate": 7.618760680637993e-07, "loss": 17.5224, "step": 7187 }, { "epoch": 0.95, "learning_rate": 7.58173541334567e-07, "loss": 15.9416, "step": 7188 }, { "epoch": 0.95, "learning_rate": 7.544799644764067e-07, "loss": 18.2511, "step": 7189 }, { "epoch": 0.95, "learning_rate": 7.507953381606369e-07, "loss": 17.4598, "step": 7190 }, { "epoch": 0.95, "learning_rate": 7.471196630569499e-07, "loss": 17.6696, "step": 7191 }, { "epoch": 0.95, "learning_rate": 7.434529398334167e-07, "loss": 0.0, "step": 7192 }, { "epoch": 0.95, "learning_rate": 7.397951691564709e-07, "loss": 18.0533, "step": 7193 }, { "epoch": 0.95, "learning_rate": 7.36146351690925e-07, "loss": 17.2183, "step": 7194 }, { "epoch": 0.95, "learning_rate": 7.325064880999711e-07, "loss": 0.0, "step": 7195 }, { "epoch": 0.95, "learning_rate": 7.288755790451629e-07, "loss": 16.9444, "step": 7196 }, { "epoch": 0.95, "learning_rate": 7.252536251864283e-07, "loss": 18.673, "step": 7197 }, { "epoch": 0.95, "learning_rate": 7.216406271820686e-07, "loss": 0.0006, "step": 7198 }, { "epoch": 0.95, "learning_rate": 7.180365856887583e-07, "loss": 17.9799, "step": 7199 }, { "epoch": 0.95, "learning_rate": 7.144415013615568e-07, "loss": 16.1694, "step": 7200 }, { "epoch": 0.95, "learning_rate": 7.108553748538693e-07, "loss": 17.9986, "step": 7201 }, { "epoch": 0.95, "learning_rate": 7.07278206817491e-07, "loss": 0.0, "step": 7202 }, { "epoch": 0.95, "learning_rate": 7.037099979025852e-07, "loss": 16.0988, "step": 7203 }, { "epoch": 0.95, "learning_rate": 7.001507487576831e-07, "loss": 0.0002, "step": 7204 }, { "epoch": 0.95, "learning_rate": 6.96600460029695e-07, "loss": 16.1148, "step": 7205 }, { "epoch": 0.95, "learning_rate": 6.930591323638991e-07, "loss": 16.5009, "step": 7206 }, { "epoch": 0.95, "learning_rate": 6.895267664039362e-07, "loss": 0.0, "step": 7207 }, { "epoch": 0.95, "learning_rate": 6.860033627918316e-07, "loss": 15.1923, "step": 7208 }, { "epoch": 0.95, "learning_rate": 6.824889221679731e-07, "loss": 15.9052, "step": 7209 }, { "epoch": 0.95, "learning_rate": 6.789834451711274e-07, "loss": 0.0, "step": 7210 }, { "epoch": 0.95, "learning_rate": 6.754869324384239e-07, "loss": 0.0, "step": 7211 }, { "epoch": 0.95, "learning_rate": 6.719993846053596e-07, "loss": 18.1618, "step": 7212 }, { "epoch": 0.95, "learning_rate": 6.68520802305822e-07, "loss": 15.9185, "step": 7213 }, { "epoch": 0.95, "learning_rate": 6.650511861720443e-07, "loss": 18.5213, "step": 7214 }, { "epoch": 0.95, "learning_rate": 6.615905368346387e-07, "loss": 16.3565, "step": 7215 }, { "epoch": 0.95, "learning_rate": 6.58138854922602e-07, "loss": 0.0008, "step": 7216 }, { "epoch": 0.95, "learning_rate": 6.546961410632824e-07, "loss": 17.1431, "step": 7217 }, { "epoch": 0.95, "learning_rate": 6.512623958824071e-07, "loss": 17.0855, "step": 7218 }, { "epoch": 0.95, "learning_rate": 6.478376200040714e-07, "loss": 15.8375, "step": 7219 }, { "epoch": 0.95, "learning_rate": 6.44421814050733e-07, "loss": 0.0001, "step": 7220 }, { "epoch": 0.95, "learning_rate": 6.41014978643234e-07, "loss": 17.1558, "step": 7221 }, { "epoch": 0.95, "learning_rate": 6.376171144007792e-07, "loss": 0.0007, "step": 7222 }, { "epoch": 0.95, "learning_rate": 6.342282219409356e-07, "loss": 15.8254, "step": 7223 }, { "epoch": 0.95, "learning_rate": 6.308483018796441e-07, "loss": 17.1566, "step": 7224 }, { "epoch": 0.95, "learning_rate": 6.274773548312296e-07, "loss": 0.0016, "step": 7225 }, { "epoch": 0.95, "learning_rate": 6.241153814083634e-07, "loss": 0.0, "step": 7226 }, { "epoch": 0.95, "learning_rate": 6.207623822221009e-07, "loss": 16.9022, "step": 7227 }, { "epoch": 0.95, "learning_rate": 6.174183578818549e-07, "loss": 0.0006, "step": 7228 }, { "epoch": 0.95, "learning_rate": 6.140833089954223e-07, "loss": 0.0, "step": 7229 }, { "epoch": 0.95, "learning_rate": 6.107572361689518e-07, "loss": 0.0001, "step": 7230 }, { "epoch": 0.95, "learning_rate": 6.074401400069652e-07, "loss": 0.0, "step": 7231 }, { "epoch": 0.95, "learning_rate": 6.041320211123635e-07, "loss": 15.4907, "step": 7232 }, { "epoch": 0.95, "learning_rate": 6.008328800864105e-07, "loss": 0.0001, "step": 7233 }, { "epoch": 0.95, "learning_rate": 5.975427175287374e-07, "loss": 0.0001, "step": 7234 }, { "epoch": 0.95, "learning_rate": 5.942615340373325e-07, "loss": 0.0001, "step": 7235 }, { "epoch": 0.95, "learning_rate": 5.909893302085689e-07, "loss": 16.315, "step": 7236 }, { "epoch": 0.95, "learning_rate": 5.87726106637182e-07, "loss": 0.0, "step": 7237 }, { "epoch": 0.95, "learning_rate": 5.844718639162694e-07, "loss": 18.4238, "step": 7238 }, { "epoch": 0.95, "learning_rate": 5.812266026373026e-07, "loss": 18.6972, "step": 7239 }, { "epoch": 0.95, "learning_rate": 5.779903233901152e-07, "loss": 0.0, "step": 7240 }, { "epoch": 0.95, "learning_rate": 5.747630267629256e-07, "loss": 17.0668, "step": 7241 }, { "epoch": 0.95, "learning_rate": 5.715447133422924e-07, "loss": 17.2694, "step": 7242 }, { "epoch": 0.95, "learning_rate": 5.683353837131589e-07, "loss": 17.1558, "step": 7243 }, { "epoch": 0.95, "learning_rate": 5.651350384588361e-07, "loss": 17.4264, "step": 7244 }, { "epoch": 0.95, "learning_rate": 5.619436781609922e-07, "loss": 16.5184, "step": 7245 }, { "epoch": 0.95, "learning_rate": 5.587613033996685e-07, "loss": 15.6731, "step": 7246 }, { "epoch": 0.95, "learning_rate": 5.55587914753275e-07, "loss": 16.5937, "step": 7247 }, { "epoch": 0.95, "learning_rate": 5.524235127985889e-07, "loss": 0.0001, "step": 7248 }, { "epoch": 0.95, "learning_rate": 5.492680981107389e-07, "loss": 18.2132, "step": 7249 }, { "epoch": 0.95, "learning_rate": 5.461216712632444e-07, "loss": 17.2512, "step": 7250 }, { "epoch": 0.95, "learning_rate": 5.429842328279755e-07, "loss": 0.0, "step": 7251 }, { "epoch": 0.95, "learning_rate": 5.398557833751761e-07, "loss": 16.8729, "step": 7252 }, { "epoch": 0.95, "learning_rate": 5.367363234734413e-07, "loss": 0.0001, "step": 7253 }, { "epoch": 0.95, "learning_rate": 5.336258536897564e-07, "loss": 0.0, "step": 7254 }, { "epoch": 0.95, "learning_rate": 5.305243745894583e-07, "loss": 16.3216, "step": 7255 }, { "epoch": 0.96, "learning_rate": 5.27431886736246e-07, "loss": 15.5238, "step": 7256 }, { "epoch": 0.96, "learning_rate": 5.243483906921864e-07, "loss": 17.1597, "step": 7257 }, { "epoch": 0.96, "learning_rate": 5.212738870177259e-07, "loss": 0.0001, "step": 7258 }, { "epoch": 0.96, "learning_rate": 5.182083762716616e-07, "loss": 16.0112, "step": 7259 }, { "epoch": 0.96, "learning_rate": 5.151518590111593e-07, "loss": 17.6777, "step": 7260 }, { "epoch": 0.96, "learning_rate": 5.121043357917521e-07, "loss": 17.8769, "step": 7261 }, { "epoch": 0.96, "learning_rate": 5.09065807167336e-07, "loss": 17.4917, "step": 7262 }, { "epoch": 0.96, "learning_rate": 5.060362736901802e-07, "loss": 15.9451, "step": 7263 }, { "epoch": 0.96, "learning_rate": 5.030157359109e-07, "loss": 14.2236, "step": 7264 }, { "epoch": 0.96, "learning_rate": 5.000041943785061e-07, "loss": 17.0895, "step": 7265 }, { "epoch": 0.96, "learning_rate": 4.97001649640344e-07, "loss": 16.4557, "step": 7266 }, { "epoch": 0.96, "learning_rate": 4.940081022421439e-07, "loss": 17.4089, "step": 7267 }, { "epoch": 0.96, "learning_rate": 4.910235527279872e-07, "loss": 0.0, "step": 7268 }, { "epoch": 0.96, "learning_rate": 4.880480016403288e-07, "loss": 17.9809, "step": 7269 }, { "epoch": 0.96, "learning_rate": 4.850814495199807e-07, "loss": 16.661, "step": 7270 }, { "epoch": 0.96, "learning_rate": 4.821238969061337e-07, "loss": 15.2032, "step": 7271 }, { "epoch": 0.96, "learning_rate": 4.791753443363245e-07, "loss": 16.891, "step": 7272 }, { "epoch": 0.96, "learning_rate": 4.7623579234646333e-07, "loss": 0.0, "step": 7273 }, { "epoch": 0.96, "learning_rate": 4.733052414708339e-07, "loss": 0.0001, "step": 7274 }, { "epoch": 0.96, "learning_rate": 4.7038369224206016e-07, "loss": 0.0, "step": 7275 }, { "epoch": 0.96, "learning_rate": 4.6747114519115063e-07, "loss": 18.499, "step": 7276 }, { "epoch": 0.96, "learning_rate": 4.6456760084747643e-07, "loss": 15.7516, "step": 7277 }, { "epoch": 0.96, "learning_rate": 4.616730597387542e-07, "loss": 0.0, "step": 7278 }, { "epoch": 0.96, "learning_rate": 4.5878752239108533e-07, "loss": 15.9863, "step": 7279 }, { "epoch": 0.96, "learning_rate": 4.559109893289171e-07, "loss": 0.0, "step": 7280 }, { "epoch": 0.96, "learning_rate": 4.530434610750811e-07, "loss": 15.9809, "step": 7281 }, { "epoch": 0.96, "learning_rate": 4.50184938150755e-07, "loss": 0.0, "step": 7282 }, { "epoch": 0.96, "learning_rate": 4.4733542107548434e-07, "loss": 15.792, "step": 7283 }, { "epoch": 0.96, "learning_rate": 4.444949103671825e-07, "loss": 0.0001, "step": 7284 }, { "epoch": 0.96, "learning_rate": 4.4166340654211434e-07, "loss": 16.3389, "step": 7285 }, { "epoch": 0.96, "learning_rate": 4.388409101149238e-07, "loss": 15.2881, "step": 7286 }, { "epoch": 0.96, "learning_rate": 4.36027421598606e-07, "loss": 0.0, "step": 7287 }, { "epoch": 0.96, "learning_rate": 4.332229415045186e-07, "loss": 0.0, "step": 7288 }, { "epoch": 0.96, "learning_rate": 4.304274703423872e-07, "loss": 15.3466, "step": 7289 }, { "epoch": 0.96, "learning_rate": 4.276410086203053e-07, "loss": 16.434, "step": 7290 }, { "epoch": 0.96, "learning_rate": 4.2486355684471233e-07, "loss": 17.5953, "step": 7291 }, { "epoch": 0.96, "learning_rate": 4.220951155204267e-07, "loss": 17.0882, "step": 7292 }, { "epoch": 0.96, "learning_rate": 4.1933568515061803e-07, "loss": 15.8448, "step": 7293 }, { "epoch": 0.96, "learning_rate": 4.1658526623682415e-07, "loss": 0.0, "step": 7294 }, { "epoch": 0.96, "learning_rate": 4.138438592789451e-07, "loss": 17.5695, "step": 7295 }, { "epoch": 0.96, "learning_rate": 4.1111146477523234e-07, "loss": 18.5107, "step": 7296 }, { "epoch": 0.96, "learning_rate": 4.0838808322231634e-07, "loss": 0.0001, "step": 7297 }, { "epoch": 0.96, "learning_rate": 4.0567371511517905e-07, "loss": 0.0001, "step": 7298 }, { "epoch": 0.96, "learning_rate": 4.029683609471646e-07, "loss": 17.5957, "step": 7299 }, { "epoch": 0.96, "learning_rate": 4.002720212099853e-07, "loss": 0.0001, "step": 7300 }, { "epoch": 0.96, "learning_rate": 3.975846963936991e-07, "loss": 0.0002, "step": 7301 }, { "epoch": 0.96, "learning_rate": 3.9490638698674865e-07, "loss": 15.8504, "step": 7302 }, { "epoch": 0.96, "learning_rate": 3.922370934759223e-07, "loss": 15.3695, "step": 7303 }, { "epoch": 0.96, "learning_rate": 3.895768163463709e-07, "loss": 0.0, "step": 7304 }, { "epoch": 0.96, "learning_rate": 3.869255560816076e-07, "loss": 16.6999, "step": 7305 }, { "epoch": 0.96, "learning_rate": 3.842833131635137e-07, "loss": 16.9905, "step": 7306 }, { "epoch": 0.96, "learning_rate": 3.816500880723217e-07, "loss": 16.6609, "step": 7307 }, { "epoch": 0.96, "learning_rate": 3.790258812866321e-07, "loss": 16.3609, "step": 7308 }, { "epoch": 0.96, "learning_rate": 3.764106932834022e-07, "loss": 14.9255, "step": 7309 }, { "epoch": 0.96, "learning_rate": 3.7380452453794626e-07, "loss": 17.5315, "step": 7310 }, { "epoch": 0.96, "learning_rate": 3.7120737552395204e-07, "loss": 0.0, "step": 7311 }, { "epoch": 0.96, "learning_rate": 3.686192467134586e-07, "loss": 16.5492, "step": 7312 }, { "epoch": 0.96, "learning_rate": 3.660401385768675e-07, "loss": 0.0001, "step": 7313 }, { "epoch": 0.96, "learning_rate": 3.6347005158293145e-07, "loss": 16.8087, "step": 7314 }, { "epoch": 0.96, "learning_rate": 3.60908986198788e-07, "loss": 17.2372, "step": 7315 }, { "epoch": 0.96, "learning_rate": 3.583569428899092e-07, "loss": 15.5583, "step": 7316 }, { "epoch": 0.96, "learning_rate": 3.5581392212014066e-07, "loss": 17.248, "step": 7317 }, { "epoch": 0.96, "learning_rate": 3.532799243516849e-07, "loss": 0.0, "step": 7318 }, { "epoch": 0.96, "learning_rate": 3.5075495004510127e-07, "loss": 15.7363, "step": 7319 }, { "epoch": 0.96, "learning_rate": 3.4823899965932274e-07, "loss": 18.5908, "step": 7320 }, { "epoch": 0.96, "learning_rate": 3.4573207365161674e-07, "loss": 19.0909, "step": 7321 }, { "epoch": 0.96, "learning_rate": 3.4323417247763556e-07, "loss": 16.5526, "step": 7322 }, { "epoch": 0.96, "learning_rate": 3.4074529659138264e-07, "loss": 0.0, "step": 7323 }, { "epoch": 0.96, "learning_rate": 3.382654464452184e-07, "loss": 16.0746, "step": 7324 }, { "epoch": 0.96, "learning_rate": 3.3579462248986007e-07, "loss": 16.6836, "step": 7325 }, { "epoch": 0.96, "learning_rate": 3.333328251743928e-07, "loss": 18.4255, "step": 7326 }, { "epoch": 0.96, "learning_rate": 3.3088005494625316e-07, "loss": 0.0001, "step": 7327 }, { "epoch": 0.96, "learning_rate": 3.2843631225124015e-07, "loss": 0.0, "step": 7328 }, { "epoch": 0.96, "learning_rate": 3.2600159753352065e-07, "loss": 17.406, "step": 7329 }, { "epoch": 0.96, "learning_rate": 3.2357591123559627e-07, "loss": 18.3646, "step": 7330 }, { "epoch": 0.96, "learning_rate": 3.2115925379835877e-07, "loss": 15.9466, "step": 7331 }, { "epoch": 0.97, "learning_rate": 3.187516256610401e-07, "loss": 16.3448, "step": 7332 }, { "epoch": 0.97, "learning_rate": 3.163530272612347e-07, "loss": 17.8906, "step": 7333 }, { "epoch": 0.97, "learning_rate": 3.139634590348939e-07, "loss": 16.3685, "step": 7334 }, { "epoch": 0.97, "learning_rate": 3.115829214163313e-07, "loss": 17.3431, "step": 7335 }, { "epoch": 0.97, "learning_rate": 3.0921141483821745e-07, "loss": 15.827, "step": 7336 }, { "epoch": 0.97, "learning_rate": 3.068489397315799e-07, "loss": 16.5194, "step": 7337 }, { "epoch": 0.97, "learning_rate": 3.04495496525814e-07, "loss": 0.0, "step": 7338 }, { "epoch": 0.97, "learning_rate": 3.021510856486498e-07, "loss": 19.0037, "step": 7339 }, { "epoch": 0.97, "learning_rate": 2.998157075262131e-07, "loss": 16.7196, "step": 7340 }, { "epoch": 0.97, "learning_rate": 2.9748936258295334e-07, "loss": 16.9118, "step": 7341 }, { "epoch": 0.97, "learning_rate": 2.951720512416989e-07, "loss": 0.0, "step": 7342 }, { "epoch": 0.97, "learning_rate": 2.928637739236184e-07, "loss": 0.0001, "step": 7343 }, { "epoch": 0.97, "learning_rate": 2.905645310482652e-07, "loss": 15.1775, "step": 7344 }, { "epoch": 0.97, "learning_rate": 2.8827432303351607e-07, "loss": 17.028, "step": 7345 }, { "epoch": 0.97, "learning_rate": 2.8599315029564364e-07, "loss": 16.4961, "step": 7346 }, { "epoch": 0.97, "learning_rate": 2.837210132492385e-07, "loss": 0.0, "step": 7347 }, { "epoch": 0.97, "learning_rate": 2.814579123072869e-07, "loss": 0.0, "step": 7348 }, { "epoch": 0.97, "learning_rate": 2.792038478811043e-07, "loss": 16.2947, "step": 7349 }, { "epoch": 0.97, "learning_rate": 2.769588203803797e-07, "loss": 0.0016, "step": 7350 }, { "epoch": 0.97, "learning_rate": 2.7472283021315326e-07, "loss": 16.3564, "step": 7351 }, { "epoch": 0.97, "learning_rate": 2.7249587778582773e-07, "loss": 16.8351, "step": 7352 }, { "epoch": 0.97, "learning_rate": 2.702779635031516e-07, "loss": 17.1245, "step": 7353 }, { "epoch": 0.97, "learning_rate": 2.6806908776824127e-07, "loss": 16.1664, "step": 7354 }, { "epoch": 0.97, "learning_rate": 2.6586925098257e-07, "loss": 0.0002, "step": 7355 }, { "epoch": 0.97, "learning_rate": 2.6367845354596243e-07, "loss": 17.9918, "step": 7356 }, { "epoch": 0.97, "learning_rate": 2.614966958566056e-07, "loss": 0.0, "step": 7357 }, { "epoch": 0.97, "learning_rate": 2.5932397831104347e-07, "loss": 16.1722, "step": 7358 }, { "epoch": 0.97, "learning_rate": 2.571603013041712e-07, "loss": 17.8953, "step": 7359 }, { "epoch": 0.97, "learning_rate": 2.550056652292465e-07, "loss": 17.0866, "step": 7360 }, { "epoch": 0.97, "learning_rate": 2.5286007047788386e-07, "loss": 17.5774, "step": 7361 }, { "epoch": 0.97, "learning_rate": 2.507235174400491e-07, "loss": 16.9673, "step": 7362 }, { "epoch": 0.97, "learning_rate": 2.485960065040649e-07, "loss": 17.621, "step": 7363 }, { "epoch": 0.97, "learning_rate": 2.46477538056622e-07, "loss": 17.0037, "step": 7364 }, { "epoch": 0.97, "learning_rate": 2.443681124827513e-07, "loss": 0.0, "step": 7365 }, { "epoch": 0.97, "learning_rate": 2.422677301658516e-07, "loss": 0.0004, "step": 7366 }, { "epoch": 0.97, "learning_rate": 2.401763914876787e-07, "loss": 17.1603, "step": 7367 }, { "epoch": 0.97, "learning_rate": 2.3809409682833405e-07, "loss": 15.8214, "step": 7368 }, { "epoch": 0.97, "learning_rate": 2.360208465662872e-07, "loss": 15.278, "step": 7369 }, { "epoch": 0.97, "learning_rate": 2.3395664107835336e-07, "loss": 14.8296, "step": 7370 }, { "epoch": 0.97, "learning_rate": 2.3190148073971573e-07, "loss": 0.0001, "step": 7371 }, { "epoch": 0.97, "learning_rate": 2.298553659238978e-07, "loss": 0.0, "step": 7372 }, { "epoch": 0.97, "learning_rate": 2.278182970027909e-07, "loss": 18.3115, "step": 7373 }, { "epoch": 0.97, "learning_rate": 2.2579027434664334e-07, "loss": 16.3049, "step": 7374 }, { "epoch": 0.97, "learning_rate": 2.2377129832405474e-07, "loss": 0.0, "step": 7375 }, { "epoch": 0.97, "learning_rate": 2.2176136930198155e-07, "loss": 0.0, "step": 7376 }, { "epoch": 0.97, "learning_rate": 2.1976048764572598e-07, "loss": 0.0001, "step": 7377 }, { "epoch": 0.97, "learning_rate": 2.1776865371896938e-07, "loss": 17.311, "step": 7378 }, { "epoch": 0.97, "learning_rate": 2.1578586788372212e-07, "loss": 0.0, "step": 7379 }, { "epoch": 0.97, "learning_rate": 2.138121305003682e-07, "loss": 17.6517, "step": 7380 }, { "epoch": 0.97, "learning_rate": 2.1184744192763728e-07, "loss": 17.0254, "step": 7381 }, { "epoch": 0.97, "learning_rate": 2.098918025226271e-07, "loss": 0.0001, "step": 7382 }, { "epoch": 0.97, "learning_rate": 2.0794521264076994e-07, "loss": 0.0, "step": 7383 }, { "epoch": 0.97, "learning_rate": 2.0600767263587727e-07, "loss": 0.0, "step": 7384 }, { "epoch": 0.97, "learning_rate": 2.040791828600952e-07, "loss": 0.0, "step": 7385 }, { "epoch": 0.97, "learning_rate": 2.021597436639322e-07, "loss": 0.0001, "step": 7386 }, { "epoch": 0.97, "learning_rate": 2.0024935539625922e-07, "loss": 16.4129, "step": 7387 }, { "epoch": 0.97, "learning_rate": 1.9834801840429296e-07, "loss": 16.4623, "step": 7388 }, { "epoch": 0.97, "learning_rate": 1.96455733033607e-07, "loss": 15.6178, "step": 7389 }, { "epoch": 0.97, "learning_rate": 1.9457249962813728e-07, "loss": 0.0, "step": 7390 }, { "epoch": 0.97, "learning_rate": 1.9269831853015452e-07, "loss": 15.4674, "step": 7391 }, { "epoch": 0.97, "learning_rate": 1.9083319008031397e-07, "loss": 0.0002, "step": 7392 }, { "epoch": 0.97, "learning_rate": 1.889771146175945e-07, "loss": 16.9573, "step": 7393 }, { "epoch": 0.97, "learning_rate": 1.8713009247935397e-07, "loss": 16.688, "step": 7394 }, { "epoch": 0.97, "learning_rate": 1.8529212400128504e-07, "loss": 16.648, "step": 7395 }, { "epoch": 0.97, "learning_rate": 1.8346320951745933e-07, "loss": 15.1561, "step": 7396 }, { "epoch": 0.97, "learning_rate": 1.816433493602776e-07, "loss": 16.2034, "step": 7397 }, { "epoch": 0.97, "learning_rate": 1.7983254386050308e-07, "loss": 17.5069, "step": 7398 }, { "epoch": 0.97, "learning_rate": 1.7803079334726692e-07, "loss": 16.9662, "step": 7399 }, { "epoch": 0.97, "learning_rate": 1.7623809814803495e-07, "loss": 17.1713, "step": 7400 }, { "epoch": 0.97, "learning_rate": 1.7445445858864097e-07, "loss": 16.953, "step": 7401 }, { "epoch": 0.97, "learning_rate": 1.7267987499326456e-07, "loss": 16.1151, "step": 7402 }, { "epoch": 0.97, "learning_rate": 1.709143476844366e-07, "loss": 0.0, "step": 7403 }, { "epoch": 0.97, "learning_rate": 1.6915787698306153e-07, "loss": 17.5067, "step": 7404 }, { "epoch": 0.97, "learning_rate": 1.6741046320837283e-07, "loss": 0.0001, "step": 7405 }, { "epoch": 0.97, "learning_rate": 1.6567210667797207e-07, "loss": 0.0, "step": 7406 }, { "epoch": 0.97, "learning_rate": 1.63942807707812e-07, "loss": 17.9058, "step": 7407 }, { "epoch": 0.98, "learning_rate": 1.6222256661220238e-07, "loss": 17.2029, "step": 7408 }, { "epoch": 0.98, "learning_rate": 1.6051138370379305e-07, "loss": 0.0, "step": 7409 }, { "epoch": 0.98, "learning_rate": 1.5880925929360747e-07, "loss": 16.6289, "step": 7410 }, { "epoch": 0.98, "learning_rate": 1.5711619369100927e-07, "loss": 17.0872, "step": 7411 }, { "epoch": 0.98, "learning_rate": 1.5543218720371344e-07, "loss": 17.8234, "step": 7412 }, { "epoch": 0.98, "learning_rate": 1.537572401378029e-07, "loss": 0.0001, "step": 7413 }, { "epoch": 0.98, "learning_rate": 1.520913527977008e-07, "loss": 16.1951, "step": 7414 }, { "epoch": 0.98, "learning_rate": 1.504345254861872e-07, "loss": 16.1552, "step": 7415 }, { "epoch": 0.98, "learning_rate": 1.4878675850439892e-07, "loss": 18.3127, "step": 7416 }, { "epoch": 0.98, "learning_rate": 1.471480521518187e-07, "loss": 16.7601, "step": 7417 }, { "epoch": 0.98, "learning_rate": 1.4551840672628603e-07, "loss": 17.3462, "step": 7418 }, { "epoch": 0.98, "learning_rate": 1.4389782252400285e-07, "loss": 16.9189, "step": 7419 }, { "epoch": 0.98, "learning_rate": 1.4228629983951135e-07, "loss": 16.2321, "step": 7420 }, { "epoch": 0.98, "learning_rate": 1.406838389657106e-07, "loss": 0.0001, "step": 7421 }, { "epoch": 0.98, "learning_rate": 1.3909044019385087e-07, "loss": 15.6694, "step": 7422 }, { "epoch": 0.98, "learning_rate": 1.3750610381354501e-07, "loss": 16.0061, "step": 7423 }, { "epoch": 0.98, "learning_rate": 1.3593083011274598e-07, "loss": 17.2172, "step": 7424 }, { "epoch": 0.98, "learning_rate": 1.3436461937776922e-07, "loss": 16.9877, "step": 7425 }, { "epoch": 0.98, "learning_rate": 1.3280747189327035e-07, "loss": 17.1562, "step": 7426 }, { "epoch": 0.98, "learning_rate": 1.3125938794227854e-07, "loss": 15.7759, "step": 7427 }, { "epoch": 0.98, "learning_rate": 1.2972036780615205e-07, "loss": 17.4774, "step": 7428 }, { "epoch": 0.98, "learning_rate": 1.2819041176462264e-07, "loss": 15.7256, "step": 7429 }, { "epoch": 0.98, "learning_rate": 1.2666952009575682e-07, "loss": 0.0, "step": 7430 }, { "epoch": 0.98, "learning_rate": 1.25157693075989e-07, "loss": 15.5173, "step": 7431 }, { "epoch": 0.98, "learning_rate": 1.2365493098009384e-07, "loss": 16.2145, "step": 7432 }, { "epoch": 0.98, "learning_rate": 1.2216123408120283e-07, "loss": 0.0001, "step": 7433 }, { "epoch": 0.98, "learning_rate": 1.2067660265080437e-07, "loss": 16.2749, "step": 7434 }, { "epoch": 0.98, "learning_rate": 1.1920103695873819e-07, "loss": 17.5013, "step": 7435 }, { "epoch": 0.98, "learning_rate": 1.1773453727318417e-07, "loss": 15.6189, "step": 7436 }, { "epoch": 0.98, "learning_rate": 1.1627710386069023e-07, "loss": 16.8714, "step": 7437 }, { "epoch": 0.98, "learning_rate": 1.1482873698615005e-07, "loss": 17.254, "step": 7438 }, { "epoch": 0.98, "learning_rate": 1.1338943691280302e-07, "loss": 16.3581, "step": 7439 }, { "epoch": 0.98, "learning_rate": 1.1195920390225656e-07, "loss": 15.9017, "step": 7440 }, { "epoch": 0.98, "learning_rate": 1.1053803821445274e-07, "loss": 15.3573, "step": 7441 }, { "epoch": 0.98, "learning_rate": 1.0912594010769606e-07, "loss": 0.0, "step": 7442 }, { "epoch": 0.98, "learning_rate": 1.0772290983863675e-07, "loss": 0.0, "step": 7443 }, { "epoch": 0.98, "learning_rate": 1.0632894766228751e-07, "loss": 16.6098, "step": 7444 }, { "epoch": 0.98, "learning_rate": 1.0494405383200123e-07, "loss": 0.0003, "step": 7445 }, { "epoch": 0.98, "learning_rate": 1.0356822859948767e-07, "loss": 17.2146, "step": 7446 }, { "epoch": 0.98, "learning_rate": 1.0220147221480791e-07, "loss": 17.8621, "step": 7447 }, { "epoch": 0.98, "learning_rate": 1.0084378492637436e-07, "loss": 16.6457, "step": 7448 }, { "epoch": 0.98, "learning_rate": 9.949516698095073e-08, "loss": 17.818, "step": 7449 }, { "epoch": 0.98, "learning_rate": 9.815561862365763e-08, "loss": 15.8815, "step": 7450 }, { "epoch": 0.98, "learning_rate": 9.68251400979503e-08, "loss": 18.5995, "step": 7451 }, { "epoch": 0.98, "learning_rate": 9.550373164566306e-08, "loss": 0.0012, "step": 7452 }, { "epoch": 0.98, "learning_rate": 9.419139350695938e-08, "loss": 17.6861, "step": 7453 }, { "epoch": 0.98, "learning_rate": 9.288812592035956e-08, "loss": 0.0, "step": 7454 }, { "epoch": 0.98, "learning_rate": 9.159392912273523e-08, "loss": 15.7678, "step": 7455 }, { "epoch": 0.98, "learning_rate": 9.030880334932046e-08, "loss": 19.212, "step": 7456 }, { "epoch": 0.98, "learning_rate": 8.903274883368396e-08, "loss": 0.0, "step": 7457 }, { "epoch": 0.98, "learning_rate": 8.776576580775686e-08, "loss": 16.5359, "step": 7458 }, { "epoch": 0.98, "learning_rate": 8.650785450181053e-08, "loss": 16.4668, "step": 7459 }, { "epoch": 0.98, "learning_rate": 8.525901514447876e-08, "loss": 16.1854, "step": 7460 }, { "epoch": 0.98, "learning_rate": 8.401924796274662e-08, "loss": 0.0, "step": 7461 }, { "epoch": 0.98, "learning_rate": 8.2788553181945e-08, "loss": 16.1423, "step": 7462 }, { "epoch": 0.98, "learning_rate": 8.156693102575052e-08, "loss": 16.1321, "step": 7463 }, { "epoch": 0.98, "learning_rate": 8.035438171620225e-08, "loss": 0.0009, "step": 7464 }, { "epoch": 0.98, "learning_rate": 7.915090547367942e-08, "loss": 0.0, "step": 7465 }, { "epoch": 0.98, "learning_rate": 7.795650251692931e-08, "loss": 0.0, "step": 7466 }, { "epoch": 0.98, "learning_rate": 7.677117306302828e-08, "loss": 16.1447, "step": 7467 }, { "epoch": 0.98, "learning_rate": 7.559491732742064e-08, "loss": 18.547, "step": 7468 }, { "epoch": 0.98, "learning_rate": 7.442773552389092e-08, "loss": 15.5629, "step": 7469 }, { "epoch": 0.98, "learning_rate": 7.326962786458058e-08, "loss": 18.2603, "step": 7470 }, { "epoch": 0.98, "learning_rate": 7.212059455997677e-08, "loss": 16.05, "step": 7471 }, { "epoch": 0.98, "learning_rate": 7.098063581892355e-08, "loss": 0.0, "step": 7472 }, { "epoch": 0.98, "learning_rate": 6.984975184861076e-08, "loss": 18.7967, "step": 7473 }, { "epoch": 0.98, "learning_rate": 6.872794285457951e-08, "loss": 17.3189, "step": 7474 }, { "epoch": 0.98, "learning_rate": 6.761520904072228e-08, "loss": 17.9714, "step": 7475 }, { "epoch": 0.98, "learning_rate": 6.651155060928838e-08, "loss": 0.0, "step": 7476 }, { "epoch": 0.98, "learning_rate": 6.541696776086182e-08, "loss": 0.0002, "step": 7477 }, { "epoch": 0.98, "learning_rate": 6.433146069439455e-08, "loss": 16.4166, "step": 7478 }, { "epoch": 0.98, "learning_rate": 6.325502960717878e-08, "loss": 18.8901, "step": 7479 }, { "epoch": 0.98, "learning_rate": 6.218767469485798e-08, "loss": 0.0, "step": 7480 }, { "epoch": 0.98, "learning_rate": 6.112939615142699e-08, "loss": 16.2026, "step": 7481 }, { "epoch": 0.98, "learning_rate": 6.008019416923194e-08, "loss": 0.0001, "step": 7482 }, { "epoch": 0.98, "learning_rate": 5.9040068938975846e-08, "loss": 17.7439, "step": 7483 }, { "epoch": 0.99, "learning_rate": 5.800902064969638e-08, "loss": 0.0, "step": 7484 }, { "epoch": 0.99, "learning_rate": 5.698704948879363e-08, "loss": 17.301, "step": 7485 }, { "epoch": 0.99, "learning_rate": 5.597415564201347e-08, "loss": 18.5301, "step": 7486 }, { "epoch": 0.99, "learning_rate": 5.4970339293453076e-08, "loss": 17.018, "step": 7487 }, { "epoch": 0.99, "learning_rate": 5.3975600625560954e-08, "loss": 18.1836, "step": 7488 }, { "epoch": 0.99, "learning_rate": 5.298993981913136e-08, "loss": 0.0001, "step": 7489 }, { "epoch": 0.99, "learning_rate": 5.20133570533099e-08, "loss": 0.0, "step": 7490 }, { "epoch": 0.99, "learning_rate": 5.1045852505604566e-08, "loss": 16.7305, "step": 7491 }, { "epoch": 0.99, "learning_rate": 5.0087426351852484e-08, "loss": 16.5941, "step": 7492 }, { "epoch": 0.99, "learning_rate": 4.9138078766253206e-08, "loss": 0.0002, "step": 7493 }, { "epoch": 0.99, "learning_rate": 4.8197809921352036e-08, "loss": 17.9617, "step": 7494 }, { "epoch": 0.99, "learning_rate": 4.726661998805115e-08, "loss": 17.2422, "step": 7495 }, { "epoch": 0.99, "learning_rate": 4.634450913559296e-08, "loss": 17.6102, "step": 7496 }, { "epoch": 0.99, "learning_rate": 4.543147753157673e-08, "loss": 0.0003, "step": 7497 }, { "epoch": 0.99, "learning_rate": 4.452752534194748e-08, "loss": 17.5552, "step": 7498 }, { "epoch": 0.99, "learning_rate": 4.363265273100159e-08, "loss": 17.3191, "step": 7499 }, { "epoch": 0.99, "learning_rate": 4.2746859861392264e-08, "loss": 16.9889, "step": 7500 }, { "epoch": 0.99, "learning_rate": 4.1870146894101844e-08, "loss": 16.2985, "step": 7501 }, { "epoch": 0.99, "learning_rate": 4.10025139884862e-08, "loss": 17.6184, "step": 7502 }, { "epoch": 0.99, "learning_rate": 4.014396130223586e-08, "loss": 0.0003, "step": 7503 }, { "epoch": 0.99, "learning_rate": 3.9294488991403755e-08, "loss": 0.0, "step": 7504 }, { "epoch": 0.99, "learning_rate": 3.8454097210371964e-08, "loss": 16.6805, "step": 7505 }, { "epoch": 0.99, "learning_rate": 3.7622786111896067e-08, "loss": 0.0, "step": 7506 }, { "epoch": 0.99, "learning_rate": 3.680055584706077e-08, "loss": 16.8957, "step": 7507 }, { "epoch": 0.99, "learning_rate": 3.5987406565313184e-08, "loss": 15.9747, "step": 7508 }, { "epoch": 0.99, "learning_rate": 3.5183338414446207e-08, "loss": 0.0, "step": 7509 }, { "epoch": 0.99, "learning_rate": 3.438835154060405e-08, "loss": 17.1395, "step": 7510 }, { "epoch": 0.99, "learning_rate": 3.3602446088271125e-08, "loss": 0.0, "step": 7511 }, { "epoch": 0.99, "learning_rate": 3.282562220029983e-08, "loss": 15.7377, "step": 7512 }, { "epoch": 0.99, "learning_rate": 3.205788001786614e-08, "loss": 15.2567, "step": 7513 }, { "epoch": 0.99, "learning_rate": 3.129921968052507e-08, "loss": 16.6149, "step": 7514 }, { "epoch": 0.99, "learning_rate": 3.054964132614968e-08, "loss": 17.039, "step": 7515 }, { "epoch": 0.99, "learning_rate": 2.9809145090992087e-08, "loss": 15.0142, "step": 7516 }, { "epoch": 0.99, "learning_rate": 2.9077731109639072e-08, "loss": 16.2533, "step": 7517 }, { "epoch": 0.99, "learning_rate": 2.8355399515017644e-08, "loss": 16.6337, "step": 7518 }, { "epoch": 0.99, "learning_rate": 2.7642150438422776e-08, "loss": 18.4427, "step": 7519 }, { "epoch": 0.99, "learning_rate": 2.6937984009484106e-08, "loss": 17.269, "step": 7520 }, { "epoch": 0.99, "learning_rate": 2.62429003561937e-08, "loss": 13.8334, "step": 7521 }, { "epoch": 0.99, "learning_rate": 2.555689960487828e-08, "loss": 15.2162, "step": 7522 }, { "epoch": 0.99, "learning_rate": 2.4879981880221447e-08, "loss": 16.7023, "step": 7523 }, { "epoch": 0.99, "learning_rate": 2.4212147305258114e-08, "loss": 17.1326, "step": 7524 }, { "epoch": 0.99, "learning_rate": 2.3553396001368967e-08, "loss": 0.0001, "step": 7525 }, { "epoch": 0.99, "learning_rate": 2.2903728088286003e-08, "loss": 17.388, "step": 7526 }, { "epoch": 0.99, "learning_rate": 2.2263143684086997e-08, "loss": 17.4126, "step": 7527 }, { "epoch": 0.99, "learning_rate": 2.1631642905201032e-08, "loss": 16.7685, "step": 7528 }, { "epoch": 0.99, "learning_rate": 2.100922586640297e-08, "loss": 15.2517, "step": 7529 }, { "epoch": 0.99, "learning_rate": 2.0395892680818985e-08, "loss": 0.0, "step": 7530 }, { "epoch": 0.99, "learning_rate": 1.9791643459926567e-08, "loss": 16.0935, "step": 7531 }, { "epoch": 0.99, "learning_rate": 1.9196478313554533e-08, "loss": 0.0, "step": 7532 }, { "epoch": 0.99, "learning_rate": 1.861039734986636e-08, "loss": 16.8013, "step": 7533 }, { "epoch": 0.99, "learning_rate": 1.8033400675393498e-08, "loss": 16.193, "step": 7534 }, { "epoch": 0.99, "learning_rate": 1.746548839500206e-08, "loss": 0.0, "step": 7535 }, { "epoch": 0.99, "learning_rate": 1.6906660611909487e-08, "loss": 0.0, "step": 7536 }, { "epoch": 0.99, "learning_rate": 1.6356917427695627e-08, "loss": 0.0001, "step": 7537 }, { "epoch": 0.99, "learning_rate": 1.5816258942263906e-08, "loss": 0.0, "step": 7538 }, { "epoch": 0.99, "learning_rate": 1.5284685253891263e-08, "loss": 16.4222, "step": 7539 }, { "epoch": 0.99, "learning_rate": 1.4762196459189304e-08, "loss": 15.4737, "step": 7540 }, { "epoch": 0.99, "learning_rate": 1.4248792653120957e-08, "loss": 17.6394, "step": 7541 }, { "epoch": 0.99, "learning_rate": 1.3744473929000467e-08, "loss": 18.3112, "step": 7542 }, { "epoch": 0.99, "learning_rate": 1.3249240378487849e-08, "loss": 16.2451, "step": 7543 }, { "epoch": 0.99, "learning_rate": 1.2763092091599982e-08, "loss": 0.0, "step": 7544 }, { "epoch": 0.99, "learning_rate": 1.2286029156688417e-08, "loss": 18.3645, "step": 7545 }, { "epoch": 0.99, "learning_rate": 1.1818051660461571e-08, "loss": 17.1579, "step": 7546 }, { "epoch": 0.99, "learning_rate": 1.1359159687984732e-08, "loss": 17.4237, "step": 7547 }, { "epoch": 0.99, "learning_rate": 1.0909353322652305e-08, "loss": 17.0777, "step": 7548 }, { "epoch": 0.99, "learning_rate": 1.046863264622111e-08, "loss": 17.2902, "step": 7549 }, { "epoch": 0.99, "learning_rate": 1.003699773879374e-08, "loss": 16.3359, "step": 7550 }, { "epoch": 0.99, "learning_rate": 9.614448678829657e-09, "loss": 16.4019, "step": 7551 }, { "epoch": 0.99, "learning_rate": 9.200985543117436e-09, "loss": 16.7234, "step": 7552 }, { "epoch": 0.99, "learning_rate": 8.79660840680807e-09, "loss": 18.4965, "step": 7553 }, { "epoch": 0.99, "learning_rate": 8.401317343398329e-09, "loss": 17.5534, "step": 7554 }, { "epoch": 0.99, "learning_rate": 8.015112424736292e-09, "loss": 15.7622, "step": 7555 }, { "epoch": 0.99, "learning_rate": 7.637993721021364e-09, "loss": 18.8154, "step": 7556 }, { "epoch": 0.99, "learning_rate": 7.269961300782058e-09, "loss": 17.2296, "step": 7557 }, { "epoch": 0.99, "learning_rate": 6.911015230920415e-09, "loss": 0.0, "step": 7558 }, { "epoch": 0.99, "learning_rate": 6.5611555766731394e-09, "loss": 17.2797, "step": 7559 }, { "epoch": 1.0, "learning_rate": 6.220382401628255e-09, "loss": 0.0, "step": 7560 }, { "epoch": 1.0, "learning_rate": 5.888695767719554e-09, "loss": 17.198, "step": 7561 }, { "epoch": 1.0, "learning_rate": 5.566095735237697e-09, "loss": 17.5679, "step": 7562 }, { "epoch": 1.0, "learning_rate": 5.252582362813563e-09, "loss": 14.4269, "step": 7563 }, { "epoch": 1.0, "learning_rate": 4.948155707429347e-09, "loss": 0.0, "step": 7564 }, { "epoch": 1.0, "learning_rate": 4.652815824418566e-09, "loss": 16.1027, "step": 7565 }, { "epoch": 1.0, "learning_rate": 4.366562767454952e-09, "loss": 17.1912, "step": 7566 }, { "epoch": 1.0, "learning_rate": 4.089396588569105e-09, "loss": 18.06, "step": 7567 }, { "epoch": 1.0, "learning_rate": 3.821317338131847e-09, "loss": 16.9234, "step": 7568 }, { "epoch": 1.0, "learning_rate": 3.562325064876415e-09, "loss": 17.2125, "step": 7569 }, { "epoch": 1.0, "learning_rate": 3.3124198158707153e-09, "loss": 17.1422, "step": 7570 }, { "epoch": 1.0, "learning_rate": 3.071601636539523e-09, "loss": 16.7264, "step": 7571 }, { "epoch": 1.0, "learning_rate": 2.839870570642278e-09, "loss": 16.8146, "step": 7572 }, { "epoch": 1.0, "learning_rate": 2.617226660306393e-09, "loss": 0.0, "step": 7573 }, { "epoch": 1.0, "learning_rate": 2.4036699459994984e-09, "loss": 16.1105, "step": 7574 }, { "epoch": 1.0, "learning_rate": 2.1992004665294387e-09, "loss": 15.7621, "step": 7575 }, { "epoch": 1.0, "learning_rate": 2.00381825906093e-09, "loss": 0.0011, "step": 7576 }, { "epoch": 1.0, "learning_rate": 1.8175233591100072e-09, "loss": 18.2038, "step": 7577 }, { "epoch": 1.0, "learning_rate": 1.64031580052737e-09, "loss": 0.0, "step": 7578 }, { "epoch": 1.0, "learning_rate": 1.4721956155316908e-09, "loss": 16.8027, "step": 7579 }, { "epoch": 1.0, "learning_rate": 1.3131628346707558e-09, "loss": 15.7481, "step": 7580 }, { "epoch": 1.0, "learning_rate": 1.163217486854773e-09, "loss": 17.999, "step": 7581 }, { "epoch": 1.0, "learning_rate": 1.0223595993341662e-09, "loss": 17.6708, "step": 7582 }, { "epoch": 1.0, "learning_rate": 8.905891977106784e-10, "loss": 17.5682, "step": 7583 }, { "epoch": 1.0, "learning_rate": 7.679063059373715e-10, "loss": 0.0, "step": 7584 }, { "epoch": 1.0, "learning_rate": 6.543109463075236e-10, "loss": 17.0414, "step": 7585 }, { "epoch": 1.0, "learning_rate": 5.498031394657321e-10, "loss": 15.903, "step": 7586 }, { "epoch": 1.0, "learning_rate": 4.5438290441346396e-10, "loss": 15.2902, "step": 7587 }, { "epoch": 1.0, "learning_rate": 3.680502584924028e-10, "loss": 0.0, "step": 7588 }, { "epoch": 1.0, "learning_rate": 2.9080521739e-10, "loss": 16.0799, "step": 7589 }, { "epoch": 1.0, "learning_rate": 2.2264779515057676e-10, "loss": 15.1645, "step": 7590 }, { "epoch": 1.0, "learning_rate": 1.6357800415311985e-10, "loss": 0.0002, "step": 7591 }, { "epoch": 1.0, "learning_rate": 1.1359585514458815e-10, "loss": 17.1014, "step": 7592 }, { "epoch": 1.0, "learning_rate": 7.270135720105486e-11, "loss": 16.7727, "step": 7593 }, { "epoch": 1.0, "learning_rate": 4.089451776101427e-11, "loss": 16.9021, "step": 7594 }, { "epoch": 1.0, "learning_rate": 1.8175342603177216e-11, "loss": 0.0, "step": 7595 }, { "epoch": 1.0, "step": 7595, "total_flos": 1.8884288986081342e+20, "train_loss": 12.693984882221208, "train_runtime": 42292.4042, "train_samples_per_second": 68.972, "train_steps_per_second": 0.18 } ], "logging_steps": 1.0, "max_steps": 7597, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 2000, "total_flos": 1.8884288986081342e+20, "train_batch_size": 2, "trial_name": null, "trial_params": null }