{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.998694942903752, "eval_steps": 500, "global_step": 3064, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.347826086956521e-08, "loss": 1.726, "step": 2 }, { "epoch": 0.01, "learning_rate": 8.695652173913042e-08, "loss": 1.7175, "step": 4 }, { "epoch": 0.01, "learning_rate": 1.3043478260869563e-07, "loss": 1.7023, "step": 6 }, { "epoch": 0.01, "learning_rate": 1.7391304347826085e-07, "loss": 1.6936, "step": 8 }, { "epoch": 0.01, "learning_rate": 2.1739130434782607e-07, "loss": 1.7156, "step": 10 }, { "epoch": 0.02, "learning_rate": 2.6086956521739126e-07, "loss": 1.7237, "step": 12 }, { "epoch": 0.02, "learning_rate": 3.043478260869565e-07, "loss": 1.7174, "step": 14 }, { "epoch": 0.02, "learning_rate": 3.478260869565217e-07, "loss": 1.7304, "step": 16 }, { "epoch": 0.02, "learning_rate": 3.9130434782608694e-07, "loss": 1.7269, "step": 18 }, { "epoch": 0.03, "learning_rate": 4.3478260869565214e-07, "loss": 1.7134, "step": 20 }, { "epoch": 0.03, "learning_rate": 4.782608695652174e-07, "loss": 1.6357, "step": 22 }, { "epoch": 0.03, "learning_rate": 5.217391304347825e-07, "loss": 1.708, "step": 24 }, { "epoch": 0.03, "learning_rate": 5.652173913043477e-07, "loss": 1.6731, "step": 26 }, { "epoch": 0.04, "learning_rate": 6.08695652173913e-07, "loss": 1.6733, "step": 28 }, { "epoch": 0.04, "learning_rate": 6.521739130434782e-07, "loss": 1.7246, "step": 30 }, { "epoch": 0.04, "learning_rate": 6.956521739130434e-07, "loss": 1.6945, "step": 32 }, { "epoch": 0.04, "learning_rate": 7.391304347826086e-07, "loss": 1.6814, "step": 34 }, { "epoch": 0.05, "learning_rate": 7.826086956521739e-07, "loss": 1.7222, "step": 36 }, { "epoch": 0.05, "learning_rate": 8.260869565217391e-07, "loss": 1.6781, "step": 38 }, { "epoch": 0.05, "learning_rate": 8.695652173913043e-07, "loss": 1.6882, "step": 40 }, { "epoch": 0.05, "learning_rate": 9.130434782608695e-07, "loss": 1.7144, "step": 42 }, { "epoch": 0.06, "learning_rate": 9.565217391304349e-07, "loss": 1.7113, "step": 44 }, { "epoch": 0.06, "learning_rate": 1e-06, "loss": 1.7489, "step": 46 }, { "epoch": 0.06, "learning_rate": 1.043478260869565e-06, "loss": 1.7169, "step": 48 }, { "epoch": 0.07, "learning_rate": 1.0869565217391303e-06, "loss": 1.7396, "step": 50 }, { "epoch": 0.07, "learning_rate": 1.1304347826086954e-06, "loss": 1.6864, "step": 52 }, { "epoch": 0.07, "learning_rate": 1.173913043478261e-06, "loss": 1.76, "step": 54 }, { "epoch": 0.07, "learning_rate": 1.217391304347826e-06, "loss": 1.7147, "step": 56 }, { "epoch": 0.08, "learning_rate": 1.2608695652173913e-06, "loss": 1.7296, "step": 58 }, { "epoch": 0.08, "learning_rate": 1.3043478260869564e-06, "loss": 1.7195, "step": 60 }, { "epoch": 0.08, "learning_rate": 1.3478260869565217e-06, "loss": 1.7456, "step": 62 }, { "epoch": 0.08, "learning_rate": 1.3913043478260868e-06, "loss": 1.8055, "step": 64 }, { "epoch": 0.09, "learning_rate": 1.434782608695652e-06, "loss": 1.6711, "step": 66 }, { "epoch": 0.09, "learning_rate": 1.4782608695652172e-06, "loss": 1.6789, "step": 68 }, { "epoch": 0.09, "learning_rate": 1.5217391304347827e-06, "loss": 1.6871, "step": 70 }, { "epoch": 0.09, "learning_rate": 1.5652173913043478e-06, "loss": 1.7404, "step": 72 }, { "epoch": 0.1, "learning_rate": 1.608695652173913e-06, "loss": 1.7063, "step": 74 }, { "epoch": 0.1, "learning_rate": 1.6521739130434782e-06, "loss": 1.7236, "step": 76 }, { "epoch": 0.1, "learning_rate": 1.6956521739130435e-06, "loss": 1.7363, "step": 78 }, { "epoch": 0.1, "learning_rate": 1.7391304347826085e-06, "loss": 1.6989, "step": 80 }, { "epoch": 0.11, "learning_rate": 1.7826086956521738e-06, "loss": 1.7187, "step": 82 }, { "epoch": 0.11, "learning_rate": 1.826086956521739e-06, "loss": 1.7102, "step": 84 }, { "epoch": 0.11, "learning_rate": 1.8695652173913044e-06, "loss": 1.7201, "step": 86 }, { "epoch": 0.11, "learning_rate": 1.9130434782608697e-06, "loss": 1.7161, "step": 88 }, { "epoch": 0.12, "learning_rate": 1.9565217391304346e-06, "loss": 1.6945, "step": 90 }, { "epoch": 0.12, "learning_rate": 2e-06, "loss": 1.6973, "step": 92 }, { "epoch": 0.12, "learning_rate": 1.9999977652344436e-06, "loss": 1.6626, "step": 94 }, { "epoch": 0.13, "learning_rate": 1.9999910609477626e-06, "loss": 1.7519, "step": 96 }, { "epoch": 0.13, "learning_rate": 1.9999798871699223e-06, "loss": 1.7536, "step": 98 }, { "epoch": 0.13, "learning_rate": 1.9999642439508647e-06, "loss": 1.7231, "step": 100 }, { "epoch": 0.13, "learning_rate": 1.9999441313605068e-06, "loss": 1.6999, "step": 102 }, { "epoch": 0.14, "learning_rate": 1.9999195494887424e-06, "loss": 1.7409, "step": 104 }, { "epoch": 0.14, "learning_rate": 1.999890498445442e-06, "loss": 1.6538, "step": 106 }, { "epoch": 0.14, "learning_rate": 1.999856978360449e-06, "loss": 1.7398, "step": 108 }, { "epoch": 0.14, "learning_rate": 1.9998189893835827e-06, "loss": 1.6626, "step": 110 }, { "epoch": 0.15, "learning_rate": 1.999776531684637e-06, "loss": 1.6812, "step": 112 }, { "epoch": 0.15, "learning_rate": 1.9997296054533767e-06, "loss": 1.6895, "step": 114 }, { "epoch": 0.15, "learning_rate": 1.99967821089954e-06, "loss": 1.7202, "step": 116 }, { "epoch": 0.15, "learning_rate": 1.9996223482528377e-06, "loss": 1.7228, "step": 118 }, { "epoch": 0.16, "learning_rate": 1.9995620177629485e-06, "loss": 1.7067, "step": 120 }, { "epoch": 0.16, "learning_rate": 1.999497219699522e-06, "loss": 1.6683, "step": 122 }, { "epoch": 0.16, "learning_rate": 1.999427954352175e-06, "loss": 1.7136, "step": 124 }, { "epoch": 0.16, "learning_rate": 1.9993542220304907e-06, "loss": 1.6608, "step": 126 }, { "epoch": 0.17, "learning_rate": 1.9992760230640188e-06, "loss": 1.7298, "step": 128 }, { "epoch": 0.17, "learning_rate": 1.9991933578022713e-06, "loss": 1.7399, "step": 130 }, { "epoch": 0.17, "learning_rate": 1.9991062266147236e-06, "loss": 1.7385, "step": 132 }, { "epoch": 0.17, "learning_rate": 1.999014629890811e-06, "loss": 1.7144, "step": 134 }, { "epoch": 0.18, "learning_rate": 1.998918568039928e-06, "loss": 1.6974, "step": 136 }, { "epoch": 0.18, "learning_rate": 1.998818041491426e-06, "loss": 1.6432, "step": 138 }, { "epoch": 0.18, "learning_rate": 1.998713050694612e-06, "loss": 1.7132, "step": 140 }, { "epoch": 0.19, "learning_rate": 1.9986035961187444e-06, "loss": 1.7044, "step": 142 }, { "epoch": 0.19, "learning_rate": 1.9984896782530355e-06, "loss": 1.6433, "step": 144 }, { "epoch": 0.19, "learning_rate": 1.9983712976066435e-06, "loss": 1.7169, "step": 146 }, { "epoch": 0.19, "learning_rate": 1.9982484547086748e-06, "loss": 1.7329, "step": 148 }, { "epoch": 0.2, "learning_rate": 1.9981211501081795e-06, "loss": 1.6872, "step": 150 }, { "epoch": 0.2, "learning_rate": 1.9979893843741495e-06, "loss": 1.655, "step": 152 }, { "epoch": 0.2, "learning_rate": 1.9978531580955158e-06, "loss": 1.6696, "step": 154 }, { "epoch": 0.2, "learning_rate": 1.9977124718811463e-06, "loss": 1.693, "step": 156 }, { "epoch": 0.21, "learning_rate": 1.9975673263598416e-06, "loss": 1.7035, "step": 158 }, { "epoch": 0.21, "learning_rate": 1.9974177221803353e-06, "loss": 1.6509, "step": 160 }, { "epoch": 0.21, "learning_rate": 1.997263660011287e-06, "loss": 1.6743, "step": 162 }, { "epoch": 0.21, "learning_rate": 1.9971051405412828e-06, "loss": 1.7133, "step": 164 }, { "epoch": 0.22, "learning_rate": 1.9969421644788302e-06, "loss": 1.7181, "step": 166 }, { "epoch": 0.22, "learning_rate": 1.996774732552356e-06, "loss": 1.6975, "step": 168 }, { "epoch": 0.22, "learning_rate": 1.996602845510202e-06, "loss": 1.7002, "step": 170 }, { "epoch": 0.22, "learning_rate": 1.996426504120623e-06, "loss": 1.6724, "step": 172 }, { "epoch": 0.23, "learning_rate": 1.9962457091717822e-06, "loss": 1.7257, "step": 174 }, { "epoch": 0.23, "learning_rate": 1.9960604614717485e-06, "loss": 1.7096, "step": 176 }, { "epoch": 0.23, "learning_rate": 1.995870761848492e-06, "loss": 1.7282, "step": 178 }, { "epoch": 0.23, "learning_rate": 1.995676611149881e-06, "loss": 1.6936, "step": 180 }, { "epoch": 0.24, "learning_rate": 1.9954780102436785e-06, "loss": 1.7012, "step": 182 }, { "epoch": 0.24, "learning_rate": 1.995274960017537e-06, "loss": 1.7187, "step": 184 }, { "epoch": 0.24, "learning_rate": 1.9950674613789958e-06, "loss": 1.654, "step": 186 }, { "epoch": 0.25, "learning_rate": 1.994855515255477e-06, "loss": 1.6977, "step": 188 }, { "epoch": 0.25, "learning_rate": 1.9946391225942798e-06, "loss": 1.7194, "step": 190 }, { "epoch": 0.25, "learning_rate": 1.994418284362578e-06, "loss": 1.7294, "step": 192 }, { "epoch": 0.25, "learning_rate": 1.994193001547416e-06, "loss": 1.7039, "step": 194 }, { "epoch": 0.26, "learning_rate": 1.993963275155701e-06, "loss": 1.7086, "step": 196 }, { "epoch": 0.26, "learning_rate": 1.9937291062142028e-06, "loss": 1.7105, "step": 198 }, { "epoch": 0.26, "learning_rate": 1.9934904957695468e-06, "loss": 1.6925, "step": 200 }, { "epoch": 0.26, "learning_rate": 1.9932474448882093e-06, "loss": 1.6923, "step": 202 }, { "epoch": 0.27, "learning_rate": 1.9929999546565148e-06, "loss": 1.7375, "step": 204 }, { "epoch": 0.27, "learning_rate": 1.9927480261806276e-06, "loss": 1.6615, "step": 206 }, { "epoch": 0.27, "learning_rate": 1.9924916605865506e-06, "loss": 1.6564, "step": 208 }, { "epoch": 0.27, "learning_rate": 1.992230859020117e-06, "loss": 1.695, "step": 210 }, { "epoch": 0.28, "learning_rate": 1.9919656226469885e-06, "loss": 1.6644, "step": 212 }, { "epoch": 0.28, "learning_rate": 1.9916959526526466e-06, "loss": 1.6498, "step": 214 }, { "epoch": 0.28, "learning_rate": 1.9914218502423895e-06, "loss": 1.7261, "step": 216 }, { "epoch": 0.28, "learning_rate": 1.9911433166413275e-06, "loss": 1.7343, "step": 218 }, { "epoch": 0.29, "learning_rate": 1.9908603530943742e-06, "loss": 1.7154, "step": 220 }, { "epoch": 0.29, "learning_rate": 1.9905729608662447e-06, "loss": 1.7613, "step": 222 }, { "epoch": 0.29, "learning_rate": 1.9902811412414467e-06, "loss": 1.6935, "step": 224 }, { "epoch": 0.29, "learning_rate": 1.9899848955242778e-06, "loss": 1.6729, "step": 226 }, { "epoch": 0.3, "learning_rate": 1.9896842250388174e-06, "loss": 1.6515, "step": 228 }, { "epoch": 0.3, "learning_rate": 1.9893791311289216e-06, "loss": 1.6641, "step": 230 }, { "epoch": 0.3, "learning_rate": 1.9890696151582166e-06, "loss": 1.7039, "step": 232 }, { "epoch": 0.31, "learning_rate": 1.9887556785100937e-06, "loss": 1.6975, "step": 234 }, { "epoch": 0.31, "learning_rate": 1.9884373225877028e-06, "loss": 1.7449, "step": 236 }, { "epoch": 0.31, "learning_rate": 1.988114548813946e-06, "loss": 1.6453, "step": 238 }, { "epoch": 0.31, "learning_rate": 1.98778735863147e-06, "loss": 1.6413, "step": 240 }, { "epoch": 0.32, "learning_rate": 1.9874557535026623e-06, "loss": 1.7127, "step": 242 }, { "epoch": 0.32, "learning_rate": 1.987119734909641e-06, "loss": 1.6847, "step": 244 }, { "epoch": 0.32, "learning_rate": 1.986779304354253e-06, "loss": 1.6642, "step": 246 }, { "epoch": 0.32, "learning_rate": 1.9864344633580628e-06, "loss": 1.6848, "step": 248 }, { "epoch": 0.33, "learning_rate": 1.9860852134623482e-06, "loss": 1.6777, "step": 250 }, { "epoch": 0.33, "learning_rate": 1.985731556228092e-06, "loss": 1.7287, "step": 252 }, { "epoch": 0.33, "learning_rate": 1.985373493235976e-06, "loss": 1.7026, "step": 254 }, { "epoch": 0.33, "learning_rate": 1.985011026086375e-06, "loss": 1.6671, "step": 256 }, { "epoch": 0.34, "learning_rate": 1.984644156399346e-06, "loss": 1.6576, "step": 258 }, { "epoch": 0.34, "learning_rate": 1.984272885814625e-06, "loss": 1.7098, "step": 260 }, { "epoch": 0.34, "learning_rate": 1.9838972159916176e-06, "loss": 1.677, "step": 262 }, { "epoch": 0.34, "learning_rate": 1.9835171486093916e-06, "loss": 1.6591, "step": 264 }, { "epoch": 0.35, "learning_rate": 1.98313268536667e-06, "loss": 1.7512, "step": 266 }, { "epoch": 0.35, "learning_rate": 1.9827438279818234e-06, "loss": 1.6488, "step": 268 }, { "epoch": 0.35, "learning_rate": 1.9823505781928613e-06, "loss": 1.6573, "step": 270 }, { "epoch": 0.35, "learning_rate": 1.9819529377574263e-06, "loss": 1.6737, "step": 272 }, { "epoch": 0.36, "learning_rate": 1.981550908452785e-06, "loss": 1.6792, "step": 274 }, { "epoch": 0.36, "learning_rate": 1.9811444920758196e-06, "loss": 1.6532, "step": 276 }, { "epoch": 0.36, "learning_rate": 1.9807336904430206e-06, "loss": 1.6684, "step": 278 }, { "epoch": 0.37, "learning_rate": 1.9803185053904792e-06, "loss": 1.6911, "step": 280 }, { "epoch": 0.37, "learning_rate": 1.9798989387738773e-06, "loss": 1.6826, "step": 282 }, { "epoch": 0.37, "learning_rate": 1.9794749924684814e-06, "loss": 1.6848, "step": 284 }, { "epoch": 0.37, "learning_rate": 1.979046668369132e-06, "loss": 1.6931, "step": 286 }, { "epoch": 0.38, "learning_rate": 1.9786139683902377e-06, "loss": 1.6803, "step": 288 }, { "epoch": 0.38, "learning_rate": 1.978176894465765e-06, "loss": 1.7187, "step": 290 }, { "epoch": 0.38, "learning_rate": 1.977735448549228e-06, "loss": 1.6719, "step": 292 }, { "epoch": 0.38, "learning_rate": 1.977289632613684e-06, "loss": 1.6764, "step": 294 }, { "epoch": 0.39, "learning_rate": 1.9768394486517205e-06, "loss": 1.7275, "step": 296 }, { "epoch": 0.39, "learning_rate": 1.9763848986754494e-06, "loss": 1.6961, "step": 298 }, { "epoch": 0.39, "learning_rate": 1.9759259847164957e-06, "loss": 1.7397, "step": 300 }, { "epoch": 0.39, "learning_rate": 1.975462708825989e-06, "loss": 1.6787, "step": 302 }, { "epoch": 0.4, "learning_rate": 1.9749950730745562e-06, "loss": 1.7241, "step": 304 }, { "epoch": 0.4, "learning_rate": 1.97452307955231e-06, "loss": 1.6624, "step": 306 }, { "epoch": 0.4, "learning_rate": 1.974046730368839e-06, "loss": 1.7419, "step": 308 }, { "epoch": 0.4, "learning_rate": 1.973566027653202e-06, "loss": 1.7145, "step": 310 }, { "epoch": 0.41, "learning_rate": 1.9730809735539135e-06, "loss": 1.6548, "step": 312 }, { "epoch": 0.41, "learning_rate": 1.9725915702389388e-06, "loss": 1.6818, "step": 314 }, { "epoch": 0.41, "learning_rate": 1.9720978198956806e-06, "loss": 1.6362, "step": 316 }, { "epoch": 0.42, "learning_rate": 1.971599724730972e-06, "loss": 1.6918, "step": 318 }, { "epoch": 0.42, "learning_rate": 1.9710972869710646e-06, "loss": 1.6339, "step": 320 }, { "epoch": 0.42, "learning_rate": 1.9705905088616194e-06, "loss": 1.6672, "step": 322 }, { "epoch": 0.42, "learning_rate": 1.9700793926676972e-06, "loss": 1.6967, "step": 324 }, { "epoch": 0.43, "learning_rate": 1.9695639406737476e-06, "loss": 1.7376, "step": 326 }, { "epoch": 0.43, "learning_rate": 1.9690441551835994e-06, "loss": 1.6528, "step": 328 }, { "epoch": 0.43, "learning_rate": 1.9685200385204496e-06, "loss": 1.6948, "step": 330 }, { "epoch": 0.43, "learning_rate": 1.967991593026855e-06, "loss": 1.718, "step": 332 }, { "epoch": 0.44, "learning_rate": 1.9674588210647182e-06, "loss": 1.7174, "step": 334 }, { "epoch": 0.44, "learning_rate": 1.96692172501528e-06, "loss": 1.6923, "step": 336 }, { "epoch": 0.44, "learning_rate": 1.966380307279109e-06, "loss": 1.646, "step": 338 }, { "epoch": 0.44, "learning_rate": 1.965834570276087e-06, "loss": 1.6976, "step": 340 }, { "epoch": 0.45, "learning_rate": 1.965284516445404e-06, "loss": 1.6921, "step": 342 }, { "epoch": 0.45, "learning_rate": 1.964730148245542e-06, "loss": 1.6751, "step": 344 }, { "epoch": 0.45, "learning_rate": 1.9641714681542667e-06, "loss": 1.6766, "step": 346 }, { "epoch": 0.45, "learning_rate": 1.963608478668617e-06, "loss": 1.7215, "step": 348 }, { "epoch": 0.46, "learning_rate": 1.963041182304891e-06, "loss": 1.6842, "step": 350 }, { "epoch": 0.46, "learning_rate": 1.962469581598638e-06, "loss": 1.6541, "step": 352 }, { "epoch": 0.46, "learning_rate": 1.961893679104645e-06, "loss": 1.6841, "step": 354 }, { "epoch": 0.46, "learning_rate": 1.9613134773969254e-06, "loss": 1.7075, "step": 356 }, { "epoch": 0.47, "learning_rate": 1.9607289790687104e-06, "loss": 1.7045, "step": 358 }, { "epoch": 0.47, "learning_rate": 1.960140186732432e-06, "loss": 1.679, "step": 360 }, { "epoch": 0.47, "learning_rate": 1.9595471030197163e-06, "loss": 1.7257, "step": 362 }, { "epoch": 0.48, "learning_rate": 1.95894973058137e-06, "loss": 1.691, "step": 364 }, { "epoch": 0.48, "learning_rate": 1.958348072087367e-06, "loss": 1.6648, "step": 366 }, { "epoch": 0.48, "learning_rate": 1.9577421302268393e-06, "loss": 1.697, "step": 368 }, { "epoch": 0.48, "learning_rate": 1.957131907708062e-06, "loss": 1.7119, "step": 370 }, { "epoch": 0.49, "learning_rate": 1.9565174072584445e-06, "loss": 1.6618, "step": 372 }, { "epoch": 0.49, "learning_rate": 1.9558986316245153e-06, "loss": 1.7103, "step": 374 }, { "epoch": 0.49, "learning_rate": 1.9552755835719113e-06, "loss": 1.6766, "step": 376 }, { "epoch": 0.49, "learning_rate": 1.954648265885366e-06, "loss": 1.6481, "step": 378 }, { "epoch": 0.5, "learning_rate": 1.9540166813686936e-06, "loss": 1.6343, "step": 380 }, { "epoch": 0.5, "learning_rate": 1.9533808328447825e-06, "loss": 1.6898, "step": 382 }, { "epoch": 0.5, "learning_rate": 1.9527407231555762e-06, "loss": 1.7079, "step": 384 }, { "epoch": 0.5, "learning_rate": 1.9520963551620657e-06, "loss": 1.656, "step": 386 }, { "epoch": 0.51, "learning_rate": 1.951447731744273e-06, "loss": 1.7079, "step": 388 }, { "epoch": 0.51, "learning_rate": 1.9507948558012416e-06, "loss": 1.6665, "step": 390 }, { "epoch": 0.51, "learning_rate": 1.9501377302510204e-06, "loss": 1.7302, "step": 392 }, { "epoch": 0.51, "learning_rate": 1.949476358030652e-06, "loss": 1.6232, "step": 394 }, { "epoch": 0.52, "learning_rate": 1.948810742096161e-06, "loss": 1.6972, "step": 396 }, { "epoch": 0.52, "learning_rate": 1.9481408854225377e-06, "loss": 1.7178, "step": 398 }, { "epoch": 0.52, "learning_rate": 1.947466791003728e-06, "loss": 1.6614, "step": 400 }, { "epoch": 0.52, "learning_rate": 1.9467884618526175e-06, "loss": 1.6718, "step": 402 }, { "epoch": 0.53, "learning_rate": 1.94610590100102e-06, "loss": 1.7231, "step": 404 }, { "epoch": 0.53, "learning_rate": 1.9454191114996614e-06, "loss": 1.6678, "step": 406 }, { "epoch": 0.53, "learning_rate": 1.9447280964181696e-06, "loss": 1.6821, "step": 408 }, { "epoch": 0.54, "learning_rate": 1.944032858845058e-06, "loss": 1.7083, "step": 410 }, { "epoch": 0.54, "learning_rate": 1.9433334018877117e-06, "loss": 1.6879, "step": 412 }, { "epoch": 0.54, "learning_rate": 1.9426297286723765e-06, "loss": 1.6622, "step": 414 }, { "epoch": 0.54, "learning_rate": 1.9419218423441413e-06, "loss": 1.739, "step": 416 }, { "epoch": 0.55, "learning_rate": 1.9412097460669255e-06, "loss": 1.6973, "step": 418 }, { "epoch": 0.55, "learning_rate": 1.940493443023466e-06, "loss": 1.6889, "step": 420 }, { "epoch": 0.55, "learning_rate": 1.939772936415302e-06, "loss": 1.6539, "step": 422 }, { "epoch": 0.55, "learning_rate": 1.93904822946276e-06, "loss": 1.6705, "step": 424 }, { "epoch": 0.56, "learning_rate": 1.9383193254049396e-06, "loss": 1.7566, "step": 426 }, { "epoch": 0.56, "learning_rate": 1.9375862274997007e-06, "loss": 1.606, "step": 428 }, { "epoch": 0.56, "learning_rate": 1.936848939023647e-06, "loss": 1.7073, "step": 430 }, { "epoch": 0.56, "learning_rate": 1.9361074632721124e-06, "loss": 1.6675, "step": 432 }, { "epoch": 0.57, "learning_rate": 1.935361803559146e-06, "loss": 1.7374, "step": 434 }, { "epoch": 0.57, "learning_rate": 1.934611963217497e-06, "loss": 1.6816, "step": 436 }, { "epoch": 0.57, "learning_rate": 1.9338579455985995e-06, "loss": 1.6804, "step": 438 }, { "epoch": 0.57, "learning_rate": 1.9330997540725597e-06, "loss": 1.6717, "step": 440 }, { "epoch": 0.58, "learning_rate": 1.932337392028138e-06, "loss": 1.6788, "step": 442 }, { "epoch": 0.58, "learning_rate": 1.9315708628727343e-06, "loss": 1.6859, "step": 444 }, { "epoch": 0.58, "learning_rate": 1.930800170032376e-06, "loss": 1.655, "step": 446 }, { "epoch": 0.58, "learning_rate": 1.9300253169516975e-06, "loss": 1.6668, "step": 448 }, { "epoch": 0.59, "learning_rate": 1.9292463070939292e-06, "loss": 1.6822, "step": 450 }, { "epoch": 0.59, "learning_rate": 1.9284631439408805e-06, "loss": 1.7345, "step": 452 }, { "epoch": 0.59, "learning_rate": 1.9276758309929225e-06, "loss": 1.7079, "step": 454 }, { "epoch": 0.6, "learning_rate": 1.9268843717689753e-06, "loss": 1.6829, "step": 456 }, { "epoch": 0.6, "learning_rate": 1.926088769806491e-06, "loss": 1.7123, "step": 458 }, { "epoch": 0.6, "learning_rate": 1.9252890286614366e-06, "loss": 1.6493, "step": 460 }, { "epoch": 0.6, "learning_rate": 1.9244851519082798e-06, "loss": 1.6968, "step": 462 }, { "epoch": 0.61, "learning_rate": 1.923677143139974e-06, "loss": 1.6282, "step": 464 }, { "epoch": 0.61, "learning_rate": 1.9228650059679387e-06, "loss": 1.6724, "step": 466 }, { "epoch": 0.61, "learning_rate": 1.922048744022046e-06, "loss": 1.6737, "step": 468 }, { "epoch": 0.61, "learning_rate": 1.9212283609506046e-06, "loss": 1.6696, "step": 470 }, { "epoch": 0.62, "learning_rate": 1.920403860420342e-06, "loss": 1.6488, "step": 472 }, { "epoch": 0.62, "learning_rate": 1.919575246116389e-06, "loss": 1.6218, "step": 474 }, { "epoch": 0.62, "learning_rate": 1.918742521742263e-06, "loss": 1.6535, "step": 476 }, { "epoch": 0.62, "learning_rate": 1.9179056910198514e-06, "loss": 1.6459, "step": 478 }, { "epoch": 0.63, "learning_rate": 1.9170647576893952e-06, "loss": 1.6519, "step": 480 }, { "epoch": 0.63, "learning_rate": 1.916219725509472e-06, "loss": 1.6875, "step": 482 }, { "epoch": 0.63, "learning_rate": 1.91537059825698e-06, "loss": 1.6848, "step": 484 }, { "epoch": 0.63, "learning_rate": 1.9145173797271186e-06, "loss": 1.6329, "step": 486 }, { "epoch": 0.64, "learning_rate": 1.913660073733376e-06, "loss": 1.6982, "step": 488 }, { "epoch": 0.64, "learning_rate": 1.912798684107507e-06, "loss": 1.6685, "step": 490 }, { "epoch": 0.64, "learning_rate": 1.9119332146995203e-06, "loss": 1.7009, "step": 492 }, { "epoch": 0.64, "learning_rate": 1.9110636693776573e-06, "loss": 1.6551, "step": 494 }, { "epoch": 0.65, "learning_rate": 1.9101900520283784e-06, "loss": 1.6704, "step": 496 }, { "epoch": 0.65, "learning_rate": 1.9093123665563434e-06, "loss": 1.6666, "step": 498 }, { "epoch": 0.65, "learning_rate": 1.9084306168843946e-06, "loss": 1.6699, "step": 500 }, { "epoch": 0.66, "learning_rate": 1.90754480695354e-06, "loss": 1.6998, "step": 502 }, { "epoch": 0.66, "learning_rate": 1.906654940722935e-06, "loss": 1.6992, "step": 504 }, { "epoch": 0.66, "learning_rate": 1.9057610221698634e-06, "loss": 1.7065, "step": 506 }, { "epoch": 0.66, "learning_rate": 1.9048630552897226e-06, "loss": 1.6426, "step": 508 }, { "epoch": 0.67, "learning_rate": 1.9039610440960034e-06, "loss": 1.6601, "step": 510 }, { "epoch": 0.67, "learning_rate": 1.903054992620273e-06, "loss": 1.6394, "step": 512 }, { "epoch": 0.67, "learning_rate": 1.9021449049121565e-06, "loss": 1.6503, "step": 514 }, { "epoch": 0.67, "learning_rate": 1.9012307850393193e-06, "loss": 1.6865, "step": 516 }, { "epoch": 0.68, "learning_rate": 1.900312637087449e-06, "loss": 1.6529, "step": 518 }, { "epoch": 0.68, "learning_rate": 1.8993904651602356e-06, "loss": 1.6776, "step": 520 }, { "epoch": 0.68, "learning_rate": 1.8984642733793554e-06, "loss": 1.6201, "step": 522 }, { "epoch": 0.68, "learning_rate": 1.8975340658844522e-06, "loss": 1.6725, "step": 524 }, { "epoch": 0.69, "learning_rate": 1.8965998468331165e-06, "loss": 1.6393, "step": 526 }, { "epoch": 0.69, "learning_rate": 1.8956616204008698e-06, "loss": 1.6464, "step": 528 }, { "epoch": 0.69, "learning_rate": 1.894719390781144e-06, "loss": 1.677, "step": 530 }, { "epoch": 0.69, "learning_rate": 1.893773162185264e-06, "loss": 1.6843, "step": 532 }, { "epoch": 0.7, "learning_rate": 1.892822938842428e-06, "loss": 1.63, "step": 534 }, { "epoch": 0.7, "learning_rate": 1.8918687249996885e-06, "loss": 1.6523, "step": 536 }, { "epoch": 0.7, "learning_rate": 1.8909105249219342e-06, "loss": 1.6744, "step": 538 }, { "epoch": 0.7, "learning_rate": 1.8899483428918701e-06, "loss": 1.6678, "step": 540 }, { "epoch": 0.71, "learning_rate": 1.8889821832099986e-06, "loss": 1.6631, "step": 542 }, { "epoch": 0.71, "learning_rate": 1.8880120501946008e-06, "loss": 1.6328, "step": 544 }, { "epoch": 0.71, "learning_rate": 1.887037948181716e-06, "loss": 1.6832, "step": 546 }, { "epoch": 0.72, "learning_rate": 1.8860598815251239e-06, "loss": 1.7224, "step": 548 }, { "epoch": 0.72, "learning_rate": 1.8850778545963234e-06, "loss": 1.7022, "step": 550 }, { "epoch": 0.72, "learning_rate": 1.8840918717845142e-06, "loss": 1.6811, "step": 552 }, { "epoch": 0.72, "learning_rate": 1.8831019374965779e-06, "loss": 1.6761, "step": 554 }, { "epoch": 0.73, "learning_rate": 1.8821080561570562e-06, "loss": 1.6503, "step": 556 }, { "epoch": 0.73, "learning_rate": 1.8811102322081327e-06, "loss": 1.744, "step": 558 }, { "epoch": 0.73, "learning_rate": 1.8801084701096124e-06, "loss": 1.6645, "step": 560 }, { "epoch": 0.73, "learning_rate": 1.8791027743389028e-06, "loss": 1.6282, "step": 562 }, { "epoch": 0.74, "learning_rate": 1.8780931493909916e-06, "loss": 1.6378, "step": 564 }, { "epoch": 0.74, "learning_rate": 1.8770795997784292e-06, "loss": 1.6921, "step": 566 }, { "epoch": 0.74, "learning_rate": 1.8760621300313076e-06, "loss": 1.6503, "step": 568 }, { "epoch": 0.74, "learning_rate": 1.8750407446972388e-06, "loss": 1.6626, "step": 570 }, { "epoch": 0.75, "learning_rate": 1.8740154483413368e-06, "loss": 1.7116, "step": 572 }, { "epoch": 0.75, "learning_rate": 1.8729862455461953e-06, "loss": 1.6613, "step": 574 }, { "epoch": 0.75, "learning_rate": 1.8719531409118688e-06, "loss": 1.6898, "step": 576 }, { "epoch": 0.75, "learning_rate": 1.8709161390558497e-06, "loss": 1.6868, "step": 578 }, { "epoch": 0.76, "learning_rate": 1.8698752446130505e-06, "loss": 1.6382, "step": 580 }, { "epoch": 0.76, "learning_rate": 1.8688304622357815e-06, "loss": 1.6634, "step": 582 }, { "epoch": 0.76, "learning_rate": 1.8677817965937294e-06, "loss": 1.6494, "step": 584 }, { "epoch": 0.76, "learning_rate": 1.8667292523739389e-06, "loss": 1.672, "step": 586 }, { "epoch": 0.77, "learning_rate": 1.8656728342807884e-06, "loss": 1.6626, "step": 588 }, { "epoch": 0.77, "learning_rate": 1.8646125470359714e-06, "loss": 1.6826, "step": 590 }, { "epoch": 0.77, "learning_rate": 1.8635483953784752e-06, "loss": 1.6692, "step": 592 }, { "epoch": 0.78, "learning_rate": 1.8624803840645585e-06, "loss": 1.6642, "step": 594 }, { "epoch": 0.78, "learning_rate": 1.8614085178677308e-06, "loss": 1.6654, "step": 596 }, { "epoch": 0.78, "learning_rate": 1.8603328015787317e-06, "loss": 1.684, "step": 598 }, { "epoch": 0.78, "learning_rate": 1.859253240005509e-06, "loss": 1.6485, "step": 600 }, { "epoch": 0.79, "learning_rate": 1.8581698379731962e-06, "loss": 1.6645, "step": 602 }, { "epoch": 0.79, "learning_rate": 1.8570826003240928e-06, "loss": 1.6559, "step": 604 }, { "epoch": 0.79, "learning_rate": 1.8559915319176405e-06, "loss": 1.6521, "step": 606 }, { "epoch": 0.79, "learning_rate": 1.8548966376304045e-06, "loss": 1.656, "step": 608 }, { "epoch": 0.8, "learning_rate": 1.853797922356048e-06, "loss": 1.6939, "step": 610 }, { "epoch": 0.8, "learning_rate": 1.852695391005314e-06, "loss": 1.677, "step": 612 }, { "epoch": 0.8, "learning_rate": 1.851589048506e-06, "loss": 1.6756, "step": 614 }, { "epoch": 0.8, "learning_rate": 1.8504788998029386e-06, "loss": 1.7516, "step": 616 }, { "epoch": 0.81, "learning_rate": 1.8493649498579735e-06, "loss": 1.6878, "step": 618 }, { "epoch": 0.81, "learning_rate": 1.848247203649939e-06, "loss": 1.7089, "step": 620 }, { "epoch": 0.81, "learning_rate": 1.8471256661746366e-06, "loss": 1.6735, "step": 622 }, { "epoch": 0.81, "learning_rate": 1.8460003424448127e-06, "loss": 1.6585, "step": 624 }, { "epoch": 0.82, "learning_rate": 1.844871237490137e-06, "loss": 1.6943, "step": 626 }, { "epoch": 0.82, "learning_rate": 1.8437383563571788e-06, "loss": 1.6922, "step": 628 }, { "epoch": 0.82, "learning_rate": 1.842601704109386e-06, "loss": 1.6556, "step": 630 }, { "epoch": 0.82, "learning_rate": 1.8414612858270613e-06, "loss": 1.6795, "step": 632 }, { "epoch": 0.83, "learning_rate": 1.8403171066073391e-06, "loss": 1.6719, "step": 634 }, { "epoch": 0.83, "learning_rate": 1.8391691715641646e-06, "loss": 1.6462, "step": 636 }, { "epoch": 0.83, "learning_rate": 1.8380174858282684e-06, "loss": 1.7138, "step": 638 }, { "epoch": 0.84, "learning_rate": 1.8368620545471468e-06, "loss": 1.6404, "step": 640 }, { "epoch": 0.84, "learning_rate": 1.8357028828850352e-06, "loss": 1.6931, "step": 642 }, { "epoch": 0.84, "learning_rate": 1.8345399760228874e-06, "loss": 1.6342, "step": 644 }, { "epoch": 0.84, "learning_rate": 1.8333733391583518e-06, "loss": 1.6657, "step": 646 }, { "epoch": 0.85, "learning_rate": 1.8322029775057489e-06, "loss": 1.6783, "step": 648 }, { "epoch": 0.85, "learning_rate": 1.8310288962960453e-06, "loss": 1.6747, "step": 650 }, { "epoch": 0.85, "learning_rate": 1.8298511007768344e-06, "loss": 1.6914, "step": 652 }, { "epoch": 0.85, "learning_rate": 1.8286695962123099e-06, "loss": 1.7069, "step": 654 }, { "epoch": 0.86, "learning_rate": 1.8274843878832424e-06, "loss": 1.6734, "step": 656 }, { "epoch": 0.86, "learning_rate": 1.826295481086958e-06, "loss": 1.6667, "step": 658 }, { "epoch": 0.86, "learning_rate": 1.8251028811373126e-06, "loss": 1.7158, "step": 660 }, { "epoch": 0.86, "learning_rate": 1.823906593364669e-06, "loss": 1.671, "step": 662 }, { "epoch": 0.87, "learning_rate": 1.8227066231158723e-06, "loss": 1.7136, "step": 664 }, { "epoch": 0.87, "learning_rate": 1.8215029757542269e-06, "loss": 1.6296, "step": 666 }, { "epoch": 0.87, "learning_rate": 1.8202956566594718e-06, "loss": 1.7197, "step": 668 }, { "epoch": 0.87, "learning_rate": 1.819084671227758e-06, "loss": 1.6423, "step": 670 }, { "epoch": 0.88, "learning_rate": 1.8178700248716222e-06, "loss": 1.643, "step": 672 }, { "epoch": 0.88, "learning_rate": 1.8166517230199635e-06, "loss": 1.6546, "step": 674 }, { "epoch": 0.88, "learning_rate": 1.815429771118021e-06, "loss": 1.6814, "step": 676 }, { "epoch": 0.88, "learning_rate": 1.8142041746273463e-06, "loss": 1.6313, "step": 678 }, { "epoch": 0.89, "learning_rate": 1.8129749390257806e-06, "loss": 1.6718, "step": 680 }, { "epoch": 0.89, "learning_rate": 1.8117420698074316e-06, "loss": 1.7085, "step": 682 }, { "epoch": 0.89, "learning_rate": 1.8105055724826453e-06, "loss": 1.6729, "step": 684 }, { "epoch": 0.9, "learning_rate": 1.8092654525779865e-06, "loss": 1.6683, "step": 686 }, { "epoch": 0.9, "learning_rate": 1.8080217156362083e-06, "loss": 1.6816, "step": 688 }, { "epoch": 0.9, "learning_rate": 1.8067743672162328e-06, "loss": 1.658, "step": 690 }, { "epoch": 0.9, "learning_rate": 1.8055234128931216e-06, "loss": 1.6674, "step": 692 }, { "epoch": 0.91, "learning_rate": 1.8042688582580547e-06, "loss": 1.6738, "step": 694 }, { "epoch": 0.91, "learning_rate": 1.8030107089183027e-06, "loss": 1.6648, "step": 696 }, { "epoch": 0.91, "learning_rate": 1.8017489704972033e-06, "loss": 1.6757, "step": 698 }, { "epoch": 0.91, "learning_rate": 1.8004836486341358e-06, "loss": 1.6839, "step": 700 }, { "epoch": 0.92, "learning_rate": 1.7992147489844954e-06, "loss": 1.6858, "step": 702 }, { "epoch": 0.92, "learning_rate": 1.7979422772196684e-06, "loss": 1.7143, "step": 704 }, { "epoch": 0.92, "learning_rate": 1.7966662390270076e-06, "loss": 1.6553, "step": 706 }, { "epoch": 0.92, "learning_rate": 1.7953866401098047e-06, "loss": 1.7045, "step": 708 }, { "epoch": 0.93, "learning_rate": 1.7941034861872674e-06, "loss": 1.6441, "step": 710 }, { "epoch": 0.93, "learning_rate": 1.7928167829944915e-06, "loss": 1.6946, "step": 712 }, { "epoch": 0.93, "learning_rate": 1.7915265362824377e-06, "loss": 1.6954, "step": 714 }, { "epoch": 0.93, "learning_rate": 1.7902327518179032e-06, "loss": 1.6722, "step": 716 }, { "epoch": 0.94, "learning_rate": 1.7889354353834982e-06, "loss": 1.6733, "step": 718 }, { "epoch": 0.94, "learning_rate": 1.7876345927776188e-06, "loss": 1.7008, "step": 720 }, { "epoch": 0.94, "learning_rate": 1.7863302298144216e-06, "loss": 1.6631, "step": 722 }, { "epoch": 0.94, "learning_rate": 1.785022352323797e-06, "loss": 1.6355, "step": 724 }, { "epoch": 0.95, "learning_rate": 1.7837109661513448e-06, "loss": 1.6214, "step": 726 }, { "epoch": 0.95, "learning_rate": 1.782396077158346e-06, "loss": 1.7022, "step": 728 }, { "epoch": 0.95, "learning_rate": 1.7810776912217378e-06, "loss": 1.7172, "step": 730 }, { "epoch": 0.96, "learning_rate": 1.779755814234087e-06, "loss": 1.7013, "step": 732 }, { "epoch": 0.96, "learning_rate": 1.7784304521035645e-06, "loss": 1.6754, "step": 734 }, { "epoch": 0.96, "learning_rate": 1.7771016107539167e-06, "loss": 1.6523, "step": 736 }, { "epoch": 0.96, "learning_rate": 1.775769296124442e-06, "loss": 1.6637, "step": 738 }, { "epoch": 0.97, "learning_rate": 1.7744335141699626e-06, "loss": 1.6844, "step": 740 }, { "epoch": 0.97, "learning_rate": 1.7730942708607964e-06, "loss": 1.6568, "step": 742 }, { "epoch": 0.97, "learning_rate": 1.7717515721827334e-06, "loss": 1.7084, "step": 744 }, { "epoch": 0.97, "learning_rate": 1.7704054241370072e-06, "loss": 1.6698, "step": 746 }, { "epoch": 0.98, "learning_rate": 1.7690558327402685e-06, "loss": 1.7337, "step": 748 }, { "epoch": 0.98, "learning_rate": 1.767702804024558e-06, "loss": 1.7196, "step": 750 }, { "epoch": 0.98, "learning_rate": 1.7663463440372795e-06, "loss": 1.6206, "step": 752 }, { "epoch": 0.98, "learning_rate": 1.7649864588411734e-06, "loss": 1.6425, "step": 754 }, { "epoch": 0.99, "learning_rate": 1.7636231545142884e-06, "loss": 1.6834, "step": 756 }, { "epoch": 0.99, "learning_rate": 1.762256437149956e-06, "loss": 1.6772, "step": 758 }, { "epoch": 0.99, "learning_rate": 1.7608863128567614e-06, "loss": 1.6882, "step": 760 }, { "epoch": 0.99, "learning_rate": 1.7595127877585188e-06, "loss": 1.7602, "step": 762 }, { "epoch": 1.0, "learning_rate": 1.7581358679942406e-06, "loss": 1.663, "step": 764 }, { "epoch": 1.0, "learning_rate": 1.7567555597181127e-06, "loss": 1.6718, "step": 766 }, { "epoch": 1.0, "learning_rate": 1.7553718690994659e-06, "loss": 1.6516, "step": 768 }, { "epoch": 1.0, "learning_rate": 1.7539848023227486e-06, "loss": 1.5969, "step": 770 }, { "epoch": 1.01, "learning_rate": 1.7525943655874986e-06, "loss": 1.6172, "step": 772 }, { "epoch": 1.01, "learning_rate": 1.7512005651083163e-06, "loss": 1.6425, "step": 774 }, { "epoch": 1.01, "learning_rate": 1.7498034071148364e-06, "loss": 1.6208, "step": 776 }, { "epoch": 1.02, "learning_rate": 1.7484028978517e-06, "loss": 1.6898, "step": 778 }, { "epoch": 1.02, "learning_rate": 1.7469990435785269e-06, "loss": 1.646, "step": 780 }, { "epoch": 1.02, "learning_rate": 1.7455918505698873e-06, "loss": 1.6232, "step": 782 }, { "epoch": 1.02, "learning_rate": 1.7441813251152739e-06, "loss": 1.6009, "step": 784 }, { "epoch": 1.03, "learning_rate": 1.7427674735190746e-06, "loss": 1.6495, "step": 786 }, { "epoch": 1.03, "learning_rate": 1.741350302100543e-06, "loss": 1.6192, "step": 788 }, { "epoch": 1.03, "learning_rate": 1.7399298171937703e-06, "loss": 1.6069, "step": 790 }, { "epoch": 1.03, "learning_rate": 1.7385060251476587e-06, "loss": 1.6259, "step": 792 }, { "epoch": 1.04, "learning_rate": 1.7370789323258905e-06, "loss": 1.6499, "step": 794 }, { "epoch": 1.04, "learning_rate": 1.735648545106902e-06, "loss": 1.6528, "step": 796 }, { "epoch": 1.04, "learning_rate": 1.734214869883853e-06, "loss": 1.6183, "step": 798 }, { "epoch": 1.04, "learning_rate": 1.7327779130645994e-06, "loss": 1.587, "step": 800 }, { "epoch": 1.05, "learning_rate": 1.7313376810716652e-06, "loss": 1.6316, "step": 802 }, { "epoch": 1.05, "learning_rate": 1.7298941803422108e-06, "loss": 1.6709, "step": 804 }, { "epoch": 1.05, "learning_rate": 1.7284474173280088e-06, "loss": 1.6394, "step": 806 }, { "epoch": 1.05, "learning_rate": 1.7269973984954112e-06, "loss": 1.6625, "step": 808 }, { "epoch": 1.06, "learning_rate": 1.7255441303253217e-06, "loss": 1.6603, "step": 810 }, { "epoch": 1.06, "learning_rate": 1.7240876193131682e-06, "loss": 1.7061, "step": 812 }, { "epoch": 1.06, "learning_rate": 1.7226278719688719e-06, "loss": 1.7066, "step": 814 }, { "epoch": 1.06, "learning_rate": 1.7211648948168187e-06, "loss": 1.6273, "step": 816 }, { "epoch": 1.07, "learning_rate": 1.719698694395831e-06, "loss": 1.6186, "step": 818 }, { "epoch": 1.07, "learning_rate": 1.7182292772591367e-06, "loss": 1.6412, "step": 820 }, { "epoch": 1.07, "learning_rate": 1.7167566499743416e-06, "loss": 1.6594, "step": 822 }, { "epoch": 1.08, "learning_rate": 1.7152808191233993e-06, "loss": 1.6368, "step": 824 }, { "epoch": 1.08, "learning_rate": 1.7138017913025814e-06, "loss": 1.6097, "step": 826 }, { "epoch": 1.08, "learning_rate": 1.7123195731224488e-06, "loss": 1.6532, "step": 828 }, { "epoch": 1.08, "learning_rate": 1.7108341712078223e-06, "loss": 1.614, "step": 830 }, { "epoch": 1.09, "learning_rate": 1.7093455921977514e-06, "loss": 1.6505, "step": 832 }, { "epoch": 1.09, "learning_rate": 1.7078538427454865e-06, "loss": 1.6498, "step": 834 }, { "epoch": 1.09, "learning_rate": 1.7063589295184482e-06, "loss": 1.6582, "step": 836 }, { "epoch": 1.09, "learning_rate": 1.7048608591981974e-06, "loss": 1.6596, "step": 838 }, { "epoch": 1.1, "learning_rate": 1.7033596384804066e-06, "loss": 1.6303, "step": 840 }, { "epoch": 1.1, "learning_rate": 1.701855274074828e-06, "loss": 1.6446, "step": 842 }, { "epoch": 1.1, "learning_rate": 1.7003477727052654e-06, "loss": 1.6726, "step": 844 }, { "epoch": 1.1, "learning_rate": 1.6988371411095428e-06, "loss": 1.6308, "step": 846 }, { "epoch": 1.11, "learning_rate": 1.6973233860394753e-06, "loss": 1.6503, "step": 848 }, { "epoch": 1.11, "learning_rate": 1.6958065142608381e-06, "loss": 1.6022, "step": 850 }, { "epoch": 1.11, "learning_rate": 1.6942865325533373e-06, "loss": 1.6028, "step": 852 }, { "epoch": 1.11, "learning_rate": 1.6927634477105778e-06, "loss": 1.6536, "step": 854 }, { "epoch": 1.12, "learning_rate": 1.6912372665400353e-06, "loss": 1.6423, "step": 856 }, { "epoch": 1.12, "learning_rate": 1.6897079958630234e-06, "loss": 1.6583, "step": 858 }, { "epoch": 1.12, "learning_rate": 1.6881756425146654e-06, "loss": 1.6365, "step": 860 }, { "epoch": 1.12, "learning_rate": 1.6866402133438623e-06, "loss": 1.6567, "step": 862 }, { "epoch": 1.13, "learning_rate": 1.6851017152132619e-06, "loss": 1.624, "step": 864 }, { "epoch": 1.13, "learning_rate": 1.6835601549992305e-06, "loss": 1.657, "step": 866 }, { "epoch": 1.13, "learning_rate": 1.6820155395918186e-06, "loss": 1.6555, "step": 868 }, { "epoch": 1.14, "learning_rate": 1.6804678758947333e-06, "loss": 1.6531, "step": 870 }, { "epoch": 1.14, "learning_rate": 1.6789171708253052e-06, "loss": 1.6555, "step": 872 }, { "epoch": 1.14, "learning_rate": 1.6773634313144593e-06, "loss": 1.7117, "step": 874 }, { "epoch": 1.14, "learning_rate": 1.6758066643066826e-06, "loss": 1.6367, "step": 876 }, { "epoch": 1.15, "learning_rate": 1.6742468767599933e-06, "loss": 1.6645, "step": 878 }, { "epoch": 1.15, "learning_rate": 1.6726840756459107e-06, "loss": 1.6659, "step": 880 }, { "epoch": 1.15, "learning_rate": 1.6711182679494232e-06, "loss": 1.6681, "step": 882 }, { "epoch": 1.15, "learning_rate": 1.6695494606689567e-06, "loss": 1.6421, "step": 884 }, { "epoch": 1.16, "learning_rate": 1.6679776608163441e-06, "loss": 1.6319, "step": 886 }, { "epoch": 1.16, "learning_rate": 1.6664028754167938e-06, "loss": 1.6776, "step": 888 }, { "epoch": 1.16, "learning_rate": 1.6648251115088583e-06, "loss": 1.6227, "step": 890 }, { "epoch": 1.16, "learning_rate": 1.6632443761444024e-06, "loss": 1.6288, "step": 892 }, { "epoch": 1.17, "learning_rate": 1.6616606763885717e-06, "loss": 1.6702, "step": 894 }, { "epoch": 1.17, "learning_rate": 1.6600740193197622e-06, "loss": 1.6661, "step": 896 }, { "epoch": 1.17, "learning_rate": 1.6584844120295865e-06, "loss": 1.5971, "step": 898 }, { "epoch": 1.17, "learning_rate": 1.656891861622844e-06, "loss": 1.6587, "step": 900 }, { "epoch": 1.18, "learning_rate": 1.655296375217488e-06, "loss": 1.6125, "step": 902 }, { "epoch": 1.18, "learning_rate": 1.6536979599445953e-06, "loss": 1.6308, "step": 904 }, { "epoch": 1.18, "learning_rate": 1.6520966229483322e-06, "loss": 1.622, "step": 906 }, { "epoch": 1.18, "learning_rate": 1.650492371385924e-06, "loss": 1.673, "step": 908 }, { "epoch": 1.19, "learning_rate": 1.6488852124276232e-06, "loss": 1.6179, "step": 910 }, { "epoch": 1.19, "learning_rate": 1.6472751532566775e-06, "loss": 1.5717, "step": 912 }, { "epoch": 1.19, "learning_rate": 1.6456622010692955e-06, "loss": 1.661, "step": 914 }, { "epoch": 1.2, "learning_rate": 1.6440463630746172e-06, "loss": 1.6027, "step": 916 }, { "epoch": 1.2, "learning_rate": 1.6424276464946813e-06, "loss": 1.6565, "step": 918 }, { "epoch": 1.2, "learning_rate": 1.640806058564392e-06, "loss": 1.6329, "step": 920 }, { "epoch": 1.2, "learning_rate": 1.6391816065314864e-06, "loss": 1.6458, "step": 922 }, { "epoch": 1.21, "learning_rate": 1.6375542976565038e-06, "loss": 1.6731, "step": 924 }, { "epoch": 1.21, "learning_rate": 1.6359241392127517e-06, "loss": 1.6625, "step": 926 }, { "epoch": 1.21, "learning_rate": 1.634291138486274e-06, "loss": 1.7185, "step": 928 }, { "epoch": 1.21, "learning_rate": 1.6326553027758183e-06, "loss": 1.6164, "step": 930 }, { "epoch": 1.22, "learning_rate": 1.6310166393928033e-06, "loss": 1.6475, "step": 932 }, { "epoch": 1.22, "learning_rate": 1.6293751556612858e-06, "loss": 1.6214, "step": 934 }, { "epoch": 1.22, "learning_rate": 1.6277308589179286e-06, "loss": 1.7042, "step": 936 }, { "epoch": 1.22, "learning_rate": 1.6260837565119668e-06, "loss": 1.6537, "step": 938 }, { "epoch": 1.23, "learning_rate": 1.6244338558051761e-06, "loss": 1.6571, "step": 940 }, { "epoch": 1.23, "learning_rate": 1.6227811641718393e-06, "loss": 1.6417, "step": 942 }, { "epoch": 1.23, "learning_rate": 1.6211256889987129e-06, "loss": 1.6794, "step": 944 }, { "epoch": 1.23, "learning_rate": 1.6194674376849941e-06, "loss": 1.6424, "step": 946 }, { "epoch": 1.24, "learning_rate": 1.6178064176422895e-06, "loss": 1.6647, "step": 948 }, { "epoch": 1.24, "learning_rate": 1.6161426362945796e-06, "loss": 1.6308, "step": 950 }, { "epoch": 1.24, "learning_rate": 1.6144761010781866e-06, "loss": 1.6578, "step": 952 }, { "epoch": 1.25, "learning_rate": 1.6128068194417424e-06, "loss": 1.6666, "step": 954 }, { "epoch": 1.25, "learning_rate": 1.6111347988461522e-06, "loss": 1.6561, "step": 956 }, { "epoch": 1.25, "learning_rate": 1.6094600467645643e-06, "loss": 1.6719, "step": 958 }, { "epoch": 1.25, "learning_rate": 1.6077825706823356e-06, "loss": 1.6735, "step": 960 }, { "epoch": 1.26, "learning_rate": 1.6061023780969975e-06, "loss": 1.6408, "step": 962 }, { "epoch": 1.26, "learning_rate": 1.604419476518223e-06, "loss": 1.6715, "step": 964 }, { "epoch": 1.26, "learning_rate": 1.6027338734677929e-06, "loss": 1.6253, "step": 966 }, { "epoch": 1.26, "learning_rate": 1.6010455764795627e-06, "loss": 1.6585, "step": 968 }, { "epoch": 1.27, "learning_rate": 1.5993545930994286e-06, "loss": 1.6267, "step": 970 }, { "epoch": 1.27, "learning_rate": 1.5976609308852927e-06, "loss": 1.6103, "step": 972 }, { "epoch": 1.27, "learning_rate": 1.5959645974070314e-06, "loss": 1.6536, "step": 974 }, { "epoch": 1.27, "learning_rate": 1.5942656002464596e-06, "loss": 1.646, "step": 976 }, { "epoch": 1.28, "learning_rate": 1.5925639469972986e-06, "loss": 1.6503, "step": 978 }, { "epoch": 1.28, "learning_rate": 1.59085964526514e-06, "loss": 1.6461, "step": 980 }, { "epoch": 1.28, "learning_rate": 1.5891527026674138e-06, "loss": 1.6556, "step": 982 }, { "epoch": 1.28, "learning_rate": 1.587443126833353e-06, "loss": 1.6606, "step": 984 }, { "epoch": 1.29, "learning_rate": 1.58573092540396e-06, "loss": 1.643, "step": 986 }, { "epoch": 1.29, "learning_rate": 1.5840161060319717e-06, "loss": 1.6463, "step": 988 }, { "epoch": 1.29, "learning_rate": 1.5822986763818271e-06, "loss": 1.6311, "step": 990 }, { "epoch": 1.29, "learning_rate": 1.5805786441296318e-06, "loss": 1.6517, "step": 992 }, { "epoch": 1.3, "learning_rate": 1.5788560169631234e-06, "loss": 1.6295, "step": 994 }, { "epoch": 1.3, "learning_rate": 1.577130802581637e-06, "loss": 1.6462, "step": 996 }, { "epoch": 1.3, "learning_rate": 1.5754030086960723e-06, "loss": 1.6615, "step": 998 }, { "epoch": 1.31, "learning_rate": 1.5736726430288578e-06, "loss": 1.6252, "step": 1000 }, { "epoch": 1.31, "learning_rate": 1.571939713313917e-06, "loss": 1.6489, "step": 1002 }, { "epoch": 1.31, "learning_rate": 1.5702042272966328e-06, "loss": 1.667, "step": 1004 }, { "epoch": 1.31, "learning_rate": 1.5684661927338144e-06, "loss": 1.6357, "step": 1006 }, { "epoch": 1.32, "learning_rate": 1.5667256173936605e-06, "loss": 1.6542, "step": 1008 }, { "epoch": 1.32, "learning_rate": 1.5649825090557276e-06, "loss": 1.6409, "step": 1010 }, { "epoch": 1.32, "learning_rate": 1.5632368755108925e-06, "loss": 1.6148, "step": 1012 }, { "epoch": 1.32, "learning_rate": 1.5614887245613181e-06, "loss": 1.636, "step": 1014 }, { "epoch": 1.33, "learning_rate": 1.55973806402042e-06, "loss": 1.6294, "step": 1016 }, { "epoch": 1.33, "learning_rate": 1.5579849017128294e-06, "loss": 1.6545, "step": 1018 }, { "epoch": 1.33, "learning_rate": 1.5562292454743606e-06, "loss": 1.6635, "step": 1020 }, { "epoch": 1.33, "learning_rate": 1.5544711031519732e-06, "loss": 1.6447, "step": 1022 }, { "epoch": 1.34, "learning_rate": 1.552710482603739e-06, "loss": 1.662, "step": 1024 }, { "epoch": 1.34, "learning_rate": 1.5509473916988067e-06, "loss": 1.6113, "step": 1026 }, { "epoch": 1.34, "learning_rate": 1.5491818383173656e-06, "loss": 1.6634, "step": 1028 }, { "epoch": 1.34, "learning_rate": 1.5474138303506115e-06, "loss": 1.6267, "step": 1030 }, { "epoch": 1.35, "learning_rate": 1.5456433757007112e-06, "loss": 1.6948, "step": 1032 }, { "epoch": 1.35, "learning_rate": 1.5438704822807669e-06, "loss": 1.5969, "step": 1034 }, { "epoch": 1.35, "learning_rate": 1.5420951580147805e-06, "loss": 1.6356, "step": 1036 }, { "epoch": 1.35, "learning_rate": 1.5403174108376194e-06, "loss": 1.5955, "step": 1038 }, { "epoch": 1.36, "learning_rate": 1.5385372486949795e-06, "loss": 1.6533, "step": 1040 }, { "epoch": 1.36, "learning_rate": 1.5367546795433514e-06, "loss": 1.6796, "step": 1042 }, { "epoch": 1.36, "learning_rate": 1.5349697113499833e-06, "loss": 1.6064, "step": 1044 }, { "epoch": 1.37, "learning_rate": 1.533182352092846e-06, "loss": 1.5981, "step": 1046 }, { "epoch": 1.37, "learning_rate": 1.5313926097605969e-06, "loss": 1.6574, "step": 1048 }, { "epoch": 1.37, "learning_rate": 1.5296004923525457e-06, "loss": 1.6784, "step": 1050 }, { "epoch": 1.37, "learning_rate": 1.5278060078786164e-06, "loss": 1.6619, "step": 1052 }, { "epoch": 1.38, "learning_rate": 1.5260091643593134e-06, "loss": 1.6148, "step": 1054 }, { "epoch": 1.38, "learning_rate": 1.524209969825685e-06, "loss": 1.6591, "step": 1056 }, { "epoch": 1.38, "learning_rate": 1.5224084323192864e-06, "loss": 1.5984, "step": 1058 }, { "epoch": 1.38, "learning_rate": 1.520604559892146e-06, "loss": 1.6718, "step": 1060 }, { "epoch": 1.39, "learning_rate": 1.5187983606067284e-06, "loss": 1.6287, "step": 1062 }, { "epoch": 1.39, "learning_rate": 1.5169898425358962e-06, "loss": 1.6369, "step": 1064 }, { "epoch": 1.39, "learning_rate": 1.5151790137628782e-06, "loss": 1.6217, "step": 1066 }, { "epoch": 1.39, "learning_rate": 1.5133658823812289e-06, "loss": 1.6222, "step": 1068 }, { "epoch": 1.4, "learning_rate": 1.5115504564947968e-06, "loss": 1.6274, "step": 1070 }, { "epoch": 1.4, "learning_rate": 1.5097327442176836e-06, "loss": 1.6423, "step": 1072 }, { "epoch": 1.4, "learning_rate": 1.5079127536742105e-06, "loss": 1.638, "step": 1074 }, { "epoch": 1.4, "learning_rate": 1.5060904929988823e-06, "loss": 1.5695, "step": 1076 }, { "epoch": 1.41, "learning_rate": 1.50426597033635e-06, "loss": 1.6434, "step": 1078 }, { "epoch": 1.41, "learning_rate": 1.5024391938413741e-06, "loss": 1.6651, "step": 1080 }, { "epoch": 1.41, "learning_rate": 1.5006101716787894e-06, "loss": 1.5832, "step": 1082 }, { "epoch": 1.41, "learning_rate": 1.4987789120234663e-06, "loss": 1.6371, "step": 1084 }, { "epoch": 1.42, "learning_rate": 1.496945423060278e-06, "loss": 1.6155, "step": 1086 }, { "epoch": 1.42, "learning_rate": 1.49510971298406e-06, "loss": 1.6308, "step": 1088 }, { "epoch": 1.42, "learning_rate": 1.4932717899995755e-06, "loss": 1.6545, "step": 1090 }, { "epoch": 1.43, "learning_rate": 1.4914316623214788e-06, "loss": 1.6111, "step": 1092 }, { "epoch": 1.43, "learning_rate": 1.4895893381742772e-06, "loss": 1.6386, "step": 1094 }, { "epoch": 1.43, "learning_rate": 1.4877448257922964e-06, "loss": 1.6522, "step": 1096 }, { "epoch": 1.43, "learning_rate": 1.4858981334196417e-06, "loss": 1.6319, "step": 1098 }, { "epoch": 1.44, "learning_rate": 1.4840492693101619e-06, "loss": 1.6326, "step": 1100 }, { "epoch": 1.44, "learning_rate": 1.4821982417274127e-06, "loss": 1.6525, "step": 1102 }, { "epoch": 1.44, "learning_rate": 1.4803450589446196e-06, "loss": 1.6357, "step": 1104 }, { "epoch": 1.44, "learning_rate": 1.478489729244641e-06, "loss": 1.6227, "step": 1106 }, { "epoch": 1.45, "learning_rate": 1.4766322609199302e-06, "loss": 1.5378, "step": 1108 }, { "epoch": 1.45, "learning_rate": 1.4747726622724996e-06, "loss": 1.6475, "step": 1110 }, { "epoch": 1.45, "learning_rate": 1.472910941613884e-06, "loss": 1.6425, "step": 1112 }, { "epoch": 1.45, "learning_rate": 1.4710471072651013e-06, "loss": 1.6182, "step": 1114 }, { "epoch": 1.46, "learning_rate": 1.4691811675566167e-06, "loss": 1.655, "step": 1116 }, { "epoch": 1.46, "learning_rate": 1.4673131308283062e-06, "loss": 1.6137, "step": 1118 }, { "epoch": 1.46, "learning_rate": 1.4654430054294182e-06, "loss": 1.6654, "step": 1120 }, { "epoch": 1.46, "learning_rate": 1.4635707997185365e-06, "loss": 1.6104, "step": 1122 }, { "epoch": 1.47, "learning_rate": 1.4616965220635425e-06, "loss": 1.6268, "step": 1124 }, { "epoch": 1.47, "learning_rate": 1.459820180841578e-06, "loss": 1.6756, "step": 1126 }, { "epoch": 1.47, "learning_rate": 1.4579417844390091e-06, "loss": 1.5819, "step": 1128 }, { "epoch": 1.47, "learning_rate": 1.456061341251387e-06, "loss": 1.6191, "step": 1130 }, { "epoch": 1.48, "learning_rate": 1.454178859683411e-06, "loss": 1.6041, "step": 1132 }, { "epoch": 1.48, "learning_rate": 1.4522943481488905e-06, "loss": 1.7297, "step": 1134 }, { "epoch": 1.48, "learning_rate": 1.450407815070709e-06, "loss": 1.5986, "step": 1136 }, { "epoch": 1.49, "learning_rate": 1.4485192688807849e-06, "loss": 1.6505, "step": 1138 }, { "epoch": 1.49, "learning_rate": 1.4466287180200334e-06, "loss": 1.6483, "step": 1140 }, { "epoch": 1.49, "learning_rate": 1.444736170938331e-06, "loss": 1.6244, "step": 1142 }, { "epoch": 1.49, "learning_rate": 1.4428416360944758e-06, "loss": 1.6216, "step": 1144 }, { "epoch": 1.5, "learning_rate": 1.44094512195615e-06, "loss": 1.6686, "step": 1146 }, { "epoch": 1.5, "learning_rate": 1.4390466369998825e-06, "loss": 1.6131, "step": 1148 }, { "epoch": 1.5, "learning_rate": 1.437146189711011e-06, "loss": 1.6217, "step": 1150 }, { "epoch": 1.5, "learning_rate": 1.4352437885836439e-06, "loss": 1.5977, "step": 1152 }, { "epoch": 1.51, "learning_rate": 1.433339442120622e-06, "loss": 1.6495, "step": 1154 }, { "epoch": 1.51, "learning_rate": 1.4314331588334812e-06, "loss": 1.625, "step": 1156 }, { "epoch": 1.51, "learning_rate": 1.4295249472424138e-06, "loss": 1.6697, "step": 1158 }, { "epoch": 1.51, "learning_rate": 1.4276148158762312e-06, "loss": 1.6801, "step": 1160 }, { "epoch": 1.52, "learning_rate": 1.4257027732723247e-06, "loss": 1.6695, "step": 1162 }, { "epoch": 1.52, "learning_rate": 1.4237888279766282e-06, "loss": 1.6368, "step": 1164 }, { "epoch": 1.52, "learning_rate": 1.4218729885435795e-06, "loss": 1.6403, "step": 1166 }, { "epoch": 1.52, "learning_rate": 1.4199552635360831e-06, "loss": 1.6456, "step": 1168 }, { "epoch": 1.53, "learning_rate": 1.418035661525471e-06, "loss": 1.6798, "step": 1170 }, { "epoch": 1.53, "learning_rate": 1.4161141910914628e-06, "loss": 1.6831, "step": 1172 }, { "epoch": 1.53, "learning_rate": 1.414190860822131e-06, "loss": 1.6382, "step": 1174 }, { "epoch": 1.53, "learning_rate": 1.4122656793138604e-06, "loss": 1.6395, "step": 1176 }, { "epoch": 1.54, "learning_rate": 1.4103386551713092e-06, "loss": 1.5895, "step": 1178 }, { "epoch": 1.54, "learning_rate": 1.4084097970073724e-06, "loss": 1.6587, "step": 1180 }, { "epoch": 1.54, "learning_rate": 1.4064791134431409e-06, "loss": 1.5971, "step": 1182 }, { "epoch": 1.55, "learning_rate": 1.4045466131078648e-06, "loss": 1.5755, "step": 1184 }, { "epoch": 1.55, "learning_rate": 1.4026123046389153e-06, "loss": 1.6285, "step": 1186 }, { "epoch": 1.55, "learning_rate": 1.4006761966817436e-06, "loss": 1.6766, "step": 1188 }, { "epoch": 1.55, "learning_rate": 1.398738297889845e-06, "loss": 1.6323, "step": 1190 }, { "epoch": 1.56, "learning_rate": 1.396798616924718e-06, "loss": 1.6665, "step": 1192 }, { "epoch": 1.56, "learning_rate": 1.394857162455827e-06, "loss": 1.6308, "step": 1194 }, { "epoch": 1.56, "learning_rate": 1.3929139431605633e-06, "loss": 1.6035, "step": 1196 }, { "epoch": 1.56, "learning_rate": 1.390968967724206e-06, "loss": 1.6314, "step": 1198 }, { "epoch": 1.57, "learning_rate": 1.3890222448398835e-06, "loss": 1.6227, "step": 1200 }, { "epoch": 1.57, "learning_rate": 1.3870737832085341e-06, "loss": 1.626, "step": 1202 }, { "epoch": 1.57, "learning_rate": 1.3851235915388678e-06, "loss": 1.6649, "step": 1204 }, { "epoch": 1.57, "learning_rate": 1.3831716785473266e-06, "loss": 1.6319, "step": 1206 }, { "epoch": 1.58, "learning_rate": 1.3812180529580468e-06, "loss": 1.7266, "step": 1208 }, { "epoch": 1.58, "learning_rate": 1.3792627235028187e-06, "loss": 1.6233, "step": 1210 }, { "epoch": 1.58, "learning_rate": 1.3773056989210478e-06, "loss": 1.6475, "step": 1212 }, { "epoch": 1.58, "learning_rate": 1.3753469879597166e-06, "loss": 1.617, "step": 1214 }, { "epoch": 1.59, "learning_rate": 1.3733865993733446e-06, "loss": 1.61, "step": 1216 }, { "epoch": 1.59, "learning_rate": 1.3714245419239494e-06, "loss": 1.658, "step": 1218 }, { "epoch": 1.59, "learning_rate": 1.3694608243810087e-06, "loss": 1.6522, "step": 1220 }, { "epoch": 1.59, "learning_rate": 1.367495455521418e-06, "loss": 1.6231, "step": 1222 }, { "epoch": 1.6, "learning_rate": 1.3655284441294552e-06, "loss": 1.5785, "step": 1224 }, { "epoch": 1.6, "learning_rate": 1.3635597989967384e-06, "loss": 1.6403, "step": 1226 }, { "epoch": 1.6, "learning_rate": 1.361589528922189e-06, "loss": 1.658, "step": 1228 }, { "epoch": 1.61, "learning_rate": 1.3596176427119902e-06, "loss": 1.6734, "step": 1230 }, { "epoch": 1.61, "learning_rate": 1.3576441491795482e-06, "loss": 1.6276, "step": 1232 }, { "epoch": 1.61, "learning_rate": 1.3556690571454543e-06, "loss": 1.6124, "step": 1234 }, { "epoch": 1.61, "learning_rate": 1.3536923754374436e-06, "loss": 1.6217, "step": 1236 }, { "epoch": 1.62, "learning_rate": 1.3517141128903566e-06, "loss": 1.6518, "step": 1238 }, { "epoch": 1.62, "learning_rate": 1.3497342783460996e-06, "loss": 1.662, "step": 1240 }, { "epoch": 1.62, "learning_rate": 1.3477528806536039e-06, "loss": 1.6347, "step": 1242 }, { "epoch": 1.62, "learning_rate": 1.3457699286687885e-06, "loss": 1.5949, "step": 1244 }, { "epoch": 1.63, "learning_rate": 1.3437854312545191e-06, "loss": 1.6731, "step": 1246 }, { "epoch": 1.63, "learning_rate": 1.3417993972805687e-06, "loss": 1.6209, "step": 1248 }, { "epoch": 1.63, "learning_rate": 1.3398118356235782e-06, "loss": 1.6307, "step": 1250 }, { "epoch": 1.63, "learning_rate": 1.3378227551670154e-06, "loss": 1.6018, "step": 1252 }, { "epoch": 1.64, "learning_rate": 1.335832164801138e-06, "loss": 1.6766, "step": 1254 }, { "epoch": 1.64, "learning_rate": 1.3338400734229513e-06, "loss": 1.6397, "step": 1256 }, { "epoch": 1.64, "learning_rate": 1.33184648993617e-06, "loss": 1.6589, "step": 1258 }, { "epoch": 1.64, "learning_rate": 1.3298514232511769e-06, "loss": 1.6371, "step": 1260 }, { "epoch": 1.65, "learning_rate": 1.3278548822849851e-06, "loss": 1.5834, "step": 1262 }, { "epoch": 1.65, "learning_rate": 1.3258568759611963e-06, "loss": 1.6901, "step": 1264 }, { "epoch": 1.65, "learning_rate": 1.3238574132099625e-06, "loss": 1.6112, "step": 1266 }, { "epoch": 1.65, "learning_rate": 1.321856502967944e-06, "loss": 1.6388, "step": 1268 }, { "epoch": 1.66, "learning_rate": 1.3198541541782718e-06, "loss": 1.6094, "step": 1270 }, { "epoch": 1.66, "learning_rate": 1.3178503757905058e-06, "loss": 1.6612, "step": 1272 }, { "epoch": 1.66, "learning_rate": 1.315845176760596e-06, "loss": 1.6111, "step": 1274 }, { "epoch": 1.67, "learning_rate": 1.3138385660508418e-06, "loss": 1.6964, "step": 1276 }, { "epoch": 1.67, "learning_rate": 1.3118305526298526e-06, "loss": 1.6161, "step": 1278 }, { "epoch": 1.67, "learning_rate": 1.3098211454725062e-06, "loss": 1.6516, "step": 1280 }, { "epoch": 1.67, "learning_rate": 1.3078103535599108e-06, "loss": 1.6194, "step": 1282 }, { "epoch": 1.68, "learning_rate": 1.3057981858793633e-06, "loss": 1.6381, "step": 1284 }, { "epoch": 1.68, "learning_rate": 1.3037846514243095e-06, "loss": 1.6109, "step": 1286 }, { "epoch": 1.68, "learning_rate": 1.3017697591943054e-06, "loss": 1.6274, "step": 1288 }, { "epoch": 1.68, "learning_rate": 1.299753518194973e-06, "loss": 1.635, "step": 1290 }, { "epoch": 1.69, "learning_rate": 1.297735937437965e-06, "loss": 1.6735, "step": 1292 }, { "epoch": 1.69, "learning_rate": 1.2957170259409214e-06, "loss": 1.6424, "step": 1294 }, { "epoch": 1.69, "learning_rate": 1.2936967927274296e-06, "loss": 1.6002, "step": 1296 }, { "epoch": 1.69, "learning_rate": 1.2916752468269854e-06, "loss": 1.636, "step": 1298 }, { "epoch": 1.7, "learning_rate": 1.2896523972749502e-06, "loss": 1.5958, "step": 1300 }, { "epoch": 1.7, "learning_rate": 1.2876282531125138e-06, "loss": 1.5757, "step": 1302 }, { "epoch": 1.7, "learning_rate": 1.2856028233866508e-06, "loss": 1.6604, "step": 1304 }, { "epoch": 1.7, "learning_rate": 1.2835761171500832e-06, "loss": 1.6398, "step": 1306 }, { "epoch": 1.71, "learning_rate": 1.281548143461237e-06, "loss": 1.6934, "step": 1308 }, { "epoch": 1.71, "learning_rate": 1.279518911384204e-06, "loss": 1.6705, "step": 1310 }, { "epoch": 1.71, "learning_rate": 1.2774884299887e-06, "loss": 1.5953, "step": 1312 }, { "epoch": 1.71, "learning_rate": 1.2754567083500245e-06, "loss": 1.614, "step": 1314 }, { "epoch": 1.72, "learning_rate": 1.2734237555490208e-06, "loss": 1.6368, "step": 1316 }, { "epoch": 1.72, "learning_rate": 1.271389580672035e-06, "loss": 1.6018, "step": 1318 }, { "epoch": 1.72, "learning_rate": 1.2693541928108747e-06, "loss": 1.6416, "step": 1320 }, { "epoch": 1.73, "learning_rate": 1.2673176010627689e-06, "loss": 1.6012, "step": 1322 }, { "epoch": 1.73, "learning_rate": 1.2652798145303284e-06, "loss": 1.6251, "step": 1324 }, { "epoch": 1.73, "learning_rate": 1.2632408423215032e-06, "loss": 1.6255, "step": 1326 }, { "epoch": 1.73, "learning_rate": 1.261200693549543e-06, "loss": 1.6101, "step": 1328 }, { "epoch": 1.74, "learning_rate": 1.2591593773329567e-06, "loss": 1.6254, "step": 1330 }, { "epoch": 1.74, "learning_rate": 1.25711690279547e-06, "loss": 1.599, "step": 1332 }, { "epoch": 1.74, "learning_rate": 1.2550732790659866e-06, "loss": 1.6217, "step": 1334 }, { "epoch": 1.74, "learning_rate": 1.2530285152785465e-06, "loss": 1.6199, "step": 1336 }, { "epoch": 1.75, "learning_rate": 1.2509826205722851e-06, "loss": 1.6429, "step": 1338 }, { "epoch": 1.75, "learning_rate": 1.248935604091392e-06, "loss": 1.6596, "step": 1340 }, { "epoch": 1.75, "learning_rate": 1.2468874749850713e-06, "loss": 1.6165, "step": 1342 }, { "epoch": 1.75, "learning_rate": 1.2448382424074999e-06, "loss": 1.6128, "step": 1344 }, { "epoch": 1.76, "learning_rate": 1.2427879155177865e-06, "loss": 1.6414, "step": 1346 }, { "epoch": 1.76, "learning_rate": 1.240736503479931e-06, "loss": 1.6124, "step": 1348 }, { "epoch": 1.76, "learning_rate": 1.238684015462783e-06, "loss": 1.5882, "step": 1350 }, { "epoch": 1.76, "learning_rate": 1.236630460640002e-06, "loss": 1.6906, "step": 1352 }, { "epoch": 1.77, "learning_rate": 1.2345758481900148e-06, "loss": 1.5847, "step": 1354 }, { "epoch": 1.77, "learning_rate": 1.2325201872959759e-06, "loss": 1.6506, "step": 1356 }, { "epoch": 1.77, "learning_rate": 1.2304634871457254e-06, "loss": 1.6201, "step": 1358 }, { "epoch": 1.77, "learning_rate": 1.228405756931749e-06, "loss": 1.6168, "step": 1360 }, { "epoch": 1.78, "learning_rate": 1.2263470058511353e-06, "loss": 1.6581, "step": 1362 }, { "epoch": 1.78, "learning_rate": 1.2242872431055372e-06, "loss": 1.6472, "step": 1364 }, { "epoch": 1.78, "learning_rate": 1.2222264779011274e-06, "loss": 1.596, "step": 1366 }, { "epoch": 1.79, "learning_rate": 1.2201647194485607e-06, "loss": 1.6063, "step": 1368 }, { "epoch": 1.79, "learning_rate": 1.2181019769629304e-06, "loss": 1.6184, "step": 1370 }, { "epoch": 1.79, "learning_rate": 1.2160382596637285e-06, "loss": 1.6643, "step": 1372 }, { "epoch": 1.79, "learning_rate": 1.213973576774803e-06, "loss": 1.6594, "step": 1374 }, { "epoch": 1.8, "learning_rate": 1.2119079375243192e-06, "loss": 1.5792, "step": 1376 }, { "epoch": 1.8, "learning_rate": 1.2098413511447158e-06, "loss": 1.6062, "step": 1378 }, { "epoch": 1.8, "learning_rate": 1.2077738268726644e-06, "loss": 1.6273, "step": 1380 }, { "epoch": 1.8, "learning_rate": 1.2057053739490295e-06, "loss": 1.6365, "step": 1382 }, { "epoch": 1.81, "learning_rate": 1.2036360016188254e-06, "loss": 1.6119, "step": 1384 }, { "epoch": 1.81, "learning_rate": 1.2015657191311767e-06, "loss": 1.6355, "step": 1386 }, { "epoch": 1.81, "learning_rate": 1.1994945357392752e-06, "loss": 1.6216, "step": 1388 }, { "epoch": 1.81, "learning_rate": 1.1974224607003392e-06, "loss": 1.6438, "step": 1390 }, { "epoch": 1.82, "learning_rate": 1.1953495032755726e-06, "loss": 1.6323, "step": 1392 }, { "epoch": 1.82, "learning_rate": 1.1932756727301235e-06, "loss": 1.6625, "step": 1394 }, { "epoch": 1.82, "learning_rate": 1.1912009783330416e-06, "loss": 1.6583, "step": 1396 }, { "epoch": 1.82, "learning_rate": 1.1891254293572387e-06, "loss": 1.6348, "step": 1398 }, { "epoch": 1.83, "learning_rate": 1.1870490350794442e-06, "loss": 1.6305, "step": 1400 }, { "epoch": 1.83, "learning_rate": 1.1849718047801685e-06, "loss": 1.636, "step": 1402 }, { "epoch": 1.83, "learning_rate": 1.182893747743656e-06, "loss": 1.6382, "step": 1404 }, { "epoch": 1.83, "learning_rate": 1.180814873257848e-06, "loss": 1.6281, "step": 1406 }, { "epoch": 1.84, "learning_rate": 1.1787351906143382e-06, "loss": 1.692, "step": 1408 }, { "epoch": 1.84, "learning_rate": 1.1766547091083328e-06, "loss": 1.6823, "step": 1410 }, { "epoch": 1.84, "learning_rate": 1.174573438038609e-06, "loss": 1.6749, "step": 1412 }, { "epoch": 1.85, "learning_rate": 1.1724913867074723e-06, "loss": 1.6037, "step": 1414 }, { "epoch": 1.85, "learning_rate": 1.1704085644207162e-06, "loss": 1.5969, "step": 1416 }, { "epoch": 1.85, "learning_rate": 1.1683249804875794e-06, "loss": 1.6382, "step": 1418 }, { "epoch": 1.85, "learning_rate": 1.1662406442207052e-06, "loss": 1.6633, "step": 1420 }, { "epoch": 1.86, "learning_rate": 1.1641555649360997e-06, "loss": 1.6059, "step": 1422 }, { "epoch": 1.86, "learning_rate": 1.1620697519530892e-06, "loss": 1.6296, "step": 1424 }, { "epoch": 1.86, "learning_rate": 1.15998321459428e-06, "loss": 1.6399, "step": 1426 }, { "epoch": 1.86, "learning_rate": 1.1578959621855158e-06, "loss": 1.6216, "step": 1428 }, { "epoch": 1.87, "learning_rate": 1.1558080040558358e-06, "loss": 1.5693, "step": 1430 }, { "epoch": 1.87, "learning_rate": 1.153719349537434e-06, "loss": 1.6424, "step": 1432 }, { "epoch": 1.87, "learning_rate": 1.1516300079656167e-06, "loss": 1.6442, "step": 1434 }, { "epoch": 1.87, "learning_rate": 1.1495399886787618e-06, "loss": 1.6205, "step": 1436 }, { "epoch": 1.88, "learning_rate": 1.1474493010182743e-06, "loss": 1.6661, "step": 1438 }, { "epoch": 1.88, "learning_rate": 1.1453579543285489e-06, "loss": 1.5974, "step": 1440 }, { "epoch": 1.88, "learning_rate": 1.1432659579569232e-06, "loss": 1.6562, "step": 1442 }, { "epoch": 1.88, "learning_rate": 1.1411733212536416e-06, "loss": 1.6273, "step": 1444 }, { "epoch": 1.89, "learning_rate": 1.1390800535718088e-06, "loss": 1.6656, "step": 1446 }, { "epoch": 1.89, "learning_rate": 1.1369861642673489e-06, "loss": 1.5945, "step": 1448 }, { "epoch": 1.89, "learning_rate": 1.1348916626989658e-06, "loss": 1.6511, "step": 1450 }, { "epoch": 1.89, "learning_rate": 1.1327965582280994e-06, "loss": 1.6789, "step": 1452 }, { "epoch": 1.9, "learning_rate": 1.1307008602188843e-06, "loss": 1.6192, "step": 1454 }, { "epoch": 1.9, "learning_rate": 1.1286045780381084e-06, "loss": 1.6453, "step": 1456 }, { "epoch": 1.9, "learning_rate": 1.1265077210551693e-06, "loss": 1.6298, "step": 1458 }, { "epoch": 1.91, "learning_rate": 1.124410298642035e-06, "loss": 1.6565, "step": 1460 }, { "epoch": 1.91, "learning_rate": 1.1223123201732e-06, "loss": 1.6615, "step": 1462 }, { "epoch": 1.91, "learning_rate": 1.120213795025645e-06, "loss": 1.6985, "step": 1464 }, { "epoch": 1.91, "learning_rate": 1.1181147325787924e-06, "loss": 1.6486, "step": 1466 }, { "epoch": 1.92, "learning_rate": 1.1160151422144682e-06, "loss": 1.6277, "step": 1468 }, { "epoch": 1.92, "learning_rate": 1.113915033316856e-06, "loss": 1.6892, "step": 1470 }, { "epoch": 1.92, "learning_rate": 1.1118144152724583e-06, "loss": 1.578, "step": 1472 }, { "epoch": 1.92, "learning_rate": 1.1097132974700527e-06, "loss": 1.6344, "step": 1474 }, { "epoch": 1.93, "learning_rate": 1.1076116893006505e-06, "loss": 1.6645, "step": 1476 }, { "epoch": 1.93, "learning_rate": 1.1055096001574549e-06, "loss": 1.6057, "step": 1478 }, { "epoch": 1.93, "learning_rate": 1.1034070394358187e-06, "loss": 1.569, "step": 1480 }, { "epoch": 1.93, "learning_rate": 1.1013040165332023e-06, "loss": 1.6646, "step": 1482 }, { "epoch": 1.94, "learning_rate": 1.0992005408491324e-06, "loss": 1.6215, "step": 1484 }, { "epoch": 1.94, "learning_rate": 1.0970966217851586e-06, "loss": 1.6169, "step": 1486 }, { "epoch": 1.94, "learning_rate": 1.094992268744813e-06, "loss": 1.6557, "step": 1488 }, { "epoch": 1.94, "learning_rate": 1.092887491133567e-06, "loss": 1.654, "step": 1490 }, { "epoch": 1.95, "learning_rate": 1.0907822983587887e-06, "loss": 1.6597, "step": 1492 }, { "epoch": 1.95, "learning_rate": 1.0886766998297036e-06, "loss": 1.6098, "step": 1494 }, { "epoch": 1.95, "learning_rate": 1.08657070495735e-06, "loss": 1.6635, "step": 1496 }, { "epoch": 1.95, "learning_rate": 1.084464323154537e-06, "loss": 1.6338, "step": 1498 }, { "epoch": 1.96, "learning_rate": 1.0823575638358033e-06, "loss": 1.6412, "step": 1500 }, { "epoch": 1.96, "learning_rate": 1.0802504364173762e-06, "loss": 1.5538, "step": 1502 }, { "epoch": 1.96, "learning_rate": 1.0781429503171264e-06, "loss": 1.5697, "step": 1504 }, { "epoch": 1.97, "learning_rate": 1.0760351149545295e-06, "loss": 1.6507, "step": 1506 }, { "epoch": 1.97, "learning_rate": 1.0739269397506198e-06, "loss": 1.6406, "step": 1508 }, { "epoch": 1.97, "learning_rate": 1.0718184341279532e-06, "loss": 1.5975, "step": 1510 }, { "epoch": 1.97, "learning_rate": 1.0697096075105609e-06, "loss": 1.6288, "step": 1512 }, { "epoch": 1.98, "learning_rate": 1.0676004693239089e-06, "loss": 1.6503, "step": 1514 }, { "epoch": 1.98, "learning_rate": 1.0654910289948561e-06, "loss": 1.653, "step": 1516 }, { "epoch": 1.98, "learning_rate": 1.0633812959516115e-06, "loss": 1.6504, "step": 1518 }, { "epoch": 1.98, "learning_rate": 1.0612712796236926e-06, "loss": 1.6386, "step": 1520 }, { "epoch": 1.99, "learning_rate": 1.0591609894418833e-06, "loss": 1.6573, "step": 1522 }, { "epoch": 1.99, "learning_rate": 1.057050434838191e-06, "loss": 1.602, "step": 1524 }, { "epoch": 1.99, "learning_rate": 1.054939625245805e-06, "loss": 1.6156, "step": 1526 }, { "epoch": 1.99, "learning_rate": 1.052828570099055e-06, "loss": 1.6396, "step": 1528 }, { "epoch": 2.0, "learning_rate": 1.0507172788333669e-06, "loss": 1.6265, "step": 1530 }, { "epoch": 2.0, "learning_rate": 1.0486057608852235e-06, "loss": 1.5756, "step": 1532 }, { "epoch": 2.0, "learning_rate": 1.0464940256921194e-06, "loss": 1.6054, "step": 1534 }, { "epoch": 2.0, "learning_rate": 1.0443820826925207e-06, "loss": 1.6173, "step": 1536 }, { "epoch": 2.01, "learning_rate": 1.0422699413258227e-06, "loss": 1.6201, "step": 1538 }, { "epoch": 2.01, "learning_rate": 1.0401576110323068e-06, "loss": 1.583, "step": 1540 }, { "epoch": 2.01, "learning_rate": 1.0380451012530988e-06, "loss": 1.6072, "step": 1542 }, { "epoch": 2.02, "learning_rate": 1.0359324214301274e-06, "loss": 1.6032, "step": 1544 }, { "epoch": 2.02, "learning_rate": 1.0338195810060804e-06, "loss": 1.6334, "step": 1546 }, { "epoch": 2.02, "learning_rate": 1.0317065894243638e-06, "loss": 1.6142, "step": 1548 }, { "epoch": 2.02, "learning_rate": 1.029593456129059e-06, "loss": 1.6001, "step": 1550 }, { "epoch": 2.03, "learning_rate": 1.0274801905648817e-06, "loss": 1.6233, "step": 1552 }, { "epoch": 2.03, "learning_rate": 1.0253668021771377e-06, "loss": 1.6106, "step": 1554 }, { "epoch": 2.03, "learning_rate": 1.0232533004116822e-06, "loss": 1.6218, "step": 1556 }, { "epoch": 2.03, "learning_rate": 1.0211396947148766e-06, "loss": 1.6109, "step": 1558 }, { "epoch": 2.04, "learning_rate": 1.0190259945335477e-06, "loss": 1.5625, "step": 1560 }, { "epoch": 2.04, "learning_rate": 1.0169122093149447e-06, "loss": 1.5947, "step": 1562 }, { "epoch": 2.04, "learning_rate": 1.0147983485066961e-06, "loss": 1.611, "step": 1564 }, { "epoch": 2.04, "learning_rate": 1.012684421556768e-06, "loss": 1.6111, "step": 1566 }, { "epoch": 2.05, "learning_rate": 1.0105704379134233e-06, "loss": 1.5724, "step": 1568 }, { "epoch": 2.05, "learning_rate": 1.0084564070251775e-06, "loss": 1.5952, "step": 1570 }, { "epoch": 2.05, "learning_rate": 1.0063423383407574e-06, "loss": 1.6118, "step": 1572 }, { "epoch": 2.05, "learning_rate": 1.0042282413090585e-06, "loss": 1.5657, "step": 1574 }, { "epoch": 2.06, "learning_rate": 1.002114125379104e-06, "loss": 1.626, "step": 1576 }, { "epoch": 2.06, "learning_rate": 1e-06, "loss": 1.5821, "step": 1578 }, { "epoch": 2.06, "learning_rate": 9.978858746208963e-07, "loss": 1.5955, "step": 1580 }, { "epoch": 2.06, "learning_rate": 9.957717586909414e-07, "loss": 1.5962, "step": 1582 }, { "epoch": 2.07, "learning_rate": 9.936576616592427e-07, "loss": 1.5815, "step": 1584 }, { "epoch": 2.07, "learning_rate": 9.915435929748224e-07, "loss": 1.5756, "step": 1586 }, { "epoch": 2.07, "learning_rate": 9.894295620865766e-07, "loss": 1.6007, "step": 1588 }, { "epoch": 2.08, "learning_rate": 9.87315578443232e-07, "loss": 1.5676, "step": 1590 }, { "epoch": 2.08, "learning_rate": 9.85201651493304e-07, "loss": 1.6159, "step": 1592 }, { "epoch": 2.08, "learning_rate": 9.830877906850554e-07, "loss": 1.6068, "step": 1594 }, { "epoch": 2.08, "learning_rate": 9.80974005466452e-07, "loss": 1.5853, "step": 1596 }, { "epoch": 2.09, "learning_rate": 9.788603052851235e-07, "loss": 1.5878, "step": 1598 }, { "epoch": 2.09, "learning_rate": 9.767466995883182e-07, "loss": 1.6147, "step": 1600 }, { "epoch": 2.09, "learning_rate": 9.746331978228622e-07, "loss": 1.5736, "step": 1602 }, { "epoch": 2.09, "learning_rate": 9.725198094351183e-07, "loss": 1.6053, "step": 1604 }, { "epoch": 2.1, "learning_rate": 9.704065438709408e-07, "loss": 1.6418, "step": 1606 }, { "epoch": 2.1, "learning_rate": 9.68293410575636e-07, "loss": 1.5614, "step": 1608 }, { "epoch": 2.1, "learning_rate": 9.6618041899392e-07, "loss": 1.5847, "step": 1610 }, { "epoch": 2.1, "learning_rate": 9.640675785698725e-07, "loss": 1.5962, "step": 1612 }, { "epoch": 2.11, "learning_rate": 9.619548987469013e-07, "loss": 1.5906, "step": 1614 }, { "epoch": 2.11, "learning_rate": 9.598423889676931e-07, "loss": 1.6022, "step": 1616 }, { "epoch": 2.11, "learning_rate": 9.577300586741774e-07, "loss": 1.6196, "step": 1618 }, { "epoch": 2.11, "learning_rate": 9.556179173074796e-07, "loss": 1.6029, "step": 1620 }, { "epoch": 2.12, "learning_rate": 9.535059743078808e-07, "loss": 1.6369, "step": 1622 }, { "epoch": 2.12, "learning_rate": 9.513942391147766e-07, "loss": 1.5692, "step": 1624 }, { "epoch": 2.12, "learning_rate": 9.492827211666327e-07, "loss": 1.6079, "step": 1626 }, { "epoch": 2.12, "learning_rate": 9.471714299009451e-07, "loss": 1.6144, "step": 1628 }, { "epoch": 2.13, "learning_rate": 9.450603747541951e-07, "loss": 1.5912, "step": 1630 }, { "epoch": 2.13, "learning_rate": 9.42949565161809e-07, "loss": 1.6431, "step": 1632 }, { "epoch": 2.13, "learning_rate": 9.408390105581167e-07, "loss": 1.5686, "step": 1634 }, { "epoch": 2.14, "learning_rate": 9.38728720376307e-07, "loss": 1.6041, "step": 1636 }, { "epoch": 2.14, "learning_rate": 9.366187040483885e-07, "loss": 1.5625, "step": 1638 }, { "epoch": 2.14, "learning_rate": 9.34508971005144e-07, "loss": 1.6035, "step": 1640 }, { "epoch": 2.14, "learning_rate": 9.323995306760907e-07, "loss": 1.5617, "step": 1642 }, { "epoch": 2.15, "learning_rate": 9.30290392489439e-07, "loss": 1.569, "step": 1644 }, { "epoch": 2.15, "learning_rate": 9.281815658720465e-07, "loss": 1.5936, "step": 1646 }, { "epoch": 2.15, "learning_rate": 9.260730602493802e-07, "loss": 1.6216, "step": 1648 }, { "epoch": 2.15, "learning_rate": 9.239648850454709e-07, "loss": 1.6273, "step": 1650 }, { "epoch": 2.16, "learning_rate": 9.218570496828733e-07, "loss": 1.6103, "step": 1652 }, { "epoch": 2.16, "learning_rate": 9.197495635826238e-07, "loss": 1.6061, "step": 1654 }, { "epoch": 2.16, "learning_rate": 9.176424361641965e-07, "loss": 1.5982, "step": 1656 }, { "epoch": 2.16, "learning_rate": 9.155356768454631e-07, "loss": 1.6264, "step": 1658 }, { "epoch": 2.17, "learning_rate": 9.134292950426502e-07, "loss": 1.6516, "step": 1660 }, { "epoch": 2.17, "learning_rate": 9.113233001702963e-07, "loss": 1.6002, "step": 1662 }, { "epoch": 2.17, "learning_rate": 9.092177016412115e-07, "loss": 1.5742, "step": 1664 }, { "epoch": 2.17, "learning_rate": 9.071125088664333e-07, "loss": 1.6369, "step": 1666 }, { "epoch": 2.18, "learning_rate": 9.050077312551869e-07, "loss": 1.6052, "step": 1668 }, { "epoch": 2.18, "learning_rate": 9.029033782148415e-07, "loss": 1.625, "step": 1670 }, { "epoch": 2.18, "learning_rate": 9.007994591508675e-07, "loss": 1.5875, "step": 1672 }, { "epoch": 2.18, "learning_rate": 8.986959834667975e-07, "loss": 1.5897, "step": 1674 }, { "epoch": 2.19, "learning_rate": 8.965929605641811e-07, "loss": 1.5835, "step": 1676 }, { "epoch": 2.19, "learning_rate": 8.944903998425451e-07, "loss": 1.5926, "step": 1678 }, { "epoch": 2.19, "learning_rate": 8.923883106993498e-07, "loss": 1.572, "step": 1680 }, { "epoch": 2.2, "learning_rate": 8.902867025299474e-07, "loss": 1.6465, "step": 1682 }, { "epoch": 2.2, "learning_rate": 8.881855847275417e-07, "loss": 1.623, "step": 1684 }, { "epoch": 2.2, "learning_rate": 8.860849666831438e-07, "loss": 1.6077, "step": 1686 }, { "epoch": 2.2, "learning_rate": 8.839848577855318e-07, "loss": 1.5757, "step": 1688 }, { "epoch": 2.21, "learning_rate": 8.818852674212074e-07, "loss": 1.618, "step": 1690 }, { "epoch": 2.21, "learning_rate": 8.79786204974355e-07, "loss": 1.6239, "step": 1692 }, { "epoch": 2.21, "learning_rate": 8.776876798267999e-07, "loss": 1.5725, "step": 1694 }, { "epoch": 2.21, "learning_rate": 8.75589701357965e-07, "loss": 1.6088, "step": 1696 }, { "epoch": 2.22, "learning_rate": 8.734922789448309e-07, "loss": 1.6064, "step": 1698 }, { "epoch": 2.22, "learning_rate": 8.713954219618918e-07, "loss": 1.5796, "step": 1700 }, { "epoch": 2.22, "learning_rate": 8.692991397811155e-07, "loss": 1.6071, "step": 1702 }, { "epoch": 2.22, "learning_rate": 8.672034417719007e-07, "loss": 1.6032, "step": 1704 }, { "epoch": 2.23, "learning_rate": 8.651083373010344e-07, "loss": 1.6373, "step": 1706 }, { "epoch": 2.23, "learning_rate": 8.63013835732651e-07, "loss": 1.5926, "step": 1708 }, { "epoch": 2.23, "learning_rate": 8.609199464281916e-07, "loss": 1.5944, "step": 1710 }, { "epoch": 2.23, "learning_rate": 8.58826678746358e-07, "loss": 1.6278, "step": 1712 }, { "epoch": 2.24, "learning_rate": 8.567340420430766e-07, "loss": 1.5819, "step": 1714 }, { "epoch": 2.24, "learning_rate": 8.546420456714516e-07, "loss": 1.5929, "step": 1716 }, { "epoch": 2.24, "learning_rate": 8.525506989817256e-07, "loss": 1.6337, "step": 1718 }, { "epoch": 2.24, "learning_rate": 8.504600113212386e-07, "loss": 1.6449, "step": 1720 }, { "epoch": 2.25, "learning_rate": 8.48369992034383e-07, "loss": 1.5987, "step": 1722 }, { "epoch": 2.25, "learning_rate": 8.462806504625659e-07, "loss": 1.6369, "step": 1724 }, { "epoch": 2.25, "learning_rate": 8.441919959441644e-07, "loss": 1.6061, "step": 1726 }, { "epoch": 2.26, "learning_rate": 8.421040378144843e-07, "loss": 1.646, "step": 1728 }, { "epoch": 2.26, "learning_rate": 8.400167854057202e-07, "loss": 1.6509, "step": 1730 }, { "epoch": 2.26, "learning_rate": 8.379302480469107e-07, "loss": 1.6275, "step": 1732 }, { "epoch": 2.26, "learning_rate": 8.358444350639002e-07, "loss": 1.6423, "step": 1734 }, { "epoch": 2.27, "learning_rate": 8.337593557792949e-07, "loss": 1.6447, "step": 1736 }, { "epoch": 2.27, "learning_rate": 8.316750195124206e-07, "loss": 1.5823, "step": 1738 }, { "epoch": 2.27, "learning_rate": 8.29591435579284e-07, "loss": 1.616, "step": 1740 }, { "epoch": 2.27, "learning_rate": 8.275086132925275e-07, "loss": 1.646, "step": 1742 }, { "epoch": 2.28, "learning_rate": 8.25426561961391e-07, "loss": 1.6423, "step": 1744 }, { "epoch": 2.28, "learning_rate": 8.233452908916674e-07, "loss": 1.5761, "step": 1746 }, { "epoch": 2.28, "learning_rate": 8.21264809385662e-07, "loss": 1.6237, "step": 1748 }, { "epoch": 2.28, "learning_rate": 8.191851267421521e-07, "loss": 1.5361, "step": 1750 }, { "epoch": 2.29, "learning_rate": 8.171062522563436e-07, "loss": 1.6289, "step": 1752 }, { "epoch": 2.29, "learning_rate": 8.150281952198315e-07, "loss": 1.6148, "step": 1754 }, { "epoch": 2.29, "learning_rate": 8.129509649205557e-07, "loss": 1.5721, "step": 1756 }, { "epoch": 2.29, "learning_rate": 8.108745706427615e-07, "loss": 1.5964, "step": 1758 }, { "epoch": 2.3, "learning_rate": 8.087990216669584e-07, "loss": 1.6142, "step": 1760 }, { "epoch": 2.3, "learning_rate": 8.067243272698765e-07, "loss": 1.6425, "step": 1762 }, { "epoch": 2.3, "learning_rate": 8.046504967244275e-07, "loss": 1.5647, "step": 1764 }, { "epoch": 2.3, "learning_rate": 8.02577539299661e-07, "loss": 1.6374, "step": 1766 }, { "epoch": 2.31, "learning_rate": 8.005054642607247e-07, "loss": 1.6104, "step": 1768 }, { "epoch": 2.31, "learning_rate": 7.984342808688234e-07, "loss": 1.597, "step": 1770 }, { "epoch": 2.31, "learning_rate": 7.963639983811743e-07, "loss": 1.6116, "step": 1772 }, { "epoch": 2.32, "learning_rate": 7.942946260509704e-07, "loss": 1.6451, "step": 1774 }, { "epoch": 2.32, "learning_rate": 7.922261731273357e-07, "loss": 1.5678, "step": 1776 }, { "epoch": 2.32, "learning_rate": 7.901586488552843e-07, "loss": 1.6032, "step": 1778 }, { "epoch": 2.32, "learning_rate": 7.880920624756808e-07, "loss": 1.5844, "step": 1780 }, { "epoch": 2.33, "learning_rate": 7.860264232251967e-07, "loss": 1.6054, "step": 1782 }, { "epoch": 2.33, "learning_rate": 7.839617403362716e-07, "loss": 1.6125, "step": 1784 }, { "epoch": 2.33, "learning_rate": 7.818980230370699e-07, "loss": 1.6237, "step": 1786 }, { "epoch": 2.33, "learning_rate": 7.798352805514394e-07, "loss": 1.625, "step": 1788 }, { "epoch": 2.34, "learning_rate": 7.777735220988728e-07, "loss": 1.641, "step": 1790 }, { "epoch": 2.34, "learning_rate": 7.757127568944628e-07, "loss": 1.6097, "step": 1792 }, { "epoch": 2.34, "learning_rate": 7.736529941488646e-07, "loss": 1.6077, "step": 1794 }, { "epoch": 2.34, "learning_rate": 7.715942430682514e-07, "loss": 1.578, "step": 1796 }, { "epoch": 2.35, "learning_rate": 7.695365128542745e-07, "loss": 1.5961, "step": 1798 }, { "epoch": 2.35, "learning_rate": 7.674798127040241e-07, "loss": 1.6232, "step": 1800 }, { "epoch": 2.35, "learning_rate": 7.654241518099851e-07, "loss": 1.6184, "step": 1802 }, { "epoch": 2.35, "learning_rate": 7.633695393599981e-07, "loss": 1.6431, "step": 1804 }, { "epoch": 2.36, "learning_rate": 7.61315984537217e-07, "loss": 1.6183, "step": 1806 }, { "epoch": 2.36, "learning_rate": 7.592634965200689e-07, "loss": 1.6466, "step": 1808 }, { "epoch": 2.36, "learning_rate": 7.572120844822136e-07, "loss": 1.6053, "step": 1810 }, { "epoch": 2.36, "learning_rate": 7.551617575925e-07, "loss": 1.6241, "step": 1812 }, { "epoch": 2.37, "learning_rate": 7.531125250149288e-07, "loss": 1.5994, "step": 1814 }, { "epoch": 2.37, "learning_rate": 7.510643959086082e-07, "loss": 1.6053, "step": 1816 }, { "epoch": 2.37, "learning_rate": 7.49017379427715e-07, "loss": 1.6266, "step": 1818 }, { "epoch": 2.38, "learning_rate": 7.469714847214537e-07, "loss": 1.6137, "step": 1820 }, { "epoch": 2.38, "learning_rate": 7.449267209340133e-07, "loss": 1.6115, "step": 1822 }, { "epoch": 2.38, "learning_rate": 7.428830972045299e-07, "loss": 1.5982, "step": 1824 }, { "epoch": 2.38, "learning_rate": 7.408406226670435e-07, "loss": 1.6685, "step": 1826 }, { "epoch": 2.39, "learning_rate": 7.387993064504567e-07, "loss": 1.6131, "step": 1828 }, { "epoch": 2.39, "learning_rate": 7.36759157678497e-07, "loss": 1.594, "step": 1830 }, { "epoch": 2.39, "learning_rate": 7.347201854696716e-07, "loss": 1.6526, "step": 1832 }, { "epoch": 2.39, "learning_rate": 7.32682398937231e-07, "loss": 1.6181, "step": 1834 }, { "epoch": 2.4, "learning_rate": 7.306458071891257e-07, "loss": 1.5931, "step": 1836 }, { "epoch": 2.4, "learning_rate": 7.286104193279651e-07, "loss": 1.6306, "step": 1838 }, { "epoch": 2.4, "learning_rate": 7.26576244450979e-07, "loss": 1.6434, "step": 1840 }, { "epoch": 2.4, "learning_rate": 7.245432916499754e-07, "loss": 1.6254, "step": 1842 }, { "epoch": 2.41, "learning_rate": 7.225115700113002e-07, "loss": 1.583, "step": 1844 }, { "epoch": 2.41, "learning_rate": 7.204810886157963e-07, "loss": 1.5963, "step": 1846 }, { "epoch": 2.41, "learning_rate": 7.18451856538763e-07, "loss": 1.6276, "step": 1848 }, { "epoch": 2.41, "learning_rate": 7.164238828499167e-07, "loss": 1.5814, "step": 1850 }, { "epoch": 2.42, "learning_rate": 7.143971766133489e-07, "loss": 1.6451, "step": 1852 }, { "epoch": 2.42, "learning_rate": 7.123717468874863e-07, "loss": 1.5674, "step": 1854 }, { "epoch": 2.42, "learning_rate": 7.103476027250499e-07, "loss": 1.6422, "step": 1856 }, { "epoch": 2.42, "learning_rate": 7.083247531730147e-07, "loss": 1.6395, "step": 1858 }, { "epoch": 2.43, "learning_rate": 7.063032072725703e-07, "loss": 1.6274, "step": 1860 }, { "epoch": 2.43, "learning_rate": 7.042829740590786e-07, "loss": 1.5986, "step": 1862 }, { "epoch": 2.43, "learning_rate": 7.022640625620351e-07, "loss": 1.5921, "step": 1864 }, { "epoch": 2.44, "learning_rate": 7.002464818050271e-07, "loss": 1.6184, "step": 1866 }, { "epoch": 2.44, "learning_rate": 6.982302408056947e-07, "loss": 1.5867, "step": 1868 }, { "epoch": 2.44, "learning_rate": 6.962153485756903e-07, "loss": 1.5978, "step": 1870 }, { "epoch": 2.44, "learning_rate": 6.942018141206367e-07, "loss": 1.6097, "step": 1872 }, { "epoch": 2.45, "learning_rate": 6.921896464400891e-07, "loss": 1.6044, "step": 1874 }, { "epoch": 2.45, "learning_rate": 6.90178854527494e-07, "loss": 1.5581, "step": 1876 }, { "epoch": 2.45, "learning_rate": 6.881694473701476e-07, "loss": 1.5993, "step": 1878 }, { "epoch": 2.45, "learning_rate": 6.861614339491584e-07, "loss": 1.6154, "step": 1880 }, { "epoch": 2.46, "learning_rate": 6.84154823239404e-07, "loss": 1.629, "step": 1882 }, { "epoch": 2.46, "learning_rate": 6.821496242094942e-07, "loss": 1.6388, "step": 1884 }, { "epoch": 2.46, "learning_rate": 6.801458458217285e-07, "loss": 1.6005, "step": 1886 }, { "epoch": 2.46, "learning_rate": 6.781434970320561e-07, "loss": 1.5789, "step": 1888 }, { "epoch": 2.47, "learning_rate": 6.761425867900377e-07, "loss": 1.6105, "step": 1890 }, { "epoch": 2.47, "learning_rate": 6.741431240388034e-07, "loss": 1.6177, "step": 1892 }, { "epoch": 2.47, "learning_rate": 6.72145117715015e-07, "loss": 1.6137, "step": 1894 }, { "epoch": 2.47, "learning_rate": 6.701485767488234e-07, "loss": 1.5801, "step": 1896 }, { "epoch": 2.48, "learning_rate": 6.681535100638302e-07, "loss": 1.6135, "step": 1898 }, { "epoch": 2.48, "learning_rate": 6.661599265770488e-07, "loss": 1.6488, "step": 1900 }, { "epoch": 2.48, "learning_rate": 6.641678351988618e-07, "loss": 1.6106, "step": 1902 }, { "epoch": 2.48, "learning_rate": 6.621772448329845e-07, "loss": 1.6269, "step": 1904 }, { "epoch": 2.49, "learning_rate": 6.60188164376422e-07, "loss": 1.5986, "step": 1906 }, { "epoch": 2.49, "learning_rate": 6.582006027194309e-07, "loss": 1.6331, "step": 1908 }, { "epoch": 2.49, "learning_rate": 6.562145687454808e-07, "loss": 1.6468, "step": 1910 }, { "epoch": 2.5, "learning_rate": 6.542300713312112e-07, "loss": 1.5422, "step": 1912 }, { "epoch": 2.5, "learning_rate": 6.522471193463964e-07, "loss": 1.5916, "step": 1914 }, { "epoch": 2.5, "learning_rate": 6.502657216539006e-07, "loss": 1.6554, "step": 1916 }, { "epoch": 2.5, "learning_rate": 6.482858871096432e-07, "loss": 1.6019, "step": 1918 }, { "epoch": 2.51, "learning_rate": 6.463076245625564e-07, "loss": 1.6108, "step": 1920 }, { "epoch": 2.51, "learning_rate": 6.443309428545456e-07, "loss": 1.626, "step": 1922 }, { "epoch": 2.51, "learning_rate": 6.423558508204516e-07, "loss": 1.6034, "step": 1924 }, { "epoch": 2.51, "learning_rate": 6.4038235728801e-07, "loss": 1.6722, "step": 1926 }, { "epoch": 2.52, "learning_rate": 6.384104710778109e-07, "loss": 1.5998, "step": 1928 }, { "epoch": 2.52, "learning_rate": 6.364402010032616e-07, "loss": 1.61, "step": 1930 }, { "epoch": 2.52, "learning_rate": 6.344715558705449e-07, "loss": 1.5932, "step": 1932 }, { "epoch": 2.52, "learning_rate": 6.32504544478582e-07, "loss": 1.6136, "step": 1934 }, { "epoch": 2.53, "learning_rate": 6.305391756189916e-07, "loss": 1.542, "step": 1936 }, { "epoch": 2.53, "learning_rate": 6.285754580760502e-07, "loss": 1.6062, "step": 1938 }, { "epoch": 2.53, "learning_rate": 6.266134006266554e-07, "loss": 1.5812, "step": 1940 }, { "epoch": 2.53, "learning_rate": 6.246530120402832e-07, "loss": 1.5957, "step": 1942 }, { "epoch": 2.54, "learning_rate": 6.226943010789522e-07, "loss": 1.5808, "step": 1944 }, { "epoch": 2.54, "learning_rate": 6.207372764971815e-07, "loss": 1.5974, "step": 1946 }, { "epoch": 2.54, "learning_rate": 6.187819470419531e-07, "loss": 1.5691, "step": 1948 }, { "epoch": 2.54, "learning_rate": 6.168283214526732e-07, "loss": 1.6269, "step": 1950 }, { "epoch": 2.55, "learning_rate": 6.148764084611325e-07, "loss": 1.6148, "step": 1952 }, { "epoch": 2.55, "learning_rate": 6.129262167914659e-07, "loss": 1.6568, "step": 1954 }, { "epoch": 2.55, "learning_rate": 6.109777551601165e-07, "loss": 1.6273, "step": 1956 }, { "epoch": 2.56, "learning_rate": 6.090310322757936e-07, "loss": 1.6499, "step": 1958 }, { "epoch": 2.56, "learning_rate": 6.070860568394367e-07, "loss": 1.5942, "step": 1960 }, { "epoch": 2.56, "learning_rate": 6.051428375441734e-07, "loss": 1.6227, "step": 1962 }, { "epoch": 2.56, "learning_rate": 6.032013830752822e-07, "loss": 1.6561, "step": 1964 }, { "epoch": 2.57, "learning_rate": 6.012617021101551e-07, "loss": 1.5966, "step": 1966 }, { "epoch": 2.57, "learning_rate": 5.993238033182562e-07, "loss": 1.6152, "step": 1968 }, { "epoch": 2.57, "learning_rate": 5.973876953610848e-07, "loss": 1.6279, "step": 1970 }, { "epoch": 2.57, "learning_rate": 5.954533868921352e-07, "loss": 1.5849, "step": 1972 }, { "epoch": 2.58, "learning_rate": 5.935208865568591e-07, "loss": 1.5829, "step": 1974 }, { "epoch": 2.58, "learning_rate": 5.915902029926279e-07, "loss": 1.5645, "step": 1976 }, { "epoch": 2.58, "learning_rate": 5.896613448286905e-07, "loss": 1.6053, "step": 1978 }, { "epoch": 2.58, "learning_rate": 5.877343206861396e-07, "loss": 1.5773, "step": 1980 }, { "epoch": 2.59, "learning_rate": 5.85809139177869e-07, "loss": 1.6491, "step": 1982 }, { "epoch": 2.59, "learning_rate": 5.838858089085372e-07, "loss": 1.5725, "step": 1984 }, { "epoch": 2.59, "learning_rate": 5.819643384745292e-07, "loss": 1.6302, "step": 1986 }, { "epoch": 2.59, "learning_rate": 5.800447364639166e-07, "loss": 1.6063, "step": 1988 }, { "epoch": 2.6, "learning_rate": 5.781270114564203e-07, "loss": 1.6031, "step": 1990 }, { "epoch": 2.6, "learning_rate": 5.76211172023372e-07, "loss": 1.6047, "step": 1992 }, { "epoch": 2.6, "learning_rate": 5.742972267276756e-07, "loss": 1.6326, "step": 1994 }, { "epoch": 2.6, "learning_rate": 5.72385184123769e-07, "loss": 1.5856, "step": 1996 }, { "epoch": 2.61, "learning_rate": 5.704750527575863e-07, "loss": 1.6225, "step": 1998 }, { "epoch": 2.61, "learning_rate": 5.68566841166519e-07, "loss": 1.5513, "step": 2000 }, { "epoch": 2.61, "learning_rate": 5.666605578793781e-07, "loss": 1.6497, "step": 2002 }, { "epoch": 2.62, "learning_rate": 5.657081420126779e-07, "loss": 1.6067, "step": 2004 }, { "epoch": 2.62, "learning_rate": 5.647562114163562e-07, "loss": 1.622, "step": 2006 }, { "epoch": 2.62, "learning_rate": 5.62853810288989e-07, "loss": 1.5964, "step": 2008 }, { "epoch": 2.62, "learning_rate": 5.609533630001176e-07, "loss": 1.5819, "step": 2010 }, { "epoch": 2.63, "learning_rate": 5.590548780438501e-07, "loss": 1.5751, "step": 2012 }, { "epoch": 2.63, "learning_rate": 5.571583639055243e-07, "loss": 1.6159, "step": 2014 }, { "epoch": 2.63, "learning_rate": 5.55263829061669e-07, "loss": 1.5722, "step": 2016 }, { "epoch": 2.63, "learning_rate": 5.533712819799666e-07, "loss": 1.5887, "step": 2018 }, { "epoch": 2.64, "learning_rate": 5.514807311192154e-07, "loss": 1.6163, "step": 2020 }, { "epoch": 2.64, "learning_rate": 5.495921849292911e-07, "loss": 1.5551, "step": 2022 }, { "epoch": 2.64, "learning_rate": 5.477056518511095e-07, "loss": 1.6046, "step": 2024 }, { "epoch": 2.64, "learning_rate": 5.458211403165892e-07, "loss": 1.5995, "step": 2026 }, { "epoch": 2.65, "learning_rate": 5.43938658748613e-07, "loss": 1.5944, "step": 2028 }, { "epoch": 2.65, "learning_rate": 5.420582155609906e-07, "loss": 1.574, "step": 2030 }, { "epoch": 2.65, "learning_rate": 5.401798191584221e-07, "loss": 1.6547, "step": 2032 }, { "epoch": 2.65, "learning_rate": 5.383034779364579e-07, "loss": 1.623, "step": 2034 }, { "epoch": 2.66, "learning_rate": 5.364292002814637e-07, "loss": 1.6148, "step": 2036 }, { "epoch": 2.66, "learning_rate": 5.345569945705817e-07, "loss": 1.5472, "step": 2038 }, { "epoch": 2.66, "learning_rate": 5.326868691716934e-07, "loss": 1.6321, "step": 2040 }, { "epoch": 2.66, "learning_rate": 5.308188324433834e-07, "loss": 1.5996, "step": 2042 }, { "epoch": 2.67, "learning_rate": 5.289528927348991e-07, "loss": 1.6188, "step": 2044 }, { "epoch": 2.67, "learning_rate": 5.270890583861159e-07, "loss": 1.5869, "step": 2046 }, { "epoch": 2.67, "learning_rate": 5.252273377275003e-07, "loss": 1.6318, "step": 2048 }, { "epoch": 2.68, "learning_rate": 5.233677390800697e-07, "loss": 1.5828, "step": 2050 }, { "epoch": 2.68, "learning_rate": 5.215102707553594e-07, "loss": 1.6117, "step": 2052 }, { "epoch": 2.68, "learning_rate": 5.196549410553805e-07, "loss": 1.6299, "step": 2054 }, { "epoch": 2.68, "learning_rate": 5.178017582725871e-07, "loss": 1.6214, "step": 2056 }, { "epoch": 2.69, "learning_rate": 5.159507306898385e-07, "loss": 1.6021, "step": 2058 }, { "epoch": 2.69, "learning_rate": 5.141018665803583e-07, "loss": 1.5559, "step": 2060 }, { "epoch": 2.69, "learning_rate": 5.122551742077035e-07, "loss": 1.6264, "step": 2062 }, { "epoch": 2.69, "learning_rate": 5.104106618257229e-07, "loss": 1.5952, "step": 2064 }, { "epoch": 2.7, "learning_rate": 5.085683376785212e-07, "loss": 1.6336, "step": 2066 }, { "epoch": 2.7, "learning_rate": 5.067282100004247e-07, "loss": 1.6012, "step": 2068 }, { "epoch": 2.7, "learning_rate": 5.048902870159398e-07, "loss": 1.6388, "step": 2070 }, { "epoch": 2.7, "learning_rate": 5.030545769397217e-07, "loss": 1.5987, "step": 2072 }, { "epoch": 2.71, "learning_rate": 5.012210879765338e-07, "loss": 1.6562, "step": 2074 }, { "epoch": 2.71, "learning_rate": 4.993898283212108e-07, "loss": 1.5812, "step": 2076 }, { "epoch": 2.71, "learning_rate": 4.975608061586257e-07, "loss": 1.6145, "step": 2078 }, { "epoch": 2.71, "learning_rate": 4.9573402966365e-07, "loss": 1.6089, "step": 2080 }, { "epoch": 2.72, "learning_rate": 4.939095070011176e-07, "loss": 1.6022, "step": 2082 }, { "epoch": 2.72, "learning_rate": 4.920872463257899e-07, "loss": 1.6055, "step": 2084 }, { "epoch": 2.72, "learning_rate": 4.902672557823167e-07, "loss": 1.6268, "step": 2086 }, { "epoch": 2.72, "learning_rate": 4.884495435052033e-07, "loss": 1.5951, "step": 2088 }, { "epoch": 2.73, "learning_rate": 4.866341176187708e-07, "loss": 1.6209, "step": 2090 }, { "epoch": 2.73, "learning_rate": 4.84820986237122e-07, "loss": 1.5918, "step": 2092 }, { "epoch": 2.73, "learning_rate": 4.830101574641038e-07, "loss": 1.6082, "step": 2094 }, { "epoch": 2.74, "learning_rate": 4.812016393932717e-07, "loss": 1.6074, "step": 2096 }, { "epoch": 2.74, "learning_rate": 4.793954401078538e-07, "loss": 1.6342, "step": 2098 }, { "epoch": 2.74, "learning_rate": 4.775915676807135e-07, "loss": 1.6049, "step": 2100 }, { "epoch": 2.74, "learning_rate": 4.757900301743152e-07, "loss": 1.5615, "step": 2102 }, { "epoch": 2.75, "learning_rate": 4.7399083564068655e-07, "loss": 1.5625, "step": 2104 }, { "epoch": 2.75, "learning_rate": 4.7219399212138366e-07, "loss": 1.5922, "step": 2106 }, { "epoch": 2.75, "learning_rate": 4.703995076474544e-07, "loss": 1.6264, "step": 2108 }, { "epoch": 2.75, "learning_rate": 4.686073902394031e-07, "loss": 1.5971, "step": 2110 }, { "epoch": 2.76, "learning_rate": 4.6681764790715416e-07, "loss": 1.5968, "step": 2112 }, { "epoch": 2.76, "learning_rate": 4.650302886500167e-07, "loss": 1.6098, "step": 2114 }, { "epoch": 2.76, "learning_rate": 4.632453204566485e-07, "loss": 1.606, "step": 2116 }, { "epoch": 2.76, "learning_rate": 4.6146275130502046e-07, "loss": 1.5781, "step": 2118 }, { "epoch": 2.77, "learning_rate": 4.596825891623808e-07, "loss": 1.5659, "step": 2120 }, { "epoch": 2.77, "learning_rate": 4.579048419852196e-07, "loss": 1.6135, "step": 2122 }, { "epoch": 2.77, "learning_rate": 4.5612951771923335e-07, "loss": 1.5918, "step": 2124 }, { "epoch": 2.77, "learning_rate": 4.543566242992889e-07, "loss": 1.6348, "step": 2126 }, { "epoch": 2.78, "learning_rate": 4.525861696493886e-07, "loss": 1.5651, "step": 2128 }, { "epoch": 2.78, "learning_rate": 4.508181616826342e-07, "loss": 1.5775, "step": 2130 }, { "epoch": 2.78, "learning_rate": 4.4905260830119343e-07, "loss": 1.5981, "step": 2132 }, { "epoch": 2.78, "learning_rate": 4.47289517396261e-07, "loss": 1.5891, "step": 2134 }, { "epoch": 2.79, "learning_rate": 4.4552889684802695e-07, "loss": 1.5942, "step": 2136 }, { "epoch": 2.79, "learning_rate": 4.437707545256395e-07, "loss": 1.5945, "step": 2138 }, { "epoch": 2.79, "learning_rate": 4.420150982871702e-07, "loss": 1.598, "step": 2140 }, { "epoch": 2.8, "learning_rate": 4.402619359795803e-07, "loss": 1.5905, "step": 2142 }, { "epoch": 2.8, "learning_rate": 4.3851127543868206e-07, "loss": 1.591, "step": 2144 }, { "epoch": 2.8, "learning_rate": 4.367631244891075e-07, "loss": 1.5987, "step": 2146 }, { "epoch": 2.8, "learning_rate": 4.350174909442724e-07, "loss": 1.5921, "step": 2148 }, { "epoch": 2.81, "learning_rate": 4.332743826063392e-07, "loss": 1.6046, "step": 2150 }, { "epoch": 2.81, "learning_rate": 4.315338072661859e-07, "loss": 1.6006, "step": 2152 }, { "epoch": 2.81, "learning_rate": 4.2979577270336717e-07, "loss": 1.6061, "step": 2154 }, { "epoch": 2.81, "learning_rate": 4.280602866860827e-07, "loss": 1.6212, "step": 2156 }, { "epoch": 2.82, "learning_rate": 4.263273569711422e-07, "loss": 1.6164, "step": 2158 }, { "epoch": 2.82, "learning_rate": 4.2459699130392757e-07, "loss": 1.6501, "step": 2160 }, { "epoch": 2.82, "learning_rate": 4.2286919741836305e-07, "loss": 1.5785, "step": 2162 }, { "epoch": 2.82, "learning_rate": 4.2114398303687703e-07, "loss": 1.6141, "step": 2164 }, { "epoch": 2.83, "learning_rate": 4.19421355870368e-07, "loss": 1.5799, "step": 2166 }, { "epoch": 2.83, "learning_rate": 4.1770132361817303e-07, "loss": 1.5517, "step": 2168 }, { "epoch": 2.83, "learning_rate": 4.1598389396802825e-07, "loss": 1.5986, "step": 2170 }, { "epoch": 2.83, "learning_rate": 4.142690745960401e-07, "loss": 1.6096, "step": 2172 }, { "epoch": 2.84, "learning_rate": 4.1255687316664724e-07, "loss": 1.6133, "step": 2174 }, { "epoch": 2.84, "learning_rate": 4.1084729733258584e-07, "loss": 1.6272, "step": 2176 }, { "epoch": 2.84, "learning_rate": 4.091403547348596e-07, "loss": 1.6366, "step": 2178 }, { "epoch": 2.85, "learning_rate": 4.074360530027012e-07, "loss": 1.6148, "step": 2180 }, { "epoch": 2.85, "learning_rate": 4.057343997535402e-07, "loss": 1.5835, "step": 2182 }, { "epoch": 2.85, "learning_rate": 4.0403540259296897e-07, "loss": 1.6169, "step": 2184 }, { "epoch": 2.85, "learning_rate": 4.0233906911470737e-07, "loss": 1.6529, "step": 2186 }, { "epoch": 2.86, "learning_rate": 4.0064540690057145e-07, "loss": 1.6738, "step": 2188 }, { "epoch": 2.86, "learning_rate": 3.9895442352043707e-07, "loss": 1.6212, "step": 2190 }, { "epoch": 2.86, "learning_rate": 3.9726612653220693e-07, "loss": 1.6021, "step": 2192 }, { "epoch": 2.86, "learning_rate": 3.955805234817769e-07, "loss": 1.6696, "step": 2194 }, { "epoch": 2.87, "learning_rate": 3.9389762190300245e-07, "loss": 1.617, "step": 2196 }, { "epoch": 2.87, "learning_rate": 3.9221742931766435e-07, "loss": 1.5683, "step": 2198 }, { "epoch": 2.87, "learning_rate": 3.90539953235436e-07, "loss": 1.6077, "step": 2200 }, { "epoch": 2.87, "learning_rate": 3.888652011538479e-07, "loss": 1.5631, "step": 2202 }, { "epoch": 2.88, "learning_rate": 3.8719318055825777e-07, "loss": 1.641, "step": 2204 }, { "epoch": 2.88, "learning_rate": 3.8552389892181323e-07, "loss": 1.6098, "step": 2206 }, { "epoch": 2.88, "learning_rate": 3.8385736370542054e-07, "loss": 1.6761, "step": 2208 }, { "epoch": 2.88, "learning_rate": 3.821935823577106e-07, "loss": 1.6118, "step": 2210 }, { "epoch": 2.89, "learning_rate": 3.8053256231500595e-07, "loss": 1.595, "step": 2212 }, { "epoch": 2.89, "learning_rate": 3.788743110012874e-07, "loss": 1.6574, "step": 2214 }, { "epoch": 2.89, "learning_rate": 3.7721883582816073e-07, "loss": 1.5992, "step": 2216 }, { "epoch": 2.89, "learning_rate": 3.755661441948238e-07, "loss": 1.5584, "step": 2218 }, { "epoch": 2.9, "learning_rate": 3.739162434880332e-07, "loss": 1.6191, "step": 2220 }, { "epoch": 2.9, "learning_rate": 3.722691410820715e-07, "loss": 1.6008, "step": 2222 }, { "epoch": 2.9, "learning_rate": 3.706248443387142e-07, "loss": 1.6182, "step": 2224 }, { "epoch": 2.91, "learning_rate": 3.689833606071967e-07, "loss": 1.5821, "step": 2226 }, { "epoch": 2.91, "learning_rate": 3.673446972241817e-07, "loss": 1.6215, "step": 2228 }, { "epoch": 2.91, "learning_rate": 3.65708861513726e-07, "loss": 1.616, "step": 2230 }, { "epoch": 2.91, "learning_rate": 3.640758607872484e-07, "loss": 1.5608, "step": 2232 }, { "epoch": 2.92, "learning_rate": 3.6244570234349636e-07, "loss": 1.5848, "step": 2234 }, { "epoch": 2.92, "learning_rate": 3.6081839346851374e-07, "loss": 1.5598, "step": 2236 }, { "epoch": 2.92, "learning_rate": 3.591939414356081e-07, "loss": 1.6165, "step": 2238 }, { "epoch": 2.92, "learning_rate": 3.575723535053186e-07, "loss": 1.6312, "step": 2240 }, { "epoch": 2.93, "learning_rate": 3.559536369253827e-07, "loss": 1.5831, "step": 2242 }, { "epoch": 2.93, "learning_rate": 3.543377989307047e-07, "loss": 1.6081, "step": 2244 }, { "epoch": 2.93, "learning_rate": 3.5272484674332223e-07, "loss": 1.5999, "step": 2246 }, { "epoch": 2.93, "learning_rate": 3.5111478757237656e-07, "loss": 1.5825, "step": 2248 }, { "epoch": 2.94, "learning_rate": 3.495076286140761e-07, "loss": 1.5792, "step": 2250 }, { "epoch": 2.94, "learning_rate": 3.4790337705166806e-07, "loss": 1.6666, "step": 2252 }, { "epoch": 2.94, "learning_rate": 3.4630204005540486e-07, "loss": 1.5955, "step": 2254 }, { "epoch": 2.94, "learning_rate": 3.4470362478251167e-07, "loss": 1.6012, "step": 2256 }, { "epoch": 2.95, "learning_rate": 3.431081383771562e-07, "loss": 1.6122, "step": 2258 }, { "epoch": 2.95, "learning_rate": 3.415155879704137e-07, "loss": 1.6524, "step": 2260 }, { "epoch": 2.95, "learning_rate": 3.399259806802376e-07, "loss": 1.6138, "step": 2262 }, { "epoch": 2.95, "learning_rate": 3.383393236114283e-07, "loss": 1.6209, "step": 2264 }, { "epoch": 2.96, "learning_rate": 3.367556238555974e-07, "loss": 1.5978, "step": 2266 }, { "epoch": 2.96, "learning_rate": 3.3517488849114184e-07, "loss": 1.6429, "step": 2268 }, { "epoch": 2.96, "learning_rate": 3.3359712458320634e-07, "loss": 1.6401, "step": 2270 }, { "epoch": 2.97, "learning_rate": 3.3202233918365585e-07, "loss": 1.6164, "step": 2272 }, { "epoch": 2.97, "learning_rate": 3.304505393310436e-07, "loss": 1.5974, "step": 2274 }, { "epoch": 2.97, "learning_rate": 3.2888173205057666e-07, "loss": 1.6189, "step": 2276 }, { "epoch": 2.97, "learning_rate": 3.273159243540891e-07, "loss": 1.6144, "step": 2278 }, { "epoch": 2.98, "learning_rate": 3.257531232400069e-07, "loss": 1.6307, "step": 2280 }, { "epoch": 2.98, "learning_rate": 3.241933356933174e-07, "loss": 1.5666, "step": 2282 }, { "epoch": 2.98, "learning_rate": 3.226365686855409e-07, "loss": 1.5699, "step": 2284 }, { "epoch": 2.98, "learning_rate": 3.210828291746946e-07, "loss": 1.6517, "step": 2286 }, { "epoch": 2.99, "learning_rate": 3.1953212410526675e-07, "loss": 1.593, "step": 2288 }, { "epoch": 2.99, "learning_rate": 3.179844604081817e-07, "loss": 1.609, "step": 2290 }, { "epoch": 2.99, "learning_rate": 3.164398450007695e-07, "loss": 1.6534, "step": 2292 }, { "epoch": 2.99, "learning_rate": 3.14898284786738e-07, "loss": 1.6303, "step": 2294 }, { "epoch": 3.0, "learning_rate": 3.1335978665613783e-07, "loss": 1.6058, "step": 2296 }, { "epoch": 3.0, "learning_rate": 3.1182435748533455e-07, "loss": 1.6229, "step": 2298 }, { "epoch": 3.0, "learning_rate": 3.1029200413697685e-07, "loss": 1.5656, "step": 2300 }, { "epoch": 3.0, "learning_rate": 3.087627334599647e-07, "loss": 1.5789, "step": 2302 }, { "epoch": 3.01, "learning_rate": 3.0723655228942203e-07, "loss": 1.534, "step": 2304 }, { "epoch": 3.01, "learning_rate": 3.057134674466627e-07, "loss": 1.6436, "step": 2306 }, { "epoch": 3.01, "learning_rate": 3.0419348573916173e-07, "loss": 1.5725, "step": 2308 }, { "epoch": 3.01, "learning_rate": 3.0267661396052467e-07, "loss": 1.5541, "step": 2310 }, { "epoch": 3.02, "learning_rate": 3.011628588904572e-07, "loss": 1.5688, "step": 2312 }, { "epoch": 3.02, "learning_rate": 2.9965222729473474e-07, "loss": 1.5797, "step": 2314 }, { "epoch": 3.02, "learning_rate": 2.9814472592517193e-07, "loss": 1.6164, "step": 2316 }, { "epoch": 3.03, "learning_rate": 2.966403615195934e-07, "loss": 1.5926, "step": 2318 }, { "epoch": 3.03, "learning_rate": 2.951391408018025e-07, "loss": 1.5881, "step": 2320 }, { "epoch": 3.03, "learning_rate": 2.9364107048155183e-07, "loss": 1.565, "step": 2322 }, { "epoch": 3.03, "learning_rate": 2.921461572545135e-07, "loss": 1.5748, "step": 2324 }, { "epoch": 3.04, "learning_rate": 2.906544078022486e-07, "loss": 1.5936, "step": 2326 }, { "epoch": 3.04, "learning_rate": 2.8916582879217776e-07, "loss": 1.5673, "step": 2328 }, { "epoch": 3.04, "learning_rate": 2.876804268775511e-07, "loss": 1.6123, "step": 2330 }, { "epoch": 3.04, "learning_rate": 2.8619820869741873e-07, "loss": 1.5789, "step": 2332 }, { "epoch": 3.05, "learning_rate": 2.8471918087660087e-07, "loss": 1.6252, "step": 2334 }, { "epoch": 3.05, "learning_rate": 2.8324335002565844e-07, "loss": 1.5652, "step": 2336 }, { "epoch": 3.05, "learning_rate": 2.8177072274086344e-07, "loss": 1.5618, "step": 2338 }, { "epoch": 3.05, "learning_rate": 2.803013056041691e-07, "loss": 1.58, "step": 2340 }, { "epoch": 3.06, "learning_rate": 2.788351051831813e-07, "loss": 1.5702, "step": 2342 }, { "epoch": 3.06, "learning_rate": 2.773721280311282e-07, "loss": 1.6083, "step": 2344 }, { "epoch": 3.06, "learning_rate": 2.7591238068683153e-07, "loss": 1.5572, "step": 2346 }, { "epoch": 3.06, "learning_rate": 2.744558696746784e-07, "loss": 1.6111, "step": 2348 }, { "epoch": 3.07, "learning_rate": 2.730026015045891e-07, "loss": 1.5847, "step": 2350 }, { "epoch": 3.07, "learning_rate": 2.715525826719912e-07, "loss": 1.6328, "step": 2352 }, { "epoch": 3.07, "learning_rate": 2.701058196577891e-07, "loss": 1.5917, "step": 2354 }, { "epoch": 3.07, "learning_rate": 2.686623189283348e-07, "loss": 1.5531, "step": 2356 }, { "epoch": 3.08, "learning_rate": 2.672220869354005e-07, "loss": 1.5591, "step": 2358 }, { "epoch": 3.08, "learning_rate": 2.6578513011614713e-07, "loss": 1.6146, "step": 2360 }, { "epoch": 3.08, "learning_rate": 2.643514548930977e-07, "loss": 1.5839, "step": 2362 }, { "epoch": 3.09, "learning_rate": 2.629210676741095e-07, "loss": 1.5523, "step": 2364 }, { "epoch": 3.09, "learning_rate": 2.6149397485234115e-07, "loss": 1.6192, "step": 2366 }, { "epoch": 3.09, "learning_rate": 2.600701828062297e-07, "loss": 1.6184, "step": 2368 }, { "epoch": 3.09, "learning_rate": 2.5864969789945737e-07, "loss": 1.6061, "step": 2370 }, { "epoch": 3.1, "learning_rate": 2.572325264809252e-07, "loss": 1.5894, "step": 2372 }, { "epoch": 3.1, "learning_rate": 2.558186748847262e-07, "loss": 1.5659, "step": 2374 }, { "epoch": 3.1, "learning_rate": 2.5440814943011255e-07, "loss": 1.5714, "step": 2376 }, { "epoch": 3.1, "learning_rate": 2.530009564214729e-07, "loss": 1.5887, "step": 2378 }, { "epoch": 3.11, "learning_rate": 2.515971021483e-07, "loss": 1.5637, "step": 2380 }, { "epoch": 3.11, "learning_rate": 2.501965928851633e-07, "loss": 1.6213, "step": 2382 }, { "epoch": 3.11, "learning_rate": 2.4879943489168367e-07, "loss": 1.6037, "step": 2384 }, { "epoch": 3.11, "learning_rate": 2.474056344125012e-07, "loss": 1.6078, "step": 2386 }, { "epoch": 3.12, "learning_rate": 2.460151976772513e-07, "loss": 1.5884, "step": 2388 }, { "epoch": 3.12, "learning_rate": 2.446281309005341e-07, "loss": 1.6303, "step": 2390 }, { "epoch": 3.12, "learning_rate": 2.4324444028188707e-07, "loss": 1.5694, "step": 2392 }, { "epoch": 3.12, "learning_rate": 2.4186413200575917e-07, "loss": 1.617, "step": 2394 }, { "epoch": 3.13, "learning_rate": 2.4048721224148105e-07, "loss": 1.6069, "step": 2396 }, { "epoch": 3.13, "learning_rate": 2.391136871432383e-07, "loss": 1.5479, "step": 2398 }, { "epoch": 3.13, "learning_rate": 2.3774356285004437e-07, "loss": 1.6079, "step": 2400 }, { "epoch": 3.13, "learning_rate": 2.363768454857117e-07, "loss": 1.5811, "step": 2402 }, { "epoch": 3.14, "learning_rate": 2.3501354115882666e-07, "loss": 1.6253, "step": 2404 }, { "epoch": 3.14, "learning_rate": 2.3365365596272036e-07, "loss": 1.6034, "step": 2406 }, { "epoch": 3.14, "learning_rate": 2.3229719597544195e-07, "loss": 1.6277, "step": 2408 }, { "epoch": 3.15, "learning_rate": 2.3094416725973133e-07, "loss": 1.6208, "step": 2410 }, { "epoch": 3.15, "learning_rate": 2.295945758629927e-07, "loss": 1.6101, "step": 2412 }, { "epoch": 3.15, "learning_rate": 2.282484278172666e-07, "loss": 1.6151, "step": 2414 }, { "epoch": 3.15, "learning_rate": 2.269057291392037e-07, "loss": 1.5795, "step": 2416 }, { "epoch": 3.16, "learning_rate": 2.2556648583003745e-07, "loss": 1.6193, "step": 2418 }, { "epoch": 3.16, "learning_rate": 2.2423070387555765e-07, "loss": 1.5931, "step": 2420 }, { "epoch": 3.16, "learning_rate": 2.2289838924608305e-07, "loss": 1.5982, "step": 2422 }, { "epoch": 3.16, "learning_rate": 2.2156954789643568e-07, "loss": 1.6249, "step": 2424 }, { "epoch": 3.17, "learning_rate": 2.2024418576591298e-07, "loss": 1.592, "step": 2426 }, { "epoch": 3.17, "learning_rate": 2.1892230877826235e-07, "loss": 1.5805, "step": 2428 }, { "epoch": 3.17, "learning_rate": 2.1760392284165408e-07, "loss": 1.5838, "step": 2430 }, { "epoch": 3.17, "learning_rate": 2.1628903384865515e-07, "loss": 1.5925, "step": 2432 }, { "epoch": 3.18, "learning_rate": 2.1497764767620286e-07, "loss": 1.5871, "step": 2434 }, { "epoch": 3.18, "learning_rate": 2.1366977018557853e-07, "loss": 1.5532, "step": 2436 }, { "epoch": 3.18, "learning_rate": 2.1236540722238117e-07, "loss": 1.5955, "step": 2438 }, { "epoch": 3.18, "learning_rate": 2.1106456461650178e-07, "loss": 1.5791, "step": 2440 }, { "epoch": 3.19, "learning_rate": 2.0976724818209679e-07, "loss": 1.5949, "step": 2442 }, { "epoch": 3.19, "learning_rate": 2.0847346371756235e-07, "loss": 1.5967, "step": 2444 }, { "epoch": 3.19, "learning_rate": 2.0718321700550813e-07, "loss": 1.5934, "step": 2446 }, { "epoch": 3.19, "learning_rate": 2.0589651381273265e-07, "loss": 1.6194, "step": 2448 }, { "epoch": 3.2, "learning_rate": 2.0461335989019524e-07, "loss": 1.5485, "step": 2450 }, { "epoch": 3.2, "learning_rate": 2.0333376097299248e-07, "loss": 1.6498, "step": 2452 }, { "epoch": 3.2, "learning_rate": 2.020577227803315e-07, "loss": 1.5994, "step": 2454 }, { "epoch": 3.21, "learning_rate": 2.007852510155047e-07, "loss": 1.5968, "step": 2456 }, { "epoch": 3.21, "learning_rate": 1.9951635136586431e-07, "loss": 1.5915, "step": 2458 }, { "epoch": 3.21, "learning_rate": 1.982510295027967e-07, "loss": 1.5243, "step": 2460 }, { "epoch": 3.21, "learning_rate": 1.9698929108169716e-07, "loss": 1.6405, "step": 2462 }, { "epoch": 3.22, "learning_rate": 1.957311417419455e-07, "loss": 1.5397, "step": 2464 }, { "epoch": 3.22, "learning_rate": 1.9447658710687852e-07, "loss": 1.6, "step": 2466 }, { "epoch": 3.22, "learning_rate": 1.9322563278376748e-07, "loss": 1.5785, "step": 2468 }, { "epoch": 3.22, "learning_rate": 1.919782843637917e-07, "loss": 1.5701, "step": 2470 }, { "epoch": 3.23, "learning_rate": 1.9073454742201356e-07, "loss": 1.5759, "step": 2472 }, { "epoch": 3.23, "learning_rate": 1.8949442751735468e-07, "loss": 1.5871, "step": 2474 }, { "epoch": 3.23, "learning_rate": 1.8825793019256876e-07, "loss": 1.5798, "step": 2476 }, { "epoch": 3.23, "learning_rate": 1.870250609742191e-07, "loss": 1.5643, "step": 2478 }, { "epoch": 3.24, "learning_rate": 1.8579582537265392e-07, "loss": 1.567, "step": 2480 }, { "epoch": 3.24, "learning_rate": 1.8457022888197872e-07, "loss": 1.5608, "step": 2482 }, { "epoch": 3.24, "learning_rate": 1.8334827698003642e-07, "loss": 1.6361, "step": 2484 }, { "epoch": 3.24, "learning_rate": 1.8212997512837813e-07, "loss": 1.581, "step": 2486 }, { "epoch": 3.25, "learning_rate": 1.8091532877224192e-07, "loss": 1.5575, "step": 2488 }, { "epoch": 3.25, "learning_rate": 1.797043433405282e-07, "loss": 1.5808, "step": 2490 }, { "epoch": 3.25, "learning_rate": 1.7849702424577317e-07, "loss": 1.5217, "step": 2492 }, { "epoch": 3.25, "learning_rate": 1.772933768841277e-07, "loss": 1.5584, "step": 2494 }, { "epoch": 3.26, "learning_rate": 1.7609340663533112e-07, "loss": 1.5996, "step": 2496 }, { "epoch": 3.26, "learning_rate": 1.748971188626871e-07, "loss": 1.5791, "step": 2498 }, { "epoch": 3.26, "learning_rate": 1.7370451891304204e-07, "loss": 1.6198, "step": 2500 }, { "epoch": 3.27, "learning_rate": 1.725156121167576e-07, "loss": 1.6167, "step": 2502 }, { "epoch": 3.27, "learning_rate": 1.7133040378769037e-07, "loss": 1.559, "step": 2504 }, { "epoch": 3.27, "learning_rate": 1.7014889922316579e-07, "loss": 1.6096, "step": 2506 }, { "epoch": 3.27, "learning_rate": 1.689711037039545e-07, "loss": 1.5766, "step": 2508 }, { "epoch": 3.28, "learning_rate": 1.677970224942512e-07, "loss": 1.6213, "step": 2510 }, { "epoch": 3.28, "learning_rate": 1.666266608416479e-07, "loss": 1.5691, "step": 2512 }, { "epoch": 3.28, "learning_rate": 1.6546002397711246e-07, "loss": 1.5159, "step": 2514 }, { "epoch": 3.28, "learning_rate": 1.6429711711496496e-07, "loss": 1.5878, "step": 2516 }, { "epoch": 3.29, "learning_rate": 1.631379454528531e-07, "loss": 1.5404, "step": 2518 }, { "epoch": 3.29, "learning_rate": 1.6198251417173136e-07, "loss": 1.5686, "step": 2520 }, { "epoch": 3.29, "learning_rate": 1.608308284358355e-07, "loss": 1.5627, "step": 2522 }, { "epoch": 3.29, "learning_rate": 1.5968289339266083e-07, "loss": 1.5893, "step": 2524 }, { "epoch": 3.3, "learning_rate": 1.5853871417293873e-07, "loss": 1.6329, "step": 2526 }, { "epoch": 3.3, "learning_rate": 1.5739829589061383e-07, "loss": 1.6152, "step": 2528 }, { "epoch": 3.3, "learning_rate": 1.56261643642821e-07, "loss": 1.5956, "step": 2530 }, { "epoch": 3.3, "learning_rate": 1.5512876250986306e-07, "loss": 1.6358, "step": 2532 }, { "epoch": 3.31, "learning_rate": 1.539996575551872e-07, "loss": 1.6428, "step": 2534 }, { "epoch": 3.31, "learning_rate": 1.5287433382536342e-07, "loss": 1.5845, "step": 2536 }, { "epoch": 3.31, "learning_rate": 1.51752796350061e-07, "loss": 1.5707, "step": 2538 }, { "epoch": 3.31, "learning_rate": 1.5063505014202648e-07, "loss": 1.5925, "step": 2540 }, { "epoch": 3.32, "learning_rate": 1.4952110019706155e-07, "loss": 1.5684, "step": 2542 }, { "epoch": 3.32, "learning_rate": 1.4841095149399996e-07, "loss": 1.6393, "step": 2544 }, { "epoch": 3.32, "learning_rate": 1.47304608994686e-07, "loss": 1.5949, "step": 2546 }, { "epoch": 3.33, "learning_rate": 1.4620207764395176e-07, "loss": 1.5885, "step": 2548 }, { "epoch": 3.33, "learning_rate": 1.4510336236959554e-07, "loss": 1.5891, "step": 2550 }, { "epoch": 3.33, "learning_rate": 1.4400846808235945e-07, "loss": 1.5994, "step": 2552 }, { "epoch": 3.33, "learning_rate": 1.4291739967590744e-07, "loss": 1.5805, "step": 2554 }, { "epoch": 3.34, "learning_rate": 1.4183016202680375e-07, "loss": 1.5854, "step": 2556 }, { "epoch": 3.34, "learning_rate": 1.4074675999449094e-07, "loss": 1.5896, "step": 2558 }, { "epoch": 3.34, "learning_rate": 1.3966719842126806e-07, "loss": 1.6235, "step": 2560 }, { "epoch": 3.34, "learning_rate": 1.3859148213226901e-07, "loss": 1.5652, "step": 2562 }, { "epoch": 3.35, "learning_rate": 1.375196159354417e-07, "loss": 1.5949, "step": 2564 }, { "epoch": 3.35, "learning_rate": 1.3645160462152493e-07, "loss": 1.5713, "step": 2566 }, { "epoch": 3.35, "learning_rate": 1.3538745296402865e-07, "loss": 1.5817, "step": 2568 }, { "epoch": 3.35, "learning_rate": 1.3432716571921176e-07, "loss": 1.5355, "step": 2570 }, { "epoch": 3.36, "learning_rate": 1.3327074762606094e-07, "loss": 1.6249, "step": 2572 }, { "epoch": 3.36, "learning_rate": 1.3221820340627043e-07, "loss": 1.6183, "step": 2574 }, { "epoch": 3.36, "learning_rate": 1.3116953776421869e-07, "loss": 1.5645, "step": 2576 }, { "epoch": 3.36, "learning_rate": 1.3012475538694934e-07, "loss": 1.5974, "step": 2578 }, { "epoch": 3.37, "learning_rate": 1.2908386094415048e-07, "loss": 1.5769, "step": 2580 }, { "epoch": 3.37, "learning_rate": 1.2804685908813118e-07, "loss": 1.6138, "step": 2582 }, { "epoch": 3.37, "learning_rate": 1.2701375445380458e-07, "loss": 1.5989, "step": 2584 }, { "epoch": 3.37, "learning_rate": 1.2598455165866318e-07, "loss": 1.6144, "step": 2586 }, { "epoch": 3.38, "learning_rate": 1.2495925530276097e-07, "loss": 1.6266, "step": 2588 }, { "epoch": 3.38, "learning_rate": 1.239378699686926e-07, "loss": 1.589, "step": 2590 }, { "epoch": 3.38, "learning_rate": 1.2292040022157057e-07, "loss": 1.5466, "step": 2592 }, { "epoch": 3.39, "learning_rate": 1.219068506090084e-07, "loss": 1.6047, "step": 2594 }, { "epoch": 3.39, "learning_rate": 1.208972256610975e-07, "loss": 1.5564, "step": 2596 }, { "epoch": 3.39, "learning_rate": 1.1989152989038741e-07, "loss": 1.5896, "step": 2598 }, { "epoch": 3.39, "learning_rate": 1.1888976779186743e-07, "loss": 1.5955, "step": 2600 }, { "epoch": 3.4, "learning_rate": 1.1789194384294376e-07, "loss": 1.6147, "step": 2602 }, { "epoch": 3.4, "learning_rate": 1.1689806250342194e-07, "loss": 1.5934, "step": 2604 }, { "epoch": 3.4, "learning_rate": 1.1590812821548579e-07, "loss": 1.5618, "step": 2606 }, { "epoch": 3.4, "learning_rate": 1.1492214540367672e-07, "loss": 1.5586, "step": 2608 }, { "epoch": 3.41, "learning_rate": 1.1394011847487616e-07, "loss": 1.6067, "step": 2610 }, { "epoch": 3.41, "learning_rate": 1.1296205181828378e-07, "loss": 1.5777, "step": 2612 }, { "epoch": 3.41, "learning_rate": 1.1198794980539905e-07, "loss": 1.5578, "step": 2614 }, { "epoch": 3.41, "learning_rate": 1.1101781679000133e-07, "loss": 1.583, "step": 2616 }, { "epoch": 3.42, "learning_rate": 1.100516571081298e-07, "loss": 1.589, "step": 2618 }, { "epoch": 3.42, "learning_rate": 1.0908947507806565e-07, "loss": 1.5534, "step": 2620 }, { "epoch": 3.42, "learning_rate": 1.0813127500031139e-07, "loss": 1.5654, "step": 2622 }, { "epoch": 3.42, "learning_rate": 1.0717706115757207e-07, "loss": 1.653, "step": 2624 }, { "epoch": 3.43, "learning_rate": 1.0622683781473596e-07, "loss": 1.5528, "step": 2626 }, { "epoch": 3.43, "learning_rate": 1.0528060921885607e-07, "loss": 1.5619, "step": 2628 }, { "epoch": 3.43, "learning_rate": 1.0433837959913039e-07, "loss": 1.5973, "step": 2630 }, { "epoch": 3.43, "learning_rate": 1.0340015316688355e-07, "loss": 1.6352, "step": 2632 }, { "epoch": 3.44, "learning_rate": 1.0246593411554793e-07, "loss": 1.598, "step": 2634 }, { "epoch": 3.44, "learning_rate": 1.0153572662064447e-07, "loss": 1.5738, "step": 2636 }, { "epoch": 3.44, "learning_rate": 1.0060953483976453e-07, "loss": 1.5879, "step": 2638 }, { "epoch": 3.45, "learning_rate": 9.96873629125512e-08, "loss": 1.5915, "step": 2640 }, { "epoch": 3.45, "learning_rate": 9.876921496068059e-08, "loss": 1.6072, "step": 2642 }, { "epoch": 3.45, "learning_rate": 9.785509508784328e-08, "loss": 1.5988, "step": 2644 }, { "epoch": 3.45, "learning_rate": 9.694500737972688e-08, "loss": 1.5733, "step": 2646 }, { "epoch": 3.46, "learning_rate": 9.603895590399647e-08, "loss": 1.5686, "step": 2648 }, { "epoch": 3.46, "learning_rate": 9.513694471027733e-08, "loss": 1.5856, "step": 2650 }, { "epoch": 3.46, "learning_rate": 9.423897783013657e-08, "loss": 1.5943, "step": 2652 }, { "epoch": 3.46, "learning_rate": 9.334505927706515e-08, "loss": 1.6321, "step": 2654 }, { "epoch": 3.47, "learning_rate": 9.245519304645977e-08, "loss": 1.606, "step": 2656 }, { "epoch": 3.47, "learning_rate": 9.156938311560524e-08, "loss": 1.5451, "step": 2658 }, { "epoch": 3.47, "learning_rate": 9.068763344365682e-08, "loss": 1.5991, "step": 2660 }, { "epoch": 3.47, "learning_rate": 8.980994797162156e-08, "loss": 1.6528, "step": 2662 }, { "epoch": 3.48, "learning_rate": 8.893633062234285e-08, "loss": 1.5927, "step": 2664 }, { "epoch": 3.48, "learning_rate": 8.806678530047983e-08, "loss": 1.618, "step": 2666 }, { "epoch": 3.48, "learning_rate": 8.72013158924928e-08, "loss": 1.6322, "step": 2668 }, { "epoch": 3.48, "learning_rate": 8.6339926266624e-08, "loss": 1.5926, "step": 2670 }, { "epoch": 3.49, "learning_rate": 8.5482620272881e-08, "loss": 1.5543, "step": 2672 }, { "epoch": 3.49, "learning_rate": 8.462940174302025e-08, "loss": 1.594, "step": 2674 }, { "epoch": 3.49, "learning_rate": 8.378027449052782e-08, "loss": 1.5964, "step": 2676 }, { "epoch": 3.49, "learning_rate": 8.293524231060466e-08, "loss": 1.6331, "step": 2678 }, { "epoch": 3.5, "learning_rate": 8.209430898014869e-08, "loss": 1.5641, "step": 2680 }, { "epoch": 3.5, "learning_rate": 8.125747825773688e-08, "loss": 1.6072, "step": 2682 }, { "epoch": 3.5, "learning_rate": 8.042475388361103e-08, "loss": 1.54, "step": 2684 }, { "epoch": 3.51, "learning_rate": 7.959613957965794e-08, "loss": 1.6152, "step": 2686 }, { "epoch": 3.51, "learning_rate": 7.877163904939522e-08, "loss": 1.6052, "step": 2688 }, { "epoch": 3.51, "learning_rate": 7.795125597795405e-08, "loss": 1.6275, "step": 2690 }, { "epoch": 3.51, "learning_rate": 7.713499403206136e-08, "loss": 1.5732, "step": 2692 }, { "epoch": 3.52, "learning_rate": 7.632285686002592e-08, "loss": 1.5795, "step": 2694 }, { "epoch": 3.52, "learning_rate": 7.551484809172004e-08, "loss": 1.5856, "step": 2696 }, { "epoch": 3.52, "learning_rate": 7.471097133856352e-08, "loss": 1.5807, "step": 2698 }, { "epoch": 3.52, "learning_rate": 7.391123019350932e-08, "loss": 1.6053, "step": 2700 }, { "epoch": 3.53, "learning_rate": 7.311562823102468e-08, "loss": 1.5852, "step": 2702 }, { "epoch": 3.53, "learning_rate": 7.232416900707738e-08, "loss": 1.5762, "step": 2704 }, { "epoch": 3.53, "learning_rate": 7.153685605911964e-08, "loss": 1.5632, "step": 2706 }, { "epoch": 3.53, "learning_rate": 7.075369290607048e-08, "loss": 1.591, "step": 2708 }, { "epoch": 3.54, "learning_rate": 6.997468304830246e-08, "loss": 1.6077, "step": 2710 }, { "epoch": 3.54, "learning_rate": 6.91998299676243e-08, "loss": 1.6017, "step": 2712 }, { "epoch": 3.54, "learning_rate": 6.84291371272655e-08, "loss": 1.6394, "step": 2714 }, { "epoch": 3.54, "learning_rate": 6.766260797186241e-08, "loss": 1.5944, "step": 2716 }, { "epoch": 3.55, "learning_rate": 6.690024592744026e-08, "loss": 1.6005, "step": 2718 }, { "epoch": 3.55, "learning_rate": 6.614205440140041e-08, "loss": 1.5622, "step": 2720 }, { "epoch": 3.55, "learning_rate": 6.538803678250337e-08, "loss": 1.599, "step": 2722 }, { "epoch": 3.55, "learning_rate": 6.463819644085411e-08, "loss": 1.6174, "step": 2724 }, { "epoch": 3.56, "learning_rate": 6.389253672788752e-08, "loss": 1.5911, "step": 2726 }, { "epoch": 3.56, "learning_rate": 6.315106097635303e-08, "loss": 1.5908, "step": 2728 }, { "epoch": 3.56, "learning_rate": 6.241377250029933e-08, "loss": 1.5892, "step": 2730 }, { "epoch": 3.57, "learning_rate": 6.168067459506066e-08, "loss": 1.6196, "step": 2732 }, { "epoch": 3.57, "learning_rate": 6.09517705372401e-08, "loss": 1.5886, "step": 2734 }, { "epoch": 3.57, "learning_rate": 6.022706358469776e-08, "loss": 1.6137, "step": 2736 }, { "epoch": 3.57, "learning_rate": 5.950655697653362e-08, "loss": 1.5554, "step": 2738 }, { "epoch": 3.58, "learning_rate": 5.879025393307435e-08, "loss": 1.6075, "step": 2740 }, { "epoch": 3.58, "learning_rate": 5.8078157655858775e-08, "loss": 1.6098, "step": 2742 }, { "epoch": 3.58, "learning_rate": 5.73702713276234e-08, "loss": 1.5819, "step": 2744 }, { "epoch": 3.58, "learning_rate": 5.666659811228802e-08, "loss": 1.6254, "step": 2746 }, { "epoch": 3.59, "learning_rate": 5.596714115494217e-08, "loss": 1.6061, "step": 2748 }, { "epoch": 3.59, "learning_rate": 5.527190358183031e-08, "loss": 1.6087, "step": 2750 }, { "epoch": 3.59, "learning_rate": 5.4580888500338486e-08, "loss": 1.5698, "step": 2752 }, { "epoch": 3.59, "learning_rate": 5.389409899898012e-08, "loss": 1.6054, "step": 2754 }, { "epoch": 3.6, "learning_rate": 5.3211538147382216e-08, "loss": 1.5646, "step": 2756 }, { "epoch": 3.6, "learning_rate": 5.253320899627178e-08, "loss": 1.6152, "step": 2758 }, { "epoch": 3.6, "learning_rate": 5.185911457746206e-08, "loss": 1.6431, "step": 2760 }, { "epoch": 3.6, "learning_rate": 5.118925790383899e-08, "loss": 1.5424, "step": 2762 }, { "epoch": 3.61, "learning_rate": 5.052364196934777e-08, "loss": 1.6433, "step": 2764 }, { "epoch": 3.61, "learning_rate": 4.986226974897967e-08, "loss": 1.5994, "step": 2766 }, { "epoch": 3.61, "learning_rate": 4.920514419875821e-08, "loss": 1.5944, "step": 2768 }, { "epoch": 3.62, "learning_rate": 4.855226825572667e-08, "loss": 1.5903, "step": 2770 }, { "epoch": 3.62, "learning_rate": 4.79036448379343e-08, "loss": 1.5756, "step": 2772 }, { "epoch": 3.62, "learning_rate": 4.725927684442366e-08, "loss": 1.6207, "step": 2774 }, { "epoch": 3.62, "learning_rate": 4.661916715521763e-08, "loss": 1.6053, "step": 2776 }, { "epoch": 3.63, "learning_rate": 4.5983318631306114e-08, "loss": 1.5439, "step": 2778 }, { "epoch": 3.63, "learning_rate": 4.535173411463422e-08, "loss": 1.5701, "step": 2780 }, { "epoch": 3.63, "learning_rate": 4.472441642808844e-08, "loss": 1.5878, "step": 2782 }, { "epoch": 3.63, "learning_rate": 4.4101368375484614e-08, "loss": 1.61, "step": 2784 }, { "epoch": 3.64, "learning_rate": 4.348259274155541e-08, "loss": 1.5514, "step": 2786 }, { "epoch": 3.64, "learning_rate": 4.2868092291937773e-08, "loss": 1.621, "step": 2788 }, { "epoch": 3.64, "learning_rate": 4.225786977316092e-08, "loss": 1.6143, "step": 2790 }, { "epoch": 3.64, "learning_rate": 4.1651927912632944e-08, "loss": 1.5332, "step": 2792 }, { "epoch": 3.65, "learning_rate": 4.105026941862988e-08, "loss": 1.5658, "step": 2794 }, { "epoch": 3.65, "learning_rate": 4.0452896980283424e-08, "loss": 1.5495, "step": 2796 }, { "epoch": 3.65, "learning_rate": 3.985981326756793e-08, "loss": 1.6149, "step": 2798 }, { "epoch": 3.65, "learning_rate": 3.927102093128976e-08, "loss": 1.6166, "step": 2800 }, { "epoch": 3.66, "learning_rate": 3.868652260307437e-08, "loss": 1.6043, "step": 2802 }, { "epoch": 3.66, "learning_rate": 3.810632089535526e-08, "loss": 1.6229, "step": 2804 }, { "epoch": 3.66, "learning_rate": 3.7530418401362175e-08, "loss": 1.5762, "step": 2806 }, { "epoch": 3.66, "learning_rate": 3.6958817695109004e-08, "loss": 1.6508, "step": 2808 }, { "epoch": 3.67, "learning_rate": 3.639152133138312e-08, "loss": 1.5482, "step": 2810 }, { "epoch": 3.67, "learning_rate": 3.5828531845733204e-08, "loss": 1.5399, "step": 2812 }, { "epoch": 3.67, "learning_rate": 3.5269851754457957e-08, "loss": 1.6152, "step": 2814 }, { "epoch": 3.68, "learning_rate": 3.471548355459597e-08, "loss": 1.5888, "step": 2816 }, { "epoch": 3.68, "learning_rate": 3.4165429723912675e-08, "loss": 1.5982, "step": 2818 }, { "epoch": 3.68, "learning_rate": 3.361969272089116e-08, "loss": 1.5847, "step": 2820 }, { "epoch": 3.68, "learning_rate": 3.307827498471982e-08, "loss": 1.5995, "step": 2822 }, { "epoch": 3.69, "learning_rate": 3.254117893528185e-08, "loss": 1.6057, "step": 2824 }, { "epoch": 3.69, "learning_rate": 3.2008406973145e-08, "loss": 1.6209, "step": 2826 }, { "epoch": 3.69, "learning_rate": 3.1479961479550124e-08, "loss": 1.5954, "step": 2828 }, { "epoch": 3.69, "learning_rate": 3.0955844816400675e-08, "loss": 1.6317, "step": 2830 }, { "epoch": 3.7, "learning_rate": 3.043605932625259e-08, "loss": 1.5995, "step": 2832 }, { "epoch": 3.7, "learning_rate": 2.992060733230284e-08, "loss": 1.5874, "step": 2834 }, { "epoch": 3.7, "learning_rate": 2.940949113838065e-08, "loss": 1.6047, "step": 2836 }, { "epoch": 3.7, "learning_rate": 2.8902713028935544e-08, "loss": 1.6055, "step": 2838 }, { "epoch": 3.71, "learning_rate": 2.8400275269028108e-08, "loss": 1.5751, "step": 2840 }, { "epoch": 3.71, "learning_rate": 2.7902180104319438e-08, "loss": 1.6235, "step": 2842 }, { "epoch": 3.71, "learning_rate": 2.740842976106139e-08, "loss": 1.6023, "step": 2844 }, { "epoch": 3.71, "learning_rate": 2.691902644608657e-08, "loss": 1.6301, "step": 2846 }, { "epoch": 3.72, "learning_rate": 2.643397234679823e-08, "loss": 1.5737, "step": 2848 }, { "epoch": 3.72, "learning_rate": 2.5953269631160844e-08, "loss": 1.593, "step": 2850 }, { "epoch": 3.72, "learning_rate": 2.547692044769012e-08, "loss": 1.5591, "step": 2852 }, { "epoch": 3.72, "learning_rate": 2.5004926925443536e-08, "loss": 1.6058, "step": 2854 }, { "epoch": 3.73, "learning_rate": 2.4537291174010822e-08, "loss": 1.623, "step": 2856 }, { "epoch": 3.73, "learning_rate": 2.40740152835045e-08, "loss": 1.6441, "step": 2858 }, { "epoch": 3.73, "learning_rate": 2.3615101324550692e-08, "loss": 1.5893, "step": 2860 }, { "epoch": 3.74, "learning_rate": 2.3160551348279434e-08, "loss": 1.5671, "step": 2862 }, { "epoch": 3.74, "learning_rate": 2.2710367386316154e-08, "loss": 1.5523, "step": 2864 }, { "epoch": 3.74, "learning_rate": 2.2264551450771996e-08, "loss": 1.642, "step": 2866 }, { "epoch": 3.74, "learning_rate": 2.1823105534235164e-08, "loss": 1.5946, "step": 2868 }, { "epoch": 3.75, "learning_rate": 2.1386031609761933e-08, "loss": 1.5919, "step": 2870 }, { "epoch": 3.75, "learning_rate": 2.0953331630867765e-08, "loss": 1.565, "step": 2872 }, { "epoch": 3.75, "learning_rate": 2.0525007531518755e-08, "loss": 1.5913, "step": 2874 }, { "epoch": 3.75, "learning_rate": 2.010106122612265e-08, "loss": 1.5931, "step": 2876 }, { "epoch": 3.76, "learning_rate": 1.968149460952073e-08, "loss": 1.5929, "step": 2878 }, { "epoch": 3.76, "learning_rate": 1.9266309556979165e-08, "loss": 1.6019, "step": 2880 }, { "epoch": 3.76, "learning_rate": 1.8855507924180336e-08, "loss": 1.5926, "step": 2882 }, { "epoch": 3.76, "learning_rate": 1.8449091547214966e-08, "loss": 1.5855, "step": 2884 }, { "epoch": 3.77, "learning_rate": 1.8047062242573573e-08, "loss": 1.6388, "step": 2886 }, { "epoch": 3.77, "learning_rate": 1.7649421807138688e-08, "loss": 1.5874, "step": 2888 }, { "epoch": 3.77, "learning_rate": 1.725617201817686e-08, "loss": 1.6315, "step": 2890 }, { "epoch": 3.77, "learning_rate": 1.686731463333002e-08, "loss": 1.5194, "step": 2892 }, { "epoch": 3.78, "learning_rate": 1.6482851390608233e-08, "loss": 1.6289, "step": 2894 }, { "epoch": 3.78, "learning_rate": 1.6102784008382274e-08, "loss": 1.5996, "step": 2896 }, { "epoch": 3.78, "learning_rate": 1.5727114185374758e-08, "loss": 1.6209, "step": 2898 }, { "epoch": 3.78, "learning_rate": 1.5355843600653895e-08, "loss": 1.6088, "step": 2900 }, { "epoch": 3.79, "learning_rate": 1.498897391362508e-08, "loss": 1.6058, "step": 2902 }, { "epoch": 3.79, "learning_rate": 1.4626506764023661e-08, "loss": 1.6141, "step": 2904 }, { "epoch": 3.79, "learning_rate": 1.4268443771908056e-08, "loss": 1.5968, "step": 2906 }, { "epoch": 3.8, "learning_rate": 1.3914786537651768e-08, "loss": 1.6004, "step": 2908 }, { "epoch": 3.8, "learning_rate": 1.3565536641936826e-08, "loss": 1.6031, "step": 2910 }, { "epoch": 3.8, "learning_rate": 1.3220695645746681e-08, "loss": 1.5953, "step": 2912 }, { "epoch": 3.8, "learning_rate": 1.2880265090358666e-08, "loss": 1.6611, "step": 2914 }, { "epoch": 3.81, "learning_rate": 1.2544246497337984e-08, "loss": 1.5924, "step": 2916 }, { "epoch": 3.81, "learning_rate": 1.221264136852984e-08, "loss": 1.6318, "step": 2918 }, { "epoch": 3.81, "learning_rate": 1.1885451186053886e-08, "loss": 1.5874, "step": 2920 }, { "epoch": 3.81, "learning_rate": 1.1562677412296995e-08, "loss": 1.6377, "step": 2922 }, { "epoch": 3.82, "learning_rate": 1.1244321489906283e-08, "loss": 1.5846, "step": 2924 }, { "epoch": 3.82, "learning_rate": 1.0930384841783546e-08, "loss": 1.5597, "step": 2926 }, { "epoch": 3.82, "learning_rate": 1.0620868871078493e-08, "loss": 1.6085, "step": 2928 }, { "epoch": 3.82, "learning_rate": 1.0315774961182411e-08, "loss": 1.558, "step": 2930 }, { "epoch": 3.83, "learning_rate": 1.0015104475721848e-08, "loss": 1.5542, "step": 2932 }, { "epoch": 3.83, "learning_rate": 9.71885875855294e-09, "loss": 1.6161, "step": 2934 }, { "epoch": 3.83, "learning_rate": 9.4270391337552e-09, "loss": 1.6006, "step": 2936 }, { "epoch": 3.83, "learning_rate": 9.139646905625519e-09, "loss": 1.5944, "step": 2938 }, { "epoch": 3.84, "learning_rate": 8.856683358672402e-09, "loss": 1.6155, "step": 2940 }, { "epoch": 3.84, "learning_rate": 8.578149757610175e-09, "loss": 1.6357, "step": 2942 }, { "epoch": 3.84, "learning_rate": 8.30404734735346e-09, "loss": 1.584, "step": 2944 }, { "epoch": 3.84, "learning_rate": 8.0343773530116e-09, "loss": 1.5639, "step": 2946 }, { "epoch": 3.85, "learning_rate": 7.769140979882905e-09, "loss": 1.6243, "step": 2948 }, { "epoch": 3.85, "learning_rate": 7.508339413449527e-09, "loss": 1.6321, "step": 2950 }, { "epoch": 3.85, "learning_rate": 7.25197381937237e-09, "loss": 1.6286, "step": 2952 }, { "epoch": 3.86, "learning_rate": 7.000045343485306e-09, "loss": 1.5902, "step": 2954 }, { "epoch": 3.86, "learning_rate": 6.752555111790515e-09, "loss": 1.5719, "step": 2956 }, { "epoch": 3.86, "learning_rate": 6.509504230453377e-09, "loss": 1.6002, "step": 2958 }, { "epoch": 3.86, "learning_rate": 6.27089378579726e-09, "loss": 1.6154, "step": 2960 }, { "epoch": 3.87, "learning_rate": 6.0367248442990684e-09, "loss": 1.5909, "step": 2962 }, { "epoch": 3.87, "learning_rate": 5.8069984525840335e-09, "loss": 1.6277, "step": 2964 }, { "epoch": 3.87, "learning_rate": 5.5817156374214916e-09, "loss": 1.6109, "step": 2966 }, { "epoch": 3.87, "learning_rate": 5.36087740572011e-09, "loss": 1.6023, "step": 2968 }, { "epoch": 3.88, "learning_rate": 5.144484744523003e-09, "loss": 1.6188, "step": 2970 }, { "epoch": 3.88, "learning_rate": 4.9325386210040675e-09, "loss": 1.62, "step": 2972 }, { "epoch": 3.88, "learning_rate": 4.7250399824629865e-09, "loss": 1.5778, "step": 2974 }, { "epoch": 3.88, "learning_rate": 4.521989756321565e-09, "loss": 1.6444, "step": 2976 }, { "epoch": 3.89, "learning_rate": 4.323388850118848e-09, "loss": 1.579, "step": 2978 }, { "epoch": 3.89, "learning_rate": 4.129238151508008e-09, "loss": 1.6021, "step": 2980 }, { "epoch": 3.89, "learning_rate": 3.939538528251462e-09, "loss": 1.572, "step": 2982 }, { "epoch": 3.89, "learning_rate": 3.7542908282176545e-09, "loss": 1.5995, "step": 2984 }, { "epoch": 3.9, "learning_rate": 3.573495879376942e-09, "loss": 1.6413, "step": 2986 }, { "epoch": 3.9, "learning_rate": 3.3971544897980488e-09, "loss": 1.5797, "step": 2988 }, { "epoch": 3.9, "learning_rate": 3.2252674476440643e-09, "loss": 1.6453, "step": 2990 }, { "epoch": 3.9, "learning_rate": 3.0578355211697824e-09, "loss": 1.5531, "step": 2992 }, { "epoch": 3.91, "learning_rate": 2.894859458717036e-09, "loss": 1.5881, "step": 2994 }, { "epoch": 3.91, "learning_rate": 2.7363399887128104e-09, "loss": 1.5912, "step": 2996 }, { "epoch": 3.91, "learning_rate": 2.58227781966458e-09, "loss": 1.6239, "step": 2998 }, { "epoch": 3.92, "learning_rate": 2.4326736401579784e-09, "loss": 1.6034, "step": 3000 }, { "epoch": 3.92, "learning_rate": 2.2875281188536875e-09, "loss": 1.6309, "step": 3002 }, { "epoch": 3.92, "learning_rate": 2.1468419044839982e-09, "loss": 1.5689, "step": 3004 }, { "epoch": 3.92, "learning_rate": 2.010615625850365e-09, "loss": 1.6326, "step": 3006 }, { "epoch": 3.93, "learning_rate": 1.878849891820411e-09, "loss": 1.5711, "step": 3008 }, { "epoch": 3.93, "learning_rate": 1.7515452913250405e-09, "loss": 1.5815, "step": 3010 }, { "epoch": 3.93, "learning_rate": 1.689566094741912e-09, "loss": 1.5682, "step": 3012 }, { "epoch": 3.93, "learning_rate": 1.6287023933564403e-09, "loss": 1.5606, "step": 3014 }, { "epoch": 3.94, "learning_rate": 1.5103217469644158e-09, "loss": 1.5411, "step": 3016 }, { "epoch": 3.94, "learning_rate": 1.3964038812551705e-09, "loss": 1.6349, "step": 3018 }, { "epoch": 3.94, "learning_rate": 1.2869493053880853e-09, "loss": 1.6072, "step": 3020 }, { "epoch": 3.94, "learning_rate": 1.1819585085737215e-09, "loss": 1.599, "step": 3022 }, { "epoch": 3.95, "learning_rate": 1.0814319600718213e-09, "loss": 1.6435, "step": 3024 }, { "epoch": 3.95, "learning_rate": 9.853701091888656e-10, "loss": 1.6176, "step": 3026 }, { "epoch": 3.95, "learning_rate": 8.937733852764085e-10, "loss": 1.5915, "step": 3028 }, { "epoch": 3.95, "learning_rate": 8.066421977286352e-10, "loss": 1.5667, "step": 3030 }, { "epoch": 3.96, "learning_rate": 7.23976935981141e-10, "loss": 1.5865, "step": 3032 }, { "epoch": 3.96, "learning_rate": 6.457779695090426e-10, "loss": 1.594, "step": 3034 }, { "epoch": 3.96, "learning_rate": 5.720456478249813e-10, "loss": 1.6078, "step": 3036 }, { "epoch": 3.96, "learning_rate": 5.027803004779008e-10, "loss": 1.6193, "step": 3038 }, { "epoch": 3.97, "learning_rate": 4.379822370512709e-10, "loss": 1.5808, "step": 3040 }, { "epoch": 3.97, "learning_rate": 3.776517471621998e-10, "loss": 1.5668, "step": 3042 }, { "epoch": 3.97, "learning_rate": 3.2178910045965734e-10, "loss": 1.5583, "step": 3044 }, { "epoch": 3.98, "learning_rate": 2.703945466233648e-10, "loss": 1.6006, "step": 3046 }, { "epoch": 3.98, "learning_rate": 2.2346831536312892e-10, "loss": 1.6075, "step": 3048 }, { "epoch": 3.98, "learning_rate": 1.8101061641695447e-10, "loss": 1.6492, "step": 3050 }, { "epoch": 3.98, "learning_rate": 1.4302163955093316e-10, "loss": 1.5577, "step": 3052 }, { "epoch": 3.99, "learning_rate": 1.0950155455802245e-10, "loss": 1.5638, "step": 3054 }, { "epoch": 3.99, "learning_rate": 8.045051125726843e-11, "loss": 1.5856, "step": 3056 }, { "epoch": 3.99, "learning_rate": 5.5868639493250603e-11, "loss": 1.5727, "step": 3058 }, { "epoch": 3.99, "learning_rate": 3.5756049135304836e-11, "loss": 1.6079, "step": 3060 }, { "epoch": 4.0, "learning_rate": 2.0112830077301247e-11, "loss": 1.5869, "step": 3062 }, { "epoch": 4.0, "learning_rate": 8.939052237089128e-12, "loss": 1.5579, "step": 3064 }, { "epoch": 4.0, "step": 3064, "total_flos": 1.7885944480792576e+17, "train_loss": 1.6310015863133474, "train_runtime": 32557.3578, "train_samples_per_second": 6.024, "train_steps_per_second": 0.094 } ], "logging_steps": 2, "max_steps": 3064, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 4000, "total_flos": 1.7885944480792576e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }