{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.913265306122449, "eval_steps": 800, "global_step": 4500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0, "loss": 2.7231, "step": 4 }, { "epoch": 0.0, "learning_rate": 0, "loss": 2.8688, "step": 8 }, { "epoch": 0.01, "learning_rate": 0, "loss": 3.0604, "step": 12 }, { "epoch": 0.01, "learning_rate": 0, "loss": 3.3619, "step": 16 }, { "epoch": 0.01, "learning_rate": 0, "loss": 3.1263, "step": 20 }, { "epoch": 0.01, "learning_rate": 0.0, "loss": 3.1305, "step": 24 }, { "epoch": 0.01, "learning_rate": 1.5188198451414044e-06, "loss": 1.2543, "step": 28 }, { "epoch": 0.01, "learning_rate": 2.0735116692035353e-06, "loss": 1.0489, "step": 32 }, { "epoch": 0.02, "learning_rate": 2.420532010460384e-06, "loss": 0.8238, "step": 36 }, { "epoch": 0.02, "learning_rate": 2.6736916157045096e-06, "loss": 0.6357, "step": 40 }, { "epoch": 0.02, "learning_rate": 2.873103126046782e-06, "loss": 0.9375, "step": 44 }, { "epoch": 0.02, "learning_rate": 3.0376396902828088e-06, "loss": 1.0042, "step": 48 }, { "epoch": 0.02, "learning_rate": 3.177703030066258e-06, "loss": 0.7668, "step": 52 }, { "epoch": 0.02, "learning_rate": 3.299639602133279e-06, "loss": 0.9168, "step": 56 }, { "epoch": 0.03, "learning_rate": 3.407608173344298e-06, "loss": 0.8997, "step": 60 }, { "epoch": 0.03, "learning_rate": 3.5044824703694353e-06, "loss": 1.0025, "step": 64 }, { "epoch": 0.03, "learning_rate": 3.5923315143449394e-06, "loss": 0.6651, "step": 68 }, { "epoch": 0.03, "learning_rate": 3.6726945828900284e-06, "loss": 0.699, "step": 72 }, { "epoch": 0.03, "learning_rate": 3.746747919084026e-06, "loss": 0.886, "step": 76 }, { "epoch": 0.03, "learning_rate": 3.8154106182475455e-06, "loss": 0.8937, "step": 80 }, { "epoch": 0.04, "learning_rate": 3.8794145195304064e-06, "loss": 0.6582, "step": 84 }, { "epoch": 0.04, "learning_rate": 3.9393518556017876e-06, "loss": 0.7974, "step": 88 }, { "epoch": 0.04, "learning_rate": 3.995708648306083e-06, "loss": 0.9472, "step": 92 }, { "epoch": 0.04, "learning_rate": 4.0488886794862905e-06, "loss": 0.8867, "step": 96 }, { "epoch": 0.04, "learning_rate": 4.099231058976525e-06, "loss": 1.061, "step": 100 }, { "epoch": 0.04, "learning_rate": 4.1470233384070705e-06, "loss": 0.7292, "step": 104 }, { "epoch": 0.05, "learning_rate": 4.192511460845915e-06, "loss": 0.9262, "step": 108 }, { "epoch": 0.05, "learning_rate": 4.235907420412398e-06, "loss": 0.7092, "step": 112 }, { "epoch": 0.05, "learning_rate": 4.27739523661862e-06, "loss": 0.7174, "step": 116 }, { "epoch": 0.05, "learning_rate": 4.317135669700268e-06, "loss": 0.8711, "step": 120 }, { "epoch": 0.05, "learning_rate": 4.355269982485126e-06, "loss": 0.9286, "step": 124 }, { "epoch": 0.05, "learning_rate": 4.391922971188186e-06, "loss": 0.9947, "step": 128 }, { "epoch": 0.06, "learning_rate": 4.427205429264097e-06, "loss": 0.8179, "step": 132 }, { "epoch": 0.06, "learning_rate": 4.461216167003915e-06, "loss": 0.6159, "step": 136 }, { "epoch": 0.06, "learning_rate": 4.494043679663919e-06, "loss": 0.8981, "step": 140 }, { "epoch": 0.06, "learning_rate": 4.525767535063022e-06, "loss": 0.9206, "step": 144 }, { "epoch": 0.06, "learning_rate": 4.556459535424214e-06, "loss": 0.7768, "step": 148 }, { "epoch": 0.06, "learning_rate": 4.5861846961499975e-06, "loss": 0.8515, "step": 152 }, { "epoch": 0.07, "learning_rate": 4.6150020750907925e-06, "loss": 0.8131, "step": 156 }, { "epoch": 0.07, "learning_rate": 4.642965478900328e-06, "loss": 1.1253, "step": 160 }, { "epoch": 0.07, "learning_rate": 4.670124067711698e-06, "loss": 0.9236, "step": 164 }, { "epoch": 0.07, "learning_rate": 4.689992082159791e-06, "loss": 0.8775, "step": 168 }, { "epoch": 0.07, "learning_rate": 4.715848379822425e-06, "loss": 0.8848, "step": 172 }, { "epoch": 0.07, "learning_rate": 4.74101509336297e-06, "loss": 0.829, "step": 176 }, { "epoch": 0.08, "learning_rate": 4.76552805154028e-06, "loss": 0.9367, "step": 180 }, { "epoch": 0.08, "learning_rate": 4.789420361336724e-06, "loss": 0.6566, "step": 184 }, { "epoch": 0.08, "learning_rate": 4.812722676847563e-06, "loss": 0.8104, "step": 188 }, { "epoch": 0.08, "learning_rate": 4.835463435763974e-06, "loss": 0.74, "step": 192 }, { "epoch": 0.08, "learning_rate": 4.857669068026358e-06, "loss": 0.6171, "step": 196 }, { "epoch": 0.09, "learning_rate": 4.879364180487766e-06, "loss": 1.0545, "step": 200 }, { "epoch": 0.09, "learning_rate": 4.900571720823068e-06, "loss": 0.9191, "step": 204 }, { "epoch": 0.09, "learning_rate": 4.921313123421507e-06, "loss": 0.6995, "step": 208 }, { "epoch": 0.09, "learning_rate": 4.941608439588058e-06, "loss": 0.8479, "step": 212 }, { "epoch": 0.09, "learning_rate": 4.9614764540361516e-06, "loss": 0.8976, "step": 216 }, { "epoch": 0.09, "learning_rate": 4.980934789368156e-06, "loss": 0.851, "step": 220 }, { "epoch": 0.1, "learning_rate": 5e-06, "loss": 0.7368, "step": 224 }, { "epoch": 0.1, "learning_rate": 4.997812135355893e-06, "loss": 1.0125, "step": 228 }, { "epoch": 0.1, "learning_rate": 4.994894982497083e-06, "loss": 0.9607, "step": 232 }, { "epoch": 0.1, "learning_rate": 4.991977829638274e-06, "loss": 0.6321, "step": 236 }, { "epoch": 0.1, "learning_rate": 4.989060676779464e-06, "loss": 0.7108, "step": 240 }, { "epoch": 0.1, "learning_rate": 4.986143523920654e-06, "loss": 0.7881, "step": 244 }, { "epoch": 0.11, "learning_rate": 4.983226371061844e-06, "loss": 0.7147, "step": 248 }, { "epoch": 0.11, "learning_rate": 4.980309218203034e-06, "loss": 0.9225, "step": 252 }, { "epoch": 0.11, "learning_rate": 4.977392065344224e-06, "loss": 0.8849, "step": 256 }, { "epoch": 0.11, "learning_rate": 4.974474912485414e-06, "loss": 0.942, "step": 260 }, { "epoch": 0.11, "learning_rate": 4.971557759626604e-06, "loss": 1.1629, "step": 264 }, { "epoch": 0.11, "learning_rate": 4.968640606767795e-06, "loss": 0.8053, "step": 268 }, { "epoch": 0.12, "learning_rate": 4.965723453908986e-06, "loss": 0.6558, "step": 272 }, { "epoch": 0.12, "learning_rate": 4.962806301050176e-06, "loss": 0.9606, "step": 276 }, { "epoch": 0.12, "learning_rate": 4.959889148191366e-06, "loss": 0.9533, "step": 280 }, { "epoch": 0.12, "learning_rate": 4.956971995332556e-06, "loss": 0.8758, "step": 284 }, { "epoch": 0.12, "learning_rate": 4.954054842473746e-06, "loss": 0.5825, "step": 288 }, { "epoch": 0.12, "learning_rate": 4.951137689614936e-06, "loss": 0.8221, "step": 292 }, { "epoch": 0.13, "learning_rate": 4.948220536756126e-06, "loss": 0.8954, "step": 296 }, { "epoch": 0.13, "learning_rate": 4.945303383897317e-06, "loss": 0.9549, "step": 300 }, { "epoch": 0.13, "learning_rate": 4.942386231038507e-06, "loss": 0.8821, "step": 304 }, { "epoch": 0.13, "learning_rate": 4.939469078179697e-06, "loss": 0.9861, "step": 308 }, { "epoch": 0.13, "learning_rate": 4.936551925320887e-06, "loss": 0.9681, "step": 312 }, { "epoch": 0.13, "learning_rate": 4.933634772462078e-06, "loss": 0.647, "step": 316 }, { "epoch": 0.14, "learning_rate": 4.930717619603268e-06, "loss": 0.8151, "step": 320 }, { "epoch": 0.14, "learning_rate": 4.9278004667444575e-06, "loss": 1.0373, "step": 324 }, { "epoch": 0.14, "learning_rate": 4.9248833138856475e-06, "loss": 0.8858, "step": 328 }, { "epoch": 0.14, "learning_rate": 4.921966161026838e-06, "loss": 0.8392, "step": 332 }, { "epoch": 0.14, "learning_rate": 4.919049008168029e-06, "loss": 1.0194, "step": 336 }, { "epoch": 0.14, "learning_rate": 4.916131855309218e-06, "loss": 0.7719, "step": 340 }, { "epoch": 0.15, "learning_rate": 4.913214702450409e-06, "loss": 0.6672, "step": 344 }, { "epoch": 0.15, "learning_rate": 4.910297549591599e-06, "loss": 0.8939, "step": 348 }, { "epoch": 0.15, "learning_rate": 4.90738039673279e-06, "loss": 0.6781, "step": 352 }, { "epoch": 0.15, "learning_rate": 4.90446324387398e-06, "loss": 0.7391, "step": 356 }, { "epoch": 0.15, "learning_rate": 4.9015460910151695e-06, "loss": 0.9476, "step": 360 }, { "epoch": 0.15, "learning_rate": 4.8986289381563595e-06, "loss": 0.6729, "step": 364 }, { "epoch": 0.16, "learning_rate": 4.89571178529755e-06, "loss": 0.8046, "step": 368 }, { "epoch": 0.16, "learning_rate": 4.89279463243874e-06, "loss": 0.8521, "step": 372 }, { "epoch": 0.16, "learning_rate": 4.88987747957993e-06, "loss": 0.8688, "step": 376 }, { "epoch": 0.16, "learning_rate": 4.886960326721121e-06, "loss": 0.875, "step": 380 }, { "epoch": 0.16, "learning_rate": 4.884043173862311e-06, "loss": 0.6733, "step": 384 }, { "epoch": 0.16, "learning_rate": 4.881126021003501e-06, "loss": 0.7755, "step": 388 }, { "epoch": 0.17, "learning_rate": 4.878208868144691e-06, "loss": 0.7047, "step": 392 }, { "epoch": 0.17, "learning_rate": 4.8752917152858815e-06, "loss": 0.6979, "step": 396 }, { "epoch": 0.17, "learning_rate": 4.8723745624270714e-06, "loss": 0.8033, "step": 400 }, { "epoch": 0.17, "learning_rate": 4.869457409568261e-06, "loss": 0.8039, "step": 404 }, { "epoch": 0.17, "learning_rate": 4.866540256709452e-06, "loss": 0.9366, "step": 408 }, { "epoch": 0.18, "learning_rate": 4.863623103850642e-06, "loss": 0.609, "step": 412 }, { "epoch": 0.18, "learning_rate": 4.860705950991833e-06, "loss": 0.7258, "step": 416 }, { "epoch": 0.18, "learning_rate": 4.857788798133022e-06, "loss": 1.018, "step": 420 }, { "epoch": 0.18, "learning_rate": 4.854871645274213e-06, "loss": 0.7338, "step": 424 }, { "epoch": 0.18, "learning_rate": 4.851954492415403e-06, "loss": 0.6595, "step": 428 }, { "epoch": 0.18, "learning_rate": 4.8490373395565935e-06, "loss": 0.7456, "step": 432 }, { "epoch": 0.19, "learning_rate": 4.8461201866977834e-06, "loss": 0.637, "step": 436 }, { "epoch": 0.19, "learning_rate": 4.843203033838973e-06, "loss": 0.8026, "step": 440 }, { "epoch": 0.19, "learning_rate": 4.840285880980164e-06, "loss": 0.8439, "step": 444 }, { "epoch": 0.19, "learning_rate": 4.837368728121354e-06, "loss": 0.646, "step": 448 }, { "epoch": 0.19, "learning_rate": 4.834451575262544e-06, "loss": 0.8644, "step": 452 }, { "epoch": 0.19, "learning_rate": 4.831534422403734e-06, "loss": 0.8595, "step": 456 }, { "epoch": 0.2, "learning_rate": 4.828617269544925e-06, "loss": 1.023, "step": 460 }, { "epoch": 0.2, "learning_rate": 4.825700116686115e-06, "loss": 0.7745, "step": 464 }, { "epoch": 0.2, "learning_rate": 4.822782963827305e-06, "loss": 0.9293, "step": 468 }, { "epoch": 0.2, "learning_rate": 4.8198658109684954e-06, "loss": 0.7542, "step": 472 }, { "epoch": 0.2, "learning_rate": 4.816948658109685e-06, "loss": 1.0059, "step": 476 }, { "epoch": 0.2, "learning_rate": 4.814031505250875e-06, "loss": 0.8302, "step": 480 }, { "epoch": 0.21, "learning_rate": 4.811114352392065e-06, "loss": 0.809, "step": 484 }, { "epoch": 0.21, "learning_rate": 4.808197199533256e-06, "loss": 0.7628, "step": 488 }, { "epoch": 0.21, "learning_rate": 4.805280046674446e-06, "loss": 0.6753, "step": 492 }, { "epoch": 0.21, "learning_rate": 4.802362893815637e-06, "loss": 0.8719, "step": 496 }, { "epoch": 0.21, "learning_rate": 4.799445740956827e-06, "loss": 0.7059, "step": 500 }, { "epoch": 0.21, "learning_rate": 4.796528588098017e-06, "loss": 0.6902, "step": 504 }, { "epoch": 0.22, "learning_rate": 4.793611435239207e-06, "loss": 1.104, "step": 508 }, { "epoch": 0.22, "learning_rate": 4.790694282380397e-06, "loss": 0.837, "step": 512 }, { "epoch": 0.22, "learning_rate": 4.787777129521587e-06, "loss": 0.794, "step": 516 }, { "epoch": 0.22, "learning_rate": 4.784859976662777e-06, "loss": 0.7997, "step": 520 }, { "epoch": 0.22, "learning_rate": 4.781942823803968e-06, "loss": 0.6738, "step": 524 }, { "epoch": 0.22, "learning_rate": 4.779025670945158e-06, "loss": 0.963, "step": 528 }, { "epoch": 0.23, "learning_rate": 4.776108518086348e-06, "loss": 0.6323, "step": 532 }, { "epoch": 0.23, "learning_rate": 4.773191365227539e-06, "loss": 0.7243, "step": 536 }, { "epoch": 0.23, "learning_rate": 4.770274212368729e-06, "loss": 0.8873, "step": 540 }, { "epoch": 0.23, "learning_rate": 4.7673570595099186e-06, "loss": 0.7435, "step": 544 }, { "epoch": 0.23, "learning_rate": 4.7644399066511085e-06, "loss": 0.8006, "step": 548 }, { "epoch": 0.23, "learning_rate": 4.761522753792299e-06, "loss": 0.9557, "step": 552 }, { "epoch": 0.24, "learning_rate": 4.758605600933489e-06, "loss": 0.7131, "step": 556 }, { "epoch": 0.24, "learning_rate": 4.755688448074679e-06, "loss": 0.8007, "step": 560 }, { "epoch": 0.24, "learning_rate": 4.752771295215869e-06, "loss": 0.8634, "step": 564 }, { "epoch": 0.24, "learning_rate": 4.74985414235706e-06, "loss": 0.7848, "step": 568 }, { "epoch": 0.24, "learning_rate": 4.746936989498251e-06, "loss": 0.6159, "step": 572 }, { "epoch": 0.24, "learning_rate": 4.744019836639441e-06, "loss": 0.7327, "step": 576 }, { "epoch": 0.25, "learning_rate": 4.7411026837806305e-06, "loss": 0.733, "step": 580 }, { "epoch": 0.25, "learning_rate": 4.7381855309218205e-06, "loss": 0.8081, "step": 584 }, { "epoch": 0.25, "learning_rate": 4.735268378063011e-06, "loss": 0.9809, "step": 588 }, { "epoch": 0.25, "learning_rate": 4.732351225204201e-06, "loss": 0.7101, "step": 592 }, { "epoch": 0.25, "learning_rate": 4.729434072345391e-06, "loss": 0.6195, "step": 596 }, { "epoch": 0.26, "learning_rate": 4.726516919486582e-06, "loss": 0.6483, "step": 600 }, { "epoch": 0.26, "learning_rate": 4.723599766627772e-06, "loss": 0.819, "step": 604 }, { "epoch": 0.26, "learning_rate": 4.720682613768962e-06, "loss": 0.7032, "step": 608 }, { "epoch": 0.26, "learning_rate": 4.717765460910152e-06, "loss": 0.7933, "step": 612 }, { "epoch": 0.26, "learning_rate": 4.7148483080513425e-06, "loss": 0.9605, "step": 616 }, { "epoch": 0.26, "learning_rate": 4.7119311551925325e-06, "loss": 0.7783, "step": 620 }, { "epoch": 0.27, "learning_rate": 4.709014002333722e-06, "loss": 0.7616, "step": 624 }, { "epoch": 0.27, "learning_rate": 4.706096849474912e-06, "loss": 0.6611, "step": 628 }, { "epoch": 0.27, "learning_rate": 4.703179696616103e-06, "loss": 0.656, "step": 632 }, { "epoch": 0.27, "learning_rate": 4.700262543757294e-06, "loss": 0.731, "step": 636 }, { "epoch": 0.27, "learning_rate": 4.697345390898483e-06, "loss": 0.6204, "step": 640 }, { "epoch": 0.27, "learning_rate": 4.694428238039674e-06, "loss": 0.695, "step": 644 }, { "epoch": 0.28, "learning_rate": 4.691511085180864e-06, "loss": 0.9691, "step": 648 }, { "epoch": 0.28, "learning_rate": 4.6885939323220545e-06, "loss": 0.5099, "step": 652 }, { "epoch": 0.28, "learning_rate": 4.6856767794632445e-06, "loss": 0.6427, "step": 656 }, { "epoch": 0.28, "learning_rate": 4.682759626604434e-06, "loss": 0.9209, "step": 660 }, { "epoch": 0.28, "learning_rate": 4.679842473745624e-06, "loss": 0.891, "step": 664 }, { "epoch": 0.28, "learning_rate": 4.676925320886815e-06, "loss": 0.7068, "step": 668 }, { "epoch": 0.29, "learning_rate": 4.674008168028005e-06, "loss": 0.4388, "step": 672 }, { "epoch": 0.29, "learning_rate": 4.671091015169195e-06, "loss": 0.7769, "step": 676 }, { "epoch": 0.29, "learning_rate": 4.668173862310386e-06, "loss": 1.03, "step": 680 }, { "epoch": 0.29, "learning_rate": 4.665256709451576e-06, "loss": 0.9345, "step": 684 }, { "epoch": 0.29, "learning_rate": 4.662339556592766e-06, "loss": 0.8261, "step": 688 }, { "epoch": 0.29, "learning_rate": 4.659422403733956e-06, "loss": 0.5926, "step": 692 }, { "epoch": 0.3, "learning_rate": 4.656505250875146e-06, "loss": 0.618, "step": 696 }, { "epoch": 0.3, "learning_rate": 4.653588098016336e-06, "loss": 0.8133, "step": 700 }, { "epoch": 0.3, "learning_rate": 4.650670945157526e-06, "loss": 0.7822, "step": 704 }, { "epoch": 0.3, "learning_rate": 4.647753792298717e-06, "loss": 0.6367, "step": 708 }, { "epoch": 0.3, "learning_rate": 4.644836639439907e-06, "loss": 0.7457, "step": 712 }, { "epoch": 0.3, "learning_rate": 4.641919486581098e-06, "loss": 0.5508, "step": 716 }, { "epoch": 0.31, "learning_rate": 4.639002333722287e-06, "loss": 0.8247, "step": 720 }, { "epoch": 0.31, "learning_rate": 4.636085180863478e-06, "loss": 0.8024, "step": 724 }, { "epoch": 0.31, "learning_rate": 4.633168028004668e-06, "loss": 0.5489, "step": 728 }, { "epoch": 0.31, "learning_rate": 4.630250875145858e-06, "loss": 0.5789, "step": 732 }, { "epoch": 0.31, "learning_rate": 4.627333722287048e-06, "loss": 0.7541, "step": 736 }, { "epoch": 0.31, "learning_rate": 4.624416569428238e-06, "loss": 0.6148, "step": 740 }, { "epoch": 0.32, "learning_rate": 4.621499416569429e-06, "loss": 0.98, "step": 744 }, { "epoch": 0.32, "learning_rate": 4.618582263710619e-06, "loss": 0.635, "step": 748 }, { "epoch": 0.32, "learning_rate": 4.615665110851809e-06, "loss": 0.9664, "step": 752 }, { "epoch": 0.32, "learning_rate": 4.612747957992999e-06, "loss": 0.8736, "step": 756 }, { "epoch": 0.32, "learning_rate": 4.60983080513419e-06, "loss": 0.6281, "step": 760 }, { "epoch": 0.32, "learning_rate": 4.60691365227538e-06, "loss": 0.9843, "step": 764 }, { "epoch": 0.33, "learning_rate": 4.6039964994165695e-06, "loss": 0.7795, "step": 768 }, { "epoch": 0.33, "learning_rate": 4.60107934655776e-06, "loss": 0.8437, "step": 772 }, { "epoch": 0.33, "learning_rate": 4.59816219369895e-06, "loss": 0.6626, "step": 776 }, { "epoch": 0.33, "learning_rate": 4.59524504084014e-06, "loss": 0.8164, "step": 780 }, { "epoch": 0.33, "learning_rate": 4.59232788798133e-06, "loss": 0.7359, "step": 784 }, { "epoch": 0.34, "learning_rate": 4.589410735122521e-06, "loss": 0.7709, "step": 788 }, { "epoch": 0.34, "learning_rate": 4.586493582263711e-06, "loss": 0.8953, "step": 792 }, { "epoch": 0.34, "learning_rate": 4.583576429404902e-06, "loss": 0.57, "step": 796 }, { "epoch": 0.34, "learning_rate": 4.5806592765460916e-06, "loss": 0.9151, "step": 800 }, { "epoch": 0.34, "learning_rate": 4.5777421236872815e-06, "loss": 0.7838, "step": 804 }, { "epoch": 0.34, "learning_rate": 4.574824970828472e-06, "loss": 0.8183, "step": 808 }, { "epoch": 0.35, "learning_rate": 4.571907817969662e-06, "loss": 0.9169, "step": 812 }, { "epoch": 0.35, "learning_rate": 4.568990665110852e-06, "loss": 0.6786, "step": 816 }, { "epoch": 0.35, "learning_rate": 4.566073512252042e-06, "loss": 0.7783, "step": 820 }, { "epoch": 0.35, "learning_rate": 4.563156359393233e-06, "loss": 0.811, "step": 824 }, { "epoch": 0.35, "learning_rate": 4.560239206534423e-06, "loss": 0.8965, "step": 828 }, { "epoch": 0.35, "learning_rate": 4.557322053675613e-06, "loss": 0.7526, "step": 832 }, { "epoch": 0.36, "learning_rate": 4.5544049008168036e-06, "loss": 0.6549, "step": 836 }, { "epoch": 0.36, "learning_rate": 4.5514877479579935e-06, "loss": 0.77, "step": 840 }, { "epoch": 0.36, "learning_rate": 4.5485705950991834e-06, "loss": 0.5399, "step": 844 }, { "epoch": 0.36, "learning_rate": 4.545653442240373e-06, "loss": 0.7214, "step": 848 }, { "epoch": 0.36, "learning_rate": 4.542736289381564e-06, "loss": 0.7454, "step": 852 }, { "epoch": 0.36, "learning_rate": 4.539819136522754e-06, "loss": 0.6774, "step": 856 }, { "epoch": 0.37, "learning_rate": 4.536901983663944e-06, "loss": 0.855, "step": 860 }, { "epoch": 0.37, "learning_rate": 4.533984830805134e-06, "loss": 0.8038, "step": 864 }, { "epoch": 0.37, "learning_rate": 4.531067677946325e-06, "loss": 0.6897, "step": 868 }, { "epoch": 0.37, "learning_rate": 4.5281505250875156e-06, "loss": 0.774, "step": 872 }, { "epoch": 0.37, "learning_rate": 4.525233372228705e-06, "loss": 0.9166, "step": 876 }, { "epoch": 0.37, "learning_rate": 4.5223162193698954e-06, "loss": 0.5643, "step": 880 }, { "epoch": 0.38, "learning_rate": 4.519399066511085e-06, "loss": 0.5427, "step": 884 }, { "epoch": 0.38, "learning_rate": 4.516481913652276e-06, "loss": 0.6834, "step": 888 }, { "epoch": 0.38, "learning_rate": 4.513564760793466e-06, "loss": 0.8979, "step": 892 }, { "epoch": 0.38, "learning_rate": 4.510647607934656e-06, "loss": 0.8804, "step": 896 }, { "epoch": 0.38, "learning_rate": 4.507730455075847e-06, "loss": 0.6715, "step": 900 }, { "epoch": 0.38, "learning_rate": 4.504813302217037e-06, "loss": 0.8331, "step": 904 }, { "epoch": 0.39, "learning_rate": 4.501896149358227e-06, "loss": 0.9623, "step": 908 }, { "epoch": 0.39, "learning_rate": 4.498978996499417e-06, "loss": 0.7293, "step": 912 }, { "epoch": 0.39, "learning_rate": 4.496061843640607e-06, "loss": 0.6704, "step": 916 }, { "epoch": 0.39, "learning_rate": 4.493144690781797e-06, "loss": 0.6452, "step": 920 }, { "epoch": 0.39, "learning_rate": 4.490227537922987e-06, "loss": 0.7556, "step": 924 }, { "epoch": 0.39, "learning_rate": 4.487310385064177e-06, "loss": 0.6235, "step": 928 }, { "epoch": 0.4, "learning_rate": 4.484393232205368e-06, "loss": 0.789, "step": 932 }, { "epoch": 0.4, "learning_rate": 4.481476079346558e-06, "loss": 0.7959, "step": 936 }, { "epoch": 0.4, "learning_rate": 4.478558926487748e-06, "loss": 0.8286, "step": 940 }, { "epoch": 0.4, "learning_rate": 4.475641773628939e-06, "loss": 0.8604, "step": 944 }, { "epoch": 0.4, "learning_rate": 4.472724620770129e-06, "loss": 0.6135, "step": 948 }, { "epoch": 0.4, "learning_rate": 4.469807467911319e-06, "loss": 0.4793, "step": 952 }, { "epoch": 0.41, "learning_rate": 4.4668903150525085e-06, "loss": 0.7225, "step": 956 }, { "epoch": 0.41, "learning_rate": 4.463973162193699e-06, "loss": 0.5255, "step": 960 }, { "epoch": 0.41, "learning_rate": 4.461056009334889e-06, "loss": 0.4695, "step": 964 }, { "epoch": 0.41, "learning_rate": 4.45813885647608e-06, "loss": 0.8741, "step": 968 }, { "epoch": 0.41, "learning_rate": 4.45522170361727e-06, "loss": 0.6311, "step": 972 }, { "epoch": 0.41, "learning_rate": 4.45230455075846e-06, "loss": 0.6423, "step": 976 }, { "epoch": 0.42, "learning_rate": 4.449387397899651e-06, "loss": 0.7879, "step": 980 }, { "epoch": 0.42, "learning_rate": 4.446470245040841e-06, "loss": 0.9635, "step": 984 }, { "epoch": 0.42, "learning_rate": 4.4435530921820306e-06, "loss": 0.6497, "step": 988 }, { "epoch": 0.42, "learning_rate": 4.4406359393232205e-06, "loss": 0.673, "step": 992 }, { "epoch": 0.42, "learning_rate": 4.437718786464411e-06, "loss": 0.7035, "step": 996 }, { "epoch": 0.43, "learning_rate": 4.434801633605601e-06, "loss": 0.7664, "step": 1000 }, { "epoch": 0.43, "learning_rate": 4.431884480746791e-06, "loss": 0.6817, "step": 1004 }, { "epoch": 0.43, "learning_rate": 4.428967327887982e-06, "loss": 0.7443, "step": 1008 }, { "epoch": 0.43, "learning_rate": 4.426050175029172e-06, "loss": 0.7205, "step": 1012 }, { "epoch": 0.43, "learning_rate": 4.423133022170362e-06, "loss": 0.6672, "step": 1016 }, { "epoch": 0.43, "learning_rate": 4.420215869311552e-06, "loss": 0.7427, "step": 1020 }, { "epoch": 0.44, "learning_rate": 4.4172987164527425e-06, "loss": 0.659, "step": 1024 }, { "epoch": 0.44, "learning_rate": 4.4143815635939325e-06, "loss": 0.832, "step": 1028 }, { "epoch": 0.44, "learning_rate": 4.411464410735123e-06, "loss": 0.7823, "step": 1032 }, { "epoch": 0.44, "learning_rate": 4.408547257876313e-06, "loss": 0.712, "step": 1036 }, { "epoch": 0.44, "learning_rate": 4.405630105017503e-06, "loss": 0.8165, "step": 1040 }, { "epoch": 0.44, "learning_rate": 4.402712952158694e-06, "loss": 0.6221, "step": 1044 }, { "epoch": 0.45, "learning_rate": 4.399795799299884e-06, "loss": 0.684, "step": 1048 }, { "epoch": 0.45, "learning_rate": 4.396878646441074e-06, "loss": 0.6845, "step": 1052 }, { "epoch": 0.45, "learning_rate": 4.393961493582264e-06, "loss": 0.8462, "step": 1056 }, { "epoch": 0.45, "learning_rate": 4.3910443407234545e-06, "loss": 0.6242, "step": 1060 }, { "epoch": 0.45, "learning_rate": 4.3881271878646445e-06, "loss": 0.5025, "step": 1064 }, { "epoch": 0.45, "learning_rate": 4.385210035005834e-06, "loss": 0.6651, "step": 1068 }, { "epoch": 0.46, "learning_rate": 4.382292882147025e-06, "loss": 0.7653, "step": 1072 }, { "epoch": 0.46, "learning_rate": 4.379375729288215e-06, "loss": 0.9375, "step": 1076 }, { "epoch": 0.46, "learning_rate": 4.376458576429405e-06, "loss": 0.6936, "step": 1080 }, { "epoch": 0.46, "learning_rate": 4.373541423570595e-06, "loss": 0.6709, "step": 1084 }, { "epoch": 0.46, "learning_rate": 4.370624270711786e-06, "loss": 0.7321, "step": 1088 }, { "epoch": 0.46, "learning_rate": 4.367707117852976e-06, "loss": 1.0432, "step": 1092 }, { "epoch": 0.47, "learning_rate": 4.364789964994166e-06, "loss": 0.6687, "step": 1096 }, { "epoch": 0.47, "learning_rate": 4.3618728121353565e-06, "loss": 0.9193, "step": 1100 }, { "epoch": 0.47, "learning_rate": 4.358955659276546e-06, "loss": 0.5975, "step": 1104 }, { "epoch": 0.47, "learning_rate": 4.356038506417737e-06, "loss": 0.7527, "step": 1108 }, { "epoch": 0.47, "learning_rate": 4.353121353558927e-06, "loss": 0.9754, "step": 1112 }, { "epoch": 0.47, "learning_rate": 4.350204200700117e-06, "loss": 0.8299, "step": 1116 }, { "epoch": 0.48, "learning_rate": 4.347287047841307e-06, "loss": 0.8524, "step": 1120 }, { "epoch": 0.48, "learning_rate": 4.344369894982498e-06, "loss": 0.8139, "step": 1124 }, { "epoch": 0.48, "learning_rate": 4.341452742123688e-06, "loss": 0.6144, "step": 1128 }, { "epoch": 0.48, "learning_rate": 4.338535589264878e-06, "loss": 0.8328, "step": 1132 }, { "epoch": 0.48, "learning_rate": 4.3356184364060684e-06, "loss": 0.5855, "step": 1136 }, { "epoch": 0.48, "learning_rate": 4.332701283547258e-06, "loss": 0.8227, "step": 1140 }, { "epoch": 0.49, "learning_rate": 4.329784130688448e-06, "loss": 0.6442, "step": 1144 }, { "epoch": 0.49, "learning_rate": 4.326866977829638e-06, "loss": 0.5148, "step": 1148 }, { "epoch": 0.49, "learning_rate": 4.323949824970829e-06, "loss": 0.8127, "step": 1152 }, { "epoch": 0.49, "learning_rate": 4.321032672112019e-06, "loss": 0.8909, "step": 1156 }, { "epoch": 0.49, "learning_rate": 4.318115519253209e-06, "loss": 0.6379, "step": 1160 }, { "epoch": 0.49, "learning_rate": 4.315198366394399e-06, "loss": 0.5912, "step": 1164 }, { "epoch": 0.5, "learning_rate": 4.31228121353559e-06, "loss": 0.6885, "step": 1168 }, { "epoch": 0.5, "learning_rate": 4.3093640606767804e-06, "loss": 0.8584, "step": 1172 }, { "epoch": 0.5, "learning_rate": 4.3064469078179695e-06, "loss": 0.6441, "step": 1176 }, { "epoch": 0.5, "learning_rate": 4.30352975495916e-06, "loss": 0.6933, "step": 1180 }, { "epoch": 0.5, "learning_rate": 4.30061260210035e-06, "loss": 0.6647, "step": 1184 }, { "epoch": 0.51, "learning_rate": 4.297695449241541e-06, "loss": 0.7737, "step": 1188 }, { "epoch": 0.51, "learning_rate": 4.294778296382731e-06, "loss": 0.8209, "step": 1192 }, { "epoch": 0.51, "learning_rate": 4.291861143523921e-06, "loss": 0.9695, "step": 1196 }, { "epoch": 0.51, "learning_rate": 4.288943990665112e-06, "loss": 0.855, "step": 1200 }, { "epoch": 0.51, "learning_rate": 4.286026837806302e-06, "loss": 0.7051, "step": 1204 }, { "epoch": 0.51, "learning_rate": 4.283109684947492e-06, "loss": 0.4676, "step": 1208 }, { "epoch": 0.52, "learning_rate": 4.2801925320886815e-06, "loss": 0.6563, "step": 1212 }, { "epoch": 0.52, "learning_rate": 4.277275379229872e-06, "loss": 0.6791, "step": 1216 }, { "epoch": 0.52, "learning_rate": 4.274358226371062e-06, "loss": 0.6531, "step": 1220 }, { "epoch": 0.52, "learning_rate": 4.271441073512252e-06, "loss": 0.7638, "step": 1224 }, { "epoch": 0.52, "learning_rate": 4.268523920653442e-06, "loss": 0.8035, "step": 1228 }, { "epoch": 0.52, "learning_rate": 4.265606767794633e-06, "loss": 0.6947, "step": 1232 }, { "epoch": 0.53, "learning_rate": 4.262689614935823e-06, "loss": 0.4111, "step": 1236 }, { "epoch": 0.53, "learning_rate": 4.259772462077013e-06, "loss": 0.787, "step": 1240 }, { "epoch": 0.53, "learning_rate": 4.2568553092182036e-06, "loss": 0.7891, "step": 1244 }, { "epoch": 0.53, "learning_rate": 4.2539381563593935e-06, "loss": 0.6001, "step": 1248 }, { "epoch": 0.53, "learning_rate": 4.251021003500584e-06, "loss": 0.5617, "step": 1252 }, { "epoch": 0.53, "learning_rate": 4.248103850641773e-06, "loss": 0.9523, "step": 1256 }, { "epoch": 0.54, "learning_rate": 4.245186697782964e-06, "loss": 0.8455, "step": 1260 }, { "epoch": 0.54, "learning_rate": 4.242269544924154e-06, "loss": 0.7674, "step": 1264 }, { "epoch": 0.54, "learning_rate": 4.239352392065345e-06, "loss": 0.5396, "step": 1268 }, { "epoch": 0.54, "learning_rate": 4.236435239206535e-06, "loss": 0.5736, "step": 1272 }, { "epoch": 0.54, "learning_rate": 4.233518086347725e-06, "loss": 0.5877, "step": 1276 }, { "epoch": 0.54, "learning_rate": 4.2306009334889156e-06, "loss": 0.6466, "step": 1280 }, { "epoch": 0.55, "learning_rate": 4.2276837806301055e-06, "loss": 0.4551, "step": 1284 }, { "epoch": 0.55, "learning_rate": 4.2247666277712954e-06, "loss": 0.5959, "step": 1288 }, { "epoch": 0.55, "learning_rate": 4.221849474912485e-06, "loss": 0.6903, "step": 1292 }, { "epoch": 0.55, "learning_rate": 4.218932322053676e-06, "loss": 0.7372, "step": 1296 }, { "epoch": 0.55, "learning_rate": 4.216015169194866e-06, "loss": 0.7741, "step": 1300 }, { "epoch": 0.55, "learning_rate": 4.213098016336056e-06, "loss": 0.6861, "step": 1304 }, { "epoch": 0.56, "learning_rate": 4.210180863477247e-06, "loss": 0.7325, "step": 1308 }, { "epoch": 0.56, "learning_rate": 4.207263710618437e-06, "loss": 0.6446, "step": 1312 }, { "epoch": 0.56, "learning_rate": 4.204346557759627e-06, "loss": 0.61, "step": 1316 }, { "epoch": 0.56, "learning_rate": 4.201429404900817e-06, "loss": 0.6286, "step": 1320 }, { "epoch": 0.56, "learning_rate": 4.1985122520420074e-06, "loss": 0.7191, "step": 1324 }, { "epoch": 0.56, "learning_rate": 4.195595099183197e-06, "loss": 0.8621, "step": 1328 }, { "epoch": 0.57, "learning_rate": 4.192677946324388e-06, "loss": 0.7634, "step": 1332 }, { "epoch": 0.57, "learning_rate": 4.189760793465578e-06, "loss": 0.7067, "step": 1336 }, { "epoch": 0.57, "learning_rate": 4.186843640606768e-06, "loss": 0.6049, "step": 1340 }, { "epoch": 0.57, "learning_rate": 4.183926487747959e-06, "loss": 0.3731, "step": 1344 }, { "epoch": 0.57, "learning_rate": 4.181009334889149e-06, "loss": 0.5955, "step": 1348 }, { "epoch": 0.57, "learning_rate": 4.178092182030339e-06, "loss": 0.5576, "step": 1352 }, { "epoch": 0.58, "learning_rate": 4.175175029171529e-06, "loss": 0.6731, "step": 1356 }, { "epoch": 0.58, "learning_rate": 4.172257876312719e-06, "loss": 0.8847, "step": 1360 }, { "epoch": 0.58, "learning_rate": 4.169340723453909e-06, "loss": 0.6158, "step": 1364 }, { "epoch": 0.58, "learning_rate": 4.166423570595099e-06, "loss": 0.7721, "step": 1368 }, { "epoch": 0.58, "learning_rate": 4.16350641773629e-06, "loss": 0.6517, "step": 1372 }, { "epoch": 0.59, "learning_rate": 4.16058926487748e-06, "loss": 0.6014, "step": 1376 }, { "epoch": 0.59, "learning_rate": 4.15767211201867e-06, "loss": 0.8049, "step": 1380 }, { "epoch": 0.59, "learning_rate": 4.15475495915986e-06, "loss": 0.7245, "step": 1384 }, { "epoch": 0.59, "learning_rate": 4.151837806301051e-06, "loss": 0.8411, "step": 1388 }, { "epoch": 0.59, "learning_rate": 4.148920653442241e-06, "loss": 0.6463, "step": 1392 }, { "epoch": 0.59, "learning_rate": 4.1460035005834306e-06, "loss": 0.5053, "step": 1396 }, { "epoch": 0.6, "learning_rate": 4.143086347724621e-06, "loss": 0.4919, "step": 1400 }, { "epoch": 0.6, "learning_rate": 4.140169194865811e-06, "loss": 0.7352, "step": 1404 }, { "epoch": 0.6, "learning_rate": 4.137252042007002e-06, "loss": 0.5577, "step": 1408 }, { "epoch": 0.6, "learning_rate": 4.134334889148192e-06, "loss": 0.7224, "step": 1412 }, { "epoch": 0.6, "learning_rate": 4.131417736289382e-06, "loss": 0.9195, "step": 1416 }, { "epoch": 0.6, "learning_rate": 4.128500583430572e-06, "loss": 0.7652, "step": 1420 }, { "epoch": 0.61, "learning_rate": 4.125583430571763e-06, "loss": 0.756, "step": 1424 }, { "epoch": 0.61, "learning_rate": 4.122666277712953e-06, "loss": 0.7378, "step": 1428 }, { "epoch": 0.61, "learning_rate": 4.1197491248541425e-06, "loss": 0.7264, "step": 1432 }, { "epoch": 0.61, "learning_rate": 4.116831971995333e-06, "loss": 0.7489, "step": 1436 }, { "epoch": 0.61, "learning_rate": 4.113914819136523e-06, "loss": 0.661, "step": 1440 }, { "epoch": 0.61, "learning_rate": 4.110997666277713e-06, "loss": 0.6761, "step": 1444 }, { "epoch": 0.62, "learning_rate": 4.108080513418903e-06, "loss": 0.62, "step": 1448 }, { "epoch": 0.62, "learning_rate": 4.105163360560094e-06, "loss": 0.5477, "step": 1452 }, { "epoch": 0.62, "learning_rate": 4.102246207701284e-06, "loss": 0.7107, "step": 1456 }, { "epoch": 0.62, "learning_rate": 4.099329054842474e-06, "loss": 0.9508, "step": 1460 }, { "epoch": 0.62, "learning_rate": 4.096411901983664e-06, "loss": 0.7668, "step": 1464 }, { "epoch": 0.62, "learning_rate": 4.0934947491248545e-06, "loss": 0.7971, "step": 1468 }, { "epoch": 0.63, "learning_rate": 4.0905775962660445e-06, "loss": 0.7532, "step": 1472 }, { "epoch": 0.63, "learning_rate": 4.087660443407234e-06, "loss": 0.6721, "step": 1476 }, { "epoch": 0.63, "learning_rate": 4.084743290548425e-06, "loss": 0.702, "step": 1480 }, { "epoch": 0.63, "learning_rate": 4.081826137689615e-06, "loss": 0.5931, "step": 1484 }, { "epoch": 0.63, "learning_rate": 4.078908984830806e-06, "loss": 0.8941, "step": 1488 }, { "epoch": 0.63, "learning_rate": 4.075991831971996e-06, "loss": 0.4728, "step": 1492 }, { "epoch": 0.64, "learning_rate": 4.073074679113186e-06, "loss": 0.5366, "step": 1496 }, { "epoch": 0.64, "learning_rate": 4.070157526254377e-06, "loss": 0.6567, "step": 1500 }, { "epoch": 0.64, "learning_rate": 4.0672403733955665e-06, "loss": 0.6594, "step": 1504 }, { "epoch": 0.64, "learning_rate": 4.0643232205367565e-06, "loss": 0.5096, "step": 1508 }, { "epoch": 0.64, "learning_rate": 4.061406067677946e-06, "loss": 0.6852, "step": 1512 }, { "epoch": 0.64, "learning_rate": 4.058488914819137e-06, "loss": 0.7667, "step": 1516 }, { "epoch": 0.65, "learning_rate": 4.055571761960327e-06, "loss": 0.662, "step": 1520 }, { "epoch": 0.65, "learning_rate": 4.052654609101517e-06, "loss": 0.6275, "step": 1524 }, { "epoch": 0.65, "learning_rate": 4.049737456242707e-06, "loss": 0.715, "step": 1528 }, { "epoch": 0.65, "learning_rate": 4.046820303383898e-06, "loss": 0.4791, "step": 1532 }, { "epoch": 0.65, "learning_rate": 4.043903150525088e-06, "loss": 0.6893, "step": 1536 }, { "epoch": 0.65, "learning_rate": 4.040985997666278e-06, "loss": 0.4941, "step": 1540 }, { "epoch": 0.66, "learning_rate": 4.0380688448074685e-06, "loss": 0.5576, "step": 1544 }, { "epoch": 0.66, "learning_rate": 4.035151691948658e-06, "loss": 0.7089, "step": 1548 }, { "epoch": 0.66, "learning_rate": 4.032234539089848e-06, "loss": 0.4944, "step": 1552 }, { "epoch": 0.66, "learning_rate": 4.029317386231038e-06, "loss": 0.4799, "step": 1556 }, { "epoch": 0.66, "learning_rate": 4.026400233372229e-06, "loss": 0.6483, "step": 1560 }, { "epoch": 0.66, "learning_rate": 4.023483080513419e-06, "loss": 0.7882, "step": 1564 }, { "epoch": 0.67, "learning_rate": 4.02056592765461e-06, "loss": 0.6979, "step": 1568 }, { "epoch": 0.67, "learning_rate": 4.0176487747958e-06, "loss": 0.6079, "step": 1572 }, { "epoch": 0.67, "learning_rate": 4.01473162193699e-06, "loss": 0.7046, "step": 1576 }, { "epoch": 0.67, "learning_rate": 4.0118144690781804e-06, "loss": 0.5994, "step": 1580 }, { "epoch": 0.67, "learning_rate": 4.00889731621937e-06, "loss": 0.628, "step": 1584 }, { "epoch": 0.68, "learning_rate": 4.00598016336056e-06, "loss": 0.4581, "step": 1588 }, { "epoch": 0.68, "learning_rate": 4.00306301050175e-06, "loss": 0.6283, "step": 1592 }, { "epoch": 0.68, "learning_rate": 4.000145857642941e-06, "loss": 0.6736, "step": 1596 }, { "epoch": 0.68, "learning_rate": 3.997228704784131e-06, "loss": 0.6118, "step": 1600 }, { "epoch": 0.68, "learning_rate": 3.994311551925321e-06, "loss": 0.8692, "step": 1604 }, { "epoch": 0.68, "learning_rate": 3.991394399066512e-06, "loss": 0.7431, "step": 1608 }, { "epoch": 0.69, "learning_rate": 3.988477246207702e-06, "loss": 0.6712, "step": 1612 }, { "epoch": 0.69, "learning_rate": 3.985560093348892e-06, "loss": 0.5141, "step": 1616 }, { "epoch": 0.69, "learning_rate": 3.9826429404900815e-06, "loss": 0.4037, "step": 1620 }, { "epoch": 0.69, "learning_rate": 3.979725787631272e-06, "loss": 0.5442, "step": 1624 }, { "epoch": 0.69, "learning_rate": 3.976808634772462e-06, "loss": 0.7256, "step": 1628 }, { "epoch": 0.69, "learning_rate": 3.973891481913652e-06, "loss": 0.7292, "step": 1632 }, { "epoch": 0.7, "learning_rate": 3.970974329054843e-06, "loss": 0.7739, "step": 1636 }, { "epoch": 0.7, "learning_rate": 3.968057176196033e-06, "loss": 0.6673, "step": 1640 }, { "epoch": 0.7, "learning_rate": 3.965140023337224e-06, "loss": 0.8099, "step": 1644 }, { "epoch": 0.7, "learning_rate": 3.962222870478414e-06, "loss": 0.5271, "step": 1648 }, { "epoch": 0.7, "learning_rate": 3.9593057176196036e-06, "loss": 0.7032, "step": 1652 }, { "epoch": 0.7, "learning_rate": 3.9563885647607935e-06, "loss": 0.6766, "step": 1656 }, { "epoch": 0.71, "learning_rate": 3.953471411901984e-06, "loss": 0.6286, "step": 1660 }, { "epoch": 0.71, "learning_rate": 3.950554259043174e-06, "loss": 0.6236, "step": 1664 }, { "epoch": 0.71, "learning_rate": 3.947637106184364e-06, "loss": 0.7438, "step": 1668 }, { "epoch": 0.71, "learning_rate": 3.944719953325555e-06, "loss": 0.6499, "step": 1672 }, { "epoch": 0.71, "learning_rate": 3.941802800466745e-06, "loss": 0.7669, "step": 1676 }, { "epoch": 0.71, "learning_rate": 3.938885647607935e-06, "loss": 0.4944, "step": 1680 }, { "epoch": 0.72, "learning_rate": 3.935968494749125e-06, "loss": 0.4765, "step": 1684 }, { "epoch": 0.72, "learning_rate": 3.9330513418903156e-06, "loss": 0.7785, "step": 1688 }, { "epoch": 0.72, "learning_rate": 3.9301341890315055e-06, "loss": 0.6521, "step": 1692 }, { "epoch": 0.72, "learning_rate": 3.9272170361726954e-06, "loss": 0.6744, "step": 1696 }, { "epoch": 0.72, "learning_rate": 3.924299883313886e-06, "loss": 0.6819, "step": 1700 }, { "epoch": 0.72, "learning_rate": 3.921382730455076e-06, "loss": 0.6566, "step": 1704 }, { "epoch": 0.73, "learning_rate": 3.918465577596267e-06, "loss": 0.7201, "step": 1708 }, { "epoch": 0.73, "learning_rate": 3.915548424737456e-06, "loss": 0.373, "step": 1712 }, { "epoch": 0.73, "learning_rate": 3.912631271878647e-06, "loss": 0.8481, "step": 1716 }, { "epoch": 0.73, "learning_rate": 3.909714119019837e-06, "loss": 0.6479, "step": 1720 }, { "epoch": 0.73, "learning_rate": 3.9067969661610276e-06, "loss": 0.5682, "step": 1724 }, { "epoch": 0.73, "learning_rate": 3.9038798133022175e-06, "loss": 0.651, "step": 1728 }, { "epoch": 0.74, "learning_rate": 3.9009626604434074e-06, "loss": 0.637, "step": 1732 }, { "epoch": 0.74, "learning_rate": 3.898045507584598e-06, "loss": 0.5119, "step": 1736 }, { "epoch": 0.74, "learning_rate": 3.895128354725788e-06, "loss": 0.5434, "step": 1740 }, { "epoch": 0.74, "learning_rate": 3.892211201866978e-06, "loss": 0.7295, "step": 1744 }, { "epoch": 0.74, "learning_rate": 3.889294049008168e-06, "loss": 0.7214, "step": 1748 }, { "epoch": 0.74, "learning_rate": 3.886376896149359e-06, "loss": 0.5946, "step": 1752 }, { "epoch": 0.75, "learning_rate": 3.883459743290549e-06, "loss": 0.6668, "step": 1756 }, { "epoch": 0.75, "learning_rate": 3.880542590431739e-06, "loss": 0.6054, "step": 1760 }, { "epoch": 0.75, "learning_rate": 3.877625437572929e-06, "loss": 0.6904, "step": 1764 }, { "epoch": 0.75, "learning_rate": 3.874708284714119e-06, "loss": 0.7803, "step": 1768 }, { "epoch": 0.75, "learning_rate": 3.871791131855309e-06, "loss": 0.6766, "step": 1772 }, { "epoch": 0.76, "learning_rate": 3.868873978996499e-06, "loss": 0.6957, "step": 1776 }, { "epoch": 0.76, "learning_rate": 3.86595682613769e-06, "loss": 0.8129, "step": 1780 }, { "epoch": 0.76, "learning_rate": 3.86303967327888e-06, "loss": 0.7652, "step": 1784 }, { "epoch": 0.76, "learning_rate": 3.860122520420071e-06, "loss": 0.4505, "step": 1788 }, { "epoch": 0.76, "learning_rate": 3.85720536756126e-06, "loss": 0.6318, "step": 1792 }, { "epoch": 0.76, "learning_rate": 3.854288214702451e-06, "loss": 0.7018, "step": 1796 }, { "epoch": 0.77, "learning_rate": 3.8513710618436415e-06, "loss": 0.8348, "step": 1800 }, { "epoch": 0.77, "learning_rate": 3.848453908984831e-06, "loss": 0.5523, "step": 1804 }, { "epoch": 0.77, "learning_rate": 3.845536756126021e-06, "loss": 0.5858, "step": 1808 }, { "epoch": 0.77, "learning_rate": 3.842619603267211e-06, "loss": 0.3905, "step": 1812 }, { "epoch": 0.77, "learning_rate": 3.839702450408402e-06, "loss": 0.7015, "step": 1816 }, { "epoch": 0.77, "learning_rate": 3.836785297549592e-06, "loss": 0.4228, "step": 1820 }, { "epoch": 0.78, "learning_rate": 3.833868144690782e-06, "loss": 0.7709, "step": 1824 }, { "epoch": 0.78, "learning_rate": 3.830950991831972e-06, "loss": 0.5833, "step": 1828 }, { "epoch": 0.78, "learning_rate": 3.828033838973163e-06, "loss": 0.6342, "step": 1832 }, { "epoch": 0.78, "learning_rate": 3.825116686114353e-06, "loss": 0.843, "step": 1836 }, { "epoch": 0.78, "learning_rate": 3.8221995332555425e-06, "loss": 0.5876, "step": 1840 }, { "epoch": 0.78, "learning_rate": 3.819282380396733e-06, "loss": 0.5432, "step": 1844 }, { "epoch": 0.79, "learning_rate": 3.816365227537923e-06, "loss": 0.7204, "step": 1848 }, { "epoch": 0.79, "learning_rate": 3.8134480746791136e-06, "loss": 0.4466, "step": 1852 }, { "epoch": 0.79, "learning_rate": 3.8105309218203036e-06, "loss": 0.623, "step": 1856 }, { "epoch": 0.79, "learning_rate": 3.807613768961494e-06, "loss": 0.3623, "step": 1860 }, { "epoch": 0.79, "learning_rate": 3.804696616102684e-06, "loss": 0.6491, "step": 1864 }, { "epoch": 0.79, "learning_rate": 3.8017794632438742e-06, "loss": 0.6469, "step": 1868 }, { "epoch": 0.8, "learning_rate": 3.7988623103850646e-06, "loss": 0.5412, "step": 1872 }, { "epoch": 0.8, "learning_rate": 3.7959451575262545e-06, "loss": 0.995, "step": 1876 }, { "epoch": 0.8, "learning_rate": 3.793028004667445e-06, "loss": 0.6152, "step": 1880 }, { "epoch": 0.8, "learning_rate": 3.790110851808635e-06, "loss": 0.7216, "step": 1884 }, { "epoch": 0.8, "learning_rate": 3.787193698949825e-06, "loss": 0.5137, "step": 1888 }, { "epoch": 0.8, "learning_rate": 3.784276546091015e-06, "loss": 0.6703, "step": 1892 }, { "epoch": 0.81, "learning_rate": 3.7813593932322055e-06, "loss": 0.5394, "step": 1896 }, { "epoch": 0.81, "learning_rate": 3.7784422403733963e-06, "loss": 0.6228, "step": 1900 }, { "epoch": 0.81, "learning_rate": 3.7755250875145862e-06, "loss": 0.6231, "step": 1904 }, { "epoch": 0.81, "learning_rate": 3.7726079346557766e-06, "loss": 0.6658, "step": 1908 }, { "epoch": 0.81, "learning_rate": 3.7696907817969665e-06, "loss": 0.5279, "step": 1912 }, { "epoch": 0.81, "learning_rate": 3.766773628938157e-06, "loss": 0.4709, "step": 1916 }, { "epoch": 0.82, "learning_rate": 3.763856476079347e-06, "loss": 0.6442, "step": 1920 }, { "epoch": 0.82, "learning_rate": 3.760939323220537e-06, "loss": 0.6787, "step": 1924 }, { "epoch": 0.82, "learning_rate": 3.758022170361727e-06, "loss": 0.6453, "step": 1928 }, { "epoch": 0.82, "learning_rate": 3.7551050175029175e-06, "loss": 0.7487, "step": 1932 }, { "epoch": 0.82, "learning_rate": 3.752187864644108e-06, "loss": 0.6078, "step": 1936 }, { "epoch": 0.82, "learning_rate": 3.749270711785298e-06, "loss": 0.5252, "step": 1940 }, { "epoch": 0.83, "learning_rate": 3.746353558926488e-06, "loss": 0.4936, "step": 1944 }, { "epoch": 0.83, "learning_rate": 3.743436406067678e-06, "loss": 0.4545, "step": 1948 }, { "epoch": 0.83, "learning_rate": 3.7405192532088685e-06, "loss": 0.7937, "step": 1952 }, { "epoch": 0.83, "learning_rate": 3.7376021003500584e-06, "loss": 0.8007, "step": 1956 }, { "epoch": 0.83, "learning_rate": 3.7346849474912488e-06, "loss": 0.4401, "step": 1960 }, { "epoch": 0.84, "learning_rate": 3.731767794632439e-06, "loss": 0.8051, "step": 1964 }, { "epoch": 0.84, "learning_rate": 3.728850641773629e-06, "loss": 0.7178, "step": 1968 }, { "epoch": 0.84, "learning_rate": 3.72593348891482e-06, "loss": 0.5673, "step": 1972 }, { "epoch": 0.84, "learning_rate": 3.7230163360560094e-06, "loss": 0.8238, "step": 1976 }, { "epoch": 0.84, "learning_rate": 3.7200991831972e-06, "loss": 0.646, "step": 1980 }, { "epoch": 0.84, "learning_rate": 3.7171820303383897e-06, "loss": 0.5166, "step": 1984 }, { "epoch": 0.85, "learning_rate": 3.7142648774795804e-06, "loss": 0.5725, "step": 1988 }, { "epoch": 0.85, "learning_rate": 3.7113477246207704e-06, "loss": 0.8298, "step": 1992 }, { "epoch": 0.85, "learning_rate": 3.7084305717619607e-06, "loss": 0.5858, "step": 1996 }, { "epoch": 0.85, "learning_rate": 3.705513418903151e-06, "loss": 0.5432, "step": 2000 }, { "epoch": 0.85, "learning_rate": 3.702596266044341e-06, "loss": 0.5454, "step": 2004 }, { "epoch": 0.85, "learning_rate": 3.6996791131855314e-06, "loss": 0.6, "step": 2008 }, { "epoch": 0.86, "learning_rate": 3.6967619603267213e-06, "loss": 0.5889, "step": 2012 }, { "epoch": 0.86, "learning_rate": 3.6938448074679117e-06, "loss": 0.7542, "step": 2016 }, { "epoch": 0.86, "learning_rate": 3.6909276546091016e-06, "loss": 0.7573, "step": 2020 }, { "epoch": 0.86, "learning_rate": 3.688010501750292e-06, "loss": 0.7429, "step": 2024 }, { "epoch": 0.86, "learning_rate": 3.685093348891482e-06, "loss": 0.6015, "step": 2028 }, { "epoch": 0.86, "learning_rate": 3.6821761960326723e-06, "loss": 0.6009, "step": 2032 }, { "epoch": 0.87, "learning_rate": 3.6792590431738627e-06, "loss": 0.4447, "step": 2036 }, { "epoch": 0.87, "learning_rate": 3.6763418903150526e-06, "loss": 0.667, "step": 2040 }, { "epoch": 0.87, "learning_rate": 3.673424737456243e-06, "loss": 0.594, "step": 2044 }, { "epoch": 0.87, "learning_rate": 3.670507584597433e-06, "loss": 0.6368, "step": 2048 }, { "epoch": 0.87, "learning_rate": 3.6675904317386237e-06, "loss": 0.5883, "step": 2052 }, { "epoch": 0.87, "learning_rate": 3.6646732788798132e-06, "loss": 0.7004, "step": 2056 }, { "epoch": 0.88, "learning_rate": 3.661756126021004e-06, "loss": 0.6169, "step": 2060 }, { "epoch": 0.88, "learning_rate": 3.6588389731621944e-06, "loss": 0.6757, "step": 2064 }, { "epoch": 0.88, "learning_rate": 3.6559218203033843e-06, "loss": 0.6134, "step": 2068 }, { "epoch": 0.88, "learning_rate": 3.6530046674445747e-06, "loss": 0.6381, "step": 2072 }, { "epoch": 0.88, "learning_rate": 3.6500875145857646e-06, "loss": 0.715, "step": 2076 }, { "epoch": 0.88, "learning_rate": 3.647170361726955e-06, "loss": 0.6319, "step": 2080 }, { "epoch": 0.89, "learning_rate": 3.644253208868145e-06, "loss": 0.4187, "step": 2084 }, { "epoch": 0.89, "learning_rate": 3.6413360560093353e-06, "loss": 0.5733, "step": 2088 }, { "epoch": 0.89, "learning_rate": 3.638418903150525e-06, "loss": 0.6943, "step": 2092 }, { "epoch": 0.89, "learning_rate": 3.6355017502917156e-06, "loss": 0.4808, "step": 2096 }, { "epoch": 0.89, "learning_rate": 3.632584597432906e-06, "loss": 0.4813, "step": 2100 }, { "epoch": 0.89, "learning_rate": 3.629667444574096e-06, "loss": 0.5777, "step": 2104 }, { "epoch": 0.9, "learning_rate": 3.6267502917152862e-06, "loss": 0.5967, "step": 2108 }, { "epoch": 0.9, "learning_rate": 3.623833138856476e-06, "loss": 0.5644, "step": 2112 }, { "epoch": 0.9, "learning_rate": 3.6209159859976665e-06, "loss": 0.6584, "step": 2116 }, { "epoch": 0.9, "learning_rate": 3.6179988331388565e-06, "loss": 0.5162, "step": 2120 }, { "epoch": 0.9, "learning_rate": 3.615081680280047e-06, "loss": 0.6585, "step": 2124 }, { "epoch": 0.9, "learning_rate": 3.6121645274212368e-06, "loss": 0.7716, "step": 2128 }, { "epoch": 0.91, "learning_rate": 3.6092473745624276e-06, "loss": 0.5249, "step": 2132 }, { "epoch": 0.91, "learning_rate": 3.606330221703618e-06, "loss": 0.722, "step": 2136 }, { "epoch": 0.91, "learning_rate": 3.603413068844808e-06, "loss": 0.6634, "step": 2140 }, { "epoch": 0.91, "learning_rate": 3.6004959159859982e-06, "loss": 0.6547, "step": 2144 }, { "epoch": 0.91, "learning_rate": 3.597578763127188e-06, "loss": 0.7804, "step": 2148 }, { "epoch": 0.91, "learning_rate": 3.5946616102683785e-06, "loss": 0.5373, "step": 2152 }, { "epoch": 0.92, "learning_rate": 3.5917444574095685e-06, "loss": 0.6122, "step": 2156 }, { "epoch": 0.92, "learning_rate": 3.588827304550759e-06, "loss": 0.5015, "step": 2160 }, { "epoch": 0.92, "learning_rate": 3.585910151691949e-06, "loss": 0.6872, "step": 2164 }, { "epoch": 0.92, "learning_rate": 3.582992998833139e-06, "loss": 0.3367, "step": 2168 }, { "epoch": 0.92, "learning_rate": 3.5800758459743295e-06, "loss": 0.4729, "step": 2172 }, { "epoch": 0.93, "learning_rate": 3.5771586931155194e-06, "loss": 0.7766, "step": 2176 }, { "epoch": 0.93, "learning_rate": 3.5742415402567098e-06, "loss": 0.7324, "step": 2180 }, { "epoch": 0.93, "learning_rate": 3.5713243873978997e-06, "loss": 0.5244, "step": 2184 }, { "epoch": 0.93, "learning_rate": 3.56840723453909e-06, "loss": 0.6321, "step": 2188 }, { "epoch": 0.93, "learning_rate": 3.56549008168028e-06, "loss": 0.6109, "step": 2192 }, { "epoch": 0.93, "learning_rate": 3.5625729288214704e-06, "loss": 0.5534, "step": 2196 }, { "epoch": 0.94, "learning_rate": 3.559655775962661e-06, "loss": 0.5453, "step": 2200 }, { "epoch": 0.94, "learning_rate": 3.5567386231038507e-06, "loss": 0.4379, "step": 2204 }, { "epoch": 0.94, "learning_rate": 3.5538214702450415e-06, "loss": 0.5268, "step": 2208 }, { "epoch": 0.94, "learning_rate": 3.5509043173862314e-06, "loss": 0.7081, "step": 2212 }, { "epoch": 0.94, "learning_rate": 3.5479871645274218e-06, "loss": 0.5149, "step": 2216 }, { "epoch": 0.94, "learning_rate": 3.5450700116686117e-06, "loss": 0.4048, "step": 2220 }, { "epoch": 0.95, "learning_rate": 3.542152858809802e-06, "loss": 0.7552, "step": 2224 }, { "epoch": 0.95, "learning_rate": 3.539235705950992e-06, "loss": 0.4624, "step": 2228 }, { "epoch": 0.95, "learning_rate": 3.5363185530921824e-06, "loss": 0.5872, "step": 2232 }, { "epoch": 0.95, "learning_rate": 3.5334014002333727e-06, "loss": 0.5265, "step": 2236 }, { "epoch": 0.95, "learning_rate": 3.5304842473745627e-06, "loss": 0.4802, "step": 2240 }, { "epoch": 0.95, "learning_rate": 3.527567094515753e-06, "loss": 0.6172, "step": 2244 }, { "epoch": 0.96, "learning_rate": 3.524649941656943e-06, "loss": 0.5788, "step": 2248 }, { "epoch": 0.96, "learning_rate": 3.5217327887981333e-06, "loss": 0.5426, "step": 2252 }, { "epoch": 0.96, "learning_rate": 3.5188156359393233e-06, "loss": 0.6427, "step": 2256 }, { "epoch": 0.96, "learning_rate": 3.5158984830805136e-06, "loss": 0.6474, "step": 2260 }, { "epoch": 0.96, "learning_rate": 3.512981330221704e-06, "loss": 0.5518, "step": 2264 }, { "epoch": 0.96, "learning_rate": 3.510064177362894e-06, "loss": 0.6028, "step": 2268 }, { "epoch": 0.97, "learning_rate": 3.5071470245040843e-06, "loss": 0.4178, "step": 2272 }, { "epoch": 0.97, "learning_rate": 3.5042298716452742e-06, "loss": 0.761, "step": 2276 }, { "epoch": 0.97, "learning_rate": 3.501312718786465e-06, "loss": 0.7076, "step": 2280 }, { "epoch": 0.97, "learning_rate": 3.4983955659276545e-06, "loss": 0.5466, "step": 2284 }, { "epoch": 0.97, "learning_rate": 3.4954784130688453e-06, "loss": 0.4539, "step": 2288 }, { "epoch": 0.97, "learning_rate": 3.4925612602100353e-06, "loss": 0.6436, "step": 2292 }, { "epoch": 0.98, "learning_rate": 3.4896441073512256e-06, "loss": 0.471, "step": 2296 }, { "epoch": 0.98, "learning_rate": 3.486726954492416e-06, "loss": 0.6848, "step": 2300 }, { "epoch": 0.98, "learning_rate": 3.483809801633606e-06, "loss": 0.6043, "step": 2304 }, { "epoch": 0.98, "learning_rate": 3.4808926487747963e-06, "loss": 0.6458, "step": 2308 }, { "epoch": 0.98, "learning_rate": 3.4779754959159862e-06, "loss": 0.6336, "step": 2312 }, { "epoch": 0.98, "learning_rate": 3.4750583430571766e-06, "loss": 0.5843, "step": 2316 }, { "epoch": 0.99, "learning_rate": 3.4721411901983665e-06, "loss": 0.4364, "step": 2320 }, { "epoch": 0.99, "learning_rate": 3.469224037339557e-06, "loss": 0.5796, "step": 2324 }, { "epoch": 0.99, "learning_rate": 3.466306884480747e-06, "loss": 0.5754, "step": 2328 }, { "epoch": 0.99, "learning_rate": 3.463389731621937e-06, "loss": 0.6848, "step": 2332 }, { "epoch": 0.99, "learning_rate": 3.4604725787631276e-06, "loss": 0.6489, "step": 2336 }, { "epoch": 0.99, "learning_rate": 3.4575554259043175e-06, "loss": 0.6255, "step": 2340 }, { "epoch": 1.0, "learning_rate": 3.454638273045508e-06, "loss": 0.4827, "step": 2344 }, { "epoch": 1.0, "learning_rate": 3.451721120186698e-06, "loss": 0.5178, "step": 2348 }, { "epoch": 1.0, "learning_rate": 3.448803967327888e-06, "loss": 0.5294, "step": 2352 }, { "epoch": 1.0, "learning_rate": 3.445886814469078e-06, "loss": 0.4968, "step": 2356 }, { "epoch": 1.0, "learning_rate": 3.442969661610269e-06, "loss": 0.7301, "step": 2360 }, { "epoch": 1.01, "learning_rate": 3.4400525087514592e-06, "loss": 0.5503, "step": 2364 }, { "epoch": 1.01, "learning_rate": 3.437135355892649e-06, "loss": 0.4724, "step": 2368 }, { "epoch": 1.01, "learning_rate": 3.4342182030338395e-06, "loss": 0.5471, "step": 2372 }, { "epoch": 1.01, "learning_rate": 3.4313010501750295e-06, "loss": 0.5414, "step": 2376 }, { "epoch": 1.01, "learning_rate": 3.42838389731622e-06, "loss": 0.4228, "step": 2380 }, { "epoch": 1.01, "learning_rate": 3.4254667444574098e-06, "loss": 0.6098, "step": 2384 }, { "epoch": 1.02, "learning_rate": 3.4225495915986e-06, "loss": 0.5935, "step": 2388 }, { "epoch": 1.02, "learning_rate": 3.41963243873979e-06, "loss": 0.5567, "step": 2392 }, { "epoch": 1.02, "learning_rate": 3.4167152858809804e-06, "loss": 0.4576, "step": 2396 }, { "epoch": 1.02, "learning_rate": 3.413798133022171e-06, "loss": 0.5125, "step": 2400 }, { "epoch": 1.02, "learning_rate": 3.4108809801633607e-06, "loss": 0.4188, "step": 2404 }, { "epoch": 1.02, "learning_rate": 3.407963827304551e-06, "loss": 0.6133, "step": 2408 }, { "epoch": 1.03, "learning_rate": 3.405046674445741e-06, "loss": 0.5203, "step": 2412 }, { "epoch": 1.03, "learning_rate": 3.4021295215869314e-06, "loss": 0.5936, "step": 2416 }, { "epoch": 1.03, "learning_rate": 3.3992123687281213e-06, "loss": 0.5783, "step": 2420 }, { "epoch": 1.03, "learning_rate": 3.3962952158693117e-06, "loss": 0.5501, "step": 2424 }, { "epoch": 1.03, "learning_rate": 3.3933780630105017e-06, "loss": 0.6859, "step": 2428 }, { "epoch": 1.03, "learning_rate": 3.390460910151692e-06, "loss": 0.5361, "step": 2432 }, { "epoch": 1.04, "learning_rate": 3.387543757292883e-06, "loss": 0.3481, "step": 2436 }, { "epoch": 1.04, "learning_rate": 3.3846266044340727e-06, "loss": 0.5475, "step": 2440 }, { "epoch": 1.04, "learning_rate": 3.381709451575263e-06, "loss": 0.4606, "step": 2444 }, { "epoch": 1.04, "learning_rate": 3.378792298716453e-06, "loss": 0.3753, "step": 2448 }, { "epoch": 1.04, "learning_rate": 3.3758751458576434e-06, "loss": 0.5286, "step": 2452 }, { "epoch": 1.04, "learning_rate": 3.3729579929988333e-06, "loss": 0.5214, "step": 2456 }, { "epoch": 1.05, "learning_rate": 3.3700408401400237e-06, "loss": 0.4971, "step": 2460 }, { "epoch": 1.05, "learning_rate": 3.367123687281214e-06, "loss": 0.5731, "step": 2464 }, { "epoch": 1.05, "learning_rate": 3.364206534422404e-06, "loss": 0.6563, "step": 2468 }, { "epoch": 1.05, "learning_rate": 3.3612893815635944e-06, "loss": 0.3885, "step": 2472 }, { "epoch": 1.05, "learning_rate": 3.3583722287047843e-06, "loss": 0.7304, "step": 2476 }, { "epoch": 1.05, "learning_rate": 3.3554550758459747e-06, "loss": 0.6601, "step": 2480 }, { "epoch": 1.06, "learning_rate": 3.3525379229871646e-06, "loss": 0.4629, "step": 2484 }, { "epoch": 1.06, "learning_rate": 3.349620770128355e-06, "loss": 0.5769, "step": 2488 }, { "epoch": 1.06, "learning_rate": 3.346703617269545e-06, "loss": 0.5616, "step": 2492 }, { "epoch": 1.06, "learning_rate": 3.3437864644107353e-06, "loss": 0.4976, "step": 2496 }, { "epoch": 1.06, "learning_rate": 3.340869311551926e-06, "loss": 0.6104, "step": 2500 }, { "epoch": 1.06, "learning_rate": 3.3379521586931156e-06, "loss": 0.4875, "step": 2504 }, { "epoch": 1.07, "learning_rate": 3.3350350058343064e-06, "loss": 0.3093, "step": 2508 }, { "epoch": 1.07, "learning_rate": 3.332117852975496e-06, "loss": 0.3887, "step": 2512 }, { "epoch": 1.07, "learning_rate": 3.3292007001166867e-06, "loss": 0.6688, "step": 2516 }, { "epoch": 1.07, "learning_rate": 3.3262835472578766e-06, "loss": 0.479, "step": 2520 }, { "epoch": 1.07, "learning_rate": 3.323366394399067e-06, "loss": 0.616, "step": 2524 }, { "epoch": 1.07, "learning_rate": 3.320449241540257e-06, "loss": 0.8513, "step": 2528 }, { "epoch": 1.08, "learning_rate": 3.3175320886814473e-06, "loss": 0.4224, "step": 2532 }, { "epoch": 1.08, "learning_rate": 3.3146149358226376e-06, "loss": 0.4577, "step": 2536 }, { "epoch": 1.08, "learning_rate": 3.3116977829638276e-06, "loss": 0.4286, "step": 2540 }, { "epoch": 1.08, "learning_rate": 3.308780630105018e-06, "loss": 0.5298, "step": 2544 }, { "epoch": 1.08, "learning_rate": 3.305863477246208e-06, "loss": 0.4531, "step": 2548 }, { "epoch": 1.09, "learning_rate": 3.3029463243873982e-06, "loss": 0.561, "step": 2552 }, { "epoch": 1.09, "learning_rate": 3.300029171528588e-06, "loss": 0.4931, "step": 2556 }, { "epoch": 1.09, "learning_rate": 3.2971120186697785e-06, "loss": 0.6341, "step": 2560 }, { "epoch": 1.09, "learning_rate": 3.294194865810969e-06, "loss": 0.5096, "step": 2564 }, { "epoch": 1.09, "learning_rate": 3.291277712952159e-06, "loss": 0.3863, "step": 2568 }, { "epoch": 1.09, "learning_rate": 3.288360560093349e-06, "loss": 0.6633, "step": 2572 }, { "epoch": 1.1, "learning_rate": 3.285443407234539e-06, "loss": 0.7305, "step": 2576 }, { "epoch": 1.1, "learning_rate": 3.2825262543757295e-06, "loss": 0.627, "step": 2580 }, { "epoch": 1.1, "learning_rate": 3.2796091015169194e-06, "loss": 0.4865, "step": 2584 }, { "epoch": 1.1, "learning_rate": 3.27669194865811e-06, "loss": 0.507, "step": 2588 }, { "epoch": 1.1, "learning_rate": 3.2737747957992997e-06, "loss": 0.5226, "step": 2592 }, { "epoch": 1.1, "learning_rate": 3.2708576429404905e-06, "loss": 0.5523, "step": 2596 }, { "epoch": 1.11, "learning_rate": 3.267940490081681e-06, "loss": 0.5195, "step": 2600 }, { "epoch": 1.11, "learning_rate": 3.265023337222871e-06, "loss": 0.3034, "step": 2604 }, { "epoch": 1.11, "learning_rate": 3.262106184364061e-06, "loss": 0.7056, "step": 2608 }, { "epoch": 1.11, "learning_rate": 3.259189031505251e-06, "loss": 0.6256, "step": 2612 }, { "epoch": 1.11, "learning_rate": 3.2562718786464415e-06, "loss": 0.4434, "step": 2616 }, { "epoch": 1.11, "learning_rate": 3.2533547257876314e-06, "loss": 0.3871, "step": 2620 }, { "epoch": 1.12, "learning_rate": 3.2504375729288218e-06, "loss": 0.6902, "step": 2624 }, { "epoch": 1.12, "learning_rate": 3.2475204200700117e-06, "loss": 0.3734, "step": 2628 }, { "epoch": 1.12, "learning_rate": 3.244603267211202e-06, "loss": 0.4771, "step": 2632 }, { "epoch": 1.12, "learning_rate": 3.2416861143523924e-06, "loss": 0.5457, "step": 2636 }, { "epoch": 1.12, "learning_rate": 3.2387689614935824e-06, "loss": 0.5787, "step": 2640 }, { "epoch": 1.12, "learning_rate": 3.2358518086347727e-06, "loss": 0.7326, "step": 2644 }, { "epoch": 1.13, "learning_rate": 3.2329346557759627e-06, "loss": 0.4606, "step": 2648 }, { "epoch": 1.13, "learning_rate": 3.230017502917153e-06, "loss": 0.5832, "step": 2652 }, { "epoch": 1.13, "learning_rate": 3.227100350058343e-06, "loss": 0.4674, "step": 2656 }, { "epoch": 1.13, "learning_rate": 3.2241831971995333e-06, "loss": 0.3263, "step": 2660 }, { "epoch": 1.13, "learning_rate": 3.221266044340724e-06, "loss": 0.433, "step": 2664 }, { "epoch": 1.13, "learning_rate": 3.218348891481914e-06, "loss": 0.562, "step": 2668 }, { "epoch": 1.14, "learning_rate": 3.2154317386231044e-06, "loss": 0.5465, "step": 2672 }, { "epoch": 1.14, "learning_rate": 3.2125145857642944e-06, "loss": 0.5806, "step": 2676 }, { "epoch": 1.14, "learning_rate": 3.2095974329054847e-06, "loss": 0.5941, "step": 2680 }, { "epoch": 1.14, "learning_rate": 3.2066802800466747e-06, "loss": 0.5672, "step": 2684 }, { "epoch": 1.14, "learning_rate": 3.203763127187865e-06, "loss": 0.4044, "step": 2688 }, { "epoch": 1.14, "learning_rate": 3.200845974329055e-06, "loss": 0.6372, "step": 2692 }, { "epoch": 1.15, "learning_rate": 3.1979288214702453e-06, "loss": 0.5306, "step": 2696 }, { "epoch": 1.15, "learning_rate": 3.1950116686114357e-06, "loss": 0.6751, "step": 2700 }, { "epoch": 1.15, "learning_rate": 3.1920945157526256e-06, "loss": 0.3999, "step": 2704 }, { "epoch": 1.15, "learning_rate": 3.189177362893816e-06, "loss": 0.6431, "step": 2708 }, { "epoch": 1.15, "learning_rate": 3.186260210035006e-06, "loss": 0.7637, "step": 2712 }, { "epoch": 1.15, "learning_rate": 3.1833430571761963e-06, "loss": 0.4698, "step": 2716 }, { "epoch": 1.16, "learning_rate": 3.1804259043173862e-06, "loss": 0.5558, "step": 2720 }, { "epoch": 1.16, "learning_rate": 3.1775087514585766e-06, "loss": 0.5202, "step": 2724 }, { "epoch": 1.16, "learning_rate": 3.1745915985997665e-06, "loss": 0.4455, "step": 2728 }, { "epoch": 1.16, "learning_rate": 3.171674445740957e-06, "loss": 0.6568, "step": 2732 }, { "epoch": 1.16, "learning_rate": 3.1687572928821477e-06, "loss": 0.6879, "step": 2736 }, { "epoch": 1.16, "learning_rate": 3.165840140023337e-06, "loss": 0.474, "step": 2740 }, { "epoch": 1.17, "learning_rate": 3.162922987164528e-06, "loss": 0.5385, "step": 2744 }, { "epoch": 1.17, "learning_rate": 3.160005834305718e-06, "loss": 0.6955, "step": 2748 }, { "epoch": 1.17, "learning_rate": 3.1570886814469083e-06, "loss": 0.501, "step": 2752 }, { "epoch": 1.17, "learning_rate": 3.1541715285880982e-06, "loss": 0.5093, "step": 2756 }, { "epoch": 1.17, "learning_rate": 3.1512543757292886e-06, "loss": 0.3698, "step": 2760 }, { "epoch": 1.18, "learning_rate": 3.148337222870479e-06, "loss": 0.4271, "step": 2764 }, { "epoch": 1.18, "learning_rate": 3.145420070011669e-06, "loss": 0.5814, "step": 2768 }, { "epoch": 1.18, "learning_rate": 3.1425029171528592e-06, "loss": 0.3312, "step": 2772 }, { "epoch": 1.18, "learning_rate": 3.139585764294049e-06, "loss": 0.5727, "step": 2776 }, { "epoch": 1.18, "learning_rate": 3.1366686114352395e-06, "loss": 0.6723, "step": 2780 }, { "epoch": 1.18, "learning_rate": 3.1337514585764295e-06, "loss": 0.5772, "step": 2784 }, { "epoch": 1.19, "learning_rate": 3.13083430571762e-06, "loss": 0.2121, "step": 2788 }, { "epoch": 1.19, "learning_rate": 3.1279171528588098e-06, "loss": 0.5148, "step": 2792 }, { "epoch": 1.19, "learning_rate": 3.125e-06, "loss": 0.402, "step": 2796 }, { "epoch": 1.19, "learning_rate": 3.1220828471411905e-06, "loss": 0.4757, "step": 2800 }, { "epoch": 1.19, "learning_rate": 3.1191656942823805e-06, "loss": 0.5157, "step": 2804 }, { "epoch": 1.19, "learning_rate": 3.1162485414235712e-06, "loss": 0.5417, "step": 2808 }, { "epoch": 1.2, "learning_rate": 3.1133313885647608e-06, "loss": 0.3401, "step": 2812 }, { "epoch": 1.2, "learning_rate": 3.1104142357059515e-06, "loss": 0.6862, "step": 2816 }, { "epoch": 1.2, "learning_rate": 3.107497082847141e-06, "loss": 0.516, "step": 2820 }, { "epoch": 1.2, "learning_rate": 3.104579929988332e-06, "loss": 0.5201, "step": 2824 }, { "epoch": 1.2, "learning_rate": 3.1016627771295218e-06, "loss": 0.4436, "step": 2828 }, { "epoch": 1.2, "learning_rate": 3.098745624270712e-06, "loss": 0.5983, "step": 2832 }, { "epoch": 1.21, "learning_rate": 3.0958284714119025e-06, "loss": 0.5076, "step": 2836 }, { "epoch": 1.21, "learning_rate": 3.0929113185530924e-06, "loss": 0.4357, "step": 2840 }, { "epoch": 1.21, "learning_rate": 3.089994165694283e-06, "loss": 0.5015, "step": 2844 }, { "epoch": 1.21, "learning_rate": 3.0870770128354727e-06, "loss": 0.5186, "step": 2848 }, { "epoch": 1.21, "learning_rate": 3.084159859976663e-06, "loss": 0.5653, "step": 2852 }, { "epoch": 1.21, "learning_rate": 3.081242707117853e-06, "loss": 0.6339, "step": 2856 }, { "epoch": 1.22, "learning_rate": 3.0783255542590434e-06, "loss": 0.5534, "step": 2860 }, { "epoch": 1.22, "learning_rate": 3.0754084014002338e-06, "loss": 0.5144, "step": 2864 }, { "epoch": 1.22, "learning_rate": 3.0724912485414237e-06, "loss": 0.393, "step": 2868 }, { "epoch": 1.22, "learning_rate": 3.069574095682614e-06, "loss": 0.4453, "step": 2872 }, { "epoch": 1.22, "learning_rate": 3.066656942823804e-06, "loss": 0.5278, "step": 2876 }, { "epoch": 1.22, "learning_rate": 3.0637397899649944e-06, "loss": 0.7833, "step": 2880 }, { "epoch": 1.23, "learning_rate": 3.0608226371061843e-06, "loss": 0.4581, "step": 2884 }, { "epoch": 1.23, "learning_rate": 3.0579054842473747e-06, "loss": 0.3942, "step": 2888 }, { "epoch": 1.23, "learning_rate": 3.0549883313885646e-06, "loss": 0.6298, "step": 2892 }, { "epoch": 1.23, "learning_rate": 3.0520711785297554e-06, "loss": 0.6276, "step": 2896 }, { "epoch": 1.23, "learning_rate": 3.0491540256709458e-06, "loss": 0.737, "step": 2900 }, { "epoch": 1.23, "learning_rate": 3.0462368728121357e-06, "loss": 0.5208, "step": 2904 }, { "epoch": 1.24, "learning_rate": 3.043319719953326e-06, "loss": 0.5953, "step": 2908 }, { "epoch": 1.24, "learning_rate": 3.040402567094516e-06, "loss": 0.5373, "step": 2912 }, { "epoch": 1.24, "learning_rate": 3.0374854142357064e-06, "loss": 0.3865, "step": 2916 }, { "epoch": 1.24, "learning_rate": 3.0345682613768963e-06, "loss": 0.5451, "step": 2920 }, { "epoch": 1.24, "learning_rate": 3.0316511085180867e-06, "loss": 0.4434, "step": 2924 }, { "epoch": 1.24, "learning_rate": 3.0287339556592766e-06, "loss": 0.6256, "step": 2928 }, { "epoch": 1.25, "learning_rate": 3.025816802800467e-06, "loss": 0.4372, "step": 2932 }, { "epoch": 1.25, "learning_rate": 3.0228996499416573e-06, "loss": 0.4225, "step": 2936 }, { "epoch": 1.25, "learning_rate": 3.0199824970828473e-06, "loss": 0.6176, "step": 2940 }, { "epoch": 1.25, "learning_rate": 3.0170653442240376e-06, "loss": 0.5814, "step": 2944 }, { "epoch": 1.25, "learning_rate": 3.0141481913652276e-06, "loss": 0.6453, "step": 2948 }, { "epoch": 1.26, "learning_rate": 3.011231038506418e-06, "loss": 0.599, "step": 2952 }, { "epoch": 1.26, "learning_rate": 3.008313885647608e-06, "loss": 0.4661, "step": 2956 }, { "epoch": 1.26, "learning_rate": 3.0053967327887982e-06, "loss": 0.3218, "step": 2960 }, { "epoch": 1.26, "learning_rate": 3.003208868144691e-06, "loss": 0.6456, "step": 2964 }, { "epoch": 1.26, "learning_rate": 3.0002917152858813e-06, "loss": 0.2891, "step": 2968 }, { "epoch": 1.26, "learning_rate": 2.9973745624270716e-06, "loss": 0.4821, "step": 2972 }, { "epoch": 1.27, "learning_rate": 2.9944574095682616e-06, "loss": 0.733, "step": 2976 }, { "epoch": 1.27, "learning_rate": 2.991540256709452e-06, "loss": 0.2193, "step": 2980 }, { "epoch": 1.27, "learning_rate": 2.988623103850642e-06, "loss": 0.4148, "step": 2984 }, { "epoch": 1.27, "learning_rate": 2.9857059509918322e-06, "loss": 0.5474, "step": 2988 }, { "epoch": 1.27, "learning_rate": 2.982788798133022e-06, "loss": 0.4005, "step": 2992 }, { "epoch": 1.27, "learning_rate": 2.9798716452742125e-06, "loss": 0.5799, "step": 2996 }, { "epoch": 1.28, "learning_rate": 2.9769544924154025e-06, "loss": 0.4967, "step": 3000 }, { "epoch": 1.28, "learning_rate": 2.974037339556593e-06, "loss": 0.4672, "step": 3004 }, { "epoch": 1.28, "learning_rate": 2.9711201866977836e-06, "loss": 0.4892, "step": 3008 }, { "epoch": 1.28, "learning_rate": 2.9682030338389735e-06, "loss": 0.554, "step": 3012 }, { "epoch": 1.28, "learning_rate": 2.965285880980164e-06, "loss": 0.3553, "step": 3016 }, { "epoch": 1.28, "learning_rate": 2.962368728121354e-06, "loss": 0.5259, "step": 3020 }, { "epoch": 1.29, "learning_rate": 2.959451575262544e-06, "loss": 0.4461, "step": 3024 }, { "epoch": 1.29, "learning_rate": 2.956534422403734e-06, "loss": 0.4768, "step": 3028 }, { "epoch": 1.29, "learning_rate": 2.9536172695449245e-06, "loss": 0.4537, "step": 3032 }, { "epoch": 1.29, "learning_rate": 2.9507001166861144e-06, "loss": 0.5611, "step": 3036 }, { "epoch": 1.29, "learning_rate": 2.947782963827305e-06, "loss": 0.5684, "step": 3040 }, { "epoch": 1.29, "learning_rate": 2.944865810968495e-06, "loss": 0.2357, "step": 3044 }, { "epoch": 1.3, "learning_rate": 2.941948658109685e-06, "loss": 0.4909, "step": 3048 }, { "epoch": 1.3, "learning_rate": 2.9390315052508755e-06, "loss": 0.5225, "step": 3052 }, { "epoch": 1.3, "learning_rate": 2.9361143523920654e-06, "loss": 0.391, "step": 3056 }, { "epoch": 1.3, "learning_rate": 2.9331971995332558e-06, "loss": 0.4058, "step": 3060 }, { "epoch": 1.3, "learning_rate": 2.9302800466744457e-06, "loss": 0.446, "step": 3064 }, { "epoch": 1.3, "learning_rate": 2.927362893815636e-06, "loss": 0.3136, "step": 3068 }, { "epoch": 1.31, "learning_rate": 2.924445740956827e-06, "loss": 0.4259, "step": 3072 }, { "epoch": 1.31, "learning_rate": 2.9215285880980164e-06, "loss": 0.4293, "step": 3076 }, { "epoch": 1.31, "learning_rate": 2.918611435239207e-06, "loss": 0.6366, "step": 3080 }, { "epoch": 1.31, "learning_rate": 2.9156942823803967e-06, "loss": 0.5103, "step": 3084 }, { "epoch": 1.31, "learning_rate": 2.9127771295215875e-06, "loss": 0.4994, "step": 3088 }, { "epoch": 1.31, "learning_rate": 2.9098599766627774e-06, "loss": 0.5369, "step": 3092 }, { "epoch": 1.32, "learning_rate": 2.9069428238039678e-06, "loss": 0.4632, "step": 3096 }, { "epoch": 1.32, "learning_rate": 2.9040256709451577e-06, "loss": 0.3315, "step": 3100 }, { "epoch": 1.32, "learning_rate": 2.901108518086348e-06, "loss": 0.5776, "step": 3104 }, { "epoch": 1.32, "learning_rate": 2.8981913652275384e-06, "loss": 0.4545, "step": 3108 }, { "epoch": 1.32, "learning_rate": 2.8952742123687284e-06, "loss": 0.6122, "step": 3112 }, { "epoch": 1.32, "learning_rate": 2.8923570595099187e-06, "loss": 0.2894, "step": 3116 }, { "epoch": 1.33, "learning_rate": 2.8894399066511087e-06, "loss": 0.5543, "step": 3120 }, { "epoch": 1.33, "learning_rate": 2.886522753792299e-06, "loss": 0.524, "step": 3124 }, { "epoch": 1.33, "learning_rate": 2.883605600933489e-06, "loss": 0.5279, "step": 3128 }, { "epoch": 1.33, "learning_rate": 2.8806884480746793e-06, "loss": 0.3323, "step": 3132 }, { "epoch": 1.33, "learning_rate": 2.8777712952158693e-06, "loss": 0.5169, "step": 3136 }, { "epoch": 1.34, "learning_rate": 2.8748541423570596e-06, "loss": 0.4117, "step": 3140 }, { "epoch": 1.34, "learning_rate": 2.87193698949825e-06, "loss": 0.2958, "step": 3144 }, { "epoch": 1.34, "learning_rate": 2.86901983663944e-06, "loss": 0.5434, "step": 3148 }, { "epoch": 1.34, "learning_rate": 2.8661026837806307e-06, "loss": 0.3813, "step": 3152 }, { "epoch": 1.34, "learning_rate": 2.8631855309218202e-06, "loss": 0.4985, "step": 3156 }, { "epoch": 1.34, "learning_rate": 2.860268378063011e-06, "loss": 0.5145, "step": 3160 }, { "epoch": 1.35, "learning_rate": 2.8573512252042005e-06, "loss": 0.4176, "step": 3164 }, { "epoch": 1.35, "learning_rate": 2.8544340723453913e-06, "loss": 0.4034, "step": 3168 }, { "epoch": 1.35, "learning_rate": 2.8515169194865817e-06, "loss": 0.514, "step": 3172 }, { "epoch": 1.35, "learning_rate": 2.8485997666277716e-06, "loss": 0.4951, "step": 3176 }, { "epoch": 1.35, "learning_rate": 2.845682613768962e-06, "loss": 0.3946, "step": 3180 }, { "epoch": 1.35, "learning_rate": 2.842765460910152e-06, "loss": 0.4, "step": 3184 }, { "epoch": 1.36, "learning_rate": 2.8398483080513423e-06, "loss": 0.5394, "step": 3188 }, { "epoch": 1.36, "learning_rate": 2.8369311551925322e-06, "loss": 0.6328, "step": 3192 }, { "epoch": 1.36, "learning_rate": 2.8340140023337226e-06, "loss": 0.4449, "step": 3196 }, { "epoch": 1.36, "learning_rate": 2.8310968494749125e-06, "loss": 0.4787, "step": 3200 }, { "epoch": 1.36, "learning_rate": 2.828179696616103e-06, "loss": 0.3408, "step": 3204 }, { "epoch": 1.36, "learning_rate": 2.8252625437572932e-06, "loss": 0.4688, "step": 3208 }, { "epoch": 1.37, "learning_rate": 2.822345390898483e-06, "loss": 0.557, "step": 3212 }, { "epoch": 1.37, "learning_rate": 2.8194282380396735e-06, "loss": 0.5299, "step": 3216 }, { "epoch": 1.37, "learning_rate": 2.8165110851808635e-06, "loss": 0.468, "step": 3220 }, { "epoch": 1.37, "learning_rate": 2.813593932322054e-06, "loss": 0.3655, "step": 3224 }, { "epoch": 1.37, "learning_rate": 2.8106767794632438e-06, "loss": 0.2575, "step": 3228 }, { "epoch": 1.37, "learning_rate": 2.807759626604434e-06, "loss": 0.5705, "step": 3232 }, { "epoch": 1.38, "learning_rate": 2.804842473745624e-06, "loss": 0.4812, "step": 3236 }, { "epoch": 1.38, "learning_rate": 2.801925320886815e-06, "loss": 0.5761, "step": 3240 }, { "epoch": 1.38, "learning_rate": 2.7990081680280052e-06, "loss": 0.6039, "step": 3244 }, { "epoch": 1.38, "learning_rate": 2.796091015169195e-06, "loss": 0.3454, "step": 3248 }, { "epoch": 1.38, "learning_rate": 2.7931738623103855e-06, "loss": 0.5554, "step": 3252 }, { "epoch": 1.38, "learning_rate": 2.7902567094515755e-06, "loss": 0.4531, "step": 3256 }, { "epoch": 1.39, "learning_rate": 2.787339556592766e-06, "loss": 0.3332, "step": 3260 }, { "epoch": 1.39, "learning_rate": 2.7844224037339558e-06, "loss": 0.3159, "step": 3264 }, { "epoch": 1.39, "learning_rate": 2.781505250875146e-06, "loss": 0.498, "step": 3268 }, { "epoch": 1.39, "learning_rate": 2.7785880980163365e-06, "loss": 0.4386, "step": 3272 }, { "epoch": 1.39, "learning_rate": 2.7756709451575264e-06, "loss": 0.4108, "step": 3276 }, { "epoch": 1.39, "learning_rate": 2.772753792298717e-06, "loss": 0.3739, "step": 3280 }, { "epoch": 1.4, "learning_rate": 2.7698366394399067e-06, "loss": 0.3242, "step": 3284 }, { "epoch": 1.4, "learning_rate": 2.766919486581097e-06, "loss": 0.5226, "step": 3288 }, { "epoch": 1.4, "learning_rate": 2.764002333722287e-06, "loss": 0.655, "step": 3292 }, { "epoch": 1.4, "learning_rate": 2.7610851808634774e-06, "loss": 0.3379, "step": 3296 }, { "epoch": 1.4, "learning_rate": 2.7581680280046673e-06, "loss": 0.5461, "step": 3300 }, { "epoch": 1.4, "learning_rate": 2.7552508751458577e-06, "loss": 0.4614, "step": 3304 }, { "epoch": 1.41, "learning_rate": 2.7523337222870485e-06, "loss": 0.6258, "step": 3308 }, { "epoch": 1.41, "learning_rate": 2.749416569428238e-06, "loss": 0.3237, "step": 3312 }, { "epoch": 1.41, "learning_rate": 2.7464994165694288e-06, "loss": 0.4256, "step": 3316 }, { "epoch": 1.41, "learning_rate": 2.7435822637106187e-06, "loss": 0.4114, "step": 3320 }, { "epoch": 1.41, "learning_rate": 2.740665110851809e-06, "loss": 0.7195, "step": 3324 }, { "epoch": 1.41, "learning_rate": 2.737747957992999e-06, "loss": 0.5899, "step": 3328 }, { "epoch": 1.42, "learning_rate": 2.7348308051341894e-06, "loss": 0.4853, "step": 3332 }, { "epoch": 1.42, "learning_rate": 2.7319136522753793e-06, "loss": 0.4043, "step": 3336 }, { "epoch": 1.42, "learning_rate": 2.7289964994165697e-06, "loss": 0.5738, "step": 3340 }, { "epoch": 1.42, "learning_rate": 2.72607934655776e-06, "loss": 0.5704, "step": 3344 }, { "epoch": 1.42, "learning_rate": 2.72316219369895e-06, "loss": 0.6312, "step": 3348 }, { "epoch": 1.43, "learning_rate": 2.7202450408401404e-06, "loss": 0.3233, "step": 3352 }, { "epoch": 1.43, "learning_rate": 2.7173278879813303e-06, "loss": 0.5598, "step": 3356 }, { "epoch": 1.43, "learning_rate": 2.7144107351225207e-06, "loss": 0.3501, "step": 3360 }, { "epoch": 1.43, "learning_rate": 2.7114935822637106e-06, "loss": 0.3945, "step": 3364 }, { "epoch": 1.43, "learning_rate": 2.708576429404901e-06, "loss": 0.4801, "step": 3368 }, { "epoch": 1.43, "learning_rate": 2.7056592765460913e-06, "loss": 0.5775, "step": 3372 }, { "epoch": 1.44, "learning_rate": 2.7027421236872813e-06, "loss": 0.3939, "step": 3376 }, { "epoch": 1.44, "learning_rate": 2.699824970828472e-06, "loss": 0.4818, "step": 3380 }, { "epoch": 1.44, "learning_rate": 2.6969078179696616e-06, "loss": 0.4698, "step": 3384 }, { "epoch": 1.44, "learning_rate": 2.6939906651108523e-06, "loss": 0.3852, "step": 3388 }, { "epoch": 1.44, "learning_rate": 2.691073512252042e-06, "loss": 0.4516, "step": 3392 }, { "epoch": 1.44, "learning_rate": 2.6881563593932326e-06, "loss": 0.3766, "step": 3396 }, { "epoch": 1.45, "learning_rate": 2.6852392065344226e-06, "loss": 0.315, "step": 3400 }, { "epoch": 1.45, "learning_rate": 2.682322053675613e-06, "loss": 0.4293, "step": 3404 }, { "epoch": 1.45, "learning_rate": 2.6794049008168033e-06, "loss": 0.5251, "step": 3408 }, { "epoch": 1.45, "learning_rate": 2.6764877479579932e-06, "loss": 0.4517, "step": 3412 }, { "epoch": 1.45, "learning_rate": 2.6735705950991836e-06, "loss": 0.568, "step": 3416 }, { "epoch": 1.45, "learning_rate": 2.6706534422403735e-06, "loss": 0.5349, "step": 3420 }, { "epoch": 1.46, "learning_rate": 2.667736289381564e-06, "loss": 0.4316, "step": 3424 }, { "epoch": 1.46, "learning_rate": 2.664819136522754e-06, "loss": 0.372, "step": 3428 }, { "epoch": 1.46, "learning_rate": 2.661901983663944e-06, "loss": 0.4546, "step": 3432 }, { "epoch": 1.46, "learning_rate": 2.658984830805134e-06, "loss": 0.3791, "step": 3436 }, { "epoch": 1.46, "learning_rate": 2.6560676779463245e-06, "loss": 0.4281, "step": 3440 }, { "epoch": 1.46, "learning_rate": 2.653150525087515e-06, "loss": 0.3564, "step": 3444 }, { "epoch": 1.47, "learning_rate": 2.650233372228705e-06, "loss": 0.443, "step": 3448 }, { "epoch": 1.47, "learning_rate": 2.647316219369895e-06, "loss": 0.6713, "step": 3452 }, { "epoch": 1.47, "learning_rate": 2.644399066511085e-06, "loss": 0.2832, "step": 3456 }, { "epoch": 1.47, "learning_rate": 2.641481913652276e-06, "loss": 0.5267, "step": 3460 }, { "epoch": 1.47, "learning_rate": 2.6385647607934654e-06, "loss": 0.688, "step": 3464 }, { "epoch": 1.47, "learning_rate": 2.635647607934656e-06, "loss": 0.6487, "step": 3468 }, { "epoch": 1.48, "learning_rate": 2.6327304550758466e-06, "loss": 0.6653, "step": 3472 }, { "epoch": 1.48, "learning_rate": 2.6298133022170365e-06, "loss": 0.4218, "step": 3476 }, { "epoch": 1.48, "learning_rate": 2.626896149358227e-06, "loss": 0.5679, "step": 3480 }, { "epoch": 1.48, "learning_rate": 2.623978996499417e-06, "loss": 0.3429, "step": 3484 }, { "epoch": 1.48, "learning_rate": 2.621061843640607e-06, "loss": 0.4428, "step": 3488 }, { "epoch": 1.48, "learning_rate": 2.618144690781797e-06, "loss": 0.4073, "step": 3492 }, { "epoch": 1.49, "learning_rate": 2.6152275379229875e-06, "loss": 0.476, "step": 3496 }, { "epoch": 1.49, "learning_rate": 2.6123103850641774e-06, "loss": 0.3964, "step": 3500 }, { "epoch": 1.49, "learning_rate": 2.6093932322053678e-06, "loss": 0.4971, "step": 3504 }, { "epoch": 1.49, "learning_rate": 2.606476079346558e-06, "loss": 0.5309, "step": 3508 }, { "epoch": 1.49, "learning_rate": 2.603558926487748e-06, "loss": 0.7094, "step": 3512 }, { "epoch": 1.49, "learning_rate": 2.6006417736289384e-06, "loss": 0.4345, "step": 3516 }, { "epoch": 1.5, "learning_rate": 2.5977246207701284e-06, "loss": 0.5559, "step": 3520 }, { "epoch": 1.5, "learning_rate": 2.5948074679113187e-06, "loss": 0.519, "step": 3524 }, { "epoch": 1.5, "learning_rate": 2.5918903150525087e-06, "loss": 0.4054, "step": 3528 }, { "epoch": 1.5, "learning_rate": 2.588973162193699e-06, "loss": 0.4334, "step": 3532 }, { "epoch": 1.5, "learning_rate": 2.586056009334889e-06, "loss": 0.3535, "step": 3536 }, { "epoch": 1.51, "learning_rate": 2.5831388564760793e-06, "loss": 0.6168, "step": 3540 }, { "epoch": 1.51, "learning_rate": 2.58022170361727e-06, "loss": 0.3484, "step": 3544 }, { "epoch": 1.51, "learning_rate": 2.57730455075846e-06, "loss": 0.3373, "step": 3548 }, { "epoch": 1.51, "learning_rate": 2.5743873978996504e-06, "loss": 0.5002, "step": 3552 }, { "epoch": 1.51, "learning_rate": 2.5714702450408404e-06, "loss": 0.4713, "step": 3556 }, { "epoch": 1.51, "learning_rate": 2.5685530921820307e-06, "loss": 0.4236, "step": 3560 }, { "epoch": 1.52, "learning_rate": 2.5656359393232207e-06, "loss": 0.49, "step": 3564 }, { "epoch": 1.52, "learning_rate": 2.562718786464411e-06, "loss": 0.3107, "step": 3568 }, { "epoch": 1.52, "learning_rate": 2.5598016336056014e-06, "loss": 0.6111, "step": 3572 }, { "epoch": 1.52, "learning_rate": 2.5568844807467913e-06, "loss": 0.4386, "step": 3576 }, { "epoch": 1.52, "learning_rate": 2.5539673278879817e-06, "loss": 0.434, "step": 3580 }, { "epoch": 1.52, "learning_rate": 2.5510501750291716e-06, "loss": 0.3085, "step": 3584 }, { "epoch": 1.53, "learning_rate": 2.548133022170362e-06, "loss": 0.6368, "step": 3588 }, { "epoch": 1.53, "learning_rate": 2.545215869311552e-06, "loss": 0.3797, "step": 3592 }, { "epoch": 1.53, "learning_rate": 2.5422987164527423e-06, "loss": 0.5329, "step": 3596 }, { "epoch": 1.53, "learning_rate": 2.5393815635939322e-06, "loss": 0.5015, "step": 3600 }, { "epoch": 1.53, "learning_rate": 2.5364644107351226e-06, "loss": 0.333, "step": 3604 }, { "epoch": 1.53, "learning_rate": 2.5335472578763134e-06, "loss": 0.349, "step": 3608 }, { "epoch": 1.54, "learning_rate": 2.530630105017503e-06, "loss": 0.5365, "step": 3612 }, { "epoch": 1.54, "learning_rate": 2.5277129521586937e-06, "loss": 0.6777, "step": 3616 }, { "epoch": 1.54, "learning_rate": 2.524795799299883e-06, "loss": 0.5432, "step": 3620 }, { "epoch": 1.54, "learning_rate": 2.521878646441074e-06, "loss": 0.2763, "step": 3624 }, { "epoch": 1.54, "learning_rate": 2.518961493582264e-06, "loss": 0.5183, "step": 3628 }, { "epoch": 1.54, "learning_rate": 2.5160443407234543e-06, "loss": 0.4486, "step": 3632 }, { "epoch": 1.55, "learning_rate": 2.513127187864644e-06, "loss": 0.3521, "step": 3636 }, { "epoch": 1.55, "learning_rate": 2.5102100350058346e-06, "loss": 0.5112, "step": 3640 }, { "epoch": 1.55, "learning_rate": 2.507292882147025e-06, "loss": 0.378, "step": 3644 }, { "epoch": 1.55, "learning_rate": 2.504375729288215e-06, "loss": 0.4282, "step": 3648 }, { "epoch": 1.55, "learning_rate": 2.5014585764294052e-06, "loss": 0.5283, "step": 3652 }, { "epoch": 1.55, "learning_rate": 2.4985414235705956e-06, "loss": 0.5269, "step": 3656 }, { "epoch": 1.56, "learning_rate": 2.4956242707117855e-06, "loss": 0.3568, "step": 3660 }, { "epoch": 1.56, "learning_rate": 2.492707117852976e-06, "loss": 0.3911, "step": 3664 }, { "epoch": 1.56, "learning_rate": 2.489789964994166e-06, "loss": 0.554, "step": 3668 }, { "epoch": 1.56, "learning_rate": 2.486872812135356e-06, "loss": 0.6132, "step": 3672 }, { "epoch": 1.56, "learning_rate": 2.483955659276546e-06, "loss": 0.5473, "step": 3676 }, { "epoch": 1.56, "learning_rate": 2.4810385064177365e-06, "loss": 0.472, "step": 3680 }, { "epoch": 1.57, "learning_rate": 2.4781213535589264e-06, "loss": 0.4616, "step": 3684 }, { "epoch": 1.57, "learning_rate": 2.4752042007001172e-06, "loss": 0.5357, "step": 3688 }, { "epoch": 1.57, "learning_rate": 2.472287047841307e-06, "loss": 0.5629, "step": 3692 }, { "epoch": 1.57, "learning_rate": 2.4693698949824975e-06, "loss": 0.5284, "step": 3696 }, { "epoch": 1.57, "learning_rate": 2.4664527421236875e-06, "loss": 0.3988, "step": 3700 }, { "epoch": 1.57, "learning_rate": 2.463535589264878e-06, "loss": 0.7007, "step": 3704 }, { "epoch": 1.58, "learning_rate": 2.4606184364060678e-06, "loss": 0.2642, "step": 3708 }, { "epoch": 1.58, "learning_rate": 2.457701283547258e-06, "loss": 0.6179, "step": 3712 }, { "epoch": 1.58, "learning_rate": 2.454784130688448e-06, "loss": 0.5595, "step": 3716 }, { "epoch": 1.58, "learning_rate": 2.4518669778296384e-06, "loss": 0.4074, "step": 3720 }, { "epoch": 1.58, "learning_rate": 2.448949824970829e-06, "loss": 0.3635, "step": 3724 }, { "epoch": 1.59, "learning_rate": 2.446032672112019e-06, "loss": 0.3888, "step": 3728 }, { "epoch": 1.59, "learning_rate": 2.443115519253209e-06, "loss": 0.2755, "step": 3732 }, { "epoch": 1.59, "learning_rate": 2.4401983663943995e-06, "loss": 0.4511, "step": 3736 }, { "epoch": 1.59, "learning_rate": 2.4372812135355894e-06, "loss": 0.5516, "step": 3740 }, { "epoch": 1.59, "learning_rate": 2.4343640606767798e-06, "loss": 0.4041, "step": 3744 }, { "epoch": 1.59, "learning_rate": 2.4314469078179697e-06, "loss": 0.1592, "step": 3748 }, { "epoch": 1.6, "learning_rate": 2.42852975495916e-06, "loss": 0.5583, "step": 3752 }, { "epoch": 1.6, "learning_rate": 2.4256126021003504e-06, "loss": 0.445, "step": 3756 }, { "epoch": 1.6, "learning_rate": 2.4226954492415404e-06, "loss": 0.3201, "step": 3760 }, { "epoch": 1.6, "learning_rate": 2.4197782963827307e-06, "loss": 0.521, "step": 3764 }, { "epoch": 1.6, "learning_rate": 2.416861143523921e-06, "loss": 0.4229, "step": 3768 }, { "epoch": 1.6, "learning_rate": 2.413943990665111e-06, "loss": 0.4319, "step": 3772 }, { "epoch": 1.61, "learning_rate": 2.4110268378063014e-06, "loss": 0.4708, "step": 3776 }, { "epoch": 1.61, "learning_rate": 2.4081096849474913e-06, "loss": 0.5419, "step": 3780 }, { "epoch": 1.61, "learning_rate": 2.4051925320886817e-06, "loss": 0.4454, "step": 3784 }, { "epoch": 1.61, "learning_rate": 2.402275379229872e-06, "loss": 0.4804, "step": 3788 }, { "epoch": 1.61, "learning_rate": 2.399358226371062e-06, "loss": 0.5686, "step": 3792 }, { "epoch": 1.61, "learning_rate": 2.3964410735122523e-06, "loss": 0.5354, "step": 3796 }, { "epoch": 1.62, "learning_rate": 2.3935239206534423e-06, "loss": 0.5101, "step": 3800 }, { "epoch": 1.62, "learning_rate": 2.3906067677946326e-06, "loss": 0.3946, "step": 3804 }, { "epoch": 1.62, "learning_rate": 2.387689614935823e-06, "loss": 0.4951, "step": 3808 }, { "epoch": 1.62, "learning_rate": 2.384772462077013e-06, "loss": 0.4952, "step": 3812 }, { "epoch": 1.62, "learning_rate": 2.3818553092182033e-06, "loss": 0.5854, "step": 3816 }, { "epoch": 1.62, "learning_rate": 2.3789381563593932e-06, "loss": 0.4074, "step": 3820 }, { "epoch": 1.63, "learning_rate": 2.3760210035005836e-06, "loss": 0.4058, "step": 3824 }, { "epoch": 1.63, "learning_rate": 2.373103850641774e-06, "loss": 0.4952, "step": 3828 }, { "epoch": 1.63, "learning_rate": 2.370186697782964e-06, "loss": 0.5502, "step": 3832 }, { "epoch": 1.63, "learning_rate": 2.3672695449241543e-06, "loss": 0.4379, "step": 3836 }, { "epoch": 1.63, "learning_rate": 2.364352392065344e-06, "loss": 0.4695, "step": 3840 }, { "epoch": 1.63, "learning_rate": 2.3614352392065346e-06, "loss": 0.5127, "step": 3844 }, { "epoch": 1.64, "learning_rate": 2.3585180863477245e-06, "loss": 0.4037, "step": 3848 }, { "epoch": 1.64, "learning_rate": 2.355600933488915e-06, "loss": 0.3387, "step": 3852 }, { "epoch": 1.64, "learning_rate": 2.3526837806301052e-06, "loss": 0.5302, "step": 3856 }, { "epoch": 1.64, "learning_rate": 2.3497666277712956e-06, "loss": 0.4549, "step": 3860 }, { "epoch": 1.64, "learning_rate": 2.3468494749124855e-06, "loss": 0.4871, "step": 3864 }, { "epoch": 1.64, "learning_rate": 2.343932322053676e-06, "loss": 0.4167, "step": 3868 }, { "epoch": 1.65, "learning_rate": 2.341015169194866e-06, "loss": 0.4498, "step": 3872 }, { "epoch": 1.65, "learning_rate": 2.338098016336056e-06, "loss": 0.3591, "step": 3876 }, { "epoch": 1.65, "learning_rate": 2.335180863477246e-06, "loss": 0.3389, "step": 3880 }, { "epoch": 1.65, "learning_rate": 2.3322637106184365e-06, "loss": 0.4688, "step": 3884 }, { "epoch": 1.65, "learning_rate": 2.329346557759627e-06, "loss": 0.4151, "step": 3888 }, { "epoch": 1.65, "learning_rate": 2.3264294049008172e-06, "loss": 0.5132, "step": 3892 }, { "epoch": 1.66, "learning_rate": 2.323512252042007e-06, "loss": 0.3682, "step": 3896 }, { "epoch": 1.66, "learning_rate": 2.3205950991831975e-06, "loss": 0.3668, "step": 3900 }, { "epoch": 1.66, "learning_rate": 2.3176779463243875e-06, "loss": 0.2637, "step": 3904 }, { "epoch": 1.66, "learning_rate": 2.314760793465578e-06, "loss": 0.5291, "step": 3908 }, { "epoch": 1.66, "learning_rate": 2.3118436406067678e-06, "loss": 0.5459, "step": 3912 }, { "epoch": 1.66, "learning_rate": 2.308926487747958e-06, "loss": 0.5774, "step": 3916 }, { "epoch": 1.67, "learning_rate": 2.306009334889148e-06, "loss": 0.5955, "step": 3920 }, { "epoch": 1.67, "learning_rate": 2.303092182030339e-06, "loss": 0.2941, "step": 3924 }, { "epoch": 1.67, "learning_rate": 2.300175029171529e-06, "loss": 0.4735, "step": 3928 }, { "epoch": 1.67, "learning_rate": 2.297257876312719e-06, "loss": 0.27, "step": 3932 }, { "epoch": 1.67, "learning_rate": 2.294340723453909e-06, "loss": 0.496, "step": 3936 }, { "epoch": 1.68, "learning_rate": 2.2914235705950995e-06, "loss": 0.279, "step": 3940 }, { "epoch": 1.68, "learning_rate": 2.2885064177362894e-06, "loss": 0.3848, "step": 3944 }, { "epoch": 1.68, "learning_rate": 2.2855892648774798e-06, "loss": 0.4868, "step": 3948 }, { "epoch": 1.68, "learning_rate": 2.2826721120186697e-06, "loss": 0.5328, "step": 3952 }, { "epoch": 1.68, "learning_rate": 2.2797549591598605e-06, "loss": 0.3838, "step": 3956 }, { "epoch": 1.68, "learning_rate": 2.2768378063010504e-06, "loss": 0.4603, "step": 3960 }, { "epoch": 1.69, "learning_rate": 2.2739206534422408e-06, "loss": 0.515, "step": 3964 }, { "epoch": 1.69, "learning_rate": 2.2710035005834307e-06, "loss": 0.4371, "step": 3968 }, { "epoch": 1.69, "learning_rate": 2.268086347724621e-06, "loss": 0.5561, "step": 3972 }, { "epoch": 1.69, "learning_rate": 2.265169194865811e-06, "loss": 0.3882, "step": 3976 }, { "epoch": 1.69, "learning_rate": 2.2622520420070014e-06, "loss": 0.4774, "step": 3980 }, { "epoch": 1.69, "learning_rate": 2.2593348891481913e-06, "loss": 0.6546, "step": 3984 }, { "epoch": 1.7, "learning_rate": 2.2564177362893817e-06, "loss": 0.4335, "step": 3988 }, { "epoch": 1.7, "learning_rate": 2.253500583430572e-06, "loss": 0.3352, "step": 3992 }, { "epoch": 1.7, "learning_rate": 2.2505834305717624e-06, "loss": 0.2946, "step": 3996 }, { "epoch": 1.7, "learning_rate": 2.2476662777129523e-06, "loss": 0.2657, "step": 4000 }, { "epoch": 1.7, "learning_rate": 2.2447491248541427e-06, "loss": 0.4859, "step": 4004 }, { "epoch": 1.7, "learning_rate": 2.2418319719953326e-06, "loss": 0.3047, "step": 4008 }, { "epoch": 1.71, "learning_rate": 2.238914819136523e-06, "loss": 0.5828, "step": 4012 }, { "epoch": 1.71, "learning_rate": 2.235997666277713e-06, "loss": 0.4141, "step": 4016 }, { "epoch": 1.71, "learning_rate": 2.2330805134189033e-06, "loss": 0.4643, "step": 4020 }, { "epoch": 1.71, "learning_rate": 2.2301633605600937e-06, "loss": 0.5517, "step": 4024 }, { "epoch": 1.71, "learning_rate": 2.2272462077012836e-06, "loss": 0.551, "step": 4028 }, { "epoch": 1.71, "learning_rate": 2.224329054842474e-06, "loss": 0.4237, "step": 4032 }, { "epoch": 1.72, "learning_rate": 2.2214119019836643e-06, "loss": 0.4345, "step": 4036 }, { "epoch": 1.72, "learning_rate": 2.2184947491248543e-06, "loss": 0.428, "step": 4040 }, { "epoch": 1.72, "learning_rate": 2.2155775962660446e-06, "loss": 0.3503, "step": 4044 }, { "epoch": 1.72, "learning_rate": 2.2126604434072346e-06, "loss": 0.4281, "step": 4048 }, { "epoch": 1.72, "learning_rate": 2.209743290548425e-06, "loss": 0.5451, "step": 4052 }, { "epoch": 1.72, "learning_rate": 2.2068261376896153e-06, "loss": 0.6199, "step": 4056 }, { "epoch": 1.73, "learning_rate": 2.2039089848308052e-06, "loss": 0.6851, "step": 4060 }, { "epoch": 1.73, "learning_rate": 2.2009918319719956e-06, "loss": 0.3541, "step": 4064 }, { "epoch": 1.73, "learning_rate": 2.1980746791131855e-06, "loss": 0.4333, "step": 4068 }, { "epoch": 1.73, "learning_rate": 2.195157526254376e-06, "loss": 0.413, "step": 4072 }, { "epoch": 1.73, "learning_rate": 2.1922403733955663e-06, "loss": 0.4385, "step": 4076 }, { "epoch": 1.73, "learning_rate": 2.189323220536756e-06, "loss": 0.3163, "step": 4080 }, { "epoch": 1.74, "learning_rate": 2.1864060676779466e-06, "loss": 0.4678, "step": 4084 }, { "epoch": 1.74, "learning_rate": 2.183488914819137e-06, "loss": 0.2889, "step": 4088 }, { "epoch": 1.74, "learning_rate": 2.180571761960327e-06, "loss": 0.3339, "step": 4092 }, { "epoch": 1.74, "learning_rate": 2.1776546091015172e-06, "loss": 0.4381, "step": 4096 }, { "epoch": 1.74, "learning_rate": 2.174737456242707e-06, "loss": 0.4926, "step": 4100 }, { "epoch": 1.74, "learning_rate": 2.1718203033838975e-06, "loss": 0.4147, "step": 4104 }, { "epoch": 1.75, "learning_rate": 2.1689031505250875e-06, "loss": 0.525, "step": 4108 }, { "epoch": 1.75, "learning_rate": 2.165985997666278e-06, "loss": 0.6573, "step": 4112 }, { "epoch": 1.75, "learning_rate": 2.163068844807468e-06, "loss": 0.4188, "step": 4116 }, { "epoch": 1.75, "learning_rate": 2.1601516919486586e-06, "loss": 0.2622, "step": 4120 }, { "epoch": 1.75, "learning_rate": 2.1572345390898485e-06, "loss": 0.4533, "step": 4124 }, { "epoch": 1.76, "learning_rate": 2.154317386231039e-06, "loss": 0.5007, "step": 4128 }, { "epoch": 1.76, "learning_rate": 2.151400233372229e-06, "loss": 0.5307, "step": 4132 }, { "epoch": 1.76, "learning_rate": 2.148483080513419e-06, "loss": 0.4071, "step": 4136 }, { "epoch": 1.76, "learning_rate": 2.145565927654609e-06, "loss": 0.5252, "step": 4140 }, { "epoch": 1.76, "learning_rate": 2.1426487747957995e-06, "loss": 0.3672, "step": 4144 }, { "epoch": 1.76, "learning_rate": 2.1397316219369894e-06, "loss": 0.3608, "step": 4148 }, { "epoch": 1.77, "learning_rate": 2.1368144690781798e-06, "loss": 0.4581, "step": 4152 }, { "epoch": 1.77, "learning_rate": 2.13389731621937e-06, "loss": 0.4953, "step": 4156 }, { "epoch": 1.77, "learning_rate": 2.1309801633605605e-06, "loss": 0.515, "step": 4160 }, { "epoch": 1.77, "learning_rate": 2.1280630105017504e-06, "loss": 0.4272, "step": 4164 }, { "epoch": 1.77, "learning_rate": 2.1251458576429408e-06, "loss": 0.5713, "step": 4168 }, { "epoch": 1.77, "learning_rate": 2.1222287047841307e-06, "loss": 0.3837, "step": 4172 }, { "epoch": 1.78, "learning_rate": 2.119311551925321e-06, "loss": 0.4367, "step": 4176 }, { "epoch": 1.78, "learning_rate": 2.116394399066511e-06, "loss": 0.3561, "step": 4180 }, { "epoch": 1.78, "learning_rate": 2.1134772462077014e-06, "loss": 0.2825, "step": 4184 }, { "epoch": 1.78, "learning_rate": 2.1105600933488917e-06, "loss": 0.2891, "step": 4188 }, { "epoch": 1.78, "learning_rate": 2.107642940490082e-06, "loss": 0.4617, "step": 4192 }, { "epoch": 1.78, "learning_rate": 2.104725787631272e-06, "loss": 0.4423, "step": 4196 }, { "epoch": 1.79, "learning_rate": 2.1018086347724624e-06, "loss": 0.2344, "step": 4200 }, { "epoch": 1.79, "learning_rate": 2.0988914819136523e-06, "loss": 0.5355, "step": 4204 }, { "epoch": 1.79, "learning_rate": 2.0959743290548427e-06, "loss": 0.427, "step": 4208 }, { "epoch": 1.79, "learning_rate": 2.0930571761960326e-06, "loss": 0.3997, "step": 4212 }, { "epoch": 1.79, "learning_rate": 2.090140023337223e-06, "loss": 0.3945, "step": 4216 }, { "epoch": 1.79, "learning_rate": 2.0872228704784134e-06, "loss": 0.3998, "step": 4220 }, { "epoch": 1.8, "learning_rate": 2.0843057176196037e-06, "loss": 0.4695, "step": 4224 }, { "epoch": 1.8, "learning_rate": 2.0813885647607937e-06, "loss": 0.3051, "step": 4228 }, { "epoch": 1.8, "learning_rate": 2.078471411901984e-06, "loss": 0.4195, "step": 4232 }, { "epoch": 1.8, "learning_rate": 2.075554259043174e-06, "loss": 0.4064, "step": 4236 }, { "epoch": 1.8, "learning_rate": 2.0726371061843643e-06, "loss": 0.4709, "step": 4240 }, { "epoch": 1.8, "learning_rate": 2.0697199533255543e-06, "loss": 0.6638, "step": 4244 }, { "epoch": 1.81, "learning_rate": 2.0668028004667446e-06, "loss": 0.4436, "step": 4248 }, { "epoch": 1.81, "learning_rate": 2.0638856476079346e-06, "loss": 0.3434, "step": 4252 }, { "epoch": 1.81, "learning_rate": 2.060968494749125e-06, "loss": 0.2871, "step": 4256 }, { "epoch": 1.81, "learning_rate": 2.0580513418903153e-06, "loss": 0.2696, "step": 4260 }, { "epoch": 1.81, "learning_rate": 2.0551341890315057e-06, "loss": 0.373, "step": 4264 }, { "epoch": 1.81, "learning_rate": 2.0522170361726956e-06, "loss": 0.4656, "step": 4268 }, { "epoch": 1.82, "learning_rate": 2.049299883313886e-06, "loss": 0.3027, "step": 4272 }, { "epoch": 1.82, "learning_rate": 2.046382730455076e-06, "loss": 0.7287, "step": 4276 }, { "epoch": 1.82, "learning_rate": 2.0434655775962663e-06, "loss": 0.3299, "step": 4280 }, { "epoch": 1.82, "learning_rate": 2.040548424737456e-06, "loss": 0.3556, "step": 4284 }, { "epoch": 1.82, "learning_rate": 2.0376312718786466e-06, "loss": 0.3275, "step": 4288 }, { "epoch": 1.82, "learning_rate": 2.034714119019837e-06, "loss": 0.334, "step": 4292 }, { "epoch": 1.83, "learning_rate": 2.031796966161027e-06, "loss": 0.4437, "step": 4296 }, { "epoch": 1.83, "learning_rate": 2.0288798133022172e-06, "loss": 0.4776, "step": 4300 }, { "epoch": 1.83, "learning_rate": 2.0259626604434076e-06, "loss": 0.497, "step": 4304 }, { "epoch": 1.83, "learning_rate": 2.0230455075845975e-06, "loss": 0.6054, "step": 4308 }, { "epoch": 1.83, "learning_rate": 2.020128354725788e-06, "loss": 0.3877, "step": 4312 }, { "epoch": 1.84, "learning_rate": 2.017211201866978e-06, "loss": 0.3442, "step": 4316 }, { "epoch": 1.84, "learning_rate": 2.014294049008168e-06, "loss": 0.5557, "step": 4320 }, { "epoch": 1.84, "learning_rate": 2.0113768961493586e-06, "loss": 0.5137, "step": 4324 }, { "epoch": 1.84, "learning_rate": 2.0084597432905485e-06, "loss": 0.6162, "step": 4328 }, { "epoch": 1.84, "learning_rate": 2.005542590431739e-06, "loss": 0.4181, "step": 4332 }, { "epoch": 1.84, "learning_rate": 2.002625437572929e-06, "loss": 0.3354, "step": 4336 }, { "epoch": 1.85, "learning_rate": 1.999708284714119e-06, "loss": 0.3924, "step": 4340 }, { "epoch": 1.85, "learning_rate": 1.9967911318553095e-06, "loss": 0.3781, "step": 4344 }, { "epoch": 1.85, "learning_rate": 1.9938739789964995e-06, "loss": 0.4388, "step": 4348 }, { "epoch": 1.85, "learning_rate": 1.99095682613769e-06, "loss": 0.4512, "step": 4352 }, { "epoch": 1.85, "learning_rate": 1.98803967327888e-06, "loss": 0.4171, "step": 4356 }, { "epoch": 1.85, "learning_rate": 1.98512252042007e-06, "loss": 0.3768, "step": 4360 }, { "epoch": 1.86, "learning_rate": 1.9822053675612605e-06, "loss": 0.3517, "step": 4364 }, { "epoch": 1.86, "learning_rate": 1.9792882147024504e-06, "loss": 0.412, "step": 4368 }, { "epoch": 1.86, "learning_rate": 1.9763710618436408e-06, "loss": 0.2887, "step": 4372 }, { "epoch": 1.86, "learning_rate": 1.9734539089848307e-06, "loss": 0.3032, "step": 4376 }, { "epoch": 1.86, "learning_rate": 1.970536756126021e-06, "loss": 0.3046, "step": 4380 }, { "epoch": 1.86, "learning_rate": 1.9676196032672114e-06, "loss": 0.3144, "step": 4384 }, { "epoch": 1.87, "learning_rate": 1.964702450408402e-06, "loss": 0.533, "step": 4388 }, { "epoch": 1.87, "learning_rate": 1.9617852975495917e-06, "loss": 0.3993, "step": 4392 }, { "epoch": 1.87, "learning_rate": 1.958868144690782e-06, "loss": 0.3253, "step": 4396 }, { "epoch": 1.87, "learning_rate": 1.955950991831972e-06, "loss": 0.4888, "step": 4400 }, { "epoch": 1.87, "learning_rate": 1.9530338389731624e-06, "loss": 0.5357, "step": 4404 }, { "epoch": 1.87, "learning_rate": 1.9501166861143524e-06, "loss": 0.3057, "step": 4408 }, { "epoch": 1.88, "learning_rate": 1.9471995332555427e-06, "loss": 0.3553, "step": 4412 }, { "epoch": 1.88, "learning_rate": 1.9442823803967327e-06, "loss": 0.3878, "step": 4416 }, { "epoch": 1.88, "learning_rate": 1.9413652275379234e-06, "loss": 0.315, "step": 4420 }, { "epoch": 1.88, "learning_rate": 1.9384480746791134e-06, "loss": 0.3791, "step": 4424 }, { "epoch": 1.88, "learning_rate": 1.9355309218203037e-06, "loss": 0.4092, "step": 4428 }, { "epoch": 1.88, "learning_rate": 1.9326137689614937e-06, "loss": 0.3911, "step": 4432 }, { "epoch": 1.89, "learning_rate": 1.929696616102684e-06, "loss": 0.3135, "step": 4436 }, { "epoch": 1.89, "learning_rate": 1.926779463243874e-06, "loss": 0.3574, "step": 4440 }, { "epoch": 1.89, "learning_rate": 1.9238623103850643e-06, "loss": 0.3147, "step": 4444 }, { "epoch": 1.89, "learning_rate": 1.9209451575262543e-06, "loss": 0.5802, "step": 4448 }, { "epoch": 1.89, "learning_rate": 1.9180280046674446e-06, "loss": 0.5169, "step": 4452 }, { "epoch": 1.89, "learning_rate": 1.915110851808635e-06, "loss": 0.4599, "step": 4456 }, { "epoch": 1.9, "learning_rate": 1.9121936989498254e-06, "loss": 0.3763, "step": 4460 }, { "epoch": 1.9, "learning_rate": 1.9092765460910153e-06, "loss": 0.4633, "step": 4464 }, { "epoch": 1.9, "learning_rate": 1.9063593932322055e-06, "loss": 0.462, "step": 4468 }, { "epoch": 1.9, "learning_rate": 1.9034422403733956e-06, "loss": 0.4908, "step": 4472 }, { "epoch": 1.9, "learning_rate": 1.900525087514586e-06, "loss": 0.3367, "step": 4476 }, { "epoch": 1.9, "learning_rate": 1.8976079346557761e-06, "loss": 0.4497, "step": 4480 }, { "epoch": 1.91, "learning_rate": 1.8946907817969663e-06, "loss": 0.3253, "step": 4484 }, { "epoch": 1.91, "learning_rate": 1.8917736289381566e-06, "loss": 0.5464, "step": 4488 }, { "epoch": 1.91, "learning_rate": 1.8888564760793468e-06, "loss": 0.3004, "step": 4492 }, { "epoch": 1.91, "learning_rate": 1.885939323220537e-06, "loss": 0.3876, "step": 4496 }, { "epoch": 1.91, "learning_rate": 1.883022170361727e-06, "loss": 0.4443, "step": 4500 } ], "logging_steps": 4, "max_steps": 7056, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "total_flos": 34569317253120.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }