diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,31439 @@ +{ + "best_metric": 23.848435348153973, + "best_model_checkpoint": "/content/XLMFinetune/model-bin2/test/checkpoint-26168", + "epoch": 2.0, + "eval_steps": 500, + "global_step": 26168, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 6.111535523300229e-07, + "loss": 5.3672, + "step": 5 + }, + { + "epoch": 0.0, + "learning_rate": 1.3750954927425516e-06, + "loss": 5.4479, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 2.1390374331550802e-06, + "loss": 5.3383, + "step": 15 + }, + { + "epoch": 0.0, + "learning_rate": 2.902979373567609e-06, + "loss": 4.952, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 3.666921313980138e-06, + "loss": 4.4477, + "step": 25 + }, + { + "epoch": 0.0, + "learning_rate": 4.430863254392666e-06, + "loss": 4.4078, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 5.194805194805195e-06, + "loss": 4.5465, + "step": 35 + }, + { + "epoch": 0.0, + "learning_rate": 5.958747135217724e-06, + "loss": 4.3111, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 6.722689075630252e-06, + "loss": 4.3164, + "step": 45 + }, + { + "epoch": 0.0, + "learning_rate": 7.4866310160427806e-06, + "loss": 4.1561, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 8.25057295645531e-06, + "loss": 5.8121, + "step": 55 + }, + { + "epoch": 0.0, + "learning_rate": 9.014514896867839e-06, + "loss": 5.9082, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 9.778456837280367e-06, + "loss": 5.732, + "step": 65 + }, + { + "epoch": 0.01, + "learning_rate": 1.0542398777692896e-05, + "loss": 5.7656, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 1.1306340718105425e-05, + "loss": 5.3199, + "step": 75 + }, + { + "epoch": 0.01, + "learning_rate": 1.2070282658517953e-05, + "loss": 5.1668, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 1.2834224598930484e-05, + "loss": 5.2023, + "step": 85 + }, + { + "epoch": 0.01, + "learning_rate": 1.359816653934301e-05, + "loss": 4.1394, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 1.436210847975554e-05, + "loss": 3.974, + "step": 95 + }, + { + "epoch": 0.01, + "learning_rate": 1.5126050420168067e-05, + "loss": 3.4523, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 1.5889992360580598e-05, + "loss": 5.4578, + "step": 105 + }, + { + "epoch": 0.01, + "learning_rate": 1.6653934300993127e-05, + "loss": 5.7855, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 1.7417876241405653e-05, + "loss": 5.0931, + "step": 115 + }, + { + "epoch": 0.01, + "learning_rate": 1.8181818181818182e-05, + "loss": 5.3789, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 1.894576012223071e-05, + "loss": 5.2285, + "step": 125 + }, + { + "epoch": 0.01, + "learning_rate": 1.970970206264324e-05, + "loss": 4.8857, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 2.047364400305577e-05, + "loss": 4.5129, + "step": 135 + }, + { + "epoch": 0.01, + "learning_rate": 2.1237585943468296e-05, + "loss": 4.0876, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 2.2001527883880825e-05, + "loss": 4.2938, + "step": 145 + }, + { + "epoch": 0.01, + "learning_rate": 2.2765469824293358e-05, + "loss": 3.9873, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 2.3529411764705884e-05, + "loss": 5.5092, + "step": 155 + }, + { + "epoch": 0.01, + "learning_rate": 2.4293353705118413e-05, + "loss": 5.9281, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 2.5057295645530942e-05, + "loss": 5.6781, + "step": 165 + }, + { + "epoch": 0.01, + "learning_rate": 2.5821237585943468e-05, + "loss": 4.7973, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 2.6585179526355997e-05, + "loss": 5.1039, + "step": 175 + }, + { + "epoch": 0.01, + "learning_rate": 2.734912146676853e-05, + "loss": 5.259, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 2.8113063407181056e-05, + "loss": 5.0793, + "step": 185 + }, + { + "epoch": 0.01, + "learning_rate": 2.8877005347593582e-05, + "loss": 4.8102, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 2.9640947288006115e-05, + "loss": 4.2803, + "step": 195 + }, + { + "epoch": 0.02, + "learning_rate": 3.0404889228418644e-05, + "loss": 3.758, + "step": 200 + }, + { + "epoch": 0.02, + "learning_rate": 3.1168831168831166e-05, + "loss": 5.957, + "step": 205 + }, + { + "epoch": 0.02, + "learning_rate": 3.1932773109243696e-05, + "loss": 5.9391, + "step": 210 + }, + { + "epoch": 0.02, + "learning_rate": 3.269671504965623e-05, + "loss": 5.4367, + "step": 215 + }, + { + "epoch": 0.02, + "learning_rate": 3.3460656990068754e-05, + "loss": 5.3895, + "step": 220 + }, + { + "epoch": 0.02, + "learning_rate": 3.4224598930481284e-05, + "loss": 5.2668, + "step": 225 + }, + { + "epoch": 0.02, + "learning_rate": 3.498854087089381e-05, + "loss": 4.9992, + "step": 230 + }, + { + "epoch": 0.02, + "learning_rate": 3.575248281130634e-05, + "loss": 4.3316, + "step": 235 + }, + { + "epoch": 0.02, + "learning_rate": 3.651642475171887e-05, + "loss": 4.4064, + "step": 240 + }, + { + "epoch": 0.02, + "learning_rate": 3.72803666921314e-05, + "loss": 3.9166, + "step": 245 + }, + { + "epoch": 0.02, + "learning_rate": 3.804430863254393e-05, + "loss": 3.5941, + "step": 250 + }, + { + "epoch": 0.02, + "learning_rate": 3.880825057295646e-05, + "loss": 5.6594, + "step": 255 + }, + { + "epoch": 0.02, + "learning_rate": 3.957219251336899e-05, + "loss": 5.5082, + "step": 260 + }, + { + "epoch": 0.02, + "learning_rate": 4.033613445378152e-05, + "loss": 5.4387, + "step": 265 + }, + { + "epoch": 0.02, + "learning_rate": 4.110007639419404e-05, + "loss": 5.1539, + "step": 270 + }, + { + "epoch": 0.02, + "learning_rate": 4.186401833460657e-05, + "loss": 5.0574, + "step": 275 + }, + { + "epoch": 0.02, + "learning_rate": 4.26279602750191e-05, + "loss": 4.8289, + "step": 280 + }, + { + "epoch": 0.02, + "learning_rate": 4.339190221543163e-05, + "loss": 4.8527, + "step": 285 + }, + { + "epoch": 0.02, + "learning_rate": 4.415584415584416e-05, + "loss": 4.3766, + "step": 290 + }, + { + "epoch": 0.02, + "learning_rate": 4.491978609625669e-05, + "loss": 3.7943, + "step": 295 + }, + { + "epoch": 0.02, + "learning_rate": 4.5683728036669216e-05, + "loss": 3.2498, + "step": 300 + }, + { + "epoch": 0.02, + "learning_rate": 4.6447669977081745e-05, + "loss": 5.7168, + "step": 305 + }, + { + "epoch": 0.02, + "learning_rate": 4.7211611917494275e-05, + "loss": 5.8766, + "step": 310 + }, + { + "epoch": 0.02, + "learning_rate": 4.7975553857906804e-05, + "loss": 5.7383, + "step": 315 + }, + { + "epoch": 0.02, + "learning_rate": 4.8739495798319326e-05, + "loss": 5.4395, + "step": 320 + }, + { + "epoch": 0.02, + "learning_rate": 4.950343773873186e-05, + "loss": 5.3113, + "step": 325 + }, + { + "epoch": 0.03, + "learning_rate": 5.026737967914439e-05, + "loss": 4.9613, + "step": 330 + }, + { + "epoch": 0.03, + "learning_rate": 5.1031321619556914e-05, + "loss": 4.5619, + "step": 335 + }, + { + "epoch": 0.03, + "learning_rate": 5.1795263559969444e-05, + "loss": 4.3414, + "step": 340 + }, + { + "epoch": 0.03, + "learning_rate": 5.255920550038197e-05, + "loss": 4.4539, + "step": 345 + }, + { + "epoch": 0.03, + "learning_rate": 5.332314744079451e-05, + "loss": 3.3832, + "step": 350 + }, + { + "epoch": 0.03, + "learning_rate": 5.408708938120703e-05, + "loss": 5.9297, + "step": 355 + }, + { + "epoch": 0.03, + "learning_rate": 5.485103132161956e-05, + "loss": 5.4016, + "step": 360 + }, + { + "epoch": 0.03, + "learning_rate": 5.561497326203209e-05, + "loss": 4.8334, + "step": 365 + }, + { + "epoch": 0.03, + "learning_rate": 5.637891520244461e-05, + "loss": 5.2273, + "step": 370 + }, + { + "epoch": 0.03, + "learning_rate": 5.714285714285714e-05, + "loss": 4.9238, + "step": 375 + }, + { + "epoch": 0.03, + "learning_rate": 5.7754010695187164e-05, + "loss": 4.3672, + "step": 380 + }, + { + "epoch": 0.03, + "learning_rate": 5.851795263559969e-05, + "loss": 4.2797, + "step": 385 + }, + { + "epoch": 0.03, + "learning_rate": 5.928189457601223e-05, + "loss": 4.1217, + "step": 390 + }, + { + "epoch": 0.03, + "learning_rate": 6.004583651642476e-05, + "loss": 4.0648, + "step": 395 + }, + { + "epoch": 0.03, + "learning_rate": 6.080977845683729e-05, + "loss": 3.3252, + "step": 400 + }, + { + "epoch": 0.03, + "learning_rate": 6.157372039724982e-05, + "loss": 5.5906, + "step": 405 + }, + { + "epoch": 0.03, + "learning_rate": 6.233766233766233e-05, + "loss": 5.3297, + "step": 410 + }, + { + "epoch": 0.03, + "learning_rate": 6.310160427807486e-05, + "loss": 5.5746, + "step": 415 + }, + { + "epoch": 0.03, + "learning_rate": 6.386554621848739e-05, + "loss": 5.2078, + "step": 420 + }, + { + "epoch": 0.03, + "learning_rate": 6.462948815889993e-05, + "loss": 4.8525, + "step": 425 + }, + { + "epoch": 0.03, + "learning_rate": 6.539343009931246e-05, + "loss": 4.7914, + "step": 430 + }, + { + "epoch": 0.03, + "learning_rate": 6.615737203972498e-05, + "loss": 4.6715, + "step": 435 + }, + { + "epoch": 0.03, + "learning_rate": 6.692131398013751e-05, + "loss": 4.5393, + "step": 440 + }, + { + "epoch": 0.03, + "learning_rate": 6.753246753246754e-05, + "loss": 4.075, + "step": 445 + }, + { + "epoch": 0.03, + "learning_rate": 6.829640947288007e-05, + "loss": 3.6494, + "step": 450 + }, + { + "epoch": 0.03, + "learning_rate": 6.90603514132926e-05, + "loss": 5.5176, + "step": 455 + }, + { + "epoch": 0.04, + "learning_rate": 6.967150496562261e-05, + "loss": 6.3191, + "step": 460 + }, + { + "epoch": 0.04, + "learning_rate": 7.043544690603514e-05, + "loss": 4.6111, + "step": 465 + }, + { + "epoch": 0.04, + "learning_rate": 7.119938884644768e-05, + "loss": 5.2383, + "step": 470 + }, + { + "epoch": 0.04, + "learning_rate": 7.181054239877769e-05, + "loss": 5.5373, + "step": 475 + }, + { + "epoch": 0.04, + "learning_rate": 7.257448433919023e-05, + "loss": 4.9678, + "step": 480 + }, + { + "epoch": 0.04, + "learning_rate": 7.333842627960276e-05, + "loss": 4.3086, + "step": 485 + }, + { + "epoch": 0.04, + "learning_rate": 7.410236822001529e-05, + "loss": 4.2645, + "step": 490 + }, + { + "epoch": 0.04, + "learning_rate": 7.486631016042782e-05, + "loss": 3.6471, + "step": 495 + }, + { + "epoch": 0.04, + "learning_rate": 7.563025210084033e-05, + "loss": 2.734, + "step": 500 + }, + { + "epoch": 0.04, + "learning_rate": 7.639419404125286e-05, + "loss": 5.6322, + "step": 505 + }, + { + "epoch": 0.04, + "learning_rate": 7.71581359816654e-05, + "loss": 6.4516, + "step": 510 + }, + { + "epoch": 0.04, + "learning_rate": 7.792207792207793e-05, + "loss": 6.141, + "step": 515 + }, + { + "epoch": 0.04, + "learning_rate": 7.868601986249046e-05, + "loss": 4.4348, + "step": 520 + }, + { + "epoch": 0.04, + "learning_rate": 7.944996180290298e-05, + "loss": 4.6922, + "step": 525 + }, + { + "epoch": 0.04, + "learning_rate": 8.021390374331551e-05, + "loss": 4.8404, + "step": 530 + }, + { + "epoch": 0.04, + "learning_rate": 8.097784568372804e-05, + "loss": 5.326, + "step": 535 + }, + { + "epoch": 0.04, + "learning_rate": 8.158899923605807e-05, + "loss": 4.1762, + "step": 540 + }, + { + "epoch": 0.04, + "learning_rate": 8.23529411764706e-05, + "loss": 2.7068, + "step": 545 + }, + { + "epoch": 0.04, + "learning_rate": 8.311688311688312e-05, + "loss": 2.9233, + "step": 550 + }, + { + "epoch": 0.04, + "learning_rate": 8.388082505729565e-05, + "loss": 5.177, + "step": 555 + }, + { + "epoch": 0.04, + "learning_rate": 8.464476699770818e-05, + "loss": 5.1857, + "step": 560 + }, + { + "epoch": 0.04, + "learning_rate": 8.540870893812071e-05, + "loss": 4.8047, + "step": 565 + }, + { + "epoch": 0.04, + "learning_rate": 8.617265087853324e-05, + "loss": 5.1225, + "step": 570 + }, + { + "epoch": 0.04, + "learning_rate": 8.693659281894576e-05, + "loss": 3.5857, + "step": 575 + }, + { + "epoch": 0.04, + "learning_rate": 8.770053475935829e-05, + "loss": 5.016, + "step": 580 + }, + { + "epoch": 0.04, + "learning_rate": 8.846447669977083e-05, + "loss": 4.1475, + "step": 585 + }, + { + "epoch": 0.05, + "learning_rate": 8.922841864018336e-05, + "loss": 3.4462, + "step": 590 + }, + { + "epoch": 0.05, + "learning_rate": 8.999236058059587e-05, + "loss": 2.926, + "step": 595 + }, + { + "epoch": 0.05, + "learning_rate": 9.07563025210084e-05, + "loss": 3.36, + "step": 600 + }, + { + "epoch": 0.05, + "learning_rate": 9.152024446142093e-05, + "loss": 5.6203, + "step": 605 + }, + { + "epoch": 0.05, + "learning_rate": 9.228418640183346e-05, + "loss": 5.2746, + "step": 610 + }, + { + "epoch": 0.05, + "learning_rate": 9.3048128342246e-05, + "loss": 6.3246, + "step": 615 + }, + { + "epoch": 0.05, + "learning_rate": 9.381207028265852e-05, + "loss": 4.7615, + "step": 620 + }, + { + "epoch": 0.05, + "learning_rate": 9.457601222307105e-05, + "loss": 4.4709, + "step": 625 + }, + { + "epoch": 0.05, + "learning_rate": 9.533995416348358e-05, + "loss": 4.0367, + "step": 630 + }, + { + "epoch": 0.05, + "learning_rate": 9.610389610389611e-05, + "loss": 4.5443, + "step": 635 + }, + { + "epoch": 0.05, + "learning_rate": 9.686783804430864e-05, + "loss": 2.754, + "step": 640 + }, + { + "epoch": 0.05, + "learning_rate": 9.763177998472117e-05, + "loss": 2.841, + "step": 645 + }, + { + "epoch": 0.05, + "learning_rate": 9.83957219251337e-05, + "loss": 2.6588, + "step": 650 + }, + { + "epoch": 0.05, + "learning_rate": 9.915966386554623e-05, + "loss": 5.3617, + "step": 655 + }, + { + "epoch": 0.05, + "learning_rate": 9.992360580595875e-05, + "loss": 6.0061, + "step": 660 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010068754774637128, + "loss": 5.8316, + "step": 665 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010145148968678381, + "loss": 5.1043, + "step": 670 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010221543162719633, + "loss": 3.8354, + "step": 675 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010297937356760887, + "loss": 3.8439, + "step": 680 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010374331550802139, + "loss": 5.7764, + "step": 685 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010450725744843393, + "loss": 3.8581, + "step": 690 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010527119938884645, + "loss": 2.841, + "step": 695 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010603514132925898, + "loss": 2.4475, + "step": 700 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010679908326967152, + "loss": 5.5125, + "step": 705 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010756302521008403, + "loss": 5.4578, + "step": 710 + }, + { + "epoch": 0.05, + "learning_rate": 0.00010832696715049658, + "loss": 5.5195, + "step": 715 + }, + { + "epoch": 0.06, + "learning_rate": 0.00010909090909090909, + "loss": 4.5291, + "step": 720 + }, + { + "epoch": 0.06, + "learning_rate": 0.00010985485103132162, + "loss": 4.5887, + "step": 725 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011061879297173416, + "loss": 5.2873, + "step": 730 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011138273491214668, + "loss": 3.6059, + "step": 735 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011214667685255922, + "loss": 3.4224, + "step": 740 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011291061879297174, + "loss": 3.1507, + "step": 745 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011367456073338427, + "loss": 2.7281, + "step": 750 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011443850267379678, + "loss": 4.7617, + "step": 755 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011520244461420933, + "loss": 5.1021, + "step": 760 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011596638655462187, + "loss": 4.8506, + "step": 765 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011673032849503439, + "loss": 4.4602, + "step": 770 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011749427043544691, + "loss": 3.5974, + "step": 775 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011825821237585943, + "loss": 3.5879, + "step": 780 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011902215431627197, + "loss": 4.2986, + "step": 785 + }, + { + "epoch": 0.06, + "learning_rate": 0.00011978609625668449, + "loss": 2.5129, + "step": 790 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012055003819709703, + "loss": 3.3521, + "step": 795 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012131398013750956, + "loss": 1.9732, + "step": 800 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012207792207792208, + "loss": 4.8342, + "step": 805 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001228418640183346, + "loss": 5.3076, + "step": 810 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012360580595874714, + "loss": 4.8811, + "step": 815 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012436974789915966, + "loss": 5.3889, + "step": 820 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001251336898395722, + "loss": 3.7623, + "step": 825 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012589763177998472, + "loss": 4.561, + "step": 830 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012666157372039725, + "loss": 4.0161, + "step": 835 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012742551566080978, + "loss": 3.5828, + "step": 840 + }, + { + "epoch": 0.06, + "learning_rate": 0.0001281894576012223, + "loss": 2.6784, + "step": 845 + }, + { + "epoch": 0.06, + "learning_rate": 0.00012895339954163484, + "loss": 2.1759, + "step": 850 + }, + { + "epoch": 0.07, + "learning_rate": 0.00012971734148204737, + "loss": 5.285, + "step": 855 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001304812834224599, + "loss": 4.3326, + "step": 860 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013124522536287243, + "loss": 4.8477, + "step": 865 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013200916730328496, + "loss": 3.8988, + "step": 870 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001327731092436975, + "loss": 3.8404, + "step": 875 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013353705118411002, + "loss": 2.9827, + "step": 880 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013430099312452255, + "loss": 3.5191, + "step": 885 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013506493506493507, + "loss": 3.0866, + "step": 890 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001358288770053476, + "loss": 3.6331, + "step": 895 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013659281894576013, + "loss": 2.1087, + "step": 900 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013735676088617266, + "loss": 4.9768, + "step": 905 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001381207028265852, + "loss": 4.974, + "step": 910 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013888464476699772, + "loss": 5.1777, + "step": 915 + }, + { + "epoch": 0.07, + "learning_rate": 0.00013964858670741022, + "loss": 5.3486, + "step": 920 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014041252864782278, + "loss": 4.2877, + "step": 925 + }, + { + "epoch": 0.07, + "learning_rate": 0.0001411764705882353, + "loss": 3.8871, + "step": 930 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014194041252864784, + "loss": 3.4246, + "step": 935 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014270435446906037, + "loss": 4.7645, + "step": 940 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014346829640947287, + "loss": 2.9158, + "step": 945 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014423223834988543, + "loss": 1.6789, + "step": 950 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014499618029029793, + "loss": 4.8162, + "step": 955 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014576012223071048, + "loss": 4.7348, + "step": 960 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014652406417112301, + "loss": 4.8879, + "step": 965 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014728800611153552, + "loss": 6.2023, + "step": 970 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014805194805194807, + "loss": 3.7848, + "step": 975 + }, + { + "epoch": 0.07, + "learning_rate": 0.00014881588999236057, + "loss": 3.9518, + "step": 980 + }, + { + "epoch": 0.08, + "learning_rate": 0.00014957983193277313, + "loss": 5.0244, + "step": 985 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015034377387318566, + "loss": 3.1289, + "step": 990 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015110771581359816, + "loss": 2.6522, + "step": 995 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015187165775401072, + "loss": 2.3406, + "step": 1000 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015263559969442322, + "loss": 5.4945, + "step": 1005 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015339954163483578, + "loss": 6.2398, + "step": 1010 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015416348357524828, + "loss": 4.6941, + "step": 1015 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001549274255156608, + "loss": 5.5852, + "step": 1020 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015569136745607337, + "loss": 4.1475, + "step": 1025 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015645530939648587, + "loss": 3.8572, + "step": 1030 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001572192513368984, + "loss": 3.3049, + "step": 1035 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015798319327731093, + "loss": 2.8052, + "step": 1040 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015874713521772346, + "loss": 3.4445, + "step": 1045 + }, + { + "epoch": 0.08, + "learning_rate": 0.00015951107715813598, + "loss": 1.85, + "step": 1050 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016027501909854851, + "loss": 5.0895, + "step": 1055 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016103896103896104, + "loss": 4.1975, + "step": 1060 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016180290297937357, + "loss": 4.8666, + "step": 1065 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001625668449197861, + "loss": 5.3641, + "step": 1070 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016333078686019863, + "loss": 4.3744, + "step": 1075 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016409472880061116, + "loss": 4.8926, + "step": 1080 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001648586707410237, + "loss": 3.8107, + "step": 1085 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016562261268143622, + "loss": 2.8379, + "step": 1090 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016638655462184875, + "loss": 2.4593, + "step": 1095 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016715049656226128, + "loss": 2.2865, + "step": 1100 + }, + { + "epoch": 0.08, + "learning_rate": 0.0001679144385026738, + "loss": 5.5463, + "step": 1105 + }, + { + "epoch": 0.08, + "learning_rate": 0.00016867838044308634, + "loss": 5.9701, + "step": 1110 + }, + { + "epoch": 0.09, + "learning_rate": 0.00016944232238349887, + "loss": 4.5203, + "step": 1115 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001702062643239114, + "loss": 4.7477, + "step": 1120 + }, + { + "epoch": 0.09, + "learning_rate": 0.00017097020626432392, + "loss": 3.3791, + "step": 1125 + }, + { + "epoch": 0.09, + "learning_rate": 0.00017173414820473645, + "loss": 3.7066, + "step": 1130 + }, + { + "epoch": 0.09, + "learning_rate": 0.00017249809014514898, + "loss": 4.1764, + "step": 1135 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001732620320855615, + "loss": 3.2178, + "step": 1140 + }, + { + "epoch": 0.09, + "learning_rate": 0.00017402597402597401, + "loss": 2.541, + "step": 1145 + }, + { + "epoch": 0.09, + "learning_rate": 0.00017478991596638657, + "loss": 3.3618, + "step": 1150 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001755538579067991, + "loss": 4.0588, + "step": 1155 + }, + { + "epoch": 0.09, + "learning_rate": 0.00017631779984721163, + "loss": 5.4387, + "step": 1160 + }, + { + "epoch": 0.09, + "learning_rate": 0.00017708174178762416, + "loss": 4.2572, + "step": 1165 + }, + { + "epoch": 0.09, + "learning_rate": 0.00017784568372803666, + "loss": 5.133, + "step": 1170 + }, + { + "epoch": 0.09, + "learning_rate": 0.00017860962566844922, + "loss": 4.4044, + "step": 1175 + }, + { + "epoch": 0.09, + "learning_rate": 0.00017937356760886172, + "loss": 3.218, + "step": 1180 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018013750954927428, + "loss": 3.2953, + "step": 1185 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001809014514896868, + "loss": 2.4731, + "step": 1190 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001816653934300993, + "loss": 3.5734, + "step": 1195 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018242933537051186, + "loss": 1.5107, + "step": 1200 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018319327731092437, + "loss": 4.7984, + "step": 1205 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018395721925133692, + "loss": 5.5371, + "step": 1210 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018472116119174942, + "loss": 3.9248, + "step": 1215 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018548510313216195, + "loss": 4.2715, + "step": 1220 + }, + { + "epoch": 0.09, + "learning_rate": 0.0001862490450725745, + "loss": 4.9904, + "step": 1225 + }, + { + "epoch": 0.09, + "learning_rate": 0.000187012987012987, + "loss": 3.2785, + "step": 1230 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018777692895339957, + "loss": 3.5854, + "step": 1235 + }, + { + "epoch": 0.09, + "learning_rate": 0.00018854087089381207, + "loss": 3.5434, + "step": 1240 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001893048128342246, + "loss": 2.4767, + "step": 1245 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019006875477463716, + "loss": 1.1776, + "step": 1250 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019083269671504966, + "loss": 5.5906, + "step": 1255 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019159663865546221, + "loss": 5.1738, + "step": 1260 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019236058059587472, + "loss": 4.7447, + "step": 1265 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019312452253628725, + "loss": 4.5672, + "step": 1270 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019388846447669978, + "loss": 3.4076, + "step": 1275 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001946524064171123, + "loss": 3.9701, + "step": 1280 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019541634835752486, + "loss": 3.6115, + "step": 1285 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019618029029793736, + "loss": 3.1557, + "step": 1290 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001969442322383499, + "loss": 2.698, + "step": 1295 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019770817417876242, + "loss": 1.5998, + "step": 1300 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019847211611917495, + "loss": 5.1922, + "step": 1305 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019923605805958748, + "loss": 4.9844, + "step": 1310 + }, + { + "epoch": 0.1, + "learning_rate": 0.0002, + "loss": 3.5535, + "step": 1315 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019995977312039907, + "loss": 3.9018, + "step": 1320 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019991954624079813, + "loss": 4.1891, + "step": 1325 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019987931936119716, + "loss": 3.2686, + "step": 1330 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001998390924815962, + "loss": 3.498, + "step": 1335 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019979886560199525, + "loss": 1.9607, + "step": 1340 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001997586387223943, + "loss": 2.2703, + "step": 1345 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019971841184279336, + "loss": 2.7828, + "step": 1350 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019967818496319242, + "loss": 5.5039, + "step": 1355 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019963795808359148, + "loss": 5.6221, + "step": 1360 + }, + { + "epoch": 0.1, + "learning_rate": 0.0001995977312039905, + "loss": 5.0553, + "step": 1365 + }, + { + "epoch": 0.1, + "learning_rate": 0.00019955750432438957, + "loss": 4.3732, + "step": 1370 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001995172774447886, + "loss": 4.2107, + "step": 1375 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019947705056518766, + "loss": 3.9906, + "step": 1380 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019943682368558672, + "loss": 3.2902, + "step": 1385 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019939659680598577, + "loss": 3.6535, + "step": 1390 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019935636992638483, + "loss": 3.9422, + "step": 1395 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019931614304678386, + "loss": 1.6013, + "step": 1400 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019927591616718292, + "loss": 5.3186, + "step": 1405 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019923568928758195, + "loss": 4.8236, + "step": 1410 + }, + { + "epoch": 0.11, + "learning_rate": 0.000199195462407981, + "loss": 4.2941, + "step": 1415 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019915523552838007, + "loss": 3.9504, + "step": 1420 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019911500864877913, + "loss": 3.9039, + "step": 1425 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001990747817691782, + "loss": 3.1057, + "step": 1430 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019903455488957722, + "loss": 3.3202, + "step": 1435 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019899432800997628, + "loss": 4.9787, + "step": 1440 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019895410113037533, + "loss": 2.4636, + "step": 1445 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019891387425077437, + "loss": 2.2541, + "step": 1450 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019887364737117342, + "loss": 4.0754, + "step": 1455 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019883342049157248, + "loss": 5.0287, + "step": 1460 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019879319361197154, + "loss": 4.4176, + "step": 1465 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019875296673237057, + "loss": 4.0273, + "step": 1470 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019871273985276963, + "loss": 4.6488, + "step": 1475 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001986725129731687, + "loss": 3.7554, + "step": 1480 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019863228609356772, + "loss": 4.065, + "step": 1485 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019859205921396678, + "loss": 3.813, + "step": 1490 + }, + { + "epoch": 0.11, + "learning_rate": 0.00019855183233436584, + "loss": 2.2622, + "step": 1495 + }, + { + "epoch": 0.11, + "learning_rate": 0.0001985116054547649, + "loss": 1.2104, + "step": 1500 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019847137857516392, + "loss": 4.7545, + "step": 1505 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019843115169556298, + "loss": 5.3623, + "step": 1510 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019839092481596204, + "loss": 3.8675, + "step": 1515 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001983506979363611, + "loss": 3.4828, + "step": 1520 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019831047105676013, + "loss": 3.7339, + "step": 1525 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001982702441771592, + "loss": 3.509, + "step": 1530 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019823001729755825, + "loss": 3.3487, + "step": 1535 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019818979041795728, + "loss": 5.2811, + "step": 1540 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019814956353835634, + "loss": 2.0317, + "step": 1545 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001981093366587554, + "loss": 1.7232, + "step": 1550 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019806910977915445, + "loss": 3.9217, + "step": 1555 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019802888289955348, + "loss": 4.3631, + "step": 1560 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019798865601995254, + "loss": 6.2506, + "step": 1565 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019795647451627176, + "loss": 4.4891, + "step": 1570 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019791624763667082, + "loss": 3.79, + "step": 1575 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019787602075706988, + "loss": 4.5018, + "step": 1580 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019783579387746894, + "loss": 3.6576, + "step": 1585 + }, + { + "epoch": 0.12, + "learning_rate": 0.000197795566997868, + "loss": 3.0058, + "step": 1590 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019775534011826705, + "loss": 3.6505, + "step": 1595 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019771511323866608, + "loss": 1.5467, + "step": 1600 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019767488635906514, + "loss": 4.4375, + "step": 1605 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019763465947946417, + "loss": 4.4432, + "step": 1610 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019759443259986323, + "loss": 4.8396, + "step": 1615 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001975542057202623, + "loss": 4.9068, + "step": 1620 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019751397884066135, + "loss": 3.3321, + "step": 1625 + }, + { + "epoch": 0.12, + "learning_rate": 0.0001974737519610604, + "loss": 3.0243, + "step": 1630 + }, + { + "epoch": 0.12, + "learning_rate": 0.00019743352508145944, + "loss": 3.6723, + "step": 1635 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001973932982018585, + "loss": 3.564, + "step": 1640 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019735307132225753, + "loss": 1.2411, + "step": 1645 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019731284444265659, + "loss": 0.7007, + "step": 1650 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019727261756305564, + "loss": 4.9119, + "step": 1655 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001972323906834547, + "loss": 6.3875, + "step": 1660 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019719216380385376, + "loss": 4.9604, + "step": 1665 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001971519369242528, + "loss": 5.1688, + "step": 1670 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019711171004465185, + "loss": 3.3456, + "step": 1675 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001970714831650509, + "loss": 4.251, + "step": 1680 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019703125628544994, + "loss": 2.4675, + "step": 1685 + }, + { + "epoch": 0.13, + "learning_rate": 0.000196991029405849, + "loss": 3.2748, + "step": 1690 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019695080252624806, + "loss": 2.7843, + "step": 1695 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019691057564664709, + "loss": 2.922, + "step": 1700 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019687034876704614, + "loss": 4.8305, + "step": 1705 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001968301218874452, + "loss": 4.2068, + "step": 1710 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019678989500784426, + "loss": 4.2574, + "step": 1715 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001967496681282433, + "loss": 3.9824, + "step": 1720 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019670944124864235, + "loss": 4.5187, + "step": 1725 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001966692143690414, + "loss": 3.1695, + "step": 1730 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019662898748944044, + "loss": 2.4495, + "step": 1735 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001965887606098395, + "loss": 3.9741, + "step": 1740 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019654853373023856, + "loss": 3.6335, + "step": 1745 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019650830685063761, + "loss": 1.5263, + "step": 1750 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019646807997103667, + "loss": 4.2363, + "step": 1755 + }, + { + "epoch": 0.13, + "learning_rate": 0.0001964278530914357, + "loss": 4.9076, + "step": 1760 + }, + { + "epoch": 0.13, + "learning_rate": 0.00019638762621183476, + "loss": 4.7342, + "step": 1765 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001963473993322338, + "loss": 6.3031, + "step": 1770 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019630717245263285, + "loss": 4.618, + "step": 1775 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001962669455730319, + "loss": 3.8465, + "step": 1780 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019622671869343097, + "loss": 2.1621, + "step": 1785 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019618649181383003, + "loss": 2.1442, + "step": 1790 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019614626493422906, + "loss": 2.3709, + "step": 1795 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019610603805462812, + "loss": 3.6523, + "step": 1800 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019606581117502715, + "loss": 5.8377, + "step": 1805 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001960255842954262, + "loss": 4.6049, + "step": 1810 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019598535741582526, + "loss": 4.4676, + "step": 1815 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019594513053622432, + "loss": 4.2148, + "step": 1820 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019590490365662338, + "loss": 4.3605, + "step": 1825 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019586467677702244, + "loss": 3.4217, + "step": 1830 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019582444989742147, + "loss": 4.5377, + "step": 1835 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001957842230178205, + "loss": 2.128, + "step": 1840 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019574399613821956, + "loss": 2.0646, + "step": 1845 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019570376925861862, + "loss": 2.895, + "step": 1850 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019566354237901768, + "loss": 4.5703, + "step": 1855 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019562331549941673, + "loss": 4.5592, + "step": 1860 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001955830886198158, + "loss": 5.3285, + "step": 1865 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019554286174021482, + "loss": 4.4393, + "step": 1870 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019550263486061385, + "loss": 4.2725, + "step": 1875 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001954624079810129, + "loss": 2.5464, + "step": 1880 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019542218110141197, + "loss": 3.3585, + "step": 1885 + }, + { + "epoch": 0.14, + "learning_rate": 0.00019538195422181103, + "loss": 2.9919, + "step": 1890 + }, + { + "epoch": 0.14, + "learning_rate": 0.0001953417273422101, + "loss": 1.8249, + "step": 1895 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019530150046260915, + "loss": 2.5888, + "step": 1900 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019526127358300818, + "loss": 5.8953, + "step": 1905 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001952210467034072, + "loss": 5.2256, + "step": 1910 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019518081982380627, + "loss": 4.6908, + "step": 1915 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019514059294420532, + "loss": 3.9816, + "step": 1920 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019510036606460438, + "loss": 3.4984, + "step": 1925 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019506013918500344, + "loss": 2.786, + "step": 1930 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001950199123054025, + "loss": 2.0924, + "step": 1935 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019497968542580153, + "loss": 2.1778, + "step": 1940 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019493945854620056, + "loss": 3.1165, + "step": 1945 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019489923166659962, + "loss": 1.3559, + "step": 1950 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019485900478699868, + "loss": 4.5992, + "step": 1955 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019481877790739774, + "loss": 4.9736, + "step": 1960 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001947785510277968, + "loss": 5.0469, + "step": 1965 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019473832414819585, + "loss": 4.3723, + "step": 1970 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019469809726859488, + "loss": 3.779, + "step": 1975 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019465787038899394, + "loss": 4.1662, + "step": 1980 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019461764350939297, + "loss": 3.2694, + "step": 1985 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019457741662979203, + "loss": 2.2375, + "step": 1990 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001945371897501911, + "loss": 2.9865, + "step": 1995 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019449696287059015, + "loss": 1.6985, + "step": 2000 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019445673599098918, + "loss": 4.2811, + "step": 2005 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019441650911138824, + "loss": 4.726, + "step": 2010 + }, + { + "epoch": 0.15, + "learning_rate": 0.0001943762822317873, + "loss": 4.2904, + "step": 2015 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019433605535218633, + "loss": 3.7736, + "step": 2020 + }, + { + "epoch": 0.15, + "learning_rate": 0.00019429582847258538, + "loss": 3.3697, + "step": 2025 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019425560159298444, + "loss": 3.1179, + "step": 2030 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001942153747133835, + "loss": 2.9869, + "step": 2035 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019417514783378253, + "loss": 3.0633, + "step": 2040 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001941349209541816, + "loss": 3.2251, + "step": 2045 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019409469407458065, + "loss": 0.7779, + "step": 2050 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001940544671949797, + "loss": 4.3598, + "step": 2055 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019401424031537874, + "loss": 4.9221, + "step": 2060 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001939740134357778, + "loss": 4.2301, + "step": 2065 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019393378655617685, + "loss": 5.2469, + "step": 2070 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019389355967657589, + "loss": 3.3051, + "step": 2075 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019385333279697494, + "loss": 3.6274, + "step": 2080 + }, + { + "epoch": 0.16, + "learning_rate": 0.000193813105917374, + "loss": 4.6274, + "step": 2085 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019377287903777306, + "loss": 2.9797, + "step": 2090 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001937326521581721, + "loss": 2.6226, + "step": 2095 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019369242527857115, + "loss": 2.4438, + "step": 2100 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001936521983989702, + "loss": 4.2717, + "step": 2105 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019361197151936924, + "loss": 4.4154, + "step": 2110 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001935717446397683, + "loss": 5.1223, + "step": 2115 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019353151776016736, + "loss": 4.3166, + "step": 2120 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019349129088056641, + "loss": 3.5363, + "step": 2125 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019345106400096547, + "loss": 4.075, + "step": 2130 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001934108371213645, + "loss": 2.5993, + "step": 2135 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019337061024176356, + "loss": 2.8878, + "step": 2140 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001933303833621626, + "loss": 2.9542, + "step": 2145 + }, + { + "epoch": 0.16, + "learning_rate": 0.00019329015648256165, + "loss": 3.0859, + "step": 2150 + }, + { + "epoch": 0.16, + "learning_rate": 0.0001932499296029607, + "loss": 5.251, + "step": 2155 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019320970272335977, + "loss": 4.7705, + "step": 2160 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019316947584375883, + "loss": 4.5398, + "step": 2165 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019312924896415786, + "loss": 4.2553, + "step": 2170 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019308902208455691, + "loss": 5.5578, + "step": 2175 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019304879520495595, + "loss": 3.7627, + "step": 2180 + }, + { + "epoch": 0.17, + "learning_rate": 0.000193008568325355, + "loss": 3.8887, + "step": 2185 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019296834144575406, + "loss": 3.8691, + "step": 2190 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019292811456615312, + "loss": 2.8983, + "step": 2195 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019288788768655218, + "loss": 1.3964, + "step": 2200 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019284766080695124, + "loss": 5.0994, + "step": 2205 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019280743392735027, + "loss": 5.5555, + "step": 2210 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001927672070477493, + "loss": 4.6605, + "step": 2215 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019272698016814836, + "loss": 3.9756, + "step": 2220 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019268675328854742, + "loss": 4.1566, + "step": 2225 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019264652640894647, + "loss": 3.3558, + "step": 2230 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019260629952934553, + "loss": 4.5201, + "step": 2235 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001925660726497446, + "loss": 3.4633, + "step": 2240 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019252584577014362, + "loss": 3.0613, + "step": 2245 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019248561889054265, + "loss": 1.4768, + "step": 2250 + }, + { + "epoch": 0.17, + "learning_rate": 0.0001924453920109417, + "loss": 5.6229, + "step": 2255 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019240516513134077, + "loss": 4.9016, + "step": 2260 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019236493825173983, + "loss": 4.9314, + "step": 2265 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019232471137213889, + "loss": 3.6584, + "step": 2270 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019228448449253794, + "loss": 2.9309, + "step": 2275 + }, + { + "epoch": 0.17, + "learning_rate": 0.00019224425761293698, + "loss": 5.1328, + "step": 2280 + }, + { + "epoch": 0.17, + "learning_rate": 0.000192204030733336, + "loss": 1.9352, + "step": 2285 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019216380385373506, + "loss": 2.3981, + "step": 2290 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019212357697413412, + "loss": 4.707, + "step": 2295 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019208335009453318, + "loss": 0.5809, + "step": 2300 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019204312321493224, + "loss": 5.2449, + "step": 2305 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019200289633533127, + "loss": 4.7281, + "step": 2310 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019196266945573033, + "loss": 4.5684, + "step": 2315 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019192244257612936, + "loss": 4.249, + "step": 2320 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019188221569652842, + "loss": 3.5209, + "step": 2325 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019184198881692748, + "loss": 3.7265, + "step": 2330 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019180176193732653, + "loss": 3.7072, + "step": 2335 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001917615350577256, + "loss": 2.195, + "step": 2340 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019172130817812462, + "loss": 3.1261, + "step": 2345 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019168108129852368, + "loss": 1.413, + "step": 2350 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019164085441892274, + "loss": 4.6439, + "step": 2355 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019160062753932177, + "loss": 6.491, + "step": 2360 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019156040065972083, + "loss": 4.1846, + "step": 2365 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001915201737801199, + "loss": 4.8385, + "step": 2370 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019147994690051895, + "loss": 4.0875, + "step": 2375 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019143972002091798, + "loss": 3.4586, + "step": 2380 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019139949314131704, + "loss": 3.9514, + "step": 2385 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001913592662617161, + "loss": 1.9388, + "step": 2390 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019131903938211513, + "loss": 3.4899, + "step": 2395 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019127881250251418, + "loss": 0.8589, + "step": 2400 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019123858562291324, + "loss": 5.1305, + "step": 2405 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001911983587433123, + "loss": 5.6215, + "step": 2410 + }, + { + "epoch": 0.18, + "learning_rate": 0.00019115813186371133, + "loss": 5.152, + "step": 2415 + }, + { + "epoch": 0.18, + "learning_rate": 0.0001911179049841104, + "loss": 4.2031, + "step": 2420 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019107767810450945, + "loss": 3.8862, + "step": 2425 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001910374512249085, + "loss": 3.2398, + "step": 2430 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019099722434530754, + "loss": 3.3589, + "step": 2435 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001909569974657066, + "loss": 1.6699, + "step": 2440 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019091677058610565, + "loss": 2.6013, + "step": 2445 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019087654370650468, + "loss": 1.346, + "step": 2450 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019083631682690374, + "loss": 5.7719, + "step": 2455 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001907960899473028, + "loss": 4.8369, + "step": 2460 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019075586306770186, + "loss": 3.7029, + "step": 2465 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001907156361881009, + "loss": 2.6815, + "step": 2470 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019067540930849995, + "loss": 4.5318, + "step": 2475 + }, + { + "epoch": 0.19, + "learning_rate": 0.000190635182428899, + "loss": 4.4757, + "step": 2480 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019059495554929804, + "loss": 3.0317, + "step": 2485 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001905547286696971, + "loss": 3.3823, + "step": 2490 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019051450179009615, + "loss": 2.1039, + "step": 2495 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001904742749104952, + "loss": 1.3054, + "step": 2500 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019043404803089427, + "loss": 4.5176, + "step": 2505 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001903938211512933, + "loss": 5.5738, + "step": 2510 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019035359427169236, + "loss": 4.4791, + "step": 2515 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001903133673920914, + "loss": 3.3446, + "step": 2520 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019027314051249045, + "loss": 3.6072, + "step": 2525 + }, + { + "epoch": 0.19, + "learning_rate": 0.0001902329136328895, + "loss": 3.7518, + "step": 2530 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019019268675328857, + "loss": 3.9649, + "step": 2535 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019015245987368762, + "loss": 2.0974, + "step": 2540 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019011223299408666, + "loss": 1.7716, + "step": 2545 + }, + { + "epoch": 0.19, + "learning_rate": 0.00019007200611448571, + "loss": 2.7839, + "step": 2550 + }, + { + "epoch": 0.2, + "learning_rate": 0.00019003177923488474, + "loss": 5.41, + "step": 2555 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001899915523552838, + "loss": 4.9215, + "step": 2560 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018995132547568286, + "loss": 3.7002, + "step": 2565 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018991109859608192, + "loss": 4.8951, + "step": 2570 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018987087171648098, + "loss": 2.5297, + "step": 2575 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018983064483688004, + "loss": 2.9542, + "step": 2580 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018979041795727907, + "loss": 2.4865, + "step": 2585 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001897501910776781, + "loss": 2.3631, + "step": 2590 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018970996419807716, + "loss": 2.8949, + "step": 2595 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018966973731847621, + "loss": 0.6757, + "step": 2600 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018962951043887527, + "loss": 4.5512, + "step": 2605 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018958928355927433, + "loss": 5.1891, + "step": 2610 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018954905667967336, + "loss": 5.5706, + "step": 2615 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001895088298000724, + "loss": 6.3279, + "step": 2620 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018946860292047145, + "loss": 4.7213, + "step": 2625 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001894283760408705, + "loss": 3.6586, + "step": 2630 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018938814916126957, + "loss": 3.4731, + "step": 2635 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018934792228166863, + "loss": 2.9237, + "step": 2640 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018930769540206768, + "loss": 1.2635, + "step": 2645 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018926746852246672, + "loss": 2.0249, + "step": 2650 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018922724164286577, + "loss": 4.6764, + "step": 2655 + }, + { + "epoch": 0.2, + "learning_rate": 0.0001891870147632648, + "loss": 5.0914, + "step": 2660 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018914678788366386, + "loss": 4.6342, + "step": 2665 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018910656100406292, + "loss": 4.3582, + "step": 2670 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018906633412446198, + "loss": 3.7621, + "step": 2675 + }, + { + "epoch": 0.2, + "learning_rate": 0.00018902610724486104, + "loss": 3.5162, + "step": 2680 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018898588036526007, + "loss": 2.9408, + "step": 2685 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018894565348565913, + "loss": 2.3332, + "step": 2690 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018890542660605816, + "loss": 2.6293, + "step": 2695 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018886519972645722, + "loss": 0.7619, + "step": 2700 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018882497284685628, + "loss": 5.459, + "step": 2705 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018878474596725533, + "loss": 4.375, + "step": 2710 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001887445190876544, + "loss": 5.0764, + "step": 2715 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018870429220805342, + "loss": 3.6271, + "step": 2720 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018866406532845248, + "loss": 3.8543, + "step": 2725 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018862383844885154, + "loss": 2.9905, + "step": 2730 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018858361156925057, + "loss": 3.3131, + "step": 2735 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018854338468964963, + "loss": 2.6086, + "step": 2740 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001885031578100487, + "loss": 1.4445, + "step": 2745 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018846293093044775, + "loss": 1.3326, + "step": 2750 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018842270405084678, + "loss": 3.508, + "step": 2755 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018838247717124583, + "loss": 4.6805, + "step": 2760 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001883422502916449, + "loss": 5.0881, + "step": 2765 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018830202341204392, + "loss": 4.0855, + "step": 2770 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018826179653244298, + "loss": 2.9991, + "step": 2775 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018822156965284204, + "loss": 2.9354, + "step": 2780 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001881813427732411, + "loss": 1.5435, + "step": 2785 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018814111589364013, + "loss": 2.5249, + "step": 2790 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001881008890140392, + "loss": 1.2265, + "step": 2795 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018806066213443825, + "loss": 0.833, + "step": 2800 + }, + { + "epoch": 0.21, + "learning_rate": 0.0001880204352548373, + "loss": 5.0906, + "step": 2805 + }, + { + "epoch": 0.21, + "learning_rate": 0.00018798020837523634, + "loss": 5.0311, + "step": 2810 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001879399814956354, + "loss": 5.8641, + "step": 2815 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018789975461603445, + "loss": 4.1654, + "step": 2820 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018785952773643348, + "loss": 3.4861, + "step": 2825 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018781930085683254, + "loss": 3.3804, + "step": 2830 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001877790739772316, + "loss": 4.5379, + "step": 2835 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018773884709763066, + "loss": 1.8242, + "step": 2840 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001876986202180297, + "loss": 3.9177, + "step": 2845 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018765839333842875, + "loss": 0.7359, + "step": 2850 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001876181664588278, + "loss": 6.2848, + "step": 2855 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018757793957922684, + "loss": 4.1291, + "step": 2860 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001875377126996259, + "loss": 4.0441, + "step": 2865 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018749748582002495, + "loss": 3.3439, + "step": 2870 + }, + { + "epoch": 0.22, + "learning_rate": 0.000187457258940424, + "loss": 3.2429, + "step": 2875 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018741703206082307, + "loss": 3.4903, + "step": 2880 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001873768051812221, + "loss": 3.8007, + "step": 2885 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018733657830162116, + "loss": 2.9526, + "step": 2890 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001872963514220202, + "loss": 3.0772, + "step": 2895 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018725612454241925, + "loss": 0.8114, + "step": 2900 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001872158976628183, + "loss": 5.1357, + "step": 2905 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018717567078321737, + "loss": 5.2641, + "step": 2910 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018713544390361642, + "loss": 4.9559, + "step": 2915 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018709521702401545, + "loss": 4.3744, + "step": 2920 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018705499014441449, + "loss": 4.2383, + "step": 2925 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018701476326481354, + "loss": 3.8998, + "step": 2930 + }, + { + "epoch": 0.22, + "learning_rate": 0.0001869745363852126, + "loss": 2.7238, + "step": 2935 + }, + { + "epoch": 0.22, + "learning_rate": 0.00018693430950561166, + "loss": 2.415, + "step": 2940 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018689408262601072, + "loss": 2.7549, + "step": 2945 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018685385574640978, + "loss": 2.9196, + "step": 2950 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001868136288668088, + "loss": 4.4637, + "step": 2955 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018677340198720784, + "loss": 4.441, + "step": 2960 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001867331751076069, + "loss": 4.4455, + "step": 2965 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018669294822800596, + "loss": 3.5803, + "step": 2970 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018665272134840501, + "loss": 3.7873, + "step": 2975 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018661249446880407, + "loss": 4.2402, + "step": 2980 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018657226758920313, + "loss": 4.591, + "step": 2985 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018653204070960216, + "loss": 2.1384, + "step": 2990 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001864918138300012, + "loss": 1.5015, + "step": 2995 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018645158695040025, + "loss": 1.4467, + "step": 3000 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001864113600707993, + "loss": 5.4559, + "step": 3005 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018637113319119837, + "loss": 4.7643, + "step": 3010 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018633090631159743, + "loss": 5.4004, + "step": 3015 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018629067943199648, + "loss": 3.9115, + "step": 3020 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018625045255239552, + "loss": 3.2608, + "step": 3025 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018621022567279457, + "loss": 3.4641, + "step": 3030 + }, + { + "epoch": 0.23, + "learning_rate": 0.0001861699987931936, + "loss": 3.2434, + "step": 3035 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018612977191359266, + "loss": 2.1317, + "step": 3040 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018608954503399172, + "loss": 2.0085, + "step": 3045 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018604931815439078, + "loss": 0.4546, + "step": 3050 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018600909127478984, + "loss": 4.4299, + "step": 3055 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018596886439518887, + "loss": 5.439, + "step": 3060 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018592863751558793, + "loss": 5.1666, + "step": 3065 + }, + { + "epoch": 0.23, + "learning_rate": 0.00018588841063598696, + "loss": 4.3708, + "step": 3070 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018584818375638602, + "loss": 4.1246, + "step": 3075 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018580795687678507, + "loss": 4.082, + "step": 3080 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018576772999718413, + "loss": 2.4525, + "step": 3085 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001857275031175832, + "loss": 3.7855, + "step": 3090 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018568727623798222, + "loss": 1.7568, + "step": 3095 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018564704935838128, + "loss": 1.5382, + "step": 3100 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018560682247878034, + "loss": 4.3197, + "step": 3105 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018556659559917937, + "loss": 5.0633, + "step": 3110 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018552636871957843, + "loss": 4.0307, + "step": 3115 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018548614183997749, + "loss": 3.3434, + "step": 3120 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018544591496037654, + "loss": 4.3059, + "step": 3125 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018540568808077558, + "loss": 4.701, + "step": 3130 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018536546120117463, + "loss": 2.9802, + "step": 3135 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001853252343215737, + "loss": 1.449, + "step": 3140 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018528500744197272, + "loss": 3.2532, + "step": 3145 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018524478056237178, + "loss": 0.6617, + "step": 3150 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018520455368277084, + "loss": 4.2574, + "step": 3155 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001851643268031699, + "loss": 4.36, + "step": 3160 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018512409992356893, + "loss": 4.2436, + "step": 3165 + }, + { + "epoch": 0.24, + "learning_rate": 0.000185083873043968, + "loss": 4.8123, + "step": 3170 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018504364616436705, + "loss": 5.1578, + "step": 3175 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001850034192847661, + "loss": 4.4125, + "step": 3180 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018496319240516513, + "loss": 2.5215, + "step": 3185 + }, + { + "epoch": 0.24, + "learning_rate": 0.0001849229655255642, + "loss": 3.3153, + "step": 3190 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018488273864596325, + "loss": 3.2267, + "step": 3195 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018484251176636228, + "loss": 2.41, + "step": 3200 + }, + { + "epoch": 0.24, + "learning_rate": 0.00018480228488676134, + "loss": 5.0113, + "step": 3205 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001847620580071604, + "loss": 4.7652, + "step": 3210 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018472183112755946, + "loss": 4.9766, + "step": 3215 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001846816042479585, + "loss": 4.8104, + "step": 3220 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018464137736835755, + "loss": 4.0664, + "step": 3225 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018460115048875658, + "loss": 3.1479, + "step": 3230 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018456092360915564, + "loss": 3.6193, + "step": 3235 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001845206967295547, + "loss": 2.3463, + "step": 3240 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018448046984995375, + "loss": 2.5716, + "step": 3245 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001844402429703528, + "loss": 1.1213, + "step": 3250 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018440001609075187, + "loss": 5.2396, + "step": 3255 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001843597892111509, + "loss": 4.9785, + "step": 3260 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018431956233154993, + "loss": 4.757, + "step": 3265 + }, + { + "epoch": 0.25, + "learning_rate": 0.000184279335451949, + "loss": 4.6502, + "step": 3270 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018423910857234805, + "loss": 4.1502, + "step": 3275 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001841988816927471, + "loss": 3.6057, + "step": 3280 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018415865481314616, + "loss": 4.0316, + "step": 3285 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018411842793354522, + "loss": 3.9748, + "step": 3290 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018407820105394425, + "loss": 2.0564, + "step": 3295 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018403797417434328, + "loss": 1.9967, + "step": 3300 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018399774729474234, + "loss": 5.2789, + "step": 3305 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001839575204151414, + "loss": 4.1012, + "step": 3310 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018391729353554046, + "loss": 4.8074, + "step": 3315 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018387706665593952, + "loss": 2.8919, + "step": 3320 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018383683977633858, + "loss": 3.7766, + "step": 3325 + }, + { + "epoch": 0.25, + "learning_rate": 0.0001837966128967376, + "loss": 5.0438, + "step": 3330 + }, + { + "epoch": 0.25, + "learning_rate": 0.00018375638601713664, + "loss": 2.7931, + "step": 3335 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001837161591375357, + "loss": 2.0797, + "step": 3340 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018367593225793475, + "loss": 2.7992, + "step": 3345 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001836357053783338, + "loss": 0.7685, + "step": 3350 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018359547849873287, + "loss": 4.7795, + "step": 3355 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018355525161913193, + "loss": 4.2709, + "step": 3360 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018351502473953096, + "loss": 5.034, + "step": 3365 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018347479785993, + "loss": 4.5508, + "step": 3370 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018343457098032905, + "loss": 3.427, + "step": 3375 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001833943441007281, + "loss": 3.2379, + "step": 3380 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018335411722112717, + "loss": 3.8982, + "step": 3385 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018331389034152622, + "loss": 2.4782, + "step": 3390 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018327366346192528, + "loss": 3.1309, + "step": 3395 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018323343658232431, + "loss": 1.8897, + "step": 3400 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018319320970272337, + "loss": 5.2643, + "step": 3405 + }, + { + "epoch": 0.26, + "learning_rate": 0.0001831529828231224, + "loss": 4.215, + "step": 3410 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018311275594352146, + "loss": 4.032, + "step": 3415 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018307252906392052, + "loss": 3.9859, + "step": 3420 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018303230218431958, + "loss": 3.4547, + "step": 3425 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018299207530471864, + "loss": 3.0241, + "step": 3430 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018295184842511767, + "loss": 2.1052, + "step": 3435 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018291162154551673, + "loss": 1.5382, + "step": 3440 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018287139466591576, + "loss": 4.3531, + "step": 3445 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018283116778631482, + "loss": 1.5439, + "step": 3450 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018279094090671387, + "loss": 4.5682, + "step": 3455 + }, + { + "epoch": 0.26, + "learning_rate": 0.00018275071402711293, + "loss": 4.4668, + "step": 3460 + }, + { + "epoch": 0.26, + "learning_rate": 0.000182710487147512, + "loss": 5.041, + "step": 3465 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018267026026791102, + "loss": 4.1639, + "step": 3470 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018263003338831008, + "loss": 4.3855, + "step": 3475 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018258980650870914, + "loss": 3.7016, + "step": 3480 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018254957962910817, + "loss": 3.9585, + "step": 3485 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018250935274950723, + "loss": 2.0294, + "step": 3490 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018246912586990629, + "loss": 2.6052, + "step": 3495 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018242889899030534, + "loss": 1.1796, + "step": 3500 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018238867211070437, + "loss": 3.7271, + "step": 3505 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018234844523110343, + "loss": 4.1547, + "step": 3510 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001823082183515025, + "loss": 4.6098, + "step": 3515 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018226799147190152, + "loss": 5.2176, + "step": 3520 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018222776459230058, + "loss": 4.0721, + "step": 3525 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018218753771269964, + "loss": 4.3234, + "step": 3530 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018214731083309867, + "loss": 3.1592, + "step": 3535 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018210708395349773, + "loss": 3.2439, + "step": 3540 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018206685707389679, + "loss": 2.8346, + "step": 3545 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018202663019429584, + "loss": 1.0432, + "step": 3550 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001819864033146949, + "loss": 4.7693, + "step": 3555 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018194617643509393, + "loss": 3.7051, + "step": 3560 + }, + { + "epoch": 0.27, + "learning_rate": 0.000181905949555493, + "loss": 4.0299, + "step": 3565 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018186572267589202, + "loss": 4.5971, + "step": 3570 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018182549579629108, + "loss": 3.5709, + "step": 3575 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018178526891669014, + "loss": 3.5243, + "step": 3580 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001817450420370892, + "loss": 2.8105, + "step": 3585 + }, + { + "epoch": 0.27, + "learning_rate": 0.00018170481515748826, + "loss": 1.6252, + "step": 3590 + }, + { + "epoch": 0.27, + "learning_rate": 0.0001816645882778873, + "loss": 1.5084, + "step": 3595 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018162436139828635, + "loss": 1.4158, + "step": 3600 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018158413451868538, + "loss": 4.3527, + "step": 3605 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018154390763908444, + "loss": 6.2471, + "step": 3610 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001815036807594835, + "loss": 3.4996, + "step": 3615 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018146345387988255, + "loss": 3.4791, + "step": 3620 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001814232270002816, + "loss": 3.9434, + "step": 3625 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018138300012068067, + "loss": 3.8335, + "step": 3630 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001813427732410797, + "loss": 2.7479, + "step": 3635 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018130254636147873, + "loss": 3.481, + "step": 3640 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001812623194818778, + "loss": 2.2178, + "step": 3645 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018122209260227685, + "loss": 0.5455, + "step": 3650 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001811818657226759, + "loss": 4.8406, + "step": 3655 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018114163884307496, + "loss": 4.449, + "step": 3660 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018110141196347402, + "loss": 5.3301, + "step": 3665 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018106118508387305, + "loss": 5.1273, + "step": 3670 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018102095820427208, + "loss": 3.4895, + "step": 3675 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018098073132467114, + "loss": 2.6304, + "step": 3680 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001809405044450702, + "loss": 2.2214, + "step": 3685 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018090027756546926, + "loss": 2.3575, + "step": 3690 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018086005068586832, + "loss": 1.2417, + "step": 3695 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018081982380626738, + "loss": 2.804, + "step": 3700 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001807795969266664, + "loss": 4.5449, + "step": 3705 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018073937004706544, + "loss": 4.7197, + "step": 3710 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001806991431674645, + "loss": 5.1338, + "step": 3715 + }, + { + "epoch": 0.28, + "learning_rate": 0.00018065891628786355, + "loss": 4.2385, + "step": 3720 + }, + { + "epoch": 0.28, + "learning_rate": 0.0001806186894082626, + "loss": 3.7326, + "step": 3725 + }, + { + "epoch": 0.29, + "learning_rate": 0.00018057846252866167, + "loss": 3.5176, + "step": 3730 + }, + { + "epoch": 0.29, + "learning_rate": 0.00018053823564906073, + "loss": 2.6052, + "step": 3735 + }, + { + "epoch": 0.29, + "learning_rate": 0.00018049800876945976, + "loss": 2.286, + "step": 3740 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001804577818898588, + "loss": 3.0982, + "step": 3745 + }, + { + "epoch": 0.29, + "learning_rate": 0.00018041755501025785, + "loss": 2.8813, + "step": 3750 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001803773281306569, + "loss": 4.1352, + "step": 3755 + }, + { + "epoch": 0.29, + "learning_rate": 0.00018033710125105597, + "loss": 4.5643, + "step": 3760 + }, + { + "epoch": 0.29, + "learning_rate": 0.00018029687437145502, + "loss": 4.6842, + "step": 3765 + }, + { + "epoch": 0.29, + "learning_rate": 0.00018025664749185408, + "loss": 3.4552, + "step": 3770 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001802164206122531, + "loss": 3.9146, + "step": 3775 + }, + { + "epoch": 0.29, + "learning_rate": 0.00018017619373265217, + "loss": 3.2099, + "step": 3780 + }, + { + "epoch": 0.29, + "learning_rate": 0.0001801359668530512, + "loss": 4.366, + "step": 3785 + }, + { + "epoch": 0.29, + "learning_rate": 0.00018009573997345026, + "loss": 2.5956, + "step": 3790 + }, + { + "epoch": 0.29, + "learning_rate": 0.00018005551309384932, + "loss": 1.6513, + "step": 3795 + }, + { + "epoch": 0.29, + "learning_rate": 0.00018001528621424838, + "loss": 2.3943, + "step": 3800 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017997505933464744, + "loss": 4.4051, + "step": 3805 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017993483245504647, + "loss": 4.1432, + "step": 3810 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017989460557544552, + "loss": 4.6814, + "step": 3815 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017985437869584456, + "loss": 3.7793, + "step": 3820 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017981415181624361, + "loss": 3.6339, + "step": 3825 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017977392493664267, + "loss": 3.716, + "step": 3830 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017973369805704173, + "loss": 3.7518, + "step": 3835 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017969347117744076, + "loss": 3.3349, + "step": 3840 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017965324429783982, + "loss": 1.3231, + "step": 3845 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017961301741823888, + "loss": 2.5204, + "step": 3850 + }, + { + "epoch": 0.29, + "learning_rate": 0.00017957279053863794, + "loss": 5.1436, + "step": 3855 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017953256365903697, + "loss": 5.2109, + "step": 3860 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017949233677943603, + "loss": 3.9754, + "step": 3865 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017945210989983508, + "loss": 3.8529, + "step": 3870 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017941188302023412, + "loss": 4.5021, + "step": 3875 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017937165614063317, + "loss": 3.8877, + "step": 3880 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017933142926103223, + "loss": 1.8866, + "step": 3885 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001792912023814313, + "loss": 4.2512, + "step": 3890 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017925097550183032, + "loss": 2.6688, + "step": 3895 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017921074862222938, + "loss": 3.2995, + "step": 3900 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017917052174262844, + "loss": 4.2304, + "step": 3905 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017913029486302747, + "loss": 4.9676, + "step": 3910 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017909006798342653, + "loss": 4.2441, + "step": 3915 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017904984110382559, + "loss": 3.8947, + "step": 3920 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017900961422422464, + "loss": 4.1439, + "step": 3925 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001789693873446237, + "loss": 3.5044, + "step": 3930 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017892916046502273, + "loss": 2.705, + "step": 3935 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001788889335854218, + "loss": 0.9807, + "step": 3940 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017884870670582082, + "loss": 2.8294, + "step": 3945 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017880847982621988, + "loss": 0.8584, + "step": 3950 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017876825294661894, + "loss": 4.865, + "step": 3955 + }, + { + "epoch": 0.3, + "learning_rate": 0.000178728026067018, + "loss": 4.4303, + "step": 3960 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017868779918741706, + "loss": 4.9551, + "step": 3965 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017864757230781611, + "loss": 4.6406, + "step": 3970 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017860734542821514, + "loss": 3.2033, + "step": 3975 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017856711854861418, + "loss": 3.6012, + "step": 3980 + }, + { + "epoch": 0.3, + "learning_rate": 0.00017852689166901323, + "loss": 1.4554, + "step": 3985 + }, + { + "epoch": 0.3, + "learning_rate": 0.0001784866647894123, + "loss": 2.4178, + "step": 3990 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017844643790981135, + "loss": 1.7368, + "step": 3995 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001784062110302104, + "loss": 1.9992, + "step": 4000 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017836598415060947, + "loss": 3.7744, + "step": 4005 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001783257572710085, + "loss": 4.1831, + "step": 4010 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017828553039140753, + "loss": 3.9318, + "step": 4015 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001782453035118066, + "loss": 3.6878, + "step": 4020 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017820507663220565, + "loss": 4.0703, + "step": 4025 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001781648497526047, + "loss": 3.9203, + "step": 4030 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017812462287300376, + "loss": 2.88, + "step": 4035 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017808439599340282, + "loss": 2.49, + "step": 4040 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017804416911380185, + "loss": 1.8029, + "step": 4045 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017800394223420088, + "loss": 1.4607, + "step": 4050 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017796371535459994, + "loss": 4.866, + "step": 4055 + }, + { + "epoch": 0.31, + "learning_rate": 0.000177923488474999, + "loss": 4.9695, + "step": 4060 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017788326159539806, + "loss": 4.073, + "step": 4065 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017784303471579712, + "loss": 4.3596, + "step": 4070 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017780280783619617, + "loss": 3.5149, + "step": 4075 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001777625809565952, + "loss": 3.9059, + "step": 4080 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017772235407699424, + "loss": 2.6699, + "step": 4085 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001776821271973933, + "loss": 3.2956, + "step": 4090 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017764190031779235, + "loss": 2.8765, + "step": 4095 + }, + { + "epoch": 0.31, + "learning_rate": 0.0001776016734381914, + "loss": 2.6877, + "step": 4100 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017756144655859047, + "loss": 4.8078, + "step": 4105 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017752121967898953, + "loss": 4.3077, + "step": 4110 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017748099279938856, + "loss": 3.7305, + "step": 4115 + }, + { + "epoch": 0.31, + "learning_rate": 0.00017744076591978762, + "loss": 3.5543, + "step": 4120 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017740053904018665, + "loss": 4.3678, + "step": 4125 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001773603121605857, + "loss": 3.1074, + "step": 4130 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017732008528098476, + "loss": 3.485, + "step": 4135 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017727985840138382, + "loss": 2.4198, + "step": 4140 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017723963152178285, + "loss": 2.7068, + "step": 4145 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001771994046421819, + "loss": 2.6594, + "step": 4150 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017715917776258097, + "loss": 3.9896, + "step": 4155 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017711895088298, + "loss": 4.7062, + "step": 4160 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017707872400337906, + "loss": 5.0463, + "step": 4165 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017703849712377812, + "loss": 3.9547, + "step": 4170 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017699827024417718, + "loss": 4.6994, + "step": 4175 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001769580433645762, + "loss": 4.1545, + "step": 4180 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017691781648497527, + "loss": 3.0788, + "step": 4185 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017687758960537432, + "loss": 2.744, + "step": 4190 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017683736272577338, + "loss": 2.6146, + "step": 4195 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017680518122209263, + "loss": 3.9939, + "step": 4200 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017676495434249166, + "loss": 4.5344, + "step": 4205 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001767247274628907, + "loss": 4.7307, + "step": 4210 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017668450058328975, + "loss": 4.5513, + "step": 4215 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001766442737036888, + "loss": 3.2362, + "step": 4220 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017660404682408787, + "loss": 3.5146, + "step": 4225 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017656381994448692, + "loss": 2.4402, + "step": 4230 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017652359306488598, + "loss": 2.7357, + "step": 4235 + }, + { + "epoch": 0.32, + "learning_rate": 0.000176483366185285, + "loss": 2.2243, + "step": 4240 + }, + { + "epoch": 0.32, + "learning_rate": 0.00017644313930568404, + "loss": 2.6992, + "step": 4245 + }, + { + "epoch": 0.32, + "learning_rate": 0.0001764029124260831, + "loss": 2.3734, + "step": 4250 + }, + { + "epoch": 0.33, + "learning_rate": 0.00017636268554648216, + "loss": 4.1707, + "step": 4255 + }, + { + "epoch": 0.33, + "learning_rate": 0.00017632245866688122, + "loss": 5.4875, + "step": 4260 + }, + { + "epoch": 0.33, + "learning_rate": 0.00017628223178728028, + "loss": 4.6299, + "step": 4265 + }, + { + "epoch": 0.33, + "learning_rate": 0.00017624200490767934, + "loss": 3.0701, + "step": 4270 + }, + { + "epoch": 0.33, + "learning_rate": 0.00017620177802807837, + "loss": 3.775, + "step": 4275 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001761615511484774, + "loss": 2.9104, + "step": 4280 + }, + { + "epoch": 0.33, + "learning_rate": 0.00017612132426887646, + "loss": 2.7511, + "step": 4285 + }, + { + "epoch": 0.33, + "learning_rate": 0.00017608109738927551, + "loss": 2.603, + "step": 4290 + }, + { + "epoch": 0.33, + "learning_rate": 0.00017604087050967457, + "loss": 2.2994, + "step": 4295 + }, + { + "epoch": 0.33, + "learning_rate": 0.00017600064363007363, + "loss": 2.7078, + "step": 4300 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001759604167504727, + "loss": 4.4588, + "step": 4305 + }, + { + "epoch": 0.33, + "learning_rate": 0.00017592018987087172, + "loss": 4.8812, + "step": 4310 + }, + { + "epoch": 0.33, + "learning_rate": 0.00017587996299127078, + "loss": 4.8076, + "step": 4315 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001758397361116698, + "loss": 3.1662, + "step": 4320 + }, + { + "epoch": 0.33, + "learning_rate": 0.00017579950923206887, + "loss": 4.4642, + "step": 4325 + }, + { + "epoch": 0.33, + "learning_rate": 0.00017575928235246793, + "loss": 2.0759, + "step": 4330 + }, + { + "epoch": 0.33, + "learning_rate": 0.00017571905547286698, + "loss": 2.2219, + "step": 4335 + }, + { + "epoch": 0.33, + "learning_rate": 0.00017567882859326604, + "loss": 1.4699, + "step": 4340 + }, + { + "epoch": 0.33, + "learning_rate": 0.00017563860171366507, + "loss": 1.0181, + "step": 4345 + }, + { + "epoch": 0.33, + "learning_rate": 0.00017559837483406413, + "loss": 1.4557, + "step": 4350 + }, + { + "epoch": 0.33, + "learning_rate": 0.00017555814795446316, + "loss": 5.0143, + "step": 4355 + }, + { + "epoch": 0.33, + "learning_rate": 0.00017551792107486222, + "loss": 4.6441, + "step": 4360 + }, + { + "epoch": 0.33, + "learning_rate": 0.00017547769419526128, + "loss": 4.8084, + "step": 4365 + }, + { + "epoch": 0.33, + "learning_rate": 0.00017543746731566034, + "loss": 5.1527, + "step": 4370 + }, + { + "epoch": 0.33, + "learning_rate": 0.0001753972404360594, + "loss": 4.5236, + "step": 4375 + }, + { + "epoch": 0.33, + "learning_rate": 0.00017535701355645843, + "loss": 4.1115, + "step": 4380 + }, + { + "epoch": 0.34, + "learning_rate": 0.00017531678667685749, + "loss": 3.2589, + "step": 4385 + }, + { + "epoch": 0.34, + "learning_rate": 0.00017527655979725654, + "loss": 3.387, + "step": 4390 + }, + { + "epoch": 0.34, + "learning_rate": 0.00017523633291765558, + "loss": 1.2526, + "step": 4395 + }, + { + "epoch": 0.34, + "learning_rate": 0.00017519610603805463, + "loss": 0.799, + "step": 4400 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001751558791584537, + "loss": 5.3492, + "step": 4405 + }, + { + "epoch": 0.34, + "learning_rate": 0.00017511565227885275, + "loss": 4.1848, + "step": 4410 + }, + { + "epoch": 0.34, + "learning_rate": 0.00017507542539925178, + "loss": 5.2502, + "step": 4415 + }, + { + "epoch": 0.34, + "learning_rate": 0.00017503519851965084, + "loss": 4.8549, + "step": 4420 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001749949716400499, + "loss": 4.3082, + "step": 4425 + }, + { + "epoch": 0.34, + "learning_rate": 0.00017495474476044893, + "loss": 3.2034, + "step": 4430 + }, + { + "epoch": 0.34, + "learning_rate": 0.000174914517880848, + "loss": 3.5008, + "step": 4435 + }, + { + "epoch": 0.34, + "learning_rate": 0.00017487429100124705, + "loss": 2.2585, + "step": 4440 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001748340641216461, + "loss": 1.9539, + "step": 4445 + }, + { + "epoch": 0.34, + "learning_rate": 0.00017479383724204513, + "loss": 1.5011, + "step": 4450 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001747536103624442, + "loss": 4.0939, + "step": 4455 + }, + { + "epoch": 0.34, + "learning_rate": 0.00017471338348284325, + "loss": 4.458, + "step": 4460 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001746731566032423, + "loss": 6.1611, + "step": 4465 + }, + { + "epoch": 0.34, + "learning_rate": 0.00017463292972364134, + "loss": 4.3582, + "step": 4470 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001745927028440404, + "loss": 3.2851, + "step": 4475 + }, + { + "epoch": 0.34, + "learning_rate": 0.00017455247596443946, + "loss": 2.6692, + "step": 4480 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001745122490848385, + "loss": 3.419, + "step": 4485 + }, + { + "epoch": 0.34, + "learning_rate": 0.00017447202220523755, + "loss": 3.178, + "step": 4490 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001744317953256366, + "loss": 2.9248, + "step": 4495 + }, + { + "epoch": 0.34, + "learning_rate": 0.00017439156844603566, + "loss": 2.8947, + "step": 4500 + }, + { + "epoch": 0.34, + "learning_rate": 0.0001743513415664347, + "loss": 5.7676, + "step": 4505 + }, + { + "epoch": 0.34, + "learning_rate": 0.00017431111468683375, + "loss": 4.4447, + "step": 4510 + }, + { + "epoch": 0.35, + "learning_rate": 0.00017427088780723278, + "loss": 4.7262, + "step": 4515 + }, + { + "epoch": 0.35, + "learning_rate": 0.00017423066092763184, + "loss": 3.8962, + "step": 4520 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001741904340480309, + "loss": 3.9734, + "step": 4525 + }, + { + "epoch": 0.35, + "learning_rate": 0.00017415020716842996, + "loss": 3.832, + "step": 4530 + }, + { + "epoch": 0.35, + "learning_rate": 0.00017410998028882902, + "loss": 2.866, + "step": 4535 + }, + { + "epoch": 0.35, + "learning_rate": 0.00017406975340922807, + "loss": 2.5358, + "step": 4540 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001740295265296271, + "loss": 1.7431, + "step": 4545 + }, + { + "epoch": 0.35, + "learning_rate": 0.00017398929965002614, + "loss": 2.6781, + "step": 4550 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001739490727704252, + "loss": 4.4768, + "step": 4555 + }, + { + "epoch": 0.35, + "learning_rate": 0.00017390884589082425, + "loss": 3.8754, + "step": 4560 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001738686190112233, + "loss": 3.7852, + "step": 4565 + }, + { + "epoch": 0.35, + "learning_rate": 0.00017382839213162237, + "loss": 4.2292, + "step": 4570 + }, + { + "epoch": 0.35, + "learning_rate": 0.00017378816525202143, + "loss": 3.6535, + "step": 4575 + }, + { + "epoch": 0.35, + "learning_rate": 0.00017374793837242046, + "loss": 4.4597, + "step": 4580 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001737077114928195, + "loss": 2.1313, + "step": 4585 + }, + { + "epoch": 0.35, + "learning_rate": 0.00017366748461321855, + "loss": 2.3775, + "step": 4590 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001736272577336176, + "loss": 1.5729, + "step": 4595 + }, + { + "epoch": 0.35, + "learning_rate": 0.00017358703085401666, + "loss": 1.3116, + "step": 4600 + }, + { + "epoch": 0.35, + "learning_rate": 0.00017354680397441572, + "loss": 5.5291, + "step": 4605 + }, + { + "epoch": 0.35, + "learning_rate": 0.00017350657709481478, + "loss": 4.3365, + "step": 4610 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001734663502152138, + "loss": 3.6343, + "step": 4615 + }, + { + "epoch": 0.35, + "learning_rate": 0.00017342612333561284, + "loss": 4.5385, + "step": 4620 + }, + { + "epoch": 0.35, + "learning_rate": 0.0001733858964560119, + "loss": 3.4747, + "step": 4625 + }, + { + "epoch": 0.35, + "learning_rate": 0.00017334566957641096, + "loss": 2.505, + "step": 4630 + }, + { + "epoch": 0.35, + "learning_rate": 0.00017330544269681002, + "loss": 2.2642, + "step": 4635 + }, + { + "epoch": 0.35, + "learning_rate": 0.00017326521581720908, + "loss": 1.7137, + "step": 4640 + }, + { + "epoch": 0.36, + "learning_rate": 0.00017322498893760813, + "loss": 2.2715, + "step": 4645 + }, + { + "epoch": 0.36, + "learning_rate": 0.00017318476205800717, + "loss": 2.6712, + "step": 4650 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001731445351784062, + "loss": 4.7334, + "step": 4655 + }, + { + "epoch": 0.36, + "learning_rate": 0.00017310430829880526, + "loss": 3.4988, + "step": 4660 + }, + { + "epoch": 0.36, + "learning_rate": 0.00017306408141920431, + "loss": 3.4859, + "step": 4665 + }, + { + "epoch": 0.36, + "learning_rate": 0.00017302385453960337, + "loss": 2.9528, + "step": 4670 + }, + { + "epoch": 0.36, + "learning_rate": 0.00017298362766000243, + "loss": 4.2112, + "step": 4675 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001729434007804015, + "loss": 4.345, + "step": 4680 + }, + { + "epoch": 0.36, + "learning_rate": 0.00017290317390080052, + "loss": 2.7534, + "step": 4685 + }, + { + "epoch": 0.36, + "learning_rate": 0.00017286294702119958, + "loss": 1.9547, + "step": 4690 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001728227201415986, + "loss": 2.9516, + "step": 4695 + }, + { + "epoch": 0.36, + "learning_rate": 0.00017278249326199767, + "loss": 0.7848, + "step": 4700 + }, + { + "epoch": 0.36, + "learning_rate": 0.00017274226638239673, + "loss": 5.2662, + "step": 4705 + }, + { + "epoch": 0.36, + "learning_rate": 0.00017270203950279578, + "loss": 5.1359, + "step": 4710 + }, + { + "epoch": 0.36, + "learning_rate": 0.00017266181262319484, + "loss": 4.5707, + "step": 4715 + }, + { + "epoch": 0.36, + "learning_rate": 0.00017262158574359387, + "loss": 3.8627, + "step": 4720 + }, + { + "epoch": 0.36, + "learning_rate": 0.00017258135886399293, + "loss": 5.6158, + "step": 4725 + }, + { + "epoch": 0.36, + "learning_rate": 0.00017254113198439196, + "loss": 4.3295, + "step": 4730 + }, + { + "epoch": 0.36, + "learning_rate": 0.00017250090510479102, + "loss": 2.5796, + "step": 4735 + }, + { + "epoch": 0.36, + "learning_rate": 0.00017246067822519008, + "loss": 2.2212, + "step": 4740 + }, + { + "epoch": 0.36, + "learning_rate": 0.00017242045134558914, + "loss": 1.7833, + "step": 4745 + }, + { + "epoch": 0.36, + "learning_rate": 0.0001723802244659882, + "loss": 1.0688, + "step": 4750 + }, + { + "epoch": 0.36, + "learning_rate": 0.00017233999758638723, + "loss": 5.7943, + "step": 4755 + }, + { + "epoch": 0.36, + "learning_rate": 0.00017229977070678628, + "loss": 4.3023, + "step": 4760 + }, + { + "epoch": 0.36, + "learning_rate": 0.00017225954382718534, + "loss": 4.1447, + "step": 4765 + }, + { + "epoch": 0.36, + "learning_rate": 0.00017221931694758437, + "loss": 4.5957, + "step": 4770 + }, + { + "epoch": 0.36, + "learning_rate": 0.00017217909006798343, + "loss": 4.5724, + "step": 4775 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001721388631883825, + "loss": 3.3936, + "step": 4780 + }, + { + "epoch": 0.37, + "learning_rate": 0.00017209863630878155, + "loss": 1.8421, + "step": 4785 + }, + { + "epoch": 0.37, + "learning_rate": 0.00017205840942918058, + "loss": 2.0833, + "step": 4790 + }, + { + "epoch": 0.37, + "learning_rate": 0.00017201818254957964, + "loss": 2.1733, + "step": 4795 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001719779556699787, + "loss": 1.4903, + "step": 4800 + }, + { + "epoch": 0.37, + "learning_rate": 0.00017193772879037773, + "loss": 5.8072, + "step": 4805 + }, + { + "epoch": 0.37, + "learning_rate": 0.00017189750191077679, + "loss": 4.2612, + "step": 4810 + }, + { + "epoch": 0.37, + "learning_rate": 0.00017185727503117584, + "loss": 3.9973, + "step": 4815 + }, + { + "epoch": 0.37, + "learning_rate": 0.00017181704815157488, + "loss": 4.2791, + "step": 4820 + }, + { + "epoch": 0.37, + "learning_rate": 0.00017177682127197393, + "loss": 4.2564, + "step": 4825 + }, + { + "epoch": 0.37, + "learning_rate": 0.000171736594392373, + "loss": 4.493, + "step": 4830 + }, + { + "epoch": 0.37, + "learning_rate": 0.00017169636751277205, + "loss": 3.7284, + "step": 4835 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001716561406331711, + "loss": 2.7577, + "step": 4840 + }, + { + "epoch": 0.37, + "learning_rate": 0.00017161591375357014, + "loss": 1.5587, + "step": 4845 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001715756868739692, + "loss": 1.974, + "step": 4850 + }, + { + "epoch": 0.37, + "learning_rate": 0.00017153545999436823, + "loss": 4.9592, + "step": 4855 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001714952331147673, + "loss": 3.9775, + "step": 4860 + }, + { + "epoch": 0.37, + "learning_rate": 0.00017145500623516635, + "loss": 4.7291, + "step": 4865 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001714147793555654, + "loss": 4.1361, + "step": 4870 + }, + { + "epoch": 0.37, + "learning_rate": 0.00017137455247596446, + "loss": 4.0131, + "step": 4875 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001713343255963635, + "loss": 4.918, + "step": 4880 + }, + { + "epoch": 0.37, + "learning_rate": 0.00017129409871676255, + "loss": 3.039, + "step": 4885 + }, + { + "epoch": 0.37, + "learning_rate": 0.00017125387183716158, + "loss": 2.5513, + "step": 4890 + }, + { + "epoch": 0.37, + "learning_rate": 0.00017121364495756064, + "loss": 1.4615, + "step": 4895 + }, + { + "epoch": 0.37, + "learning_rate": 0.0001711734180779597, + "loss": 1.9534, + "step": 4900 + }, + { + "epoch": 0.37, + "learning_rate": 0.00017113319119835876, + "loss": 4.6439, + "step": 4905 + }, + { + "epoch": 0.38, + "learning_rate": 0.00017109296431875782, + "loss": 4.8441, + "step": 4910 + }, + { + "epoch": 0.38, + "learning_rate": 0.00017105273743915687, + "loss": 3.5526, + "step": 4915 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001710125105595559, + "loss": 3.2357, + "step": 4920 + }, + { + "epoch": 0.38, + "learning_rate": 0.00017097228367995494, + "loss": 3.4221, + "step": 4925 + }, + { + "epoch": 0.38, + "learning_rate": 0.000170932056800354, + "loss": 3.1509, + "step": 4930 + }, + { + "epoch": 0.38, + "learning_rate": 0.00017089182992075305, + "loss": 1.6275, + "step": 4935 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001708516030411521, + "loss": 2.1762, + "step": 4940 + }, + { + "epoch": 0.38, + "learning_rate": 0.00017081137616155117, + "loss": 1.9439, + "step": 4945 + }, + { + "epoch": 0.38, + "learning_rate": 0.00017077114928195023, + "loss": 0.8666, + "step": 4950 + }, + { + "epoch": 0.38, + "learning_rate": 0.00017073092240234926, + "loss": 5.1297, + "step": 4955 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001706906955227483, + "loss": 4.4209, + "step": 4960 + }, + { + "epoch": 0.38, + "learning_rate": 0.00017065046864314735, + "loss": 4.9455, + "step": 4965 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001706102417635464, + "loss": 4.8725, + "step": 4970 + }, + { + "epoch": 0.38, + "learning_rate": 0.00017057001488394546, + "loss": 3.9418, + "step": 4975 + }, + { + "epoch": 0.38, + "learning_rate": 0.00017052978800434452, + "loss": 4.1444, + "step": 4980 + }, + { + "epoch": 0.38, + "learning_rate": 0.00017048956112474358, + "loss": 3.5028, + "step": 4985 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001704493342451426, + "loss": 2.5432, + "step": 4990 + }, + { + "epoch": 0.38, + "learning_rate": 0.00017040910736554164, + "loss": 1.4747, + "step": 4995 + }, + { + "epoch": 0.38, + "learning_rate": 0.0001703688804859407, + "loss": 0.9428, + "step": 5000 + }, + { + "epoch": 0.38, + "learning_rate": 0.00017032865360633976, + "loss": 5.8418, + "step": 5005 + }, + { + "epoch": 0.38, + "learning_rate": 0.00017028842672673882, + "loss": 3.3178, + "step": 5010 + }, + { + "epoch": 0.38, + "learning_rate": 0.00017024819984713788, + "loss": 4.2133, + "step": 5015 + }, + { + "epoch": 0.38, + "learning_rate": 0.00017020797296753693, + "loss": 3.9502, + "step": 5020 + }, + { + "epoch": 0.38, + "learning_rate": 0.00017016774608793597, + "loss": 3.731, + "step": 5025 + }, + { + "epoch": 0.38, + "learning_rate": 0.000170127519208335, + "loss": 4.6752, + "step": 5030 + }, + { + "epoch": 0.38, + "learning_rate": 0.00017008729232873405, + "loss": 2.7182, + "step": 5035 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001700470654491331, + "loss": 2.9108, + "step": 5040 + }, + { + "epoch": 0.39, + "learning_rate": 0.00017000683856953217, + "loss": 3.8828, + "step": 5045 + }, + { + "epoch": 0.39, + "learning_rate": 0.00016996661168993123, + "loss": 1.53, + "step": 5050 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001699263848103303, + "loss": 5.5035, + "step": 5055 + }, + { + "epoch": 0.39, + "learning_rate": 0.00016988615793072932, + "loss": 3.9553, + "step": 5060 + }, + { + "epoch": 0.39, + "learning_rate": 0.00016984593105112838, + "loss": 4.5656, + "step": 5065 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001698057041715274, + "loss": 4.4553, + "step": 5070 + }, + { + "epoch": 0.39, + "learning_rate": 0.00016976547729192647, + "loss": 4.3305, + "step": 5075 + }, + { + "epoch": 0.39, + "learning_rate": 0.00016972525041232552, + "loss": 3.2616, + "step": 5080 + }, + { + "epoch": 0.39, + "learning_rate": 0.00016968502353272458, + "loss": 4.177, + "step": 5085 + }, + { + "epoch": 0.39, + "learning_rate": 0.00016964479665312364, + "loss": 3.3635, + "step": 5090 + }, + { + "epoch": 0.39, + "learning_rate": 0.00016960456977352267, + "loss": 0.7162, + "step": 5095 + }, + { + "epoch": 0.39, + "learning_rate": 0.00016956434289392173, + "loss": 3.3646, + "step": 5100 + }, + { + "epoch": 0.39, + "learning_rate": 0.00016952411601432076, + "loss": 3.9, + "step": 5105 + }, + { + "epoch": 0.39, + "learning_rate": 0.00016948388913471982, + "loss": 5.1976, + "step": 5110 + }, + { + "epoch": 0.39, + "learning_rate": 0.00016944366225511888, + "loss": 5.4234, + "step": 5115 + }, + { + "epoch": 0.39, + "learning_rate": 0.00016940343537551794, + "loss": 3.2449, + "step": 5120 + }, + { + "epoch": 0.39, + "learning_rate": 0.00016936320849591697, + "loss": 3.8414, + "step": 5125 + }, + { + "epoch": 0.39, + "learning_rate": 0.00016932298161631603, + "loss": 3.5964, + "step": 5130 + }, + { + "epoch": 0.39, + "learning_rate": 0.00016928275473671508, + "loss": 3.0642, + "step": 5135 + }, + { + "epoch": 0.39, + "learning_rate": 0.00016924252785711414, + "loss": 2.8784, + "step": 5140 + }, + { + "epoch": 0.39, + "learning_rate": 0.00016920230097751317, + "loss": 2.0424, + "step": 5145 + }, + { + "epoch": 0.39, + "learning_rate": 0.00016916207409791223, + "loss": 0.0858, + "step": 5150 + }, + { + "epoch": 0.39, + "learning_rate": 0.0001691218472183113, + "loss": 4.6318, + "step": 5155 + }, + { + "epoch": 0.39, + "learning_rate": 0.00016908162033871032, + "loss": 5.0166, + "step": 5160 + }, + { + "epoch": 0.39, + "learning_rate": 0.00016904139345910938, + "loss": 3.9994, + "step": 5165 + }, + { + "epoch": 0.4, + "learning_rate": 0.00016900116657950844, + "loss": 4.776, + "step": 5170 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001689609396999075, + "loss": 4.778, + "step": 5175 + }, + { + "epoch": 0.4, + "learning_rate": 0.00016892071282030653, + "loss": 3.5281, + "step": 5180 + }, + { + "epoch": 0.4, + "learning_rate": 0.00016888048594070558, + "loss": 2.7328, + "step": 5185 + }, + { + "epoch": 0.4, + "learning_rate": 0.00016884025906110464, + "loss": 2.6921, + "step": 5190 + }, + { + "epoch": 0.4, + "learning_rate": 0.00016880003218150367, + "loss": 1.5729, + "step": 5195 + }, + { + "epoch": 0.4, + "learning_rate": 0.00016875980530190273, + "loss": 1.321, + "step": 5200 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001687195784223018, + "loss": 3.92, + "step": 5205 + }, + { + "epoch": 0.4, + "learning_rate": 0.00016867935154270085, + "loss": 4.4383, + "step": 5210 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001686391246630999, + "loss": 5.2021, + "step": 5215 + }, + { + "epoch": 0.4, + "learning_rate": 0.00016859889778349894, + "loss": 2.773, + "step": 5220 + }, + { + "epoch": 0.4, + "learning_rate": 0.000168558670903898, + "loss": 3.292, + "step": 5225 + }, + { + "epoch": 0.4, + "learning_rate": 0.00016851844402429703, + "loss": 4.218, + "step": 5230 + }, + { + "epoch": 0.4, + "learning_rate": 0.00016847821714469609, + "loss": 3.38, + "step": 5235 + }, + { + "epoch": 0.4, + "learning_rate": 0.00016843799026509514, + "loss": 2.0585, + "step": 5240 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001683977633854942, + "loss": 3.6215, + "step": 5245 + }, + { + "epoch": 0.4, + "learning_rate": 0.00016835753650589326, + "loss": 0.1181, + "step": 5250 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001683173096262923, + "loss": 4.9352, + "step": 5255 + }, + { + "epoch": 0.4, + "learning_rate": 0.00016827708274669135, + "loss": 4.5021, + "step": 5260 + }, + { + "epoch": 0.4, + "learning_rate": 0.00016823685586709038, + "loss": 4.435, + "step": 5265 + }, + { + "epoch": 0.4, + "learning_rate": 0.00016819662898748944, + "loss": 3.6467, + "step": 5270 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001681564021078885, + "loss": 3.5488, + "step": 5275 + }, + { + "epoch": 0.4, + "learning_rate": 0.00016811617522828756, + "loss": 3.414, + "step": 5280 + }, + { + "epoch": 0.4, + "learning_rate": 0.00016807594834868661, + "loss": 3.1631, + "step": 5285 + }, + { + "epoch": 0.4, + "learning_rate": 0.00016803572146908567, + "loss": 2.0229, + "step": 5290 + }, + { + "epoch": 0.4, + "learning_rate": 0.0001679954945894847, + "loss": 1.2201, + "step": 5295 + }, + { + "epoch": 0.41, + "learning_rate": 0.00016795526770988373, + "loss": 3.7302, + "step": 5300 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001679150408302828, + "loss": 4.5115, + "step": 5305 + }, + { + "epoch": 0.41, + "learning_rate": 0.00016787481395068185, + "loss": 4.2893, + "step": 5310 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001678345870710809, + "loss": 3.5971, + "step": 5315 + }, + { + "epoch": 0.41, + "learning_rate": 0.00016779436019147997, + "loss": 4.424, + "step": 5320 + }, + { + "epoch": 0.41, + "learning_rate": 0.00016775413331187903, + "loss": 3.4488, + "step": 5325 + }, + { + "epoch": 0.41, + "learning_rate": 0.00016771390643227806, + "loss": 2.7201, + "step": 5330 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001676736795526771, + "loss": 2.8584, + "step": 5335 + }, + { + "epoch": 0.41, + "learning_rate": 0.00016763345267307615, + "loss": 2.0608, + "step": 5340 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001675932257934752, + "loss": 1.7905, + "step": 5345 + }, + { + "epoch": 0.41, + "learning_rate": 0.00016755299891387426, + "loss": 1.7893, + "step": 5350 + }, + { + "epoch": 0.41, + "learning_rate": 0.00016751277203427332, + "loss": 4.6043, + "step": 5355 + }, + { + "epoch": 0.41, + "learning_rate": 0.00016747254515467238, + "loss": 4.7084, + "step": 5360 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001674323182750714, + "loss": 4.0279, + "step": 5365 + }, + { + "epoch": 0.41, + "learning_rate": 0.00016739209139547044, + "loss": 4.6014, + "step": 5370 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001673518645158695, + "loss": 4.233, + "step": 5375 + }, + { + "epoch": 0.41, + "learning_rate": 0.00016731163763626856, + "loss": 4.4205, + "step": 5380 + }, + { + "epoch": 0.41, + "learning_rate": 0.00016727141075666762, + "loss": 3.7451, + "step": 5385 + }, + { + "epoch": 0.41, + "learning_rate": 0.00016723118387706667, + "loss": 1.9283, + "step": 5390 + }, + { + "epoch": 0.41, + "learning_rate": 0.00016719095699746573, + "loss": 1.5219, + "step": 5395 + }, + { + "epoch": 0.41, + "learning_rate": 0.00016715073011786476, + "loss": 1.0435, + "step": 5400 + }, + { + "epoch": 0.41, + "learning_rate": 0.00016711050323826382, + "loss": 3.7072, + "step": 5405 + }, + { + "epoch": 0.41, + "learning_rate": 0.00016707027635866285, + "loss": 4.7615, + "step": 5410 + }, + { + "epoch": 0.41, + "learning_rate": 0.0001670300494790619, + "loss": 3.8532, + "step": 5415 + }, + { + "epoch": 0.41, + "learning_rate": 0.00016698982259946097, + "loss": 4.3826, + "step": 5420 + }, + { + "epoch": 0.41, + "learning_rate": 0.00016694959571986003, + "loss": 3.766, + "step": 5425 + }, + { + "epoch": 0.42, + "learning_rate": 0.00016690936884025906, + "loss": 2.053, + "step": 5430 + }, + { + "epoch": 0.42, + "learning_rate": 0.00016686914196065812, + "loss": 3.3234, + "step": 5435 + }, + { + "epoch": 0.42, + "learning_rate": 0.00016682891508105718, + "loss": 1.802, + "step": 5440 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001667886882014562, + "loss": 2.0026, + "step": 5445 + }, + { + "epoch": 0.42, + "learning_rate": 0.00016674846132185527, + "loss": 1.087, + "step": 5450 + }, + { + "epoch": 0.42, + "learning_rate": 0.00016670823444225432, + "loss": 4.4006, + "step": 5455 + }, + { + "epoch": 0.42, + "learning_rate": 0.00016666800756265338, + "loss": 4.633, + "step": 5460 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001666277806830524, + "loss": 3.4943, + "step": 5465 + }, + { + "epoch": 0.42, + "learning_rate": 0.00016658755380345147, + "loss": 4.6764, + "step": 5470 + }, + { + "epoch": 0.42, + "learning_rate": 0.00016654732692385053, + "loss": 3.6434, + "step": 5475 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001665071000442496, + "loss": 3.2263, + "step": 5480 + }, + { + "epoch": 0.42, + "learning_rate": 0.00016646687316464862, + "loss": 2.8007, + "step": 5485 + }, + { + "epoch": 0.42, + "learning_rate": 0.00016642664628504768, + "loss": 2.9424, + "step": 5490 + }, + { + "epoch": 0.42, + "learning_rate": 0.00016638641940544674, + "loss": 1.1535, + "step": 5495 + }, + { + "epoch": 0.42, + "learning_rate": 0.00016634619252584577, + "loss": 0.2137, + "step": 5500 + }, + { + "epoch": 0.42, + "learning_rate": 0.00016630596564624482, + "loss": 4.2225, + "step": 5505 + }, + { + "epoch": 0.42, + "learning_rate": 0.00016626573876664388, + "loss": 4.5811, + "step": 5510 + }, + { + "epoch": 0.42, + "learning_rate": 0.00016622551188704294, + "loss": 5.7295, + "step": 5515 + }, + { + "epoch": 0.42, + "learning_rate": 0.00016618528500744197, + "loss": 4.3728, + "step": 5520 + }, + { + "epoch": 0.42, + "learning_rate": 0.00016614505812784103, + "loss": 3.9244, + "step": 5525 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001661048312482401, + "loss": 3.3557, + "step": 5530 + }, + { + "epoch": 0.42, + "learning_rate": 0.00016606460436863912, + "loss": 2.9875, + "step": 5535 + }, + { + "epoch": 0.42, + "learning_rate": 0.00016602437748903818, + "loss": 3.5087, + "step": 5540 + }, + { + "epoch": 0.42, + "learning_rate": 0.00016598415060943724, + "loss": 3.3396, + "step": 5545 + }, + { + "epoch": 0.42, + "learning_rate": 0.0001659439237298363, + "loss": 1.8395, + "step": 5550 + }, + { + "epoch": 0.42, + "learning_rate": 0.00016590369685023535, + "loss": 4.0984, + "step": 5555 + }, + { + "epoch": 0.42, + "learning_rate": 0.00016586346997063438, + "loss": 3.4518, + "step": 5560 + }, + { + "epoch": 0.43, + "learning_rate": 0.00016582324309103344, + "loss": 4.3408, + "step": 5565 + }, + { + "epoch": 0.43, + "learning_rate": 0.00016578301621143247, + "loss": 3.5854, + "step": 5570 + }, + { + "epoch": 0.43, + "learning_rate": 0.00016574278933183153, + "loss": 3.4309, + "step": 5575 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001657025624522306, + "loss": 3.1176, + "step": 5580 + }, + { + "epoch": 0.43, + "learning_rate": 0.00016566233557262965, + "loss": 3.166, + "step": 5585 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001656221086930287, + "loss": 2.8318, + "step": 5590 + }, + { + "epoch": 0.43, + "learning_rate": 0.00016558188181342774, + "loss": 1.8063, + "step": 5595 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001655416549338268, + "loss": 0.0932, + "step": 5600 + }, + { + "epoch": 0.43, + "learning_rate": 0.00016550142805422583, + "loss": 4.1902, + "step": 5605 + }, + { + "epoch": 0.43, + "learning_rate": 0.00016546120117462489, + "loss": 4.9719, + "step": 5610 + }, + { + "epoch": 0.43, + "learning_rate": 0.00016542097429502394, + "loss": 4.8271, + "step": 5615 + }, + { + "epoch": 0.43, + "learning_rate": 0.000165380747415423, + "loss": 4.3954, + "step": 5620 + }, + { + "epoch": 0.43, + "learning_rate": 0.00016534052053582206, + "loss": 3.5303, + "step": 5625 + }, + { + "epoch": 0.43, + "learning_rate": 0.00016530029365622112, + "loss": 2.2174, + "step": 5630 + }, + { + "epoch": 0.43, + "learning_rate": 0.00016526006677662015, + "loss": 2.1981, + "step": 5635 + }, + { + "epoch": 0.43, + "learning_rate": 0.00016521983989701918, + "loss": 2.1, + "step": 5640 + }, + { + "epoch": 0.43, + "learning_rate": 0.00016517961301741824, + "loss": 2.3089, + "step": 5645 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001651393861378173, + "loss": 3.9498, + "step": 5650 + }, + { + "epoch": 0.43, + "learning_rate": 0.00016509915925821636, + "loss": 4.8281, + "step": 5655 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001650589323786154, + "loss": 5.3375, + "step": 5660 + }, + { + "epoch": 0.43, + "learning_rate": 0.00016501870549901447, + "loss": 4.2732, + "step": 5665 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001649784786194135, + "loss": 3.8743, + "step": 5670 + }, + { + "epoch": 0.43, + "learning_rate": 0.00016493825173981253, + "loss": 2.8098, + "step": 5675 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001648980248602116, + "loss": 3.626, + "step": 5680 + }, + { + "epoch": 0.43, + "learning_rate": 0.00016485779798061065, + "loss": 3.1708, + "step": 5685 + }, + { + "epoch": 0.43, + "learning_rate": 0.0001648175711010097, + "loss": 2.9451, + "step": 5690 + }, + { + "epoch": 0.44, + "learning_rate": 0.00016477734422140877, + "loss": 2.7528, + "step": 5695 + }, + { + "epoch": 0.44, + "learning_rate": 0.00016473711734180783, + "loss": 0.7335, + "step": 5700 + }, + { + "epoch": 0.44, + "learning_rate": 0.00016469689046220686, + "loss": 4.0922, + "step": 5705 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001646566635826059, + "loss": 4.3029, + "step": 5710 + }, + { + "epoch": 0.44, + "learning_rate": 0.00016461643670300495, + "loss": 4.0666, + "step": 5715 + }, + { + "epoch": 0.44, + "learning_rate": 0.000164576209823404, + "loss": 4.3164, + "step": 5720 + }, + { + "epoch": 0.44, + "learning_rate": 0.00016453598294380306, + "loss": 3.5441, + "step": 5725 + }, + { + "epoch": 0.44, + "learning_rate": 0.00016449575606420212, + "loss": 2.875, + "step": 5730 + }, + { + "epoch": 0.44, + "learning_rate": 0.00016445552918460115, + "loss": 2.0636, + "step": 5735 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001644153023050002, + "loss": 2.9502, + "step": 5740 + }, + { + "epoch": 0.44, + "learning_rate": 0.00016437507542539924, + "loss": 0.608, + "step": 5745 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001643348485457983, + "loss": 2.9006, + "step": 5750 + }, + { + "epoch": 0.44, + "learning_rate": 0.00016429462166619736, + "loss": 5.0873, + "step": 5755 + }, + { + "epoch": 0.44, + "learning_rate": 0.00016425439478659642, + "loss": 4.1059, + "step": 5760 + }, + { + "epoch": 0.44, + "learning_rate": 0.00016421416790699547, + "loss": 3.5555, + "step": 5765 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001641739410273945, + "loss": 3.1217, + "step": 5770 + }, + { + "epoch": 0.44, + "learning_rate": 0.00016413371414779356, + "loss": 3.9523, + "step": 5775 + }, + { + "epoch": 0.44, + "learning_rate": 0.00016409348726819262, + "loss": 2.5499, + "step": 5780 + }, + { + "epoch": 0.44, + "learning_rate": 0.00016405326038859165, + "loss": 1.9861, + "step": 5785 + }, + { + "epoch": 0.44, + "learning_rate": 0.0001640130335089907, + "loss": 1.938, + "step": 5790 + }, + { + "epoch": 0.44, + "learning_rate": 0.00016397280662938977, + "loss": 1.8224, + "step": 5795 + }, + { + "epoch": 0.44, + "learning_rate": 0.00016393257974978883, + "loss": 3.7677, + "step": 5800 + }, + { + "epoch": 0.44, + "learning_rate": 0.00016389235287018786, + "loss": 4.5553, + "step": 5805 + }, + { + "epoch": 0.44, + "learning_rate": 0.00016385212599058692, + "loss": 4.0107, + "step": 5810 + }, + { + "epoch": 0.44, + "learning_rate": 0.00016381189911098597, + "loss": 4.0619, + "step": 5815 + }, + { + "epoch": 0.44, + "learning_rate": 0.000163771672231385, + "loss": 4.0109, + "step": 5820 + }, + { + "epoch": 0.45, + "learning_rate": 0.00016373144535178406, + "loss": 3.6002, + "step": 5825 + }, + { + "epoch": 0.45, + "learning_rate": 0.00016369121847218312, + "loss": 3.1553, + "step": 5830 + }, + { + "epoch": 0.45, + "learning_rate": 0.00016365099159258218, + "loss": 2.579, + "step": 5835 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001636107647129812, + "loss": 2.6138, + "step": 5840 + }, + { + "epoch": 0.45, + "learning_rate": 0.00016357053783338027, + "loss": 1.2922, + "step": 5845 + }, + { + "epoch": 0.45, + "learning_rate": 0.00016353031095377933, + "loss": 1.3788, + "step": 5850 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001634900840741784, + "loss": 3.92, + "step": 5855 + }, + { + "epoch": 0.45, + "learning_rate": 0.00016344985719457742, + "loss": 4.1422, + "step": 5860 + }, + { + "epoch": 0.45, + "learning_rate": 0.00016340963031497648, + "loss": 4.1127, + "step": 5865 + }, + { + "epoch": 0.45, + "learning_rate": 0.00016336940343537553, + "loss": 3.7213, + "step": 5870 + }, + { + "epoch": 0.45, + "learning_rate": 0.00016332917655577457, + "loss": 3.4097, + "step": 5875 + }, + { + "epoch": 0.45, + "learning_rate": 0.00016328894967617362, + "loss": 2.2418, + "step": 5880 + }, + { + "epoch": 0.45, + "learning_rate": 0.00016324872279657268, + "loss": 2.4848, + "step": 5885 + }, + { + "epoch": 0.45, + "learning_rate": 0.00016320849591697174, + "loss": 2.2139, + "step": 5890 + }, + { + "epoch": 0.45, + "learning_rate": 0.00016316826903737077, + "loss": 2.1187, + "step": 5895 + }, + { + "epoch": 0.45, + "learning_rate": 0.00016312804215776983, + "loss": 0.5452, + "step": 5900 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001630878152781689, + "loss": 5.1633, + "step": 5905 + }, + { + "epoch": 0.45, + "learning_rate": 0.00016304758839856792, + "loss": 5.2154, + "step": 5910 + }, + { + "epoch": 0.45, + "learning_rate": 0.00016300736151896698, + "loss": 4.3537, + "step": 5915 + }, + { + "epoch": 0.45, + "learning_rate": 0.00016296713463936604, + "loss": 4.3426, + "step": 5920 + }, + { + "epoch": 0.45, + "learning_rate": 0.0001629269077597651, + "loss": 3.8029, + "step": 5925 + }, + { + "epoch": 0.45, + "learning_rate": 0.00016288668088016415, + "loss": 3.3158, + "step": 5930 + }, + { + "epoch": 0.45, + "learning_rate": 0.00016284645400056318, + "loss": 2.3386, + "step": 5935 + }, + { + "epoch": 0.45, + "learning_rate": 0.00016280622712096224, + "loss": 2.2401, + "step": 5940 + }, + { + "epoch": 0.45, + "learning_rate": 0.00016276600024136127, + "loss": 3.1957, + "step": 5945 + }, + { + "epoch": 0.45, + "learning_rate": 0.00016272577336176033, + "loss": 3.0096, + "step": 5950 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001626855464821594, + "loss": 4.1182, + "step": 5955 + }, + { + "epoch": 0.46, + "learning_rate": 0.00016264531960255845, + "loss": 4.3215, + "step": 5960 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001626050927229575, + "loss": 4.4809, + "step": 5965 + }, + { + "epoch": 0.46, + "learning_rate": 0.00016256486584335654, + "loss": 4.4412, + "step": 5970 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001625246389637556, + "loss": 5.4105, + "step": 5975 + }, + { + "epoch": 0.46, + "learning_rate": 0.00016248441208415463, + "loss": 4.3814, + "step": 5980 + }, + { + "epoch": 0.46, + "learning_rate": 0.00016244418520455368, + "loss": 2.7278, + "step": 5985 + }, + { + "epoch": 0.46, + "learning_rate": 0.00016240395832495274, + "loss": 3.1769, + "step": 5990 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001623637314453518, + "loss": 0.9617, + "step": 5995 + }, + { + "epoch": 0.46, + "learning_rate": 0.00016232350456575086, + "loss": 1.1335, + "step": 6000 + }, + { + "epoch": 0.46, + "learning_rate": 0.00016228327768614992, + "loss": 4.5203, + "step": 6005 + }, + { + "epoch": 0.46, + "learning_rate": 0.00016224305080654895, + "loss": 4.9381, + "step": 6010 + }, + { + "epoch": 0.46, + "learning_rate": 0.00016220282392694798, + "loss": 4.7555, + "step": 6015 + }, + { + "epoch": 0.46, + "learning_rate": 0.00016216259704734704, + "loss": 4.7895, + "step": 6020 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001621223701677461, + "loss": 3.355, + "step": 6025 + }, + { + "epoch": 0.46, + "learning_rate": 0.00016208214328814515, + "loss": 3.2165, + "step": 6030 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001620419164085442, + "loss": 3.9241, + "step": 6035 + }, + { + "epoch": 0.46, + "learning_rate": 0.00016200168952894324, + "loss": 2.3204, + "step": 6040 + }, + { + "epoch": 0.46, + "learning_rate": 0.00016196146264934227, + "loss": 1.1676, + "step": 6045 + }, + { + "epoch": 0.46, + "learning_rate": 0.00016192123576974133, + "loss": 0.6222, + "step": 6050 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001618810088901404, + "loss": 4.2146, + "step": 6055 + }, + { + "epoch": 0.46, + "learning_rate": 0.00016184078201053945, + "loss": 5.6813, + "step": 6060 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001618005551309385, + "loss": 3.683, + "step": 6065 + }, + { + "epoch": 0.46, + "learning_rate": 0.00016176032825133757, + "loss": 3.5365, + "step": 6070 + }, + { + "epoch": 0.46, + "learning_rate": 0.0001617201013717366, + "loss": 2.232, + "step": 6075 + }, + { + "epoch": 0.46, + "learning_rate": 0.00016167987449213566, + "loss": 3.2946, + "step": 6080 + }, + { + "epoch": 0.47, + "learning_rate": 0.00016163964761253469, + "loss": 2.1471, + "step": 6085 + }, + { + "epoch": 0.47, + "learning_rate": 0.00016159942073293374, + "loss": 3.233, + "step": 6090 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001615591938533328, + "loss": 3.6617, + "step": 6095 + }, + { + "epoch": 0.47, + "learning_rate": 0.00016151896697373186, + "loss": 1.3713, + "step": 6100 + }, + { + "epoch": 0.47, + "learning_rate": 0.00016147874009413092, + "loss": 5.2553, + "step": 6105 + }, + { + "epoch": 0.47, + "learning_rate": 0.00016143851321452995, + "loss": 4.0039, + "step": 6110 + }, + { + "epoch": 0.47, + "learning_rate": 0.000161398286334929, + "loss": 4.5361, + "step": 6115 + }, + { + "epoch": 0.47, + "learning_rate": 0.00016135805945532804, + "loss": 3.3616, + "step": 6120 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001613178325757271, + "loss": 3.3566, + "step": 6125 + }, + { + "epoch": 0.47, + "learning_rate": 0.00016127760569612616, + "loss": 3.7517, + "step": 6130 + }, + { + "epoch": 0.47, + "learning_rate": 0.00016123737881652521, + "loss": 3.2124, + "step": 6135 + }, + { + "epoch": 0.47, + "learning_rate": 0.00016119715193692427, + "loss": 2.3577, + "step": 6140 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001611569250573233, + "loss": 3.5035, + "step": 6145 + }, + { + "epoch": 0.47, + "learning_rate": 0.00016111669817772236, + "loss": 1.3117, + "step": 6150 + }, + { + "epoch": 0.47, + "learning_rate": 0.00016107647129812142, + "loss": 4.2394, + "step": 6155 + }, + { + "epoch": 0.47, + "learning_rate": 0.00016103624441852045, + "loss": 4.5457, + "step": 6160 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001609960175389195, + "loss": 3.7481, + "step": 6165 + }, + { + "epoch": 0.47, + "learning_rate": 0.00016095579065931857, + "loss": 3.9391, + "step": 6170 + }, + { + "epoch": 0.47, + "learning_rate": 0.00016091556377971763, + "loss": 3.5035, + "step": 6175 + }, + { + "epoch": 0.47, + "learning_rate": 0.00016087533690011666, + "loss": 3.3803, + "step": 6180 + }, + { + "epoch": 0.47, + "learning_rate": 0.00016083511002051572, + "loss": 2.449, + "step": 6185 + }, + { + "epoch": 0.47, + "learning_rate": 0.00016079488314091477, + "loss": 1.8994, + "step": 6190 + }, + { + "epoch": 0.47, + "learning_rate": 0.0001607546562613138, + "loss": 2.9759, + "step": 6195 + }, + { + "epoch": 0.47, + "learning_rate": 0.00016071442938171286, + "loss": 2.7731, + "step": 6200 + }, + { + "epoch": 0.47, + "learning_rate": 0.00016067420250211192, + "loss": 4.325, + "step": 6205 + }, + { + "epoch": 0.47, + "learning_rate": 0.00016063397562251098, + "loss": 4.6818, + "step": 6210 + }, + { + "epoch": 0.48, + "learning_rate": 0.00016059374874291, + "loss": 3.3004, + "step": 6215 + }, + { + "epoch": 0.48, + "learning_rate": 0.00016055352186330907, + "loss": 3.7553, + "step": 6220 + }, + { + "epoch": 0.48, + "learning_rate": 0.00016051329498370813, + "loss": 3.8145, + "step": 6225 + }, + { + "epoch": 0.48, + "learning_rate": 0.00016047306810410719, + "loss": 3.1997, + "step": 6230 + }, + { + "epoch": 0.48, + "learning_rate": 0.00016043284122450622, + "loss": 3.2375, + "step": 6235 + }, + { + "epoch": 0.48, + "learning_rate": 0.00016039261434490528, + "loss": 2.0859, + "step": 6240 + }, + { + "epoch": 0.48, + "learning_rate": 0.00016035238746530433, + "loss": 3.2224, + "step": 6245 + }, + { + "epoch": 0.48, + "learning_rate": 0.00016031216058570336, + "loss": 2.5072, + "step": 6250 + }, + { + "epoch": 0.48, + "learning_rate": 0.00016027193370610242, + "loss": 4.9318, + "step": 6255 + }, + { + "epoch": 0.48, + "learning_rate": 0.00016023170682650148, + "loss": 4.1885, + "step": 6260 + }, + { + "epoch": 0.48, + "learning_rate": 0.00016019147994690054, + "loss": 4.3771, + "step": 6265 + }, + { + "epoch": 0.48, + "learning_rate": 0.00016015125306729957, + "loss": 4.267, + "step": 6270 + }, + { + "epoch": 0.48, + "learning_rate": 0.00016011102618769863, + "loss": 4.1869, + "step": 6275 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001600707993080977, + "loss": 2.5321, + "step": 6280 + }, + { + "epoch": 0.48, + "learning_rate": 0.00016003057242849672, + "loss": 2.5823, + "step": 6285 + }, + { + "epoch": 0.48, + "learning_rate": 0.00015999034554889578, + "loss": 1.3439, + "step": 6290 + }, + { + "epoch": 0.48, + "learning_rate": 0.00015995011866929483, + "loss": 2.2149, + "step": 6295 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001599098917896939, + "loss": 1.6957, + "step": 6300 + }, + { + "epoch": 0.48, + "learning_rate": 0.00015986966491009295, + "loss": 4.6896, + "step": 6305 + }, + { + "epoch": 0.48, + "learning_rate": 0.00015982943803049198, + "loss": 4.6721, + "step": 6310 + }, + { + "epoch": 0.48, + "learning_rate": 0.00015978921115089104, + "loss": 3.8354, + "step": 6315 + }, + { + "epoch": 0.48, + "learning_rate": 0.00015974898427129007, + "loss": 3.1878, + "step": 6320 + }, + { + "epoch": 0.48, + "learning_rate": 0.00015970875739168913, + "loss": 3.6049, + "step": 6325 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001596685305120882, + "loss": 3.4877, + "step": 6330 + }, + { + "epoch": 0.48, + "learning_rate": 0.00015962830363248725, + "loss": 3.7444, + "step": 6335 + }, + { + "epoch": 0.48, + "learning_rate": 0.0001595880767528863, + "loss": 2.6608, + "step": 6340 + }, + { + "epoch": 0.48, + "learning_rate": 0.00015954784987328534, + "loss": 1.8235, + "step": 6345 + }, + { + "epoch": 0.49, + "learning_rate": 0.00015950762299368437, + "loss": 1.9221, + "step": 6350 + }, + { + "epoch": 0.49, + "learning_rate": 0.00015946739611408342, + "loss": 4.5215, + "step": 6355 + }, + { + "epoch": 0.49, + "learning_rate": 0.00015942716923448248, + "loss": 5.2854, + "step": 6360 + }, + { + "epoch": 0.49, + "learning_rate": 0.00015938694235488154, + "loss": 4.3033, + "step": 6365 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001593467154752806, + "loss": 5.4211, + "step": 6370 + }, + { + "epoch": 0.49, + "learning_rate": 0.00015930648859567966, + "loss": 2.9237, + "step": 6375 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001592662617160787, + "loss": 3.6648, + "step": 6380 + }, + { + "epoch": 0.49, + "learning_rate": 0.00015922603483647772, + "loss": 2.2996, + "step": 6385 + }, + { + "epoch": 0.49, + "learning_rate": 0.00015918580795687678, + "loss": 2.3559, + "step": 6390 + }, + { + "epoch": 0.49, + "learning_rate": 0.00015914558107727584, + "loss": 1.8411, + "step": 6395 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001591053541976749, + "loss": 1.8784, + "step": 6400 + }, + { + "epoch": 0.49, + "learning_rate": 0.00015906512731807395, + "loss": 3.5871, + "step": 6405 + }, + { + "epoch": 0.49, + "learning_rate": 0.000159024900438473, + "loss": 4.8471, + "step": 6410 + }, + { + "epoch": 0.49, + "learning_rate": 0.00015898467355887204, + "loss": 3.7787, + "step": 6415 + }, + { + "epoch": 0.49, + "learning_rate": 0.00015894444667927107, + "loss": 3.7279, + "step": 6420 + }, + { + "epoch": 0.49, + "learning_rate": 0.00015890421979967013, + "loss": 4.0128, + "step": 6425 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001588639929200692, + "loss": 3.1311, + "step": 6430 + }, + { + "epoch": 0.49, + "learning_rate": 0.00015882376604046825, + "loss": 2.5843, + "step": 6435 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001587835391608673, + "loss": 4.284, + "step": 6440 + }, + { + "epoch": 0.49, + "learning_rate": 0.00015874331228126636, + "loss": 3.2978, + "step": 6445 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001587030854016654, + "loss": 0.0883, + "step": 6450 + }, + { + "epoch": 0.49, + "learning_rate": 0.00015866285852206445, + "loss": 3.7918, + "step": 6455 + }, + { + "epoch": 0.49, + "learning_rate": 0.00015862263164246349, + "loss": 4.1014, + "step": 6460 + }, + { + "epoch": 0.49, + "learning_rate": 0.00015858240476286254, + "loss": 3.642, + "step": 6465 + }, + { + "epoch": 0.49, + "learning_rate": 0.0001585421778832616, + "loss": 4.5049, + "step": 6470 + }, + { + "epoch": 0.49, + "learning_rate": 0.00015850195100366066, + "loss": 3.2287, + "step": 6475 + }, + { + "epoch": 0.5, + "learning_rate": 0.00015846172412405972, + "loss": 2.9815, + "step": 6480 + }, + { + "epoch": 0.5, + "learning_rate": 0.00015842149724445875, + "loss": 3.578, + "step": 6485 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001583812703648578, + "loss": 3.4713, + "step": 6490 + }, + { + "epoch": 0.5, + "learning_rate": 0.00015834104348525684, + "loss": 2.6449, + "step": 6495 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001583008166056559, + "loss": 3.3023, + "step": 6500 + }, + { + "epoch": 0.5, + "learning_rate": 0.00015826058972605496, + "loss": 3.7168, + "step": 6505 + }, + { + "epoch": 0.5, + "learning_rate": 0.00015822036284645401, + "loss": 3.6531, + "step": 6510 + }, + { + "epoch": 0.5, + "learning_rate": 0.00015818013596685307, + "loss": 4.1709, + "step": 6515 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001581399090872521, + "loss": 3.7805, + "step": 6520 + }, + { + "epoch": 0.5, + "learning_rate": 0.00015809968220765116, + "loss": 4.2029, + "step": 6525 + }, + { + "epoch": 0.5, + "learning_rate": 0.00015805945532805022, + "loss": 3.6784, + "step": 6530 + }, + { + "epoch": 0.5, + "learning_rate": 0.00015801922844844925, + "loss": 2.9541, + "step": 6535 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001579790015688483, + "loss": 2.3555, + "step": 6540 + }, + { + "epoch": 0.5, + "learning_rate": 0.00015793877468924737, + "loss": 2.8182, + "step": 6545 + }, + { + "epoch": 0.5, + "learning_rate": 0.00015789854780964643, + "loss": 0.6913, + "step": 6550 + }, + { + "epoch": 0.5, + "learning_rate": 0.00015785832093004546, + "loss": 3.7799, + "step": 6555 + }, + { + "epoch": 0.5, + "learning_rate": 0.00015781809405044451, + "loss": 4.8266, + "step": 6560 + }, + { + "epoch": 0.5, + "learning_rate": 0.00015777786717084357, + "loss": 4.3416, + "step": 6565 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001577376402912426, + "loss": 3.3998, + "step": 6570 + }, + { + "epoch": 0.5, + "learning_rate": 0.00015769741341164166, + "loss": 3.0765, + "step": 6575 + }, + { + "epoch": 0.5, + "learning_rate": 0.00015765718653204072, + "loss": 3.1966, + "step": 6580 + }, + { + "epoch": 0.5, + "learning_rate": 0.00015761695965243978, + "loss": 3.4283, + "step": 6585 + }, + { + "epoch": 0.5, + "learning_rate": 0.0001575767327728388, + "loss": 1.7517, + "step": 6590 + }, + { + "epoch": 0.5, + "learning_rate": 0.00015753650589323787, + "loss": 2.4111, + "step": 6595 + }, + { + "epoch": 0.5, + "learning_rate": 0.00015749627901363693, + "loss": 1.31, + "step": 6600 + }, + { + "epoch": 0.5, + "learning_rate": 0.00015745605213403598, + "loss": 5.1953, + "step": 6605 + }, + { + "epoch": 0.51, + "learning_rate": 0.00015741582525443502, + "loss": 4.7135, + "step": 6610 + }, + { + "epoch": 0.51, + "learning_rate": 0.00015737559837483407, + "loss": 3.4439, + "step": 6615 + }, + { + "epoch": 0.51, + "learning_rate": 0.00015733537149523313, + "loss": 4.5059, + "step": 6620 + }, + { + "epoch": 0.51, + "learning_rate": 0.00015729514461563216, + "loss": 4.826, + "step": 6625 + }, + { + "epoch": 0.51, + "learning_rate": 0.00015725491773603122, + "loss": 4.1916, + "step": 6630 + }, + { + "epoch": 0.51, + "learning_rate": 0.00015721469085643028, + "loss": 3.258, + "step": 6635 + }, + { + "epoch": 0.51, + "learning_rate": 0.00015717446397682934, + "loss": 2.7372, + "step": 6640 + }, + { + "epoch": 0.51, + "learning_rate": 0.00015713423709722837, + "loss": 2.4825, + "step": 6645 + }, + { + "epoch": 0.51, + "learning_rate": 0.00015709401021762743, + "loss": 0.8767, + "step": 6650 + }, + { + "epoch": 0.51, + "learning_rate": 0.00015705378333802646, + "loss": 5.0094, + "step": 6655 + }, + { + "epoch": 0.51, + "learning_rate": 0.00015701355645842552, + "loss": 5.1865, + "step": 6660 + }, + { + "epoch": 0.51, + "learning_rate": 0.00015697332957882458, + "loss": 3.6629, + "step": 6665 + }, + { + "epoch": 0.51, + "learning_rate": 0.00015693310269922363, + "loss": 4.3094, + "step": 6670 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001568928758196227, + "loss": 3.6267, + "step": 6675 + }, + { + "epoch": 0.51, + "learning_rate": 0.00015685264894002175, + "loss": 3.056, + "step": 6680 + }, + { + "epoch": 0.51, + "learning_rate": 0.00015681242206042078, + "loss": 2.7199, + "step": 6685 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001567721951808198, + "loss": 2.7694, + "step": 6690 + }, + { + "epoch": 0.51, + "learning_rate": 0.00015673196830121887, + "loss": 3.5551, + "step": 6695 + }, + { + "epoch": 0.51, + "learning_rate": 0.00015669174142161793, + "loss": 0.0749, + "step": 6700 + }, + { + "epoch": 0.51, + "learning_rate": 0.000156651514542017, + "loss": 4.768, + "step": 6705 + }, + { + "epoch": 0.51, + "learning_rate": 0.00015661128766241605, + "loss": 4.2012, + "step": 6710 + }, + { + "epoch": 0.51, + "learning_rate": 0.0001565710607828151, + "loss": 2.9939, + "step": 6715 + }, + { + "epoch": 0.51, + "learning_rate": 0.00015653083390321413, + "loss": 2.8134, + "step": 6720 + }, + { + "epoch": 0.51, + "learning_rate": 0.00015649060702361317, + "loss": 3.4247, + "step": 6725 + }, + { + "epoch": 0.51, + "learning_rate": 0.00015645038014401222, + "loss": 3.0152, + "step": 6730 + }, + { + "epoch": 0.51, + "learning_rate": 0.00015641015326441128, + "loss": 2.0896, + "step": 6735 + }, + { + "epoch": 0.52, + "learning_rate": 0.00015636992638481034, + "loss": 2.5528, + "step": 6740 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001563296995052094, + "loss": 0.7059, + "step": 6745 + }, + { + "epoch": 0.52, + "learning_rate": 0.00015628947262560846, + "loss": 1.9621, + "step": 6750 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001562492457460075, + "loss": 5.2277, + "step": 6755 + }, + { + "epoch": 0.52, + "learning_rate": 0.00015620901886640652, + "loss": 5.1182, + "step": 6760 + }, + { + "epoch": 0.52, + "learning_rate": 0.00015616879198680558, + "loss": 4.7277, + "step": 6765 + }, + { + "epoch": 0.52, + "learning_rate": 0.00015612856510720464, + "loss": 3.9717, + "step": 6770 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001560883382276037, + "loss": 3.8693, + "step": 6775 + }, + { + "epoch": 0.52, + "learning_rate": 0.00015604811134800275, + "loss": 2.4765, + "step": 6780 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001560078844684018, + "loss": 3.0296, + "step": 6785 + }, + { + "epoch": 0.52, + "learning_rate": 0.00015596765758880084, + "loss": 2.2676, + "step": 6790 + }, + { + "epoch": 0.52, + "learning_rate": 0.00015592743070919987, + "loss": 2.3231, + "step": 6795 + }, + { + "epoch": 0.52, + "learning_rate": 0.00015588720382959893, + "loss": 0.0427, + "step": 6800 + }, + { + "epoch": 0.52, + "learning_rate": 0.000155846976949998, + "loss": 4.8117, + "step": 6805 + }, + { + "epoch": 0.52, + "learning_rate": 0.00015580675007039705, + "loss": 4.2992, + "step": 6810 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001557665231907961, + "loss": 4.5337, + "step": 6815 + }, + { + "epoch": 0.52, + "learning_rate": 0.00015572629631119516, + "loss": 5.334, + "step": 6820 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001556860694315942, + "loss": 3.5176, + "step": 6825 + }, + { + "epoch": 0.52, + "learning_rate": 0.00015564584255199325, + "loss": 1.5627, + "step": 6830 + }, + { + "epoch": 0.52, + "learning_rate": 0.00015560561567239228, + "loss": 2.1864, + "step": 6835 + }, + { + "epoch": 0.52, + "learning_rate": 0.00015556538879279134, + "loss": 0.7217, + "step": 6840 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001555251619131904, + "loss": 3.6855, + "step": 6845 + }, + { + "epoch": 0.52, + "learning_rate": 0.00015548493503358946, + "loss": 1.8674, + "step": 6850 + }, + { + "epoch": 0.52, + "learning_rate": 0.00015544470815398852, + "loss": 4.4283, + "step": 6855 + }, + { + "epoch": 0.52, + "learning_rate": 0.00015540448127438755, + "loss": 4.7135, + "step": 6860 + }, + { + "epoch": 0.52, + "learning_rate": 0.0001553642543947866, + "loss": 3.5176, + "step": 6865 + }, + { + "epoch": 0.53, + "learning_rate": 0.00015532402751518564, + "loss": 5.2475, + "step": 6870 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001552838006355847, + "loss": 3.5299, + "step": 6875 + }, + { + "epoch": 0.53, + "learning_rate": 0.00015524357375598375, + "loss": 3.6828, + "step": 6880 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001552033468763828, + "loss": 5.0184, + "step": 6885 + }, + { + "epoch": 0.53, + "learning_rate": 0.00015516311999678187, + "loss": 3.1216, + "step": 6890 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001551228931171809, + "loss": 3.048, + "step": 6895 + }, + { + "epoch": 0.53, + "learning_rate": 0.00015508266623757996, + "loss": 3.0663, + "step": 6900 + }, + { + "epoch": 0.53, + "learning_rate": 0.00015504243935797902, + "loss": 5.0727, + "step": 6905 + }, + { + "epoch": 0.53, + "learning_rate": 0.00015500221247837805, + "loss": 4.3605, + "step": 6910 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001549619855987771, + "loss": 4.4473, + "step": 6915 + }, + { + "epoch": 0.53, + "learning_rate": 0.00015492175871917617, + "loss": 3.3799, + "step": 6920 + }, + { + "epoch": 0.53, + "learning_rate": 0.00015488153183957522, + "loss": 3.7008, + "step": 6925 + }, + { + "epoch": 0.53, + "learning_rate": 0.00015484130495997426, + "loss": 3.7081, + "step": 6930 + }, + { + "epoch": 0.53, + "learning_rate": 0.00015480107808037331, + "loss": 3.5293, + "step": 6935 + }, + { + "epoch": 0.53, + "learning_rate": 0.00015476085120077237, + "loss": 2.8219, + "step": 6940 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001547206243211714, + "loss": 2.5409, + "step": 6945 + }, + { + "epoch": 0.53, + "learning_rate": 0.00015468039744157046, + "loss": 1.8382, + "step": 6950 + }, + { + "epoch": 0.53, + "learning_rate": 0.00015464017056196952, + "loss": 4.9234, + "step": 6955 + }, + { + "epoch": 0.53, + "learning_rate": 0.00015459994368236855, + "loss": 5.3937, + "step": 6960 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001545597168027676, + "loss": 4.5607, + "step": 6965 + }, + { + "epoch": 0.53, + "learning_rate": 0.00015451948992316667, + "loss": 3.7303, + "step": 6970 + }, + { + "epoch": 0.53, + "learning_rate": 0.00015447926304356573, + "loss": 3.9004, + "step": 6975 + }, + { + "epoch": 0.53, + "learning_rate": 0.00015443903616396478, + "loss": 3.7896, + "step": 6980 + }, + { + "epoch": 0.53, + "learning_rate": 0.00015439880928436381, + "loss": 2.2128, + "step": 6985 + }, + { + "epoch": 0.53, + "learning_rate": 0.00015435858240476287, + "loss": 1.8755, + "step": 6990 + }, + { + "epoch": 0.53, + "learning_rate": 0.0001543183555251619, + "loss": 2.1864, + "step": 6995 + }, + { + "epoch": 0.54, + "learning_rate": 0.00015427812864556096, + "loss": 0.8963, + "step": 7000 + }, + { + "epoch": 0.54, + "learning_rate": 0.00015423790176596002, + "loss": 4.0704, + "step": 7005 + }, + { + "epoch": 0.54, + "learning_rate": 0.00015419767488635908, + "loss": 4.6586, + "step": 7010 + }, + { + "epoch": 0.54, + "learning_rate": 0.00015415744800675814, + "loss": 4.1082, + "step": 7015 + }, + { + "epoch": 0.54, + "learning_rate": 0.00015411722112715717, + "loss": 3.795, + "step": 7020 + }, + { + "epoch": 0.54, + "learning_rate": 0.00015407699424755623, + "loss": 4.5971, + "step": 7025 + }, + { + "epoch": 0.54, + "learning_rate": 0.00015403676736795526, + "loss": 4.1455, + "step": 7030 + }, + { + "epoch": 0.54, + "learning_rate": 0.00015399654048835432, + "loss": 2.7245, + "step": 7035 + }, + { + "epoch": 0.54, + "learning_rate": 0.00015395631360875337, + "loss": 3.3336, + "step": 7040 + }, + { + "epoch": 0.54, + "learning_rate": 0.00015391608672915243, + "loss": 3.3736, + "step": 7045 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001538758598495515, + "loss": 2.5276, + "step": 7050 + }, + { + "epoch": 0.54, + "learning_rate": 0.00015383563296995055, + "loss": 5.3309, + "step": 7055 + }, + { + "epoch": 0.54, + "learning_rate": 0.00015379540609034958, + "loss": 4.858, + "step": 7060 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001537551792107486, + "loss": 4.5324, + "step": 7065 + }, + { + "epoch": 0.54, + "learning_rate": 0.00015371495233114767, + "loss": 4.2461, + "step": 7070 + }, + { + "epoch": 0.54, + "learning_rate": 0.00015367472545154673, + "loss": 3.8908, + "step": 7075 + }, + { + "epoch": 0.54, + "learning_rate": 0.00015363449857194579, + "loss": 3.1788, + "step": 7080 + }, + { + "epoch": 0.54, + "learning_rate": 0.00015359427169234484, + "loss": 3.3179, + "step": 7085 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001535540448127439, + "loss": 2.7894, + "step": 7090 + }, + { + "epoch": 0.54, + "learning_rate": 0.00015351381793314293, + "loss": 2.9542, + "step": 7095 + }, + { + "epoch": 0.54, + "learning_rate": 0.00015347359105354196, + "loss": 0.7846, + "step": 7100 + }, + { + "epoch": 0.54, + "learning_rate": 0.00015343336417394102, + "loss": 5.3049, + "step": 7105 + }, + { + "epoch": 0.54, + "learning_rate": 0.00015339313729434008, + "loss": 4.2867, + "step": 7110 + }, + { + "epoch": 0.54, + "learning_rate": 0.00015335291041473914, + "loss": 4.4562, + "step": 7115 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001533126835351382, + "loss": 3.9077, + "step": 7120 + }, + { + "epoch": 0.54, + "learning_rate": 0.00015327245665553726, + "loss": 3.4854, + "step": 7125 + }, + { + "epoch": 0.54, + "learning_rate": 0.0001532322297759363, + "loss": 2.647, + "step": 7130 + }, + { + "epoch": 0.55, + "learning_rate": 0.00015319200289633532, + "loss": 2.2412, + "step": 7135 + }, + { + "epoch": 0.55, + "learning_rate": 0.00015315177601673438, + "loss": 2.5988, + "step": 7140 + }, + { + "epoch": 0.55, + "learning_rate": 0.00015311154913713343, + "loss": 2.807, + "step": 7145 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001530713222575325, + "loss": 3.1684, + "step": 7150 + }, + { + "epoch": 0.55, + "learning_rate": 0.00015303109537793155, + "loss": 4.0076, + "step": 7155 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001529908684983306, + "loss": 3.8545, + "step": 7160 + }, + { + "epoch": 0.55, + "learning_rate": 0.00015295064161872964, + "loss": 4.7838, + "step": 7165 + }, + { + "epoch": 0.55, + "learning_rate": 0.00015291041473912867, + "loss": 4.7896, + "step": 7170 + }, + { + "epoch": 0.55, + "learning_rate": 0.00015287018785952773, + "loss": 4.5879, + "step": 7175 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001528299609799268, + "loss": 4.1828, + "step": 7180 + }, + { + "epoch": 0.55, + "learning_rate": 0.00015278973410032585, + "loss": 3.6771, + "step": 7185 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001527495072207249, + "loss": 2.4002, + "step": 7190 + }, + { + "epoch": 0.55, + "learning_rate": 0.00015270928034112396, + "loss": 2.9123, + "step": 7195 + }, + { + "epoch": 0.55, + "learning_rate": 0.000152669053461523, + "loss": 1.4825, + "step": 7200 + }, + { + "epoch": 0.55, + "learning_rate": 0.00015262882658192205, + "loss": 4.3521, + "step": 7205 + }, + { + "epoch": 0.55, + "learning_rate": 0.00015258859970232108, + "loss": 4.76, + "step": 7210 + }, + { + "epoch": 0.55, + "learning_rate": 0.00015254837282272014, + "loss": 4.7199, + "step": 7215 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001525081459431192, + "loss": 3.7285, + "step": 7220 + }, + { + "epoch": 0.55, + "learning_rate": 0.00015246791906351826, + "loss": 4.2059, + "step": 7225 + }, + { + "epoch": 0.55, + "learning_rate": 0.00015242769218391732, + "loss": 2.4594, + "step": 7230 + }, + { + "epoch": 0.55, + "learning_rate": 0.00015238746530431635, + "loss": 1.7533, + "step": 7235 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001523472384247154, + "loss": 2.0827, + "step": 7240 + }, + { + "epoch": 0.55, + "learning_rate": 0.00015230701154511444, + "loss": 3.332, + "step": 7245 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001522667846655135, + "loss": 1.1347, + "step": 7250 + }, + { + "epoch": 0.55, + "learning_rate": 0.00015222655778591255, + "loss": 3.9932, + "step": 7255 + }, + { + "epoch": 0.55, + "learning_rate": 0.0001521863309063116, + "loss": 3.5039, + "step": 7260 + }, + { + "epoch": 0.56, + "learning_rate": 0.00015214610402671064, + "loss": 3.6082, + "step": 7265 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001521058771471097, + "loss": 2.8597, + "step": 7270 + }, + { + "epoch": 0.56, + "learning_rate": 0.00015206565026750876, + "loss": 3.6561, + "step": 7275 + }, + { + "epoch": 0.56, + "learning_rate": 0.00015202542338790782, + "loss": 1.5933, + "step": 7280 + }, + { + "epoch": 0.56, + "learning_rate": 0.00015198519650830685, + "loss": 2.386, + "step": 7285 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001519449696287059, + "loss": 2.4073, + "step": 7290 + }, + { + "epoch": 0.56, + "learning_rate": 0.00015190474274910497, + "loss": 2.3714, + "step": 7295 + }, + { + "epoch": 0.56, + "learning_rate": 0.000151864515869504, + "loss": 1.211, + "step": 7300 + }, + { + "epoch": 0.56, + "learning_rate": 0.00015182428898990305, + "loss": 3.9281, + "step": 7305 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001517840621103021, + "loss": 4.6617, + "step": 7310 + }, + { + "epoch": 0.56, + "learning_rate": 0.00015174383523070117, + "loss": 4.624, + "step": 7315 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001517036083511002, + "loss": 4.2254, + "step": 7320 + }, + { + "epoch": 0.56, + "learning_rate": 0.00015166338147149926, + "loss": 3.4564, + "step": 7325 + }, + { + "epoch": 0.56, + "learning_rate": 0.00015162315459189832, + "loss": 3.0671, + "step": 7330 + }, + { + "epoch": 0.56, + "learning_rate": 0.00015158292771229735, + "loss": 2.1117, + "step": 7335 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001515427008326964, + "loss": 2.8584, + "step": 7340 + }, + { + "epoch": 0.56, + "learning_rate": 0.00015150247395309547, + "loss": 2.352, + "step": 7345 + }, + { + "epoch": 0.56, + "learning_rate": 0.00015146224707349452, + "loss": 1.9422, + "step": 7350 + }, + { + "epoch": 0.56, + "learning_rate": 0.00015142202019389358, + "loss": 4.873, + "step": 7355 + }, + { + "epoch": 0.56, + "learning_rate": 0.00015138179331429261, + "loss": 4.7299, + "step": 7360 + }, + { + "epoch": 0.56, + "learning_rate": 0.00015134156643469167, + "loss": 3.7564, + "step": 7365 + }, + { + "epoch": 0.56, + "learning_rate": 0.0001513013395550907, + "loss": 5.5928, + "step": 7370 + }, + { + "epoch": 0.56, + "learning_rate": 0.00015126111267548976, + "loss": 4.1801, + "step": 7375 + }, + { + "epoch": 0.56, + "learning_rate": 0.00015122088579588882, + "loss": 3.195, + "step": 7380 + }, + { + "epoch": 0.56, + "learning_rate": 0.00015118065891628788, + "loss": 2.6585, + "step": 7385 + }, + { + "epoch": 0.56, + "learning_rate": 0.00015114043203668694, + "loss": 2.2449, + "step": 7390 + }, + { + "epoch": 0.57, + "learning_rate": 0.00015110020515708597, + "loss": 3.2964, + "step": 7395 + }, + { + "epoch": 0.57, + "learning_rate": 0.00015105997827748503, + "loss": 1.8592, + "step": 7400 + }, + { + "epoch": 0.57, + "learning_rate": 0.00015101975139788406, + "loss": 5.1701, + "step": 7405 + }, + { + "epoch": 0.57, + "learning_rate": 0.00015097952451828312, + "loss": 3.9732, + "step": 7410 + }, + { + "epoch": 0.57, + "learning_rate": 0.00015093929763868217, + "loss": 3.7973, + "step": 7415 + }, + { + "epoch": 0.57, + "learning_rate": 0.00015089907075908123, + "loss": 4.9865, + "step": 7420 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001508588438794803, + "loss": 3.4799, + "step": 7425 + }, + { + "epoch": 0.57, + "learning_rate": 0.00015081861699987935, + "loss": 2.9448, + "step": 7430 + }, + { + "epoch": 0.57, + "learning_rate": 0.00015077839012027838, + "loss": 2.2613, + "step": 7435 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001507381632406774, + "loss": 3.6979, + "step": 7440 + }, + { + "epoch": 0.57, + "learning_rate": 0.00015069793636107647, + "loss": 1.1705, + "step": 7445 + }, + { + "epoch": 0.57, + "learning_rate": 0.00015065770948147553, + "loss": 1.0137, + "step": 7450 + }, + { + "epoch": 0.57, + "learning_rate": 0.00015061748260187459, + "loss": 4.4939, + "step": 7455 + }, + { + "epoch": 0.57, + "learning_rate": 0.00015057725572227364, + "loss": 4.7318, + "step": 7460 + }, + { + "epoch": 0.57, + "learning_rate": 0.0001505370288426727, + "loss": 4.4439, + "step": 7465 + }, + { + "epoch": 0.57, + "learning_rate": 0.00015049680196307173, + "loss": 3.9766, + "step": 7470 + }, + { + "epoch": 0.57, + "learning_rate": 0.00015045657508347076, + "loss": 4.8434, + "step": 7475 + }, + { + "epoch": 0.57, + "learning_rate": 0.00015041634820386982, + "loss": 3.7546, + "step": 7480 + }, + { + "epoch": 0.57, + "learning_rate": 0.00015037612132426888, + "loss": 4.257, + "step": 7485 + }, + { + "epoch": 0.57, + "learning_rate": 0.00015033589444466794, + "loss": 2.0724, + "step": 7490 + }, + { + "epoch": 0.57, + "learning_rate": 0.000150295667565067, + "loss": 2.3552, + "step": 7495 + }, + { + "epoch": 0.57, + "learning_rate": 0.00015025544068546606, + "loss": 2.8831, + "step": 7500 + }, + { + "epoch": 0.57, + "learning_rate": 0.00015021521380586509, + "loss": 4.2111, + "step": 7505 + }, + { + "epoch": 0.57, + "learning_rate": 0.00015017498692626412, + "loss": 3.5881, + "step": 7510 + }, + { + "epoch": 0.57, + "learning_rate": 0.00015013476004666318, + "loss": 3.7628, + "step": 7515 + }, + { + "epoch": 0.57, + "learning_rate": 0.00015009453316706223, + "loss": 4.3022, + "step": 7520 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001500543062874613, + "loss": 3.9914, + "step": 7525 + }, + { + "epoch": 0.58, + "learning_rate": 0.00015001407940786035, + "loss": 3.0919, + "step": 7530 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001499738525282594, + "loss": 3.287, + "step": 7535 + }, + { + "epoch": 0.58, + "learning_rate": 0.00014993362564865844, + "loss": 3.2431, + "step": 7540 + }, + { + "epoch": 0.58, + "learning_rate": 0.00014989339876905747, + "loss": 1.7876, + "step": 7545 + }, + { + "epoch": 0.58, + "learning_rate": 0.00014985317188945653, + "loss": 3.392, + "step": 7550 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001498129450098556, + "loss": 4.3033, + "step": 7555 + }, + { + "epoch": 0.58, + "learning_rate": 0.00014977271813025465, + "loss": 4.7418, + "step": 7560 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001497324912506537, + "loss": 3.6264, + "step": 7565 + }, + { + "epoch": 0.58, + "learning_rate": 0.00014969226437105273, + "loss": 3.9139, + "step": 7570 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001496520374914518, + "loss": 4.6238, + "step": 7575 + }, + { + "epoch": 0.58, + "learning_rate": 0.00014961181061185085, + "loss": 3.354, + "step": 7580 + }, + { + "epoch": 0.58, + "learning_rate": 0.00014957158373224988, + "loss": 2.212, + "step": 7585 + }, + { + "epoch": 0.58, + "learning_rate": 0.00014953135685264894, + "loss": 2.9092, + "step": 7590 + }, + { + "epoch": 0.58, + "learning_rate": 0.000149491129973048, + "loss": 1.8592, + "step": 7595 + }, + { + "epoch": 0.58, + "learning_rate": 0.00014945090309344706, + "loss": 0.1596, + "step": 7600 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001494106762138461, + "loss": 3.8275, + "step": 7605 + }, + { + "epoch": 0.58, + "learning_rate": 0.00014937044933424515, + "loss": 3.526, + "step": 7610 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001493302224546442, + "loss": 4.7551, + "step": 7615 + }, + { + "epoch": 0.58, + "learning_rate": 0.00014928999557504324, + "loss": 2.7891, + "step": 7620 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001492497686954423, + "loss": 3.836, + "step": 7625 + }, + { + "epoch": 0.58, + "learning_rate": 0.00014920954181584135, + "loss": 4.1405, + "step": 7630 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001491693149362404, + "loss": 3.3979, + "step": 7635 + }, + { + "epoch": 0.58, + "learning_rate": 0.00014912908805663944, + "loss": 2.051, + "step": 7640 + }, + { + "epoch": 0.58, + "learning_rate": 0.0001490888611770385, + "loss": 2.0498, + "step": 7645 + }, + { + "epoch": 0.58, + "learning_rate": 0.00014904863429743756, + "loss": 1.2102, + "step": 7650 + }, + { + "epoch": 0.59, + "learning_rate": 0.00014900840741783662, + "loss": 4.4957, + "step": 7655 + }, + { + "epoch": 0.59, + "learning_rate": 0.00014896818053823565, + "loss": 4.0664, + "step": 7660 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001489279536586347, + "loss": 4.7891, + "step": 7665 + }, + { + "epoch": 0.59, + "learning_rate": 0.00014888772677903376, + "loss": 2.6803, + "step": 7670 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001488474998994328, + "loss": 3.4743, + "step": 7675 + }, + { + "epoch": 0.59, + "learning_rate": 0.00014880727301983185, + "loss": 3.2174, + "step": 7680 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001487670461402309, + "loss": 3.109, + "step": 7685 + }, + { + "epoch": 0.59, + "learning_rate": 0.00014872681926062997, + "loss": 1.0869, + "step": 7690 + }, + { + "epoch": 0.59, + "learning_rate": 0.00014868659238102903, + "loss": 1.1583, + "step": 7695 + }, + { + "epoch": 0.59, + "learning_rate": 0.00014864636550142806, + "loss": 3.8975, + "step": 7700 + }, + { + "epoch": 0.59, + "learning_rate": 0.00014860613862182712, + "loss": 5.1785, + "step": 7705 + }, + { + "epoch": 0.59, + "learning_rate": 0.00014856591174222615, + "loss": 3.8462, + "step": 7710 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001485256848626252, + "loss": 3.6188, + "step": 7715 + }, + { + "epoch": 0.59, + "learning_rate": 0.00014848545798302427, + "loss": 3.4301, + "step": 7720 + }, + { + "epoch": 0.59, + "learning_rate": 0.00014844523110342332, + "loss": 4.3648, + "step": 7725 + }, + { + "epoch": 0.59, + "learning_rate": 0.00014840500422382238, + "loss": 1.6885, + "step": 7730 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001483647773442214, + "loss": 2.2321, + "step": 7735 + }, + { + "epoch": 0.59, + "learning_rate": 0.00014832455046462047, + "loss": 2.4661, + "step": 7740 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001482843235850195, + "loss": 3.3984, + "step": 7745 + }, + { + "epoch": 0.59, + "learning_rate": 0.00014824409670541856, + "loss": 1.2363, + "step": 7750 + }, + { + "epoch": 0.59, + "learning_rate": 0.00014820386982581762, + "loss": 5.4285, + "step": 7755 + }, + { + "epoch": 0.59, + "learning_rate": 0.00014816364294621668, + "loss": 3.917, + "step": 7760 + }, + { + "epoch": 0.59, + "learning_rate": 0.00014812341606661574, + "loss": 4.5963, + "step": 7765 + }, + { + "epoch": 0.59, + "learning_rate": 0.0001480831891870148, + "loss": 3.875, + "step": 7770 + }, + { + "epoch": 0.59, + "learning_rate": 0.00014804296230741382, + "loss": 3.8596, + "step": 7775 + }, + { + "epoch": 0.59, + "learning_rate": 0.00014800273542781286, + "loss": 3.2287, + "step": 7780 + }, + { + "epoch": 0.6, + "learning_rate": 0.00014796250854821191, + "loss": 3.3803, + "step": 7785 + }, + { + "epoch": 0.6, + "learning_rate": 0.00014792228166861097, + "loss": 2.5698, + "step": 7790 + }, + { + "epoch": 0.6, + "learning_rate": 0.00014788205478901003, + "loss": 1.9961, + "step": 7795 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001478418279094091, + "loss": 1.7948, + "step": 7800 + }, + { + "epoch": 0.6, + "learning_rate": 0.00014780160102980815, + "loss": 4.4584, + "step": 7805 + }, + { + "epoch": 0.6, + "learning_rate": 0.00014776137415020718, + "loss": 4.4805, + "step": 7810 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001477211472706062, + "loss": 4.1227, + "step": 7815 + }, + { + "epoch": 0.6, + "learning_rate": 0.00014768092039100527, + "loss": 4.8541, + "step": 7820 + }, + { + "epoch": 0.6, + "learning_rate": 0.00014764069351140433, + "loss": 3.4299, + "step": 7825 + }, + { + "epoch": 0.6, + "learning_rate": 0.00014760046663180338, + "loss": 4.4625, + "step": 7830 + }, + { + "epoch": 0.6, + "learning_rate": 0.00014756023975220244, + "loss": 2.7753, + "step": 7835 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001475200128726015, + "loss": 2.1818, + "step": 7840 + }, + { + "epoch": 0.6, + "learning_rate": 0.00014747978599300053, + "loss": 0.9003, + "step": 7845 + }, + { + "epoch": 0.6, + "learning_rate": 0.00014743955911339956, + "loss": 2.6693, + "step": 7850 + }, + { + "epoch": 0.6, + "learning_rate": 0.00014739933223379862, + "loss": 4.8398, + "step": 7855 + }, + { + "epoch": 0.6, + "learning_rate": 0.00014735910535419768, + "loss": 4.6248, + "step": 7860 + }, + { + "epoch": 0.6, + "learning_rate": 0.00014731887847459674, + "loss": 4.7467, + "step": 7865 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001472786515949958, + "loss": 4.7848, + "step": 7870 + }, + { + "epoch": 0.6, + "learning_rate": 0.00014723842471539483, + "loss": 5.0921, + "step": 7875 + }, + { + "epoch": 0.6, + "learning_rate": 0.00014719819783579389, + "loss": 3.9907, + "step": 7880 + }, + { + "epoch": 0.6, + "learning_rate": 0.00014715797095619292, + "loss": 4.1201, + "step": 7885 + }, + { + "epoch": 0.6, + "learning_rate": 0.00014711774407659197, + "loss": 1.9702, + "step": 7890 + }, + { + "epoch": 0.6, + "learning_rate": 0.00014707751719699103, + "loss": 2.9985, + "step": 7895 + }, + { + "epoch": 0.6, + "learning_rate": 0.0001470372903173901, + "loss": 1.4111, + "step": 7900 + }, + { + "epoch": 0.6, + "learning_rate": 0.00014699706343778915, + "loss": 4.8258, + "step": 7905 + }, + { + "epoch": 0.6, + "learning_rate": 0.00014695683655818818, + "loss": 4.6049, + "step": 7910 + }, + { + "epoch": 0.6, + "learning_rate": 0.00014691660967858724, + "loss": 4.159, + "step": 7915 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001468763827989863, + "loss": 3.5428, + "step": 7920 + }, + { + "epoch": 0.61, + "learning_rate": 0.00014683615591938533, + "loss": 3.757, + "step": 7925 + }, + { + "epoch": 0.61, + "learning_rate": 0.00014679592903978439, + "loss": 3.7508, + "step": 7930 + }, + { + "epoch": 0.61, + "learning_rate": 0.00014675570216018344, + "loss": 3.3074, + "step": 7935 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001467154752805825, + "loss": 3.104, + "step": 7940 + }, + { + "epoch": 0.61, + "learning_rate": 0.00014667524840098153, + "loss": 2.4619, + "step": 7945 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001466350215213806, + "loss": 2.2569, + "step": 7950 + }, + { + "epoch": 0.61, + "learning_rate": 0.00014659479464177965, + "loss": 3.9715, + "step": 7955 + }, + { + "epoch": 0.61, + "learning_rate": 0.00014655456776217868, + "loss": 3.9365, + "step": 7960 + }, + { + "epoch": 0.61, + "learning_rate": 0.00014651434088257774, + "loss": 4.6123, + "step": 7965 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001464741140029768, + "loss": 4.1201, + "step": 7970 + }, + { + "epoch": 0.61, + "learning_rate": 0.00014643388712337586, + "loss": 2.651, + "step": 7975 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001463936602437749, + "loss": 3.2354, + "step": 7980 + }, + { + "epoch": 0.61, + "learning_rate": 0.00014635343336417395, + "loss": 2.7716, + "step": 7985 + }, + { + "epoch": 0.61, + "learning_rate": 0.000146313206484573, + "loss": 1.5226, + "step": 7990 + }, + { + "epoch": 0.61, + "learning_rate": 0.00014627297960497206, + "loss": 1.8045, + "step": 7995 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001462327527253711, + "loss": 2.6607, + "step": 8000 + }, + { + "epoch": 0.61, + "learning_rate": 0.00014619252584577015, + "loss": 5.0256, + "step": 8005 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001461522989661692, + "loss": 4.3643, + "step": 8010 + }, + { + "epoch": 0.61, + "learning_rate": 0.00014611207208656824, + "loss": 4.3105, + "step": 8015 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001460718452069673, + "loss": 4.2754, + "step": 8020 + }, + { + "epoch": 0.61, + "learning_rate": 0.00014603161832736636, + "loss": 4.1685, + "step": 8025 + }, + { + "epoch": 0.61, + "learning_rate": 0.00014599139144776542, + "loss": 4.0061, + "step": 8030 + }, + { + "epoch": 0.61, + "learning_rate": 0.00014595116456816445, + "loss": 2.8045, + "step": 8035 + }, + { + "epoch": 0.61, + "learning_rate": 0.0001459109376885635, + "loss": 3.6893, + "step": 8040 + }, + { + "epoch": 0.61, + "learning_rate": 0.00014587071080896256, + "loss": 3.1893, + "step": 8045 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001458304839293616, + "loss": 2.4559, + "step": 8050 + }, + { + "epoch": 0.62, + "learning_rate": 0.00014579025704976065, + "loss": 4.7047, + "step": 8055 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001457500301701597, + "loss": 5.4594, + "step": 8060 + }, + { + "epoch": 0.62, + "learning_rate": 0.00014570980329055877, + "loss": 3.6199, + "step": 8065 + }, + { + "epoch": 0.62, + "learning_rate": 0.00014566957641095783, + "loss": 3.1002, + "step": 8070 + }, + { + "epoch": 0.62, + "learning_rate": 0.00014562934953135686, + "loss": 3.0232, + "step": 8075 + }, + { + "epoch": 0.62, + "learning_rate": 0.00014558912265175592, + "loss": 4.4474, + "step": 8080 + }, + { + "epoch": 0.62, + "learning_rate": 0.00014554889577215495, + "loss": 2.7882, + "step": 8085 + }, + { + "epoch": 0.62, + "learning_rate": 0.000145508668892554, + "loss": 2.3826, + "step": 8090 + }, + { + "epoch": 0.62, + "learning_rate": 0.00014546844201295306, + "loss": 1.9198, + "step": 8095 + }, + { + "epoch": 0.62, + "learning_rate": 0.00014542821513335212, + "loss": 1.3647, + "step": 8100 + }, + { + "epoch": 0.62, + "learning_rate": 0.00014538798825375118, + "loss": 5.1926, + "step": 8105 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001453477613741502, + "loss": 3.6435, + "step": 8110 + }, + { + "epoch": 0.62, + "learning_rate": 0.00014530753449454927, + "loss": 4.091, + "step": 8115 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001452673076149483, + "loss": 4.3703, + "step": 8120 + }, + { + "epoch": 0.62, + "learning_rate": 0.00014522708073534736, + "loss": 4.308, + "step": 8125 + }, + { + "epoch": 0.62, + "learning_rate": 0.00014518685385574642, + "loss": 2.2132, + "step": 8130 + }, + { + "epoch": 0.62, + "learning_rate": 0.00014514662697614548, + "loss": 2.4709, + "step": 8135 + }, + { + "epoch": 0.62, + "learning_rate": 0.00014510640009654453, + "loss": 2.6757, + "step": 8140 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001450661732169436, + "loss": 2.0093, + "step": 8145 + }, + { + "epoch": 0.62, + "learning_rate": 0.00014502594633734262, + "loss": 1.4906, + "step": 8150 + }, + { + "epoch": 0.62, + "learning_rate": 0.00014498571945774166, + "loss": 4.9254, + "step": 8155 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001449454925781407, + "loss": 4.8832, + "step": 8160 + }, + { + "epoch": 0.62, + "learning_rate": 0.00014490526569853977, + "loss": 4.3543, + "step": 8165 + }, + { + "epoch": 0.62, + "learning_rate": 0.00014486503881893883, + "loss": 4.5822, + "step": 8170 + }, + { + "epoch": 0.62, + "learning_rate": 0.0001448248119393379, + "loss": 3.6982, + "step": 8175 + }, + { + "epoch": 0.63, + "learning_rate": 0.00014478458505973692, + "loss": 2.7762, + "step": 8180 + }, + { + "epoch": 0.63, + "learning_rate": 0.00014474435818013598, + "loss": 3.1775, + "step": 8185 + }, + { + "epoch": 0.63, + "learning_rate": 0.000144704131300535, + "loss": 1.7506, + "step": 8190 + }, + { + "epoch": 0.63, + "learning_rate": 0.00014466390442093407, + "loss": 2.6996, + "step": 8195 + }, + { + "epoch": 0.63, + "learning_rate": 0.00014462367754133312, + "loss": 1.8007, + "step": 8200 + }, + { + "epoch": 0.63, + "learning_rate": 0.00014458345066173218, + "loss": 5.0187, + "step": 8205 + }, + { + "epoch": 0.63, + "learning_rate": 0.00014454322378213124, + "loss": 3.435, + "step": 8210 + }, + { + "epoch": 0.63, + "learning_rate": 0.00014450299690253027, + "loss": 3.5404, + "step": 8215 + }, + { + "epoch": 0.63, + "learning_rate": 0.00014446277002292933, + "loss": 5.1195, + "step": 8220 + }, + { + "epoch": 0.63, + "learning_rate": 0.00014442254314332836, + "loss": 3.7687, + "step": 8225 + }, + { + "epoch": 0.63, + "learning_rate": 0.00014438231626372742, + "loss": 4.91, + "step": 8230 + }, + { + "epoch": 0.63, + "learning_rate": 0.00014434208938412648, + "loss": 2.3242, + "step": 8235 + }, + { + "epoch": 0.63, + "learning_rate": 0.00014430186250452554, + "loss": 1.712, + "step": 8240 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001442616356249246, + "loss": 1.66, + "step": 8245 + }, + { + "epoch": 0.63, + "learning_rate": 0.00014422140874532363, + "loss": 1.6371, + "step": 8250 + }, + { + "epoch": 0.63, + "learning_rate": 0.00014418118186572268, + "loss": 4.1268, + "step": 8255 + }, + { + "epoch": 0.63, + "learning_rate": 0.00014414095498612172, + "loss": 4.1861, + "step": 8260 + }, + { + "epoch": 0.63, + "learning_rate": 0.00014410072810652077, + "loss": 3.8643, + "step": 8265 + }, + { + "epoch": 0.63, + "learning_rate": 0.00014406050122691983, + "loss": 3.8318, + "step": 8270 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001440202743473189, + "loss": 3.3503, + "step": 8275 + }, + { + "epoch": 0.63, + "learning_rate": 0.00014398004746771795, + "loss": 3.3456, + "step": 8280 + }, + { + "epoch": 0.63, + "learning_rate": 0.00014393982058811698, + "loss": 4.1158, + "step": 8285 + }, + { + "epoch": 0.63, + "learning_rate": 0.00014389959370851604, + "loss": 3.0195, + "step": 8290 + }, + { + "epoch": 0.63, + "learning_rate": 0.0001438593668289151, + "loss": 0.8465, + "step": 8295 + }, + { + "epoch": 0.63, + "learning_rate": 0.00014381913994931413, + "loss": 1.5685, + "step": 8300 + }, + { + "epoch": 0.63, + "learning_rate": 0.00014377891306971319, + "loss": 4.1578, + "step": 8305 + }, + { + "epoch": 0.64, + "learning_rate": 0.00014373868619011224, + "loss": 4.6133, + "step": 8310 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001436984593105113, + "loss": 5.6063, + "step": 8315 + }, + { + "epoch": 0.64, + "learning_rate": 0.00014365823243091033, + "loss": 4.8566, + "step": 8320 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001436180055513094, + "loss": 3.8412, + "step": 8325 + }, + { + "epoch": 0.64, + "learning_rate": 0.00014357777867170845, + "loss": 2.9457, + "step": 8330 + }, + { + "epoch": 0.64, + "learning_rate": 0.00014353755179210748, + "loss": 3.2986, + "step": 8335 + }, + { + "epoch": 0.64, + "learning_rate": 0.00014349732491250654, + "loss": 3.7619, + "step": 8340 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001434570980329056, + "loss": 2.0469, + "step": 8345 + }, + { + "epoch": 0.64, + "learning_rate": 0.00014341687115330466, + "loss": 1.2048, + "step": 8350 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001433766442737037, + "loss": 4.6137, + "step": 8355 + }, + { + "epoch": 0.64, + "learning_rate": 0.00014333641739410274, + "loss": 4.2262, + "step": 8360 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001432961905145018, + "loss": 3.5321, + "step": 8365 + }, + { + "epoch": 0.64, + "learning_rate": 0.00014325596363490086, + "loss": 3.1123, + "step": 8370 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001432157367552999, + "loss": 4.0621, + "step": 8375 + }, + { + "epoch": 0.64, + "learning_rate": 0.00014317550987569895, + "loss": 3.2757, + "step": 8380 + }, + { + "epoch": 0.64, + "learning_rate": 0.000143135282996098, + "loss": 3.4299, + "step": 8385 + }, + { + "epoch": 0.64, + "learning_rate": 0.00014309505611649704, + "loss": 2.9666, + "step": 8390 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001430548292368961, + "loss": 3.7125, + "step": 8395 + }, + { + "epoch": 0.64, + "learning_rate": 0.00014301460235729516, + "loss": 3.3482, + "step": 8400 + }, + { + "epoch": 0.64, + "learning_rate": 0.00014297437547769421, + "loss": 3.2589, + "step": 8405 + }, + { + "epoch": 0.64, + "learning_rate": 0.00014293414859809325, + "loss": 4.1152, + "step": 8410 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001428939217184923, + "loss": 4.1553, + "step": 8415 + }, + { + "epoch": 0.64, + "learning_rate": 0.00014285369483889136, + "loss": 2.563, + "step": 8420 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001428134679592904, + "loss": 3.1831, + "step": 8425 + }, + { + "epoch": 0.64, + "learning_rate": 0.00014277324107968945, + "loss": 3.0423, + "step": 8430 + }, + { + "epoch": 0.64, + "learning_rate": 0.0001427330142000885, + "loss": 2.415, + "step": 8435 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014269278732048757, + "loss": 3.3074, + "step": 8440 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014265256044088663, + "loss": 2.9256, + "step": 8445 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014261233356128566, + "loss": 0.7464, + "step": 8450 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014257210668168472, + "loss": 3.7371, + "step": 8455 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014253187980208375, + "loss": 3.3926, + "step": 8460 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001424916529224828, + "loss": 3.8283, + "step": 8465 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014245142604288186, + "loss": 2.7588, + "step": 8470 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014241119916328092, + "loss": 3.9244, + "step": 8475 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014237097228367998, + "loss": 3.9608, + "step": 8480 + }, + { + "epoch": 0.65, + "learning_rate": 0.000142330745404079, + "loss": 3.4815, + "step": 8485 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014229051852447804, + "loss": 2.2937, + "step": 8490 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001422502916448771, + "loss": 1.4673, + "step": 8495 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014221811014119635, + "loss": 2.0945, + "step": 8500 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001421778832615954, + "loss": 5.1734, + "step": 8505 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014213765638199446, + "loss": 4.7025, + "step": 8510 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014209742950239352, + "loss": 3.0898, + "step": 8515 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014205720262279255, + "loss": 3.6896, + "step": 8520 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001420169757431916, + "loss": 3.1475, + "step": 8525 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014197674886359064, + "loss": 1.9613, + "step": 8530 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001419365219839897, + "loss": 2.6826, + "step": 8535 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014189629510438876, + "loss": 1.0485, + "step": 8540 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014185606822478782, + "loss": 2.8809, + "step": 8545 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014181584134518685, + "loss": 0.0693, + "step": 8550 + }, + { + "epoch": 0.65, + "learning_rate": 0.0001417756144655859, + "loss": 4.4512, + "step": 8555 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014173538758598496, + "loss": 4.483, + "step": 8560 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014169516070638402, + "loss": 4.1109, + "step": 8565 + }, + { + "epoch": 0.65, + "learning_rate": 0.00014165493382678305, + "loss": 3.7912, + "step": 8570 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001416147069471821, + "loss": 4.1405, + "step": 8575 + }, + { + "epoch": 0.66, + "learning_rate": 0.00014157448006758117, + "loss": 2.772, + "step": 8580 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001415342531879802, + "loss": 2.3251, + "step": 8585 + }, + { + "epoch": 0.66, + "learning_rate": 0.00014149402630837926, + "loss": 3.733, + "step": 8590 + }, + { + "epoch": 0.66, + "learning_rate": 0.00014145379942877832, + "loss": 1.8271, + "step": 8595 + }, + { + "epoch": 0.66, + "learning_rate": 0.00014141357254917738, + "loss": 1.507, + "step": 8600 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001413733456695764, + "loss": 5.0523, + "step": 8605 + }, + { + "epoch": 0.66, + "learning_rate": 0.00014133311878997547, + "loss": 4.0752, + "step": 8610 + }, + { + "epoch": 0.66, + "learning_rate": 0.00014129289191037452, + "loss": 4.1002, + "step": 8615 + }, + { + "epoch": 0.66, + "learning_rate": 0.00014125266503077356, + "loss": 3.0717, + "step": 8620 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001412124381511726, + "loss": 2.2346, + "step": 8625 + }, + { + "epoch": 0.66, + "learning_rate": 0.00014117221127157167, + "loss": 2.3687, + "step": 8630 + }, + { + "epoch": 0.66, + "learning_rate": 0.00014113198439197073, + "loss": 2.5631, + "step": 8635 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001410917575123698, + "loss": 1.2662, + "step": 8640 + }, + { + "epoch": 0.66, + "learning_rate": 0.00014105153063276882, + "loss": 3.2849, + "step": 8645 + }, + { + "epoch": 0.66, + "learning_rate": 0.00014101130375316788, + "loss": 1.8528, + "step": 8650 + }, + { + "epoch": 0.66, + "learning_rate": 0.0001409710768735669, + "loss": 4.3492, + "step": 8655 + }, + { + "epoch": 0.66, + "learning_rate": 0.00014093084999396597, + "loss": 4.6064, + "step": 8660 + }, + { + "epoch": 0.66, + "learning_rate": 0.00014089062311436503, + "loss": 4.0658, + "step": 8665 + }, + { + "epoch": 0.66, + "learning_rate": 0.00014085039623476408, + "loss": 3.9941, + "step": 8670 + }, + { + "epoch": 0.66, + "learning_rate": 0.00014081016935516314, + "loss": 3.4873, + "step": 8675 + }, + { + "epoch": 0.66, + "learning_rate": 0.00014076994247556217, + "loss": 1.615, + "step": 8680 + }, + { + "epoch": 0.66, + "learning_rate": 0.00014072971559596123, + "loss": 1.9473, + "step": 8685 + }, + { + "epoch": 0.66, + "learning_rate": 0.00014068948871636026, + "loss": 3.5422, + "step": 8690 + }, + { + "epoch": 0.66, + "learning_rate": 0.00014064926183675932, + "loss": 1.6255, + "step": 8695 + }, + { + "epoch": 0.66, + "learning_rate": 0.00014060903495715838, + "loss": 0.6512, + "step": 8700 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014056880807755744, + "loss": 4.1771, + "step": 8705 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001405285811979565, + "loss": 4.2812, + "step": 8710 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014048835431835555, + "loss": 3.3185, + "step": 8715 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014044812743875458, + "loss": 3.8672, + "step": 8720 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014040790055915362, + "loss": 4.4287, + "step": 8725 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014036767367955267, + "loss": 3.4561, + "step": 8730 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014032744679995173, + "loss": 3.5, + "step": 8735 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001402872199203508, + "loss": 2.2681, + "step": 8740 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014024699304074985, + "loss": 1.6788, + "step": 8745 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001402067661611489, + "loss": 4.0663, + "step": 8750 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014016653928154794, + "loss": 4.5193, + "step": 8755 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014012631240194697, + "loss": 4.5533, + "step": 8760 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014008608552234603, + "loss": 3.6523, + "step": 8765 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014004585864274509, + "loss": 4.5703, + "step": 8770 + }, + { + "epoch": 0.67, + "learning_rate": 0.00014000563176314414, + "loss": 4.8623, + "step": 8775 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001399654048835432, + "loss": 3.0412, + "step": 8780 + }, + { + "epoch": 0.67, + "learning_rate": 0.00013992517800394226, + "loss": 2.1579, + "step": 8785 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001398849511243413, + "loss": 3.1091, + "step": 8790 + }, + { + "epoch": 0.67, + "learning_rate": 0.00013984472424474032, + "loss": 2.0995, + "step": 8795 + }, + { + "epoch": 0.67, + "learning_rate": 0.00013980449736513938, + "loss": 2.2463, + "step": 8800 + }, + { + "epoch": 0.67, + "learning_rate": 0.00013976427048553844, + "loss": 5.2713, + "step": 8805 + }, + { + "epoch": 0.67, + "learning_rate": 0.0001397240436059375, + "loss": 3.9492, + "step": 8810 + }, + { + "epoch": 0.67, + "learning_rate": 0.00013968381672633656, + "loss": 3.9627, + "step": 8815 + }, + { + "epoch": 0.67, + "learning_rate": 0.00013964358984673561, + "loss": 4.1762, + "step": 8820 + }, + { + "epoch": 0.67, + "learning_rate": 0.00013960336296713465, + "loss": 3.9076, + "step": 8825 + }, + { + "epoch": 0.67, + "learning_rate": 0.00013956313608753368, + "loss": 2.9175, + "step": 8830 + }, + { + "epoch": 0.68, + "learning_rate": 0.00013952290920793273, + "loss": 1.738, + "step": 8835 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001394826823283318, + "loss": 1.9772, + "step": 8840 + }, + { + "epoch": 0.68, + "learning_rate": 0.00013944245544873085, + "loss": 1.0877, + "step": 8845 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001394022285691299, + "loss": 1.9826, + "step": 8850 + }, + { + "epoch": 0.68, + "learning_rate": 0.00013936200168952894, + "loss": 5.7988, + "step": 8855 + }, + { + "epoch": 0.68, + "learning_rate": 0.000139321774809928, + "loss": 5.0049, + "step": 8860 + }, + { + "epoch": 0.68, + "learning_rate": 0.00013928154793032706, + "loss": 4.1568, + "step": 8865 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001392413210507261, + "loss": 3.3445, + "step": 8870 + }, + { + "epoch": 0.68, + "learning_rate": 0.00013920109417112515, + "loss": 4.5633, + "step": 8875 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001391608672915242, + "loss": 2.7033, + "step": 8880 + }, + { + "epoch": 0.68, + "learning_rate": 0.00013912064041192326, + "loss": 3.8346, + "step": 8885 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001390804135323223, + "loss": 2.3505, + "step": 8890 + }, + { + "epoch": 0.68, + "learning_rate": 0.00013904018665272135, + "loss": 1.7089, + "step": 8895 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001389999597731204, + "loss": 1.9751, + "step": 8900 + }, + { + "epoch": 0.68, + "learning_rate": 0.00013895973289351944, + "loss": 3.9482, + "step": 8905 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001389195060139185, + "loss": 5.2367, + "step": 8910 + }, + { + "epoch": 0.68, + "learning_rate": 0.00013887927913431756, + "loss": 4.2158, + "step": 8915 + }, + { + "epoch": 0.68, + "learning_rate": 0.00013883905225471662, + "loss": 4.0254, + "step": 8920 + }, + { + "epoch": 0.68, + "learning_rate": 0.00013879882537511565, + "loss": 4.4863, + "step": 8925 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001387585984955147, + "loss": 3.8158, + "step": 8930 + }, + { + "epoch": 0.68, + "learning_rate": 0.00013871837161591376, + "loss": 2.6618, + "step": 8935 + }, + { + "epoch": 0.68, + "learning_rate": 0.00013867814473631282, + "loss": 2.6897, + "step": 8940 + }, + { + "epoch": 0.68, + "learning_rate": 0.00013863791785671185, + "loss": 1.2268, + "step": 8945 + }, + { + "epoch": 0.68, + "learning_rate": 0.0001385976909771109, + "loss": 2.3218, + "step": 8950 + }, + { + "epoch": 0.68, + "learning_rate": 0.00013855746409750997, + "loss": 3.4869, + "step": 8955 + }, + { + "epoch": 0.68, + "learning_rate": 0.000138517237217909, + "loss": 4.4197, + "step": 8960 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013847701033830806, + "loss": 3.4383, + "step": 8965 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013843678345870712, + "loss": 3.1334, + "step": 8970 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013839655657910618, + "loss": 3.8105, + "step": 8975 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001383563296995052, + "loss": 2.6163, + "step": 8980 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013831610281990426, + "loss": 2.1495, + "step": 8985 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013827587594030332, + "loss": 3.512, + "step": 8990 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013823564906070235, + "loss": 4.8705, + "step": 8995 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001381954221811014, + "loss": 1.2528, + "step": 9000 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013815519530150047, + "loss": 4.6674, + "step": 9005 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013811496842189953, + "loss": 3.8508, + "step": 9010 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001380747415422986, + "loss": 3.9963, + "step": 9015 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013803451466269762, + "loss": 4.5911, + "step": 9020 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013799428778309668, + "loss": 4.2135, + "step": 9025 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001379540609034957, + "loss": 3.3, + "step": 9030 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013791383402389477, + "loss": 3.1516, + "step": 9035 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013787360714429382, + "loss": 2.2726, + "step": 9040 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013783338026469288, + "loss": 1.3781, + "step": 9045 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013779315338509194, + "loss": 1.5878, + "step": 9050 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013775292650549097, + "loss": 3.8682, + "step": 9055 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013771269962589003, + "loss": 5.8621, + "step": 9060 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013767247274628906, + "loss": 4.1668, + "step": 9065 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013763224586668812, + "loss": 3.3687, + "step": 9070 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013759201898708718, + "loss": 3.5014, + "step": 9075 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013755179210748624, + "loss": 2.693, + "step": 9080 + }, + { + "epoch": 0.69, + "learning_rate": 0.0001375115652278853, + "loss": 3.4979, + "step": 9085 + }, + { + "epoch": 0.69, + "learning_rate": 0.00013747133834828435, + "loss": 2.3423, + "step": 9090 + }, + { + "epoch": 0.7, + "learning_rate": 0.00013743111146868338, + "loss": 3.0961, + "step": 9095 + }, + { + "epoch": 0.7, + "learning_rate": 0.00013739088458908241, + "loss": 2.2662, + "step": 9100 + }, + { + "epoch": 0.7, + "learning_rate": 0.00013735065770948147, + "loss": 4.8318, + "step": 9105 + }, + { + "epoch": 0.7, + "learning_rate": 0.00013731043082988053, + "loss": 4.9508, + "step": 9110 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001372702039502796, + "loss": 3.5961, + "step": 9115 + }, + { + "epoch": 0.7, + "learning_rate": 0.00013722997707067865, + "loss": 4.1072, + "step": 9120 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001371897501910777, + "loss": 3.7145, + "step": 9125 + }, + { + "epoch": 0.7, + "learning_rate": 0.00013714952331147674, + "loss": 3.7985, + "step": 9130 + }, + { + "epoch": 0.7, + "learning_rate": 0.00013710929643187577, + "loss": 3.2468, + "step": 9135 + }, + { + "epoch": 0.7, + "learning_rate": 0.00013706906955227483, + "loss": 2.0802, + "step": 9140 + }, + { + "epoch": 0.7, + "learning_rate": 0.00013702884267267388, + "loss": 1.4174, + "step": 9145 + }, + { + "epoch": 0.7, + "learning_rate": 0.00013698861579307294, + "loss": 0.3039, + "step": 9150 + }, + { + "epoch": 0.7, + "learning_rate": 0.000136948388913472, + "loss": 4.1092, + "step": 9155 + }, + { + "epoch": 0.7, + "learning_rate": 0.00013690816203387103, + "loss": 4.8393, + "step": 9160 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001368679351542701, + "loss": 4.3215, + "step": 9165 + }, + { + "epoch": 0.7, + "learning_rate": 0.00013682770827466912, + "loss": 4.8441, + "step": 9170 + }, + { + "epoch": 0.7, + "learning_rate": 0.00013678748139506818, + "loss": 3.8361, + "step": 9175 + }, + { + "epoch": 0.7, + "learning_rate": 0.00013674725451546724, + "loss": 3.663, + "step": 9180 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001367070276358663, + "loss": 3.2722, + "step": 9185 + }, + { + "epoch": 0.7, + "learning_rate": 0.00013666680075626535, + "loss": 3.3886, + "step": 9190 + }, + { + "epoch": 0.7, + "learning_rate": 0.00013662657387666439, + "loss": 3.5016, + "step": 9195 + }, + { + "epoch": 0.7, + "learning_rate": 0.00013658634699706344, + "loss": 2.5376, + "step": 9200 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001365461201174625, + "loss": 4.8418, + "step": 9205 + }, + { + "epoch": 0.7, + "learning_rate": 0.00013650589323786153, + "loss": 4.2836, + "step": 9210 + }, + { + "epoch": 0.7, + "learning_rate": 0.0001364656663582606, + "loss": 4.0514, + "step": 9215 + }, + { + "epoch": 0.7, + "learning_rate": 0.00013642543947865965, + "loss": 3.6088, + "step": 9220 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001363852125990587, + "loss": 3.5259, + "step": 9225 + }, + { + "epoch": 0.71, + "learning_rate": 0.00013634498571945774, + "loss": 3.9555, + "step": 9230 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001363047588398568, + "loss": 2.5013, + "step": 9235 + }, + { + "epoch": 0.71, + "learning_rate": 0.00013626453196025586, + "loss": 1.8686, + "step": 9240 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001362243050806549, + "loss": 2.7858, + "step": 9245 + }, + { + "epoch": 0.71, + "learning_rate": 0.00013618407820105395, + "loss": 2.5493, + "step": 9250 + }, + { + "epoch": 0.71, + "learning_rate": 0.000136143851321453, + "loss": 4.5645, + "step": 9255 + }, + { + "epoch": 0.71, + "learning_rate": 0.00013610362444185206, + "loss": 4.4248, + "step": 9260 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001360633975622511, + "loss": 4.1404, + "step": 9265 + }, + { + "epoch": 0.71, + "learning_rate": 0.00013602317068265015, + "loss": 4.19, + "step": 9270 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001359829438030492, + "loss": 3.1199, + "step": 9275 + }, + { + "epoch": 0.71, + "learning_rate": 0.00013594271692344827, + "loss": 3.3325, + "step": 9280 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001359024900438473, + "loss": 1.8506, + "step": 9285 + }, + { + "epoch": 0.71, + "learning_rate": 0.00013586226316424636, + "loss": 2.0855, + "step": 9290 + }, + { + "epoch": 0.71, + "learning_rate": 0.00013582203628464542, + "loss": 2.7468, + "step": 9295 + }, + { + "epoch": 0.71, + "learning_rate": 0.00013578180940504445, + "loss": 3.2028, + "step": 9300 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001357415825254435, + "loss": 4.6888, + "step": 9305 + }, + { + "epoch": 0.71, + "learning_rate": 0.00013570135564584256, + "loss": 4.9221, + "step": 9310 + }, + { + "epoch": 0.71, + "learning_rate": 0.00013566112876624162, + "loss": 4.0654, + "step": 9315 + }, + { + "epoch": 0.71, + "learning_rate": 0.00013562090188664065, + "loss": 3.5768, + "step": 9320 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001355806750070397, + "loss": 4.2205, + "step": 9325 + }, + { + "epoch": 0.71, + "learning_rate": 0.00013554044812743877, + "loss": 2.9903, + "step": 9330 + }, + { + "epoch": 0.71, + "learning_rate": 0.0001355002212478378, + "loss": 3.491, + "step": 9335 + }, + { + "epoch": 0.71, + "learning_rate": 0.00013545999436823686, + "loss": 2.4792, + "step": 9340 + }, + { + "epoch": 0.71, + "learning_rate": 0.00013541976748863592, + "loss": 0.8765, + "step": 9345 + }, + { + "epoch": 0.71, + "learning_rate": 0.00013537954060903497, + "loss": 1.7416, + "step": 9350 + }, + { + "epoch": 0.71, + "learning_rate": 0.00013533931372943403, + "loss": 4.7154, + "step": 9355 + }, + { + "epoch": 0.72, + "learning_rate": 0.00013529908684983306, + "loss": 3.5176, + "step": 9360 + }, + { + "epoch": 0.72, + "learning_rate": 0.00013525885997023212, + "loss": 3.8848, + "step": 9365 + }, + { + "epoch": 0.72, + "learning_rate": 0.00013521863309063115, + "loss": 2.9452, + "step": 9370 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001351784062110302, + "loss": 3.007, + "step": 9375 + }, + { + "epoch": 0.72, + "learning_rate": 0.00013513817933142927, + "loss": 3.152, + "step": 9380 + }, + { + "epoch": 0.72, + "learning_rate": 0.00013509795245182833, + "loss": 3.2887, + "step": 9385 + }, + { + "epoch": 0.72, + "learning_rate": 0.00013505772557222739, + "loss": 2.8927, + "step": 9390 + }, + { + "epoch": 0.72, + "learning_rate": 0.00013501749869262642, + "loss": 2.6005, + "step": 9395 + }, + { + "epoch": 0.72, + "learning_rate": 0.00013497727181302548, + "loss": 3.1346, + "step": 9400 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001349370449334245, + "loss": 4.4902, + "step": 9405 + }, + { + "epoch": 0.72, + "learning_rate": 0.00013489681805382357, + "loss": 4.3199, + "step": 9410 + }, + { + "epoch": 0.72, + "learning_rate": 0.00013485659117422262, + "loss": 4.7805, + "step": 9415 + }, + { + "epoch": 0.72, + "learning_rate": 0.00013481636429462168, + "loss": 3.8885, + "step": 9420 + }, + { + "epoch": 0.72, + "learning_rate": 0.00013477613741502074, + "loss": 2.4693, + "step": 9425 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001347359105354198, + "loss": 2.7459, + "step": 9430 + }, + { + "epoch": 0.72, + "learning_rate": 0.00013469568365581883, + "loss": 3.4011, + "step": 9435 + }, + { + "epoch": 0.72, + "learning_rate": 0.00013465545677621786, + "loss": 3.0815, + "step": 9440 + }, + { + "epoch": 0.72, + "learning_rate": 0.00013461522989661692, + "loss": 1.5173, + "step": 9445 + }, + { + "epoch": 0.72, + "learning_rate": 0.00013457500301701598, + "loss": 2.4051, + "step": 9450 + }, + { + "epoch": 0.72, + "learning_rate": 0.00013453477613741504, + "loss": 4.7314, + "step": 9455 + }, + { + "epoch": 0.72, + "learning_rate": 0.0001344945492578141, + "loss": 4.0549, + "step": 9460 + }, + { + "epoch": 0.72, + "learning_rate": 0.00013445432237821312, + "loss": 3.774, + "step": 9465 + }, + { + "epoch": 0.72, + "learning_rate": 0.00013441409549861218, + "loss": 3.5994, + "step": 9470 + }, + { + "epoch": 0.72, + "learning_rate": 0.00013437386861901121, + "loss": 3.5275, + "step": 9475 + }, + { + "epoch": 0.72, + "learning_rate": 0.00013433364173941027, + "loss": 3.8396, + "step": 9480 + }, + { + "epoch": 0.72, + "learning_rate": 0.00013429341485980933, + "loss": 2.5812, + "step": 9485 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001342531879802084, + "loss": 3.3454, + "step": 9490 + }, + { + "epoch": 0.73, + "learning_rate": 0.00013421296110060745, + "loss": 1.5726, + "step": 9495 + }, + { + "epoch": 0.73, + "learning_rate": 0.00013417273422100648, + "loss": 0.6411, + "step": 9500 + }, + { + "epoch": 0.73, + "learning_rate": 0.00013413250734140554, + "loss": 4.6568, + "step": 9505 + }, + { + "epoch": 0.73, + "learning_rate": 0.00013409228046180457, + "loss": 4.666, + "step": 9510 + }, + { + "epoch": 0.73, + "learning_rate": 0.00013405205358220363, + "loss": 3.4013, + "step": 9515 + }, + { + "epoch": 0.73, + "learning_rate": 0.00013401182670260268, + "loss": 3.9559, + "step": 9520 + }, + { + "epoch": 0.73, + "learning_rate": 0.00013397159982300174, + "loss": 3.7091, + "step": 9525 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001339313729434008, + "loss": 3.2853, + "step": 9530 + }, + { + "epoch": 0.73, + "learning_rate": 0.00013389114606379983, + "loss": 2.6608, + "step": 9535 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001338509191841989, + "loss": 1.7764, + "step": 9540 + }, + { + "epoch": 0.73, + "learning_rate": 0.00013381069230459792, + "loss": 2.1373, + "step": 9545 + }, + { + "epoch": 0.73, + "learning_rate": 0.00013377046542499698, + "loss": 2.9499, + "step": 9550 + }, + { + "epoch": 0.73, + "learning_rate": 0.00013373023854539604, + "loss": 4.3037, + "step": 9555 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001336900116657951, + "loss": 3.468, + "step": 9560 + }, + { + "epoch": 0.73, + "learning_rate": 0.00013364978478619415, + "loss": 4.5646, + "step": 9565 + }, + { + "epoch": 0.73, + "learning_rate": 0.00013360955790659318, + "loss": 3.7449, + "step": 9570 + }, + { + "epoch": 0.73, + "learning_rate": 0.00013356933102699224, + "loss": 3.6267, + "step": 9575 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001335291041473913, + "loss": 3.0011, + "step": 9580 + }, + { + "epoch": 0.73, + "learning_rate": 0.00013348887726779033, + "loss": 3.9645, + "step": 9585 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001334486503881894, + "loss": 2.6243, + "step": 9590 + }, + { + "epoch": 0.73, + "learning_rate": 0.00013340842350858845, + "loss": 3.0731, + "step": 9595 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001333681966289875, + "loss": 1.45, + "step": 9600 + }, + { + "epoch": 0.73, + "learning_rate": 0.00013332796974938654, + "loss": 3.6967, + "step": 9605 + }, + { + "epoch": 0.73, + "learning_rate": 0.0001332877428697856, + "loss": 4.2301, + "step": 9610 + }, + { + "epoch": 0.73, + "learning_rate": 0.00013324751599018465, + "loss": 3.9514, + "step": 9615 + }, + { + "epoch": 0.74, + "learning_rate": 0.00013320728911058369, + "loss": 3.9246, + "step": 9620 + }, + { + "epoch": 0.74, + "learning_rate": 0.00013316706223098274, + "loss": 3.4285, + "step": 9625 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001331268353513818, + "loss": 3.0667, + "step": 9630 + }, + { + "epoch": 0.74, + "learning_rate": 0.00013308660847178086, + "loss": 2.9836, + "step": 9635 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001330463815921799, + "loss": 2.4699, + "step": 9640 + }, + { + "epoch": 0.74, + "learning_rate": 0.00013300615471257895, + "loss": 2.3858, + "step": 9645 + }, + { + "epoch": 0.74, + "learning_rate": 0.000132965927832978, + "loss": 1.9694, + "step": 9650 + }, + { + "epoch": 0.74, + "learning_rate": 0.00013292570095337707, + "loss": 4.6979, + "step": 9655 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001328854740737761, + "loss": 4.1387, + "step": 9660 + }, + { + "epoch": 0.74, + "learning_rate": 0.00013284524719417516, + "loss": 4.4168, + "step": 9665 + }, + { + "epoch": 0.74, + "learning_rate": 0.00013280502031457421, + "loss": 4.8387, + "step": 9670 + }, + { + "epoch": 0.74, + "learning_rate": 0.00013276479343497325, + "loss": 3.6943, + "step": 9675 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001327245665553723, + "loss": 3.0823, + "step": 9680 + }, + { + "epoch": 0.74, + "learning_rate": 0.00013268433967577136, + "loss": 2.7571, + "step": 9685 + }, + { + "epoch": 0.74, + "learning_rate": 0.00013264411279617042, + "loss": 3.0168, + "step": 9690 + }, + { + "epoch": 0.74, + "learning_rate": 0.00013260388591656945, + "loss": 2.7996, + "step": 9695 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001325636590369685, + "loss": 2.6553, + "step": 9700 + }, + { + "epoch": 0.74, + "learning_rate": 0.00013252343215736757, + "loss": 4.643, + "step": 9705 + }, + { + "epoch": 0.74, + "learning_rate": 0.0001324832052777666, + "loss": 4.3184, + "step": 9710 + }, + { + "epoch": 0.74, + "learning_rate": 0.00013244297839816566, + "loss": 3.1918, + "step": 9715 + }, + { + "epoch": 0.74, + "learning_rate": 0.00013240275151856472, + "loss": 2.6649, + "step": 9720 + }, + { + "epoch": 0.74, + "learning_rate": 0.00013236252463896377, + "loss": 3.9285, + "step": 9725 + }, + { + "epoch": 0.74, + "learning_rate": 0.00013232229775936283, + "loss": 2.9876, + "step": 9730 + }, + { + "epoch": 0.74, + "learning_rate": 0.00013228207087976186, + "loss": 2.318, + "step": 9735 + }, + { + "epoch": 0.74, + "learning_rate": 0.00013224184400016092, + "loss": 2.6796, + "step": 9740 + }, + { + "epoch": 0.74, + "learning_rate": 0.00013220161712055995, + "loss": 1.675, + "step": 9745 + }, + { + "epoch": 0.75, + "learning_rate": 0.000132161390240959, + "loss": 1.1842, + "step": 9750 + }, + { + "epoch": 0.75, + "learning_rate": 0.00013212116336135807, + "loss": 3.9574, + "step": 9755 + }, + { + "epoch": 0.75, + "learning_rate": 0.00013208093648175713, + "loss": 5.1955, + "step": 9760 + }, + { + "epoch": 0.75, + "learning_rate": 0.00013204070960215619, + "loss": 3.4871, + "step": 9765 + }, + { + "epoch": 0.75, + "learning_rate": 0.00013200048272255522, + "loss": 4.4609, + "step": 9770 + }, + { + "epoch": 0.75, + "learning_rate": 0.00013196025584295427, + "loss": 5.2233, + "step": 9775 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001319200289633533, + "loss": 4.0359, + "step": 9780 + }, + { + "epoch": 0.75, + "learning_rate": 0.00013187980208375236, + "loss": 2.6144, + "step": 9785 + }, + { + "epoch": 0.75, + "learning_rate": 0.00013183957520415142, + "loss": 1.9424, + "step": 9790 + }, + { + "epoch": 0.75, + "learning_rate": 0.00013179934832455048, + "loss": 3.2209, + "step": 9795 + }, + { + "epoch": 0.75, + "learning_rate": 0.00013175912144494954, + "loss": 2.7711, + "step": 9800 + }, + { + "epoch": 0.75, + "learning_rate": 0.00013171889456534857, + "loss": 3.8485, + "step": 9805 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001316786676857476, + "loss": 3.1569, + "step": 9810 + }, + { + "epoch": 0.75, + "learning_rate": 0.00013163844080614666, + "loss": 4.6219, + "step": 9815 + }, + { + "epoch": 0.75, + "learning_rate": 0.00013159821392654572, + "loss": 3.0187, + "step": 9820 + }, + { + "epoch": 0.75, + "learning_rate": 0.00013155798704694478, + "loss": 3.7212, + "step": 9825 + }, + { + "epoch": 0.75, + "learning_rate": 0.00013151776016734383, + "loss": 2.5, + "step": 9830 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001314775332877429, + "loss": 2.1953, + "step": 9835 + }, + { + "epoch": 0.75, + "learning_rate": 0.00013143730640814192, + "loss": 1.6314, + "step": 9840 + }, + { + "epoch": 0.75, + "learning_rate": 0.00013139707952854095, + "loss": 2.1152, + "step": 9845 + }, + { + "epoch": 0.75, + "learning_rate": 0.00013135685264894, + "loss": 3.3051, + "step": 9850 + }, + { + "epoch": 0.75, + "learning_rate": 0.00013131662576933907, + "loss": 4.8615, + "step": 9855 + }, + { + "epoch": 0.75, + "learning_rate": 0.00013127639888973813, + "loss": 4.4182, + "step": 9860 + }, + { + "epoch": 0.75, + "learning_rate": 0.0001312361720101372, + "loss": 4.64, + "step": 9865 + }, + { + "epoch": 0.75, + "learning_rate": 0.00013119594513053625, + "loss": 3.8684, + "step": 9870 + }, + { + "epoch": 0.75, + "learning_rate": 0.00013115571825093528, + "loss": 3.9086, + "step": 9875 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013111549137133434, + "loss": 2.4209, + "step": 9880 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013107526449173337, + "loss": 1.358, + "step": 9885 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013103503761213242, + "loss": 1.3452, + "step": 9890 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013099481073253148, + "loss": 2.1419, + "step": 9895 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013095458385293054, + "loss": 2.2326, + "step": 9900 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001309143569733296, + "loss": 4.982, + "step": 9905 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013087413009372863, + "loss": 4.2432, + "step": 9910 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001308339032141277, + "loss": 4.2863, + "step": 9915 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013079367633452672, + "loss": 3.5053, + "step": 9920 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013075344945492578, + "loss": 4.0033, + "step": 9925 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013071322257532484, + "loss": 3.812, + "step": 9930 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001306729956957239, + "loss": 3.1587, + "step": 9935 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013063276881612295, + "loss": 3.0073, + "step": 9940 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013059254193652198, + "loss": 3.4743, + "step": 9945 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013055231505692104, + "loss": 1.0452, + "step": 9950 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001305120881773201, + "loss": 3.9975, + "step": 9955 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013047186129771913, + "loss": 3.7115, + "step": 9960 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001304316344181182, + "loss": 3.8148, + "step": 9965 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013039140753851725, + "loss": 3.2805, + "step": 9970 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001303511806589163, + "loss": 3.5577, + "step": 9975 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013031095377931534, + "loss": 3.2957, + "step": 9980 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001302707268997144, + "loss": 1.8262, + "step": 9985 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013023050002011345, + "loss": 3.8027, + "step": 9990 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013019027314051249, + "loss": 1.8096, + "step": 9995 + }, + { + "epoch": 0.76, + "learning_rate": 0.00013015004626091154, + "loss": 1.4512, + "step": 10000 + }, + { + "epoch": 0.76, + "learning_rate": 0.0001301098193813106, + "loss": 3.9459, + "step": 10005 + }, + { + "epoch": 0.77, + "learning_rate": 0.00013006959250170966, + "loss": 4.8336, + "step": 10010 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001300293656221087, + "loss": 4.3008, + "step": 10015 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012998913874250775, + "loss": 3.5926, + "step": 10020 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001299489118629068, + "loss": 3.3028, + "step": 10025 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012990868498330587, + "loss": 3.0276, + "step": 10030 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001298684581037049, + "loss": 1.6533, + "step": 10035 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012982823122410396, + "loss": 2.652, + "step": 10040 + }, + { + "epoch": 0.77, + "learning_rate": 0.000129788004344503, + "loss": 1.1642, + "step": 10045 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012974777746490204, + "loss": 2.6869, + "step": 10050 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001297075505853011, + "loss": 4.8092, + "step": 10055 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012966732370570016, + "loss": 5.8297, + "step": 10060 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012962709682609922, + "loss": 3.7469, + "step": 10065 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012958686994649825, + "loss": 4.5357, + "step": 10070 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001295466430668973, + "loss": 3.599, + "step": 10075 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012950641618729634, + "loss": 2.9362, + "step": 10080 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001294661893076954, + "loss": 2.9291, + "step": 10085 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012942596242809446, + "loss": 0.7292, + "step": 10090 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012938573554849351, + "loss": 2.6529, + "step": 10095 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012934550866889257, + "loss": 1.4879, + "step": 10100 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012930528178929163, + "loss": 4.5059, + "step": 10105 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012926505490969066, + "loss": 4.3889, + "step": 10110 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001292248280300897, + "loss": 5.0908, + "step": 10115 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012918460115048875, + "loss": 4.3072, + "step": 10120 + }, + { + "epoch": 0.77, + "learning_rate": 0.0001291443742708878, + "loss": 3.3316, + "step": 10125 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012910414739128687, + "loss": 3.8155, + "step": 10130 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012906392051168593, + "loss": 2.2591, + "step": 10135 + }, + { + "epoch": 0.77, + "learning_rate": 0.00012902369363208498, + "loss": 3.473, + "step": 10140 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012898346675248402, + "loss": 1.5188, + "step": 10145 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012894323987288305, + "loss": 2.933, + "step": 10150 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001289030129932821, + "loss": 4.2186, + "step": 10155 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012886278611368116, + "loss": 4.9096, + "step": 10160 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012882255923408022, + "loss": 4.5613, + "step": 10165 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012878233235447928, + "loss": 3.0895, + "step": 10170 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012874210547487834, + "loss": 3.5822, + "step": 10175 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012870187859527737, + "loss": 2.599, + "step": 10180 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001286616517156764, + "loss": 3.6651, + "step": 10185 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012862142483607546, + "loss": 2.747, + "step": 10190 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012858119795647452, + "loss": 2.2399, + "step": 10195 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012854097107687357, + "loss": 0.2028, + "step": 10200 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012850074419727263, + "loss": 3.8363, + "step": 10205 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001284605173176717, + "loss": 3.6502, + "step": 10210 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012842029043807072, + "loss": 4.1717, + "step": 10215 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012838006355846975, + "loss": 3.6252, + "step": 10220 + }, + { + "epoch": 0.78, + "learning_rate": 0.0001283398366788688, + "loss": 3.8029, + "step": 10225 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012829960979926787, + "loss": 2.7107, + "step": 10230 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012825938291966693, + "loss": 3.614, + "step": 10235 + }, + { + "epoch": 0.78, + "learning_rate": 0.000128219156040066, + "loss": 2.4932, + "step": 10240 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012817892916046504, + "loss": 3.0111, + "step": 10245 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012813870228086408, + "loss": 2.2005, + "step": 10250 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012809847540126313, + "loss": 4.8871, + "step": 10255 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012805824852166217, + "loss": 4.4473, + "step": 10260 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012801802164206122, + "loss": 4.5402, + "step": 10265 + }, + { + "epoch": 0.78, + "learning_rate": 0.00012797779476246028, + "loss": 3.0773, + "step": 10270 + }, + { + "epoch": 0.79, + "learning_rate": 0.00012793756788285934, + "loss": 2.9697, + "step": 10275 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001278973410032584, + "loss": 1.9987, + "step": 10280 + }, + { + "epoch": 0.79, + "learning_rate": 0.00012785711412365743, + "loss": 3.0007, + "step": 10285 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001278168872440565, + "loss": 2.0372, + "step": 10290 + }, + { + "epoch": 0.79, + "learning_rate": 0.00012777666036445552, + "loss": 2.4596, + "step": 10295 + }, + { + "epoch": 0.79, + "learning_rate": 0.00012773643348485458, + "loss": 1.395, + "step": 10300 + }, + { + "epoch": 0.79, + "learning_rate": 0.00012769620660525364, + "loss": 4.6918, + "step": 10305 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001276559797256527, + "loss": 3.618, + "step": 10310 + }, + { + "epoch": 0.79, + "learning_rate": 0.00012761575284605175, + "loss": 3.8582, + "step": 10315 + }, + { + "epoch": 0.79, + "learning_rate": 0.00012757552596645078, + "loss": 4.6336, + "step": 10320 + }, + { + "epoch": 0.79, + "learning_rate": 0.00012753529908684984, + "loss": 2.6338, + "step": 10325 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001274950722072489, + "loss": 3.15, + "step": 10330 + }, + { + "epoch": 0.79, + "learning_rate": 0.00012745484532764793, + "loss": 1.2446, + "step": 10335 + }, + { + "epoch": 0.79, + "learning_rate": 0.000127414618448047, + "loss": 2.2504, + "step": 10340 + }, + { + "epoch": 0.79, + "learning_rate": 0.00012737439156844605, + "loss": 3.426, + "step": 10345 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001273341646888451, + "loss": 0.0367, + "step": 10350 + }, + { + "epoch": 0.79, + "learning_rate": 0.00012729393780924414, + "loss": 4.5221, + "step": 10355 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001272537109296432, + "loss": 4.7127, + "step": 10360 + }, + { + "epoch": 0.79, + "learning_rate": 0.00012721348405004225, + "loss": 3.3664, + "step": 10365 + }, + { + "epoch": 0.79, + "learning_rate": 0.00012717325717044128, + "loss": 3.905, + "step": 10370 + }, + { + "epoch": 0.79, + "learning_rate": 0.00012713303029084034, + "loss": 3.6577, + "step": 10375 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001270928034112394, + "loss": 2.1809, + "step": 10380 + }, + { + "epoch": 0.79, + "learning_rate": 0.00012705257653163843, + "loss": 2.0813, + "step": 10385 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001270123496520375, + "loss": 3.2018, + "step": 10390 + }, + { + "epoch": 0.79, + "learning_rate": 0.00012697212277243655, + "loss": 2.3257, + "step": 10395 + }, + { + "epoch": 0.79, + "learning_rate": 0.0001269318958928356, + "loss": 1.8804, + "step": 10400 + }, + { + "epoch": 0.8, + "learning_rate": 0.00012689166901323466, + "loss": 4.5512, + "step": 10405 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001268514421336337, + "loss": 3.9561, + "step": 10410 + }, + { + "epoch": 0.8, + "learning_rate": 0.00012681121525403275, + "loss": 3.5211, + "step": 10415 + }, + { + "epoch": 0.8, + "learning_rate": 0.00012677098837443179, + "loss": 3.8664, + "step": 10420 + }, + { + "epoch": 0.8, + "learning_rate": 0.00012673076149483084, + "loss": 3.3927, + "step": 10425 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001266905346152299, + "loss": 3.9993, + "step": 10430 + }, + { + "epoch": 0.8, + "learning_rate": 0.00012665030773562896, + "loss": 3.6185, + "step": 10435 + }, + { + "epoch": 0.8, + "learning_rate": 0.00012661008085602802, + "loss": 2.7498, + "step": 10440 + }, + { + "epoch": 0.8, + "learning_rate": 0.00012656985397642705, + "loss": 1.6683, + "step": 10445 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001265296270968261, + "loss": 0.0475, + "step": 10450 + }, + { + "epoch": 0.8, + "learning_rate": 0.00012648940021722514, + "loss": 4.1752, + "step": 10455 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001264491733376242, + "loss": 4.8688, + "step": 10460 + }, + { + "epoch": 0.8, + "learning_rate": 0.00012640894645802326, + "loss": 4.3963, + "step": 10465 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001263687195784223, + "loss": 4.7291, + "step": 10470 + }, + { + "epoch": 0.8, + "learning_rate": 0.00012632849269882137, + "loss": 3.6086, + "step": 10475 + }, + { + "epoch": 0.8, + "learning_rate": 0.00012628826581922043, + "loss": 2.9757, + "step": 10480 + }, + { + "epoch": 0.8, + "learning_rate": 0.00012624803893961946, + "loss": 3.5504, + "step": 10485 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001262078120600185, + "loss": 3.5773, + "step": 10490 + }, + { + "epoch": 0.8, + "learning_rate": 0.00012616758518041755, + "loss": 1.6866, + "step": 10495 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001261273583008166, + "loss": 2.5525, + "step": 10500 + }, + { + "epoch": 0.8, + "learning_rate": 0.00012608713142121567, + "loss": 4.3145, + "step": 10505 + }, + { + "epoch": 0.8, + "learning_rate": 0.00012604690454161473, + "loss": 4.4189, + "step": 10510 + }, + { + "epoch": 0.8, + "learning_rate": 0.00012600667766201378, + "loss": 4.108, + "step": 10515 + }, + { + "epoch": 0.8, + "learning_rate": 0.00012596645078241281, + "loss": 4.3092, + "step": 10520 + }, + { + "epoch": 0.8, + "learning_rate": 0.00012592622390281185, + "loss": 4.0993, + "step": 10525 + }, + { + "epoch": 0.8, + "learning_rate": 0.0001258859970232109, + "loss": 3.2504, + "step": 10530 + }, + { + "epoch": 0.81, + "learning_rate": 0.00012584577014360996, + "loss": 3.6479, + "step": 10535 + }, + { + "epoch": 0.81, + "learning_rate": 0.00012580554326400902, + "loss": 4.1616, + "step": 10540 + }, + { + "epoch": 0.81, + "learning_rate": 0.00012577336176032827, + "loss": 3.0753, + "step": 10545 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001257331348807273, + "loss": 1.1912, + "step": 10550 + }, + { + "epoch": 0.81, + "learning_rate": 0.00012569290800112636, + "loss": 4.1457, + "step": 10555 + }, + { + "epoch": 0.81, + "learning_rate": 0.00012565268112152541, + "loss": 4.3012, + "step": 10560 + }, + { + "epoch": 0.81, + "learning_rate": 0.00012561245424192447, + "loss": 3.3361, + "step": 10565 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001255722273623235, + "loss": 3.3066, + "step": 10570 + }, + { + "epoch": 0.81, + "learning_rate": 0.00012553200048272256, + "loss": 3.2586, + "step": 10575 + }, + { + "epoch": 0.81, + "learning_rate": 0.00012549177360312162, + "loss": 3.1351, + "step": 10580 + }, + { + "epoch": 0.81, + "learning_rate": 0.00012545154672352065, + "loss": 3.3842, + "step": 10585 + }, + { + "epoch": 0.81, + "learning_rate": 0.0001254113198439197, + "loss": 2.4289, + "step": 10590 + }, + { + "epoch": 0.81, + "learning_rate": 0.00012537109296431877, + "loss": 2.9674, + "step": 10595 + }, + { + "epoch": 0.81, + "learning_rate": 0.00012533086608471783, + "loss": 1.956, + "step": 10600 + }, + { + "epoch": 0.81, + "learning_rate": 0.00012529063920511686, + "loss": 3.8779, + "step": 10605 + }, + { + "epoch": 0.81, + "learning_rate": 0.00012525041232551592, + "loss": 3.518, + "step": 10610 + }, + { + "epoch": 0.81, + "learning_rate": 0.00012521018544591497, + "loss": 4.851, + "step": 10615 + }, + { + "epoch": 0.81, + "learning_rate": 0.000125169958566314, + "loss": 3.1552, + "step": 10620 + }, + { + "epoch": 0.81, + "learning_rate": 0.00012512973168671306, + "loss": 2.9098, + "step": 10625 + }, + { + "epoch": 0.81, + "learning_rate": 0.00012508950480711212, + "loss": 2.8324, + "step": 10630 + }, + { + "epoch": 0.81, + "learning_rate": 0.00012504927792751118, + "loss": 2.5265, + "step": 10635 + }, + { + "epoch": 0.81, + "learning_rate": 0.00012500905104791024, + "loss": 2.6673, + "step": 10640 + }, + { + "epoch": 0.81, + "learning_rate": 0.00012496882416830927, + "loss": 2.8797, + "step": 10645 + }, + { + "epoch": 0.81, + "learning_rate": 0.00012492859728870833, + "loss": 2.8138, + "step": 10650 + }, + { + "epoch": 0.81, + "learning_rate": 0.00012488837040910736, + "loss": 4.6326, + "step": 10655 + }, + { + "epoch": 0.81, + "learning_rate": 0.00012484814352950642, + "loss": 3.6939, + "step": 10660 + }, + { + "epoch": 0.82, + "learning_rate": 0.00012480791664990548, + "loss": 3.8156, + "step": 10665 + }, + { + "epoch": 0.82, + "learning_rate": 0.00012476768977030453, + "loss": 3.8127, + "step": 10670 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001247274628907036, + "loss": 3.8121, + "step": 10675 + }, + { + "epoch": 0.82, + "learning_rate": 0.00012468723601110262, + "loss": 4.3749, + "step": 10680 + }, + { + "epoch": 0.82, + "learning_rate": 0.00012464700913150168, + "loss": 2.2904, + "step": 10685 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001246067822519007, + "loss": 1.6336, + "step": 10690 + }, + { + "epoch": 0.82, + "learning_rate": 0.00012456655537229977, + "loss": 1.492, + "step": 10695 + }, + { + "epoch": 0.82, + "learning_rate": 0.00012452632849269883, + "loss": 1.4875, + "step": 10700 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001244861016130979, + "loss": 4.3963, + "step": 10705 + }, + { + "epoch": 0.82, + "learning_rate": 0.00012444587473349695, + "loss": 3.8414, + "step": 10710 + }, + { + "epoch": 0.82, + "learning_rate": 0.000124405647853896, + "loss": 4.0334, + "step": 10715 + }, + { + "epoch": 0.82, + "learning_rate": 0.00012436542097429503, + "loss": 4.182, + "step": 10720 + }, + { + "epoch": 0.82, + "learning_rate": 0.00012432519409469407, + "loss": 3.2245, + "step": 10725 + }, + { + "epoch": 0.82, + "learning_rate": 0.00012428496721509312, + "loss": 2.7608, + "step": 10730 + }, + { + "epoch": 0.82, + "learning_rate": 0.00012424474033549218, + "loss": 2.8074, + "step": 10735 + }, + { + "epoch": 0.82, + "learning_rate": 0.00012420451345589124, + "loss": 3.0531, + "step": 10740 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001241642865762903, + "loss": 1.4641, + "step": 10745 + }, + { + "epoch": 0.82, + "learning_rate": 0.00012412405969668933, + "loss": 2.0748, + "step": 10750 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001240838328170884, + "loss": 4.573, + "step": 10755 + }, + { + "epoch": 0.82, + "learning_rate": 0.00012404360593748742, + "loss": 3.651, + "step": 10760 + }, + { + "epoch": 0.82, + "learning_rate": 0.00012400337905788648, + "loss": 3.5417, + "step": 10765 + }, + { + "epoch": 0.82, + "learning_rate": 0.00012396315217828554, + "loss": 4.6764, + "step": 10770 + }, + { + "epoch": 0.82, + "learning_rate": 0.0001239229252986846, + "loss": 4.1459, + "step": 10775 + }, + { + "epoch": 0.82, + "learning_rate": 0.00012388269841908365, + "loss": 3.444, + "step": 10780 + }, + { + "epoch": 0.82, + "learning_rate": 0.00012384247153948268, + "loss": 3.3812, + "step": 10785 + }, + { + "epoch": 0.82, + "learning_rate": 0.00012380224465988174, + "loss": 1.5143, + "step": 10790 + }, + { + "epoch": 0.83, + "learning_rate": 0.00012376201778028077, + "loss": 2.626, + "step": 10795 + }, + { + "epoch": 0.83, + "learning_rate": 0.00012372179090067983, + "loss": 1.1325, + "step": 10800 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001236815640210789, + "loss": 5.0693, + "step": 10805 + }, + { + "epoch": 0.83, + "learning_rate": 0.00012364133714147795, + "loss": 4.8133, + "step": 10810 + }, + { + "epoch": 0.83, + "learning_rate": 0.000123601110261877, + "loss": 3.3014, + "step": 10815 + }, + { + "epoch": 0.83, + "learning_rate": 0.00012356088338227604, + "loss": 4.2463, + "step": 10820 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001235206565026751, + "loss": 4.0414, + "step": 10825 + }, + { + "epoch": 0.83, + "learning_rate": 0.00012348042962307413, + "loss": 4.8287, + "step": 10830 + }, + { + "epoch": 0.83, + "learning_rate": 0.00012344020274347318, + "loss": 1.6694, + "step": 10835 + }, + { + "epoch": 0.83, + "learning_rate": 0.00012339997586387224, + "loss": 1.8969, + "step": 10840 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001233597489842713, + "loss": 2.2986, + "step": 10845 + }, + { + "epoch": 0.83, + "learning_rate": 0.00012331952210467036, + "loss": 1.7887, + "step": 10850 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001232792952250694, + "loss": 4.7313, + "step": 10855 + }, + { + "epoch": 0.83, + "learning_rate": 0.00012323906834546845, + "loss": 3.7457, + "step": 10860 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001231988414658675, + "loss": 5.1592, + "step": 10865 + }, + { + "epoch": 0.83, + "learning_rate": 0.00012315861458626654, + "loss": 3.3604, + "step": 10870 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001231183877066656, + "loss": 3.1216, + "step": 10875 + }, + { + "epoch": 0.83, + "learning_rate": 0.00012307816082706465, + "loss": 4.282, + "step": 10880 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001230379339474637, + "loss": 2.4792, + "step": 10885 + }, + { + "epoch": 0.83, + "learning_rate": 0.00012299770706786274, + "loss": 1.8746, + "step": 10890 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001229574801882618, + "loss": 1.6147, + "step": 10895 + }, + { + "epoch": 0.83, + "learning_rate": 0.00012291725330866086, + "loss": 2.9426, + "step": 10900 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001228770264290599, + "loss": 4.616, + "step": 10905 + }, + { + "epoch": 0.83, + "learning_rate": 0.00012283679954945895, + "loss": 3.8428, + "step": 10910 + }, + { + "epoch": 0.83, + "learning_rate": 0.000122796572669858, + "loss": 4.851, + "step": 10915 + }, + { + "epoch": 0.83, + "learning_rate": 0.00012275634579025707, + "loss": 3.6221, + "step": 10920 + }, + { + "epoch": 0.83, + "learning_rate": 0.0001227161189106561, + "loss": 4.0699, + "step": 10925 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012267589203105516, + "loss": 4.5023, + "step": 10930 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012263566515145421, + "loss": 3.5705, + "step": 10935 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012259543827185327, + "loss": 2.7856, + "step": 10940 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001225552113922523, + "loss": 1.8233, + "step": 10945 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012251498451265136, + "loss": 2.3532, + "step": 10950 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012247475763305042, + "loss": 3.4496, + "step": 10955 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012243453075344945, + "loss": 3.6041, + "step": 10960 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001223943038738485, + "loss": 4.3311, + "step": 10965 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012235407699424757, + "loss": 4.1073, + "step": 10970 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012231385011464663, + "loss": 4.1563, + "step": 10975 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012227362323504566, + "loss": 2.2184, + "step": 10980 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012223339635544471, + "loss": 2.3406, + "step": 10985 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012219316947584377, + "loss": 1.6087, + "step": 10990 + }, + { + "epoch": 0.84, + "learning_rate": 0.0001221529425962428, + "loss": 1.7286, + "step": 10995 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012211271571664186, + "loss": 0.5842, + "step": 11000 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012207248883704092, + "loss": 4.0867, + "step": 11005 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012203226195743998, + "loss": 3.6726, + "step": 11010 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012199203507783902, + "loss": 4.5297, + "step": 11015 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012195180819823805, + "loss": 3.8204, + "step": 11020 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012191158131863711, + "loss": 3.8697, + "step": 11025 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012187135443903617, + "loss": 3.5434, + "step": 11030 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012183112755943522, + "loss": 2.4551, + "step": 11035 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012179090067983427, + "loss": 2.9122, + "step": 11040 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012175067380023333, + "loss": 2.9427, + "step": 11045 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012171044692063238, + "loss": 0.0149, + "step": 11050 + }, + { + "epoch": 0.84, + "learning_rate": 0.00012167022004103141, + "loss": 3.976, + "step": 11055 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012162999316143047, + "loss": 4.292, + "step": 11060 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012158976628182952, + "loss": 3.5534, + "step": 11065 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012154953940222857, + "loss": 3.6272, + "step": 11070 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012150931252262763, + "loss": 3.5693, + "step": 11075 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012146908564302669, + "loss": 3.2917, + "step": 11080 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012142885876342573, + "loss": 3.7476, + "step": 11085 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012138863188382479, + "loss": 1.9143, + "step": 11090 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012134840500422382, + "loss": 1.1044, + "step": 11095 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012130817812462288, + "loss": 2.3956, + "step": 11100 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012126795124502192, + "loss": 4.0947, + "step": 11105 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012122772436542098, + "loss": 3.8414, + "step": 11110 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012118749748582004, + "loss": 3.4203, + "step": 11115 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012114727060621908, + "loss": 4.1299, + "step": 11120 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012110704372661814, + "loss": 1.8336, + "step": 11125 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012106681684701717, + "loss": 3.1511, + "step": 11130 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012102658996741623, + "loss": 1.5535, + "step": 11135 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012098636308781528, + "loss": 2.8977, + "step": 11140 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012094613620821433, + "loss": 1.0519, + "step": 11145 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012090590932861339, + "loss": 2.3733, + "step": 11150 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012086568244901244, + "loss": 6.4816, + "step": 11155 + }, + { + "epoch": 0.85, + "learning_rate": 0.0001208254555694115, + "loss": 4.9588, + "step": 11160 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012078522868981055, + "loss": 3.2568, + "step": 11165 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012074500181020959, + "loss": 3.6896, + "step": 11170 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012070477493060863, + "loss": 4.2188, + "step": 11175 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012066454805100769, + "loss": 3.8143, + "step": 11180 + }, + { + "epoch": 0.85, + "learning_rate": 0.00012062432117140675, + "loss": 2.433, + "step": 11185 + }, + { + "epoch": 0.86, + "learning_rate": 0.00012058409429180579, + "loss": 2.9424, + "step": 11190 + }, + { + "epoch": 0.86, + "learning_rate": 0.00012054386741220485, + "loss": 1.7503, + "step": 11195 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001205036405326039, + "loss": 1.471, + "step": 11200 + }, + { + "epoch": 0.86, + "learning_rate": 0.00012046341365300294, + "loss": 4.4582, + "step": 11205 + }, + { + "epoch": 0.86, + "learning_rate": 0.00012042318677340198, + "loss": 4.9609, + "step": 11210 + }, + { + "epoch": 0.86, + "learning_rate": 0.00012038295989380104, + "loss": 4.7346, + "step": 11215 + }, + { + "epoch": 0.86, + "learning_rate": 0.00012034273301420009, + "loss": 3.2718, + "step": 11220 + }, + { + "epoch": 0.86, + "learning_rate": 0.00012030250613459914, + "loss": 3.5242, + "step": 11225 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001202622792549982, + "loss": 3.4757, + "step": 11230 + }, + { + "epoch": 0.86, + "learning_rate": 0.00012022205237539725, + "loss": 1.8217, + "step": 11235 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001201818254957963, + "loss": 2.7196, + "step": 11240 + }, + { + "epoch": 0.86, + "learning_rate": 0.00012014159861619534, + "loss": 1.9612, + "step": 11245 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001201013717365944, + "loss": 1.22, + "step": 11250 + }, + { + "epoch": 0.86, + "learning_rate": 0.00012006114485699344, + "loss": 3.6163, + "step": 11255 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001200209179773925, + "loss": 3.748, + "step": 11260 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011998069109779156, + "loss": 3.8621, + "step": 11265 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001199404642181906, + "loss": 3.7291, + "step": 11270 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011990023733858966, + "loss": 3.5688, + "step": 11275 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011986001045898869, + "loss": 3.9327, + "step": 11280 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011981978357938775, + "loss": 2.0496, + "step": 11285 + }, + { + "epoch": 0.86, + "learning_rate": 0.0001197795566997868, + "loss": 3.2875, + "step": 11290 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011973932982018585, + "loss": 3.1575, + "step": 11295 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011969910294058491, + "loss": 1.5065, + "step": 11300 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011965887606098395, + "loss": 5.9072, + "step": 11305 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011961864918138301, + "loss": 4.5477, + "step": 11310 + }, + { + "epoch": 0.86, + "learning_rate": 0.00011957842230178207, + "loss": 4.2107, + "step": 11315 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001195381954221811, + "loss": 4.1152, + "step": 11320 + }, + { + "epoch": 0.87, + "learning_rate": 0.00011949796854258015, + "loss": 3.9164, + "step": 11325 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001194577416629792, + "loss": 3.1883, + "step": 11330 + }, + { + "epoch": 0.87, + "learning_rate": 0.00011941751478337826, + "loss": 3.7471, + "step": 11335 + }, + { + "epoch": 0.87, + "learning_rate": 0.00011937728790377731, + "loss": 2.6846, + "step": 11340 + }, + { + "epoch": 0.87, + "learning_rate": 0.00011933706102417637, + "loss": 2.8391, + "step": 11345 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001193048795204956, + "loss": 2.3922, + "step": 11350 + }, + { + "epoch": 0.87, + "learning_rate": 0.00011926465264089466, + "loss": 4.3721, + "step": 11355 + }, + { + "epoch": 0.87, + "learning_rate": 0.00011922442576129372, + "loss": 4.3652, + "step": 11360 + }, + { + "epoch": 0.87, + "learning_rate": 0.00011918419888169275, + "loss": 3.515, + "step": 11365 + }, + { + "epoch": 0.87, + "learning_rate": 0.00011914397200209179, + "loss": 4.5871, + "step": 11370 + }, + { + "epoch": 0.87, + "learning_rate": 0.00011910374512249085, + "loss": 3.9205, + "step": 11375 + }, + { + "epoch": 0.87, + "learning_rate": 0.00011906351824288991, + "loss": 3.4631, + "step": 11380 + }, + { + "epoch": 0.87, + "learning_rate": 0.00011902329136328895, + "loss": 3.6807, + "step": 11385 + }, + { + "epoch": 0.87, + "learning_rate": 0.00011898306448368801, + "loss": 2.6828, + "step": 11390 + }, + { + "epoch": 0.87, + "learning_rate": 0.00011894283760408707, + "loss": 2.0819, + "step": 11395 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001189026107244861, + "loss": 2.5945, + "step": 11400 + }, + { + "epoch": 0.87, + "learning_rate": 0.00011886238384488515, + "loss": 3.8611, + "step": 11405 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001188221569652842, + "loss": 4.0271, + "step": 11410 + }, + { + "epoch": 0.87, + "learning_rate": 0.00011878193008568326, + "loss": 5.5275, + "step": 11415 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001187417032060823, + "loss": 4.565, + "step": 11420 + }, + { + "epoch": 0.87, + "learning_rate": 0.00011870147632648136, + "loss": 3.5147, + "step": 11425 + }, + { + "epoch": 0.87, + "learning_rate": 0.00011866124944688042, + "loss": 3.1143, + "step": 11430 + }, + { + "epoch": 0.87, + "learning_rate": 0.00011862102256727947, + "loss": 1.3822, + "step": 11435 + }, + { + "epoch": 0.87, + "learning_rate": 0.0001185807956876785, + "loss": 2.3071, + "step": 11440 + }, + { + "epoch": 0.87, + "learning_rate": 0.00011854056880807756, + "loss": 1.8316, + "step": 11445 + }, + { + "epoch": 0.88, + "learning_rate": 0.00011850034192847662, + "loss": 1.9333, + "step": 11450 + }, + { + "epoch": 0.88, + "learning_rate": 0.00011846011504887566, + "loss": 4.7771, + "step": 11455 + }, + { + "epoch": 0.88, + "learning_rate": 0.00011841988816927472, + "loss": 4.8422, + "step": 11460 + }, + { + "epoch": 0.88, + "learning_rate": 0.00011837966128967378, + "loss": 4.3121, + "step": 11465 + }, + { + "epoch": 0.88, + "learning_rate": 0.00011833943441007282, + "loss": 3.5387, + "step": 11470 + }, + { + "epoch": 0.88, + "learning_rate": 0.00011829920753047185, + "loss": 2.7474, + "step": 11475 + }, + { + "epoch": 0.88, + "learning_rate": 0.00011825898065087091, + "loss": 3.3034, + "step": 11480 + }, + { + "epoch": 0.88, + "learning_rate": 0.00011821875377126997, + "loss": 3.3162, + "step": 11485 + }, + { + "epoch": 0.88, + "learning_rate": 0.00011817852689166901, + "loss": 1.4119, + "step": 11490 + }, + { + "epoch": 0.88, + "learning_rate": 0.00011813830001206807, + "loss": 2.9623, + "step": 11495 + }, + { + "epoch": 0.88, + "learning_rate": 0.00011809807313246713, + "loss": 1.924, + "step": 11500 + }, + { + "epoch": 0.88, + "learning_rate": 0.00011805784625286617, + "loss": 4.39, + "step": 11505 + }, + { + "epoch": 0.88, + "learning_rate": 0.00011801761937326523, + "loss": 4.1092, + "step": 11510 + }, + { + "epoch": 0.88, + "learning_rate": 0.00011797739249366426, + "loss": 4.3584, + "step": 11515 + }, + { + "epoch": 0.88, + "learning_rate": 0.00011793716561406332, + "loss": 4.9307, + "step": 11520 + }, + { + "epoch": 0.88, + "learning_rate": 0.00011789693873446237, + "loss": 3.7303, + "step": 11525 + }, + { + "epoch": 0.88, + "learning_rate": 0.00011785671185486143, + "loss": 2.9764, + "step": 11530 + }, + { + "epoch": 0.88, + "learning_rate": 0.00011781648497526048, + "loss": 2.5169, + "step": 11535 + }, + { + "epoch": 0.88, + "learning_rate": 0.00011777625809565953, + "loss": 2.986, + "step": 11540 + }, + { + "epoch": 0.88, + "learning_rate": 0.00011773603121605859, + "loss": 2.9573, + "step": 11545 + }, + { + "epoch": 0.88, + "learning_rate": 0.00011769580433645762, + "loss": 4.3488, + "step": 11550 + }, + { + "epoch": 0.88, + "learning_rate": 0.00011765557745685668, + "loss": 3.9223, + "step": 11555 + }, + { + "epoch": 0.88, + "learning_rate": 0.00011761535057725572, + "loss": 4.3891, + "step": 11560 + }, + { + "epoch": 0.88, + "learning_rate": 0.00011757512369765478, + "loss": 4.6504, + "step": 11565 + }, + { + "epoch": 0.88, + "learning_rate": 0.00011753489681805384, + "loss": 2.1885, + "step": 11570 + }, + { + "epoch": 0.88, + "learning_rate": 0.00011749466993845288, + "loss": 4.1299, + "step": 11575 + }, + { + "epoch": 0.89, + "learning_rate": 0.00011745444305885194, + "loss": 2.6887, + "step": 11580 + }, + { + "epoch": 0.89, + "learning_rate": 0.00011741421617925098, + "loss": 4.266, + "step": 11585 + }, + { + "epoch": 0.89, + "learning_rate": 0.00011737398929965002, + "loss": 2.6325, + "step": 11590 + }, + { + "epoch": 0.89, + "learning_rate": 0.00011733376242004907, + "loss": 2.8226, + "step": 11595 + }, + { + "epoch": 0.89, + "learning_rate": 0.00011729353554044813, + "loss": 0.0985, + "step": 11600 + }, + { + "epoch": 0.89, + "learning_rate": 0.00011725330866084718, + "loss": 4.3924, + "step": 11605 + }, + { + "epoch": 0.89, + "learning_rate": 0.00011721308178124624, + "loss": 4.9402, + "step": 11610 + }, + { + "epoch": 0.89, + "learning_rate": 0.0001171728549016453, + "loss": 4.0807, + "step": 11615 + }, + { + "epoch": 0.89, + "learning_rate": 0.00011713262802204434, + "loss": 3.7963, + "step": 11620 + }, + { + "epoch": 0.89, + "learning_rate": 0.00011709240114244337, + "loss": 3.3341, + "step": 11625 + }, + { + "epoch": 0.89, + "learning_rate": 0.00011705217426284243, + "loss": 2.103, + "step": 11630 + }, + { + "epoch": 0.89, + "learning_rate": 0.00011701194738324149, + "loss": 2.1604, + "step": 11635 + }, + { + "epoch": 0.89, + "learning_rate": 0.00011697172050364053, + "loss": 3.5096, + "step": 11640 + }, + { + "epoch": 0.89, + "learning_rate": 0.00011693149362403959, + "loss": 4.5993, + "step": 11645 + }, + { + "epoch": 0.89, + "learning_rate": 0.00011689126674443865, + "loss": 3.1042, + "step": 11650 + }, + { + "epoch": 0.89, + "learning_rate": 0.00011685103986483769, + "loss": 4.4627, + "step": 11655 + }, + { + "epoch": 0.89, + "learning_rate": 0.00011681081298523675, + "loss": 4.0707, + "step": 11660 + }, + { + "epoch": 0.89, + "learning_rate": 0.00011677058610563578, + "loss": 4.1977, + "step": 11665 + }, + { + "epoch": 0.89, + "learning_rate": 0.00011673035922603484, + "loss": 3.0867, + "step": 11670 + }, + { + "epoch": 0.89, + "learning_rate": 0.00011669013234643388, + "loss": 3.8916, + "step": 11675 + }, + { + "epoch": 0.89, + "learning_rate": 0.00011664990546683294, + "loss": 2.4693, + "step": 11680 + }, + { + "epoch": 0.89, + "learning_rate": 0.000116609678587232, + "loss": 3.8117, + "step": 11685 + }, + { + "epoch": 0.89, + "learning_rate": 0.00011656945170763104, + "loss": 2.3956, + "step": 11690 + }, + { + "epoch": 0.89, + "learning_rate": 0.0001165292248280301, + "loss": 2.6879, + "step": 11695 + }, + { + "epoch": 0.89, + "learning_rate": 0.00011648899794842913, + "loss": 1.1392, + "step": 11700 + }, + { + "epoch": 0.89, + "learning_rate": 0.00011644877106882819, + "loss": 4.1566, + "step": 11705 + }, + { + "epoch": 0.89, + "learning_rate": 0.00011640854418922724, + "loss": 4.0279, + "step": 11710 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001163683173096263, + "loss": 3.9164, + "step": 11715 + }, + { + "epoch": 0.9, + "learning_rate": 0.00011632809043002535, + "loss": 2.289, + "step": 11720 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001162878635504244, + "loss": 4.4148, + "step": 11725 + }, + { + "epoch": 0.9, + "learning_rate": 0.00011624763667082346, + "loss": 2.4125, + "step": 11730 + }, + { + "epoch": 0.9, + "learning_rate": 0.00011620740979122251, + "loss": 1.2408, + "step": 11735 + }, + { + "epoch": 0.9, + "learning_rate": 0.00011616718291162155, + "loss": 1.2736, + "step": 11740 + }, + { + "epoch": 0.9, + "learning_rate": 0.00011612695603202059, + "loss": 1.0584, + "step": 11745 + }, + { + "epoch": 0.9, + "learning_rate": 0.00011608672915241965, + "loss": 2.8411, + "step": 11750 + }, + { + "epoch": 0.9, + "learning_rate": 0.00011604650227281871, + "loss": 3.5629, + "step": 11755 + }, + { + "epoch": 0.9, + "learning_rate": 0.00011600627539321775, + "loss": 4.7592, + "step": 11760 + }, + { + "epoch": 0.9, + "learning_rate": 0.00011596604851361681, + "loss": 4.0055, + "step": 11765 + }, + { + "epoch": 0.9, + "learning_rate": 0.00011592582163401587, + "loss": 4.5269, + "step": 11770 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001158855947544149, + "loss": 4.535, + "step": 11775 + }, + { + "epoch": 0.9, + "learning_rate": 0.00011584536787481394, + "loss": 1.9481, + "step": 11780 + }, + { + "epoch": 0.9, + "learning_rate": 0.000115805140995213, + "loss": 2.4671, + "step": 11785 + }, + { + "epoch": 0.9, + "learning_rate": 0.00011576491411561206, + "loss": 3.4428, + "step": 11790 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001157246872360111, + "loss": 2.0589, + "step": 11795 + }, + { + "epoch": 0.9, + "learning_rate": 0.00011568446035641016, + "loss": 3.3335, + "step": 11800 + }, + { + "epoch": 0.9, + "learning_rate": 0.00011564423347680922, + "loss": 4.425, + "step": 11805 + }, + { + "epoch": 0.9, + "learning_rate": 0.00011560400659720827, + "loss": 4.8104, + "step": 11810 + }, + { + "epoch": 0.9, + "learning_rate": 0.0001155637797176073, + "loss": 4.1982, + "step": 11815 + }, + { + "epoch": 0.9, + "learning_rate": 0.00011552355283800636, + "loss": 4.6156, + "step": 11820 + }, + { + "epoch": 0.9, + "learning_rate": 0.00011548332595840541, + "loss": 3.4463, + "step": 11825 + }, + { + "epoch": 0.9, + "learning_rate": 0.00011544309907880446, + "loss": 1.7727, + "step": 11830 + }, + { + "epoch": 0.9, + "learning_rate": 0.00011540287219920352, + "loss": 2.8218, + "step": 11835 + }, + { + "epoch": 0.9, + "learning_rate": 0.00011536264531960258, + "loss": 2.5171, + "step": 11840 + }, + { + "epoch": 0.91, + "learning_rate": 0.00011532241844000162, + "loss": 3.8074, + "step": 11845 + }, + { + "epoch": 0.91, + "learning_rate": 0.00011528219156040065, + "loss": 1.3458, + "step": 11850 + }, + { + "epoch": 0.91, + "learning_rate": 0.00011524196468079971, + "loss": 4.7215, + "step": 11855 + }, + { + "epoch": 0.91, + "learning_rate": 0.00011520173780119877, + "loss": 4.6002, + "step": 11860 + }, + { + "epoch": 0.91, + "learning_rate": 0.00011516151092159781, + "loss": 4.7979, + "step": 11865 + }, + { + "epoch": 0.91, + "learning_rate": 0.00011512128404199687, + "loss": 4.3117, + "step": 11870 + }, + { + "epoch": 0.91, + "learning_rate": 0.00011508105716239593, + "loss": 2.6911, + "step": 11875 + }, + { + "epoch": 0.91, + "learning_rate": 0.00011504083028279497, + "loss": 3.0803, + "step": 11880 + }, + { + "epoch": 0.91, + "learning_rate": 0.00011500060340319403, + "loss": 2.7019, + "step": 11885 + }, + { + "epoch": 0.91, + "learning_rate": 0.00011496037652359306, + "loss": 2.1551, + "step": 11890 + }, + { + "epoch": 0.91, + "learning_rate": 0.00011492014964399211, + "loss": 2.693, + "step": 11895 + }, + { + "epoch": 0.91, + "learning_rate": 0.00011487992276439117, + "loss": 0.6029, + "step": 11900 + }, + { + "epoch": 0.91, + "learning_rate": 0.00011483969588479022, + "loss": 4.9437, + "step": 11905 + }, + { + "epoch": 0.91, + "learning_rate": 0.00011479946900518927, + "loss": 4.216, + "step": 11910 + }, + { + "epoch": 0.91, + "learning_rate": 0.00011475924212558833, + "loss": 3.3949, + "step": 11915 + }, + { + "epoch": 0.91, + "learning_rate": 0.00011471901524598739, + "loss": 3.6592, + "step": 11920 + }, + { + "epoch": 0.91, + "learning_rate": 0.00011467878836638643, + "loss": 2.7439, + "step": 11925 + }, + { + "epoch": 0.91, + "learning_rate": 0.00011463856148678546, + "loss": 2.8549, + "step": 11930 + }, + { + "epoch": 0.91, + "learning_rate": 0.00011459833460718452, + "loss": 2.6736, + "step": 11935 + }, + { + "epoch": 0.91, + "learning_rate": 0.00011455810772758358, + "loss": 3.3242, + "step": 11940 + }, + { + "epoch": 0.91, + "learning_rate": 0.00011451788084798262, + "loss": 1.7755, + "step": 11945 + }, + { + "epoch": 0.91, + "learning_rate": 0.00011447765396838168, + "loss": 2.2492, + "step": 11950 + }, + { + "epoch": 0.91, + "learning_rate": 0.00011443742708878074, + "loss": 4.5637, + "step": 11955 + }, + { + "epoch": 0.91, + "learning_rate": 0.00011439720020917978, + "loss": 4.16, + "step": 11960 + }, + { + "epoch": 0.91, + "learning_rate": 0.00011435697332957881, + "loss": 4.0373, + "step": 11965 + }, + { + "epoch": 0.91, + "learning_rate": 0.00011431674644997787, + "loss": 3.9465, + "step": 11970 + }, + { + "epoch": 0.92, + "learning_rate": 0.00011427651957037693, + "loss": 4.5678, + "step": 11975 + }, + { + "epoch": 0.92, + "learning_rate": 0.00011423629269077598, + "loss": 4.6479, + "step": 11980 + }, + { + "epoch": 0.92, + "learning_rate": 0.00011419606581117503, + "loss": 3.3561, + "step": 11985 + }, + { + "epoch": 0.92, + "learning_rate": 0.00011415583893157409, + "loss": 2.7581, + "step": 11990 + }, + { + "epoch": 0.92, + "learning_rate": 0.00011411561205197314, + "loss": 1.4079, + "step": 11995 + }, + { + "epoch": 0.92, + "learning_rate": 0.0001140753851723722, + "loss": 3.3122, + "step": 12000 + }, + { + "epoch": 0.92, + "learning_rate": 0.00011403515829277123, + "loss": 5.2654, + "step": 12005 + }, + { + "epoch": 0.92, + "learning_rate": 0.00011399493141317028, + "loss": 4.2666, + "step": 12010 + }, + { + "epoch": 0.92, + "learning_rate": 0.00011395470453356933, + "loss": 3.2514, + "step": 12015 + }, + { + "epoch": 0.92, + "learning_rate": 0.00011391447765396839, + "loss": 3.2791, + "step": 12020 + }, + { + "epoch": 0.92, + "learning_rate": 0.00011387425077436745, + "loss": 2.7815, + "step": 12025 + }, + { + "epoch": 0.92, + "learning_rate": 0.00011383402389476649, + "loss": 3.698, + "step": 12030 + }, + { + "epoch": 0.92, + "learning_rate": 0.00011379379701516555, + "loss": 3.4422, + "step": 12035 + }, + { + "epoch": 0.92, + "learning_rate": 0.00011375357013556458, + "loss": 0.9724, + "step": 12040 + }, + { + "epoch": 0.92, + "learning_rate": 0.00011371334325596364, + "loss": 1.8559, + "step": 12045 + }, + { + "epoch": 0.92, + "learning_rate": 0.00011367311637636268, + "loss": 1.9751, + "step": 12050 + }, + { + "epoch": 0.92, + "learning_rate": 0.00011363288949676174, + "loss": 3.3685, + "step": 12055 + }, + { + "epoch": 0.92, + "learning_rate": 0.0001135926626171608, + "loss": 5.3922, + "step": 12060 + }, + { + "epoch": 0.92, + "learning_rate": 0.00011355243573755984, + "loss": 3.7494, + "step": 12065 + }, + { + "epoch": 0.92, + "learning_rate": 0.0001135122088579589, + "loss": 5.3334, + "step": 12070 + }, + { + "epoch": 0.92, + "learning_rate": 0.00011347198197835796, + "loss": 3.8536, + "step": 12075 + }, + { + "epoch": 0.92, + "learning_rate": 0.00011343175509875699, + "loss": 3.947, + "step": 12080 + }, + { + "epoch": 0.92, + "learning_rate": 0.00011339152821915604, + "loss": 2.8934, + "step": 12085 + }, + { + "epoch": 0.92, + "learning_rate": 0.0001133513013395551, + "loss": 2.3866, + "step": 12090 + }, + { + "epoch": 0.92, + "learning_rate": 0.00011331107445995415, + "loss": 1.5607, + "step": 12095 + }, + { + "epoch": 0.92, + "learning_rate": 0.0001132708475803532, + "loss": 1.2598, + "step": 12100 + }, + { + "epoch": 0.93, + "learning_rate": 0.00011323062070075226, + "loss": 3.7342, + "step": 12105 + }, + { + "epoch": 0.93, + "learning_rate": 0.00011319039382115131, + "loss": 4.8529, + "step": 12110 + }, + { + "epoch": 0.93, + "learning_rate": 0.00011315016694155035, + "loss": 4.0742, + "step": 12115 + }, + { + "epoch": 0.93, + "learning_rate": 0.00011310994006194939, + "loss": 5.4617, + "step": 12120 + }, + { + "epoch": 0.93, + "learning_rate": 0.00011306971318234845, + "loss": 3.8857, + "step": 12125 + }, + { + "epoch": 0.93, + "learning_rate": 0.0001130294863027475, + "loss": 2.9613, + "step": 12130 + }, + { + "epoch": 0.93, + "learning_rate": 0.00011298925942314655, + "loss": 1.5732, + "step": 12135 + }, + { + "epoch": 0.93, + "learning_rate": 0.00011294903254354561, + "loss": 1.9693, + "step": 12140 + }, + { + "epoch": 0.93, + "learning_rate": 0.00011290880566394467, + "loss": 1.1998, + "step": 12145 + }, + { + "epoch": 0.93, + "learning_rate": 0.00011286857878434371, + "loss": 3.2726, + "step": 12150 + }, + { + "epoch": 0.93, + "learning_rate": 0.00011282835190474274, + "loss": 4.6418, + "step": 12155 + }, + { + "epoch": 0.93, + "learning_rate": 0.0001127881250251418, + "loss": 4.0848, + "step": 12160 + }, + { + "epoch": 0.93, + "learning_rate": 0.00011274789814554086, + "loss": 4.4574, + "step": 12165 + }, + { + "epoch": 0.93, + "learning_rate": 0.0001127076712659399, + "loss": 4.4372, + "step": 12170 + }, + { + "epoch": 0.93, + "learning_rate": 0.00011266744438633896, + "loss": 4.7805, + "step": 12175 + }, + { + "epoch": 0.93, + "learning_rate": 0.00011262721750673802, + "loss": 2.6393, + "step": 12180 + }, + { + "epoch": 0.93, + "learning_rate": 0.00011258699062713707, + "loss": 3.8619, + "step": 12185 + }, + { + "epoch": 0.93, + "learning_rate": 0.0001125467637475361, + "loss": 3.5495, + "step": 12190 + }, + { + "epoch": 0.93, + "learning_rate": 0.00011250653686793516, + "loss": 3.6609, + "step": 12195 + }, + { + "epoch": 0.93, + "learning_rate": 0.0001124663099883342, + "loss": 2.3156, + "step": 12200 + }, + { + "epoch": 0.93, + "learning_rate": 0.00011242608310873326, + "loss": 4.6609, + "step": 12205 + }, + { + "epoch": 0.93, + "learning_rate": 0.00011238585622913232, + "loss": 4.3674, + "step": 12210 + }, + { + "epoch": 0.93, + "learning_rate": 0.00011234562934953136, + "loss": 3.684, + "step": 12215 + }, + { + "epoch": 0.93, + "learning_rate": 0.00011230540246993042, + "loss": 4.9451, + "step": 12220 + }, + { + "epoch": 0.93, + "learning_rate": 0.00011226517559032948, + "loss": 2.7038, + "step": 12225 + }, + { + "epoch": 0.93, + "learning_rate": 0.00011222494871072851, + "loss": 3.1783, + "step": 12230 + }, + { + "epoch": 0.94, + "learning_rate": 0.00011218472183112755, + "loss": 2.1691, + "step": 12235 + }, + { + "epoch": 0.94, + "learning_rate": 0.00011214449495152661, + "loss": 2.7589, + "step": 12240 + }, + { + "epoch": 0.94, + "learning_rate": 0.00011210426807192567, + "loss": 1.9916, + "step": 12245 + }, + { + "epoch": 0.94, + "learning_rate": 0.00011206404119232471, + "loss": 2.93, + "step": 12250 + }, + { + "epoch": 0.94, + "learning_rate": 0.00011202381431272377, + "loss": 3.9955, + "step": 12255 + }, + { + "epoch": 0.94, + "learning_rate": 0.00011198358743312283, + "loss": 4.1354, + "step": 12260 + }, + { + "epoch": 0.94, + "learning_rate": 0.00011194336055352186, + "loss": 3.9764, + "step": 12265 + }, + { + "epoch": 0.94, + "learning_rate": 0.0001119031336739209, + "loss": 3.9035, + "step": 12270 + }, + { + "epoch": 0.94, + "learning_rate": 0.00011186290679431996, + "loss": 3.8275, + "step": 12275 + }, + { + "epoch": 0.94, + "learning_rate": 0.00011182267991471902, + "loss": 3.5661, + "step": 12280 + }, + { + "epoch": 0.94, + "learning_rate": 0.00011178245303511807, + "loss": 3.2651, + "step": 12285 + }, + { + "epoch": 0.94, + "learning_rate": 0.00011174222615551713, + "loss": 1.1449, + "step": 12290 + }, + { + "epoch": 0.94, + "learning_rate": 0.00011170199927591618, + "loss": 1.265, + "step": 12295 + }, + { + "epoch": 0.94, + "learning_rate": 0.00011166177239631523, + "loss": 0.3177, + "step": 12300 + }, + { + "epoch": 0.94, + "learning_rate": 0.00011162154551671426, + "loss": 5.1195, + "step": 12305 + }, + { + "epoch": 0.94, + "learning_rate": 0.00011158131863711332, + "loss": 4.2492, + "step": 12310 + }, + { + "epoch": 0.94, + "learning_rate": 0.00011154109175751238, + "loss": 4.85, + "step": 12315 + }, + { + "epoch": 0.94, + "learning_rate": 0.00011150086487791142, + "loss": 3.5096, + "step": 12320 + }, + { + "epoch": 0.94, + "learning_rate": 0.00011146063799831048, + "loss": 3.4133, + "step": 12325 + }, + { + "epoch": 0.94, + "learning_rate": 0.00011142041111870954, + "loss": 2.644, + "step": 12330 + }, + { + "epoch": 0.94, + "learning_rate": 0.00011138018423910858, + "loss": 1.4073, + "step": 12335 + }, + { + "epoch": 0.94, + "learning_rate": 0.00011133995735950761, + "loss": 2.6904, + "step": 12340 + }, + { + "epoch": 0.94, + "learning_rate": 0.00011129973047990667, + "loss": 4.1987, + "step": 12345 + }, + { + "epoch": 0.94, + "learning_rate": 0.00011125950360030573, + "loss": 4.0315, + "step": 12350 + }, + { + "epoch": 0.94, + "learning_rate": 0.00011121927672070477, + "loss": 4.21, + "step": 12355 + }, + { + "epoch": 0.94, + "learning_rate": 0.00011117904984110383, + "loss": 4.2699, + "step": 12360 + }, + { + "epoch": 0.95, + "learning_rate": 0.00011113882296150289, + "loss": 5.3631, + "step": 12365 + }, + { + "epoch": 0.95, + "learning_rate": 0.00011109859608190194, + "loss": 4.8795, + "step": 12370 + }, + { + "epoch": 0.95, + "learning_rate": 0.000111058369202301, + "loss": 4.0154, + "step": 12375 + }, + { + "epoch": 0.95, + "learning_rate": 0.00011101814232270003, + "loss": 4.0755, + "step": 12380 + }, + { + "epoch": 0.95, + "learning_rate": 0.00011097791544309908, + "loss": 4.2021, + "step": 12385 + }, + { + "epoch": 0.95, + "learning_rate": 0.00011093768856349813, + "loss": 2.2228, + "step": 12390 + }, + { + "epoch": 0.95, + "learning_rate": 0.00011089746168389719, + "loss": 2.6391, + "step": 12395 + }, + { + "epoch": 0.95, + "learning_rate": 0.00011085723480429624, + "loss": 2.3025, + "step": 12400 + }, + { + "epoch": 0.95, + "learning_rate": 0.00011081700792469529, + "loss": 5.06, + "step": 12405 + }, + { + "epoch": 0.95, + "learning_rate": 0.00011077678104509435, + "loss": 4.7277, + "step": 12410 + }, + { + "epoch": 0.95, + "learning_rate": 0.00011073655416549338, + "loss": 4.8125, + "step": 12415 + }, + { + "epoch": 0.95, + "learning_rate": 0.00011069632728589244, + "loss": 3.7311, + "step": 12420 + }, + { + "epoch": 0.95, + "learning_rate": 0.00011065610040629148, + "loss": 3.3934, + "step": 12425 + }, + { + "epoch": 0.95, + "learning_rate": 0.00011061587352669054, + "loss": 4.0301, + "step": 12430 + }, + { + "epoch": 0.95, + "learning_rate": 0.0001105756466470896, + "loss": 3.5967, + "step": 12435 + }, + { + "epoch": 0.95, + "learning_rate": 0.00011053541976748864, + "loss": 3.6605, + "step": 12440 + }, + { + "epoch": 0.95, + "learning_rate": 0.0001104951928878877, + "loss": 2.1137, + "step": 12445 + }, + { + "epoch": 0.95, + "learning_rate": 0.00011045496600828676, + "loss": 1.5456, + "step": 12450 + }, + { + "epoch": 0.95, + "learning_rate": 0.00011041473912868579, + "loss": 4.5984, + "step": 12455 + }, + { + "epoch": 0.95, + "learning_rate": 0.00011037451224908484, + "loss": 4.9756, + "step": 12460 + }, + { + "epoch": 0.95, + "learning_rate": 0.0001103342853694839, + "loss": 4.725, + "step": 12465 + }, + { + "epoch": 0.95, + "learning_rate": 0.00011029405848988295, + "loss": 5.0465, + "step": 12470 + }, + { + "epoch": 0.95, + "learning_rate": 0.000110253831610282, + "loss": 3.6682, + "step": 12475 + }, + { + "epoch": 0.95, + "learning_rate": 0.00011021360473068105, + "loss": 4.3359, + "step": 12480 + }, + { + "epoch": 0.95, + "learning_rate": 0.0001101733778510801, + "loss": 2.6403, + "step": 12485 + }, + { + "epoch": 0.95, + "learning_rate": 0.00011013315097147914, + "loss": 3.0468, + "step": 12490 + }, + { + "epoch": 0.95, + "learning_rate": 0.00011009292409187819, + "loss": 3.0222, + "step": 12495 + }, + { + "epoch": 0.96, + "learning_rate": 0.00011005269721227725, + "loss": 0.6001, + "step": 12500 + }, + { + "epoch": 0.96, + "learning_rate": 0.00011001247033267629, + "loss": 4.099, + "step": 12505 + }, + { + "epoch": 0.96, + "learning_rate": 0.00010997224345307535, + "loss": 3.7836, + "step": 12510 + }, + { + "epoch": 0.96, + "learning_rate": 0.00010993201657347441, + "loss": 4.1847, + "step": 12515 + }, + { + "epoch": 0.96, + "learning_rate": 0.00010989178969387345, + "loss": 4.2145, + "step": 12520 + }, + { + "epoch": 0.96, + "learning_rate": 0.00010985156281427251, + "loss": 3.6762, + "step": 12525 + }, + { + "epoch": 0.96, + "learning_rate": 0.00010981133593467154, + "loss": 3.2025, + "step": 12530 + }, + { + "epoch": 0.96, + "learning_rate": 0.0001097711090550706, + "loss": 2.674, + "step": 12535 + }, + { + "epoch": 0.96, + "learning_rate": 0.00010973088217546965, + "loss": 2.7232, + "step": 12540 + }, + { + "epoch": 0.96, + "learning_rate": 0.0001096906552958687, + "loss": 0.4914, + "step": 12545 + }, + { + "epoch": 0.96, + "learning_rate": 0.00010965042841626776, + "loss": 2.267, + "step": 12550 + }, + { + "epoch": 0.96, + "learning_rate": 0.0001096102015366668, + "loss": 3.6879, + "step": 12555 + }, + { + "epoch": 0.96, + "learning_rate": 0.00010956997465706586, + "loss": 4.2727, + "step": 12560 + }, + { + "epoch": 0.96, + "learning_rate": 0.0001095297477774649, + "loss": 3.6961, + "step": 12565 + }, + { + "epoch": 0.96, + "learning_rate": 0.00010948952089786395, + "loss": 2.9307, + "step": 12570 + }, + { + "epoch": 0.96, + "learning_rate": 0.000109449294018263, + "loss": 2.4069, + "step": 12575 + }, + { + "epoch": 0.96, + "learning_rate": 0.00010940906713866206, + "loss": 2.6994, + "step": 12580 + }, + { + "epoch": 0.96, + "learning_rate": 0.00010936884025906112, + "loss": 1.9843, + "step": 12585 + }, + { + "epoch": 0.96, + "learning_rate": 0.00010932861337946016, + "loss": 2.8084, + "step": 12590 + }, + { + "epoch": 0.96, + "learning_rate": 0.00010928838649985922, + "loss": 2.6188, + "step": 12595 + }, + { + "epoch": 0.96, + "learning_rate": 0.00010924815962025828, + "loss": 0.5502, + "step": 12600 + }, + { + "epoch": 0.96, + "learning_rate": 0.00010920793274065731, + "loss": 5.0705, + "step": 12605 + }, + { + "epoch": 0.96, + "learning_rate": 0.00010916770586105635, + "loss": 3.4141, + "step": 12610 + }, + { + "epoch": 0.96, + "learning_rate": 0.00010912747898145541, + "loss": 3.7998, + "step": 12615 + }, + { + "epoch": 0.96, + "learning_rate": 0.00010908725210185447, + "loss": 3.3628, + "step": 12620 + }, + { + "epoch": 0.96, + "learning_rate": 0.00010904702522225351, + "loss": 3.8773, + "step": 12625 + }, + { + "epoch": 0.97, + "learning_rate": 0.00010900679834265257, + "loss": 3.2954, + "step": 12630 + }, + { + "epoch": 0.97, + "learning_rate": 0.00010896657146305163, + "loss": 3.2307, + "step": 12635 + }, + { + "epoch": 0.97, + "learning_rate": 0.00010892634458345066, + "loss": 2.1269, + "step": 12640 + }, + { + "epoch": 0.97, + "learning_rate": 0.0001088861177038497, + "loss": 2.2615, + "step": 12645 + }, + { + "epoch": 0.97, + "learning_rate": 0.00010884589082424876, + "loss": 4.0116, + "step": 12650 + }, + { + "epoch": 0.97, + "learning_rate": 0.00010880566394464782, + "loss": 4.5145, + "step": 12655 + }, + { + "epoch": 0.97, + "learning_rate": 0.00010876543706504687, + "loss": 4.0908, + "step": 12660 + }, + { + "epoch": 0.97, + "learning_rate": 0.00010872521018544593, + "loss": 3.6911, + "step": 12665 + }, + { + "epoch": 0.97, + "learning_rate": 0.00010868498330584498, + "loss": 3.7748, + "step": 12670 + }, + { + "epoch": 0.97, + "learning_rate": 0.00010864475642624403, + "loss": 3.6313, + "step": 12675 + }, + { + "epoch": 0.97, + "learning_rate": 0.00010860452954664306, + "loss": 1.8294, + "step": 12680 + }, + { + "epoch": 0.97, + "learning_rate": 0.00010856430266704212, + "loss": 3.2862, + "step": 12685 + }, + { + "epoch": 0.97, + "learning_rate": 0.00010852407578744118, + "loss": 2.1133, + "step": 12690 + }, + { + "epoch": 0.97, + "learning_rate": 0.00010848384890784022, + "loss": 2.6551, + "step": 12695 + }, + { + "epoch": 0.97, + "learning_rate": 0.00010844362202823928, + "loss": 1.0988, + "step": 12700 + }, + { + "epoch": 0.97, + "learning_rate": 0.00010840339514863834, + "loss": 4.7625, + "step": 12705 + }, + { + "epoch": 0.97, + "learning_rate": 0.00010836316826903738, + "loss": 3.9336, + "step": 12710 + }, + { + "epoch": 0.97, + "learning_rate": 0.00010832294138943641, + "loss": 4.7963, + "step": 12715 + }, + { + "epoch": 0.97, + "learning_rate": 0.00010828271450983547, + "loss": 3.2798, + "step": 12720 + }, + { + "epoch": 0.97, + "learning_rate": 0.00010824248763023453, + "loss": 2.7068, + "step": 12725 + }, + { + "epoch": 0.97, + "learning_rate": 0.00010820226075063357, + "loss": 3.1443, + "step": 12730 + }, + { + "epoch": 0.97, + "learning_rate": 0.00010816203387103263, + "loss": 3.0755, + "step": 12735 + }, + { + "epoch": 0.97, + "learning_rate": 0.00010812180699143169, + "loss": 1.3621, + "step": 12740 + }, + { + "epoch": 0.97, + "learning_rate": 0.00010808158011183074, + "loss": 2.0039, + "step": 12745 + }, + { + "epoch": 0.97, + "learning_rate": 0.0001080413532322298, + "loss": 1.3085, + "step": 12750 + }, + { + "epoch": 0.97, + "learning_rate": 0.00010800112635262882, + "loss": 5.2609, + "step": 12755 + }, + { + "epoch": 0.98, + "learning_rate": 0.00010796089947302788, + "loss": 4.3895, + "step": 12760 + }, + { + "epoch": 0.98, + "learning_rate": 0.00010792067259342693, + "loss": 3.3505, + "step": 12765 + }, + { + "epoch": 0.98, + "learning_rate": 0.00010788044571382599, + "loss": 4.2628, + "step": 12770 + }, + { + "epoch": 0.98, + "learning_rate": 0.00010784021883422504, + "loss": 5.0826, + "step": 12775 + }, + { + "epoch": 0.98, + "learning_rate": 0.00010779999195462409, + "loss": 2.538, + "step": 12780 + }, + { + "epoch": 0.98, + "learning_rate": 0.00010775976507502315, + "loss": 3.901, + "step": 12785 + }, + { + "epoch": 0.98, + "learning_rate": 0.00010771953819542218, + "loss": 1.9004, + "step": 12790 + }, + { + "epoch": 0.98, + "learning_rate": 0.00010767931131582124, + "loss": 0.5085, + "step": 12795 + }, + { + "epoch": 0.98, + "learning_rate": 0.00010763908443622028, + "loss": 3.4105, + "step": 12800 + }, + { + "epoch": 0.98, + "learning_rate": 0.00010759885755661934, + "loss": 3.8672, + "step": 12805 + }, + { + "epoch": 0.98, + "learning_rate": 0.00010755863067701838, + "loss": 4.4018, + "step": 12810 + }, + { + "epoch": 0.98, + "learning_rate": 0.00010751840379741744, + "loss": 3.9307, + "step": 12815 + }, + { + "epoch": 0.98, + "learning_rate": 0.0001074781769178165, + "loss": 4.3627, + "step": 12820 + }, + { + "epoch": 0.98, + "learning_rate": 0.00010743795003821555, + "loss": 3.4223, + "step": 12825 + }, + { + "epoch": 0.98, + "learning_rate": 0.00010739772315861458, + "loss": 2.4649, + "step": 12830 + }, + { + "epoch": 0.98, + "learning_rate": 0.00010735749627901363, + "loss": 1.3693, + "step": 12835 + }, + { + "epoch": 0.98, + "learning_rate": 0.00010731726939941269, + "loss": 1.899, + "step": 12840 + }, + { + "epoch": 0.98, + "learning_rate": 0.00010727704251981174, + "loss": 0.5113, + "step": 12845 + }, + { + "epoch": 0.98, + "learning_rate": 0.0001072368156402108, + "loss": 2.3541, + "step": 12850 + }, + { + "epoch": 0.98, + "learning_rate": 0.00010719658876060985, + "loss": 5.3012, + "step": 12855 + }, + { + "epoch": 0.98, + "learning_rate": 0.0001071563618810089, + "loss": 3.8258, + "step": 12860 + }, + { + "epoch": 0.98, + "learning_rate": 0.00010711613500140793, + "loss": 4.4615, + "step": 12865 + }, + { + "epoch": 0.98, + "learning_rate": 0.00010707590812180699, + "loss": 4.375, + "step": 12870 + }, + { + "epoch": 0.98, + "learning_rate": 0.00010703568124220605, + "loss": 3.2504, + "step": 12875 + }, + { + "epoch": 0.98, + "learning_rate": 0.00010699545436260509, + "loss": 2.7794, + "step": 12880 + }, + { + "epoch": 0.98, + "learning_rate": 0.00010695522748300415, + "loss": 2.613, + "step": 12885 + }, + { + "epoch": 0.99, + "learning_rate": 0.00010691500060340321, + "loss": 2.0133, + "step": 12890 + }, + { + "epoch": 0.99, + "learning_rate": 0.00010687477372380225, + "loss": 3.0668, + "step": 12895 + }, + { + "epoch": 0.99, + "learning_rate": 0.00010683454684420131, + "loss": 2.141, + "step": 12900 + }, + { + "epoch": 0.99, + "learning_rate": 0.00010679431996460034, + "loss": 3.9652, + "step": 12905 + }, + { + "epoch": 0.99, + "learning_rate": 0.0001067540930849994, + "loss": 5.0807, + "step": 12910 + }, + { + "epoch": 0.99, + "learning_rate": 0.00010671386620539844, + "loss": 4.0959, + "step": 12915 + }, + { + "epoch": 0.99, + "learning_rate": 0.0001066736393257975, + "loss": 2.3016, + "step": 12920 + }, + { + "epoch": 0.99, + "learning_rate": 0.00010663341244619656, + "loss": 3.6994, + "step": 12925 + }, + { + "epoch": 0.99, + "learning_rate": 0.0001065931855665956, + "loss": 3.4188, + "step": 12930 + }, + { + "epoch": 0.99, + "learning_rate": 0.00010655295868699466, + "loss": 2.1822, + "step": 12935 + }, + { + "epoch": 0.99, + "learning_rate": 0.0001065127318073937, + "loss": 2.6569, + "step": 12940 + }, + { + "epoch": 0.99, + "learning_rate": 0.00010647250492779275, + "loss": 2.9767, + "step": 12945 + }, + { + "epoch": 0.99, + "learning_rate": 0.0001064322780481918, + "loss": 0.6595, + "step": 12950 + }, + { + "epoch": 0.99, + "learning_rate": 0.00010639205116859086, + "loss": 4.4453, + "step": 12955 + }, + { + "epoch": 0.99, + "learning_rate": 0.00010635182428898991, + "loss": 4.6098, + "step": 12960 + }, + { + "epoch": 0.99, + "learning_rate": 0.00010631159740938896, + "loss": 4.4469, + "step": 12965 + }, + { + "epoch": 0.99, + "learning_rate": 0.00010627137052978802, + "loss": 4.1836, + "step": 12970 + }, + { + "epoch": 0.99, + "learning_rate": 0.00010623114365018708, + "loss": 3.3398, + "step": 12975 + }, + { + "epoch": 0.99, + "learning_rate": 0.0001061909167705861, + "loss": 3.8299, + "step": 12980 + }, + { + "epoch": 0.99, + "learning_rate": 0.00010615068989098515, + "loss": 2.2403, + "step": 12985 + }, + { + "epoch": 0.99, + "learning_rate": 0.00010611046301138421, + "loss": 1.9197, + "step": 12990 + }, + { + "epoch": 0.99, + "learning_rate": 0.00010607023613178327, + "loss": 1.7764, + "step": 12995 + }, + { + "epoch": 0.99, + "learning_rate": 0.00010603000925218231, + "loss": 3.4165, + "step": 13000 + }, + { + "epoch": 0.99, + "learning_rate": 0.00010598978237258137, + "loss": 5.0324, + "step": 13005 + }, + { + "epoch": 0.99, + "learning_rate": 0.00010594955549298043, + "loss": 4.4018, + "step": 13010 + }, + { + "epoch": 0.99, + "learning_rate": 0.00010590932861337946, + "loss": 4.1213, + "step": 13015 + }, + { + "epoch": 1.0, + "learning_rate": 0.0001058691017337785, + "loss": 4.3871, + "step": 13020 + }, + { + "epoch": 1.0, + "learning_rate": 0.00010582887485417756, + "loss": 2.7148, + "step": 13025 + }, + { + "epoch": 1.0, + "learning_rate": 0.00010578864797457662, + "loss": 2.8373, + "step": 13030 + }, + { + "epoch": 1.0, + "learning_rate": 0.00010574842109497567, + "loss": 2.7298, + "step": 13035 + }, + { + "epoch": 1.0, + "learning_rate": 0.00010570819421537472, + "loss": 0.7858, + "step": 13040 + }, + { + "epoch": 1.0, + "learning_rate": 0.00010566796733577378, + "loss": 1.4459, + "step": 13045 + }, + { + "epoch": 1.0, + "learning_rate": 0.00010562774045617283, + "loss": 2.2835, + "step": 13050 + }, + { + "epoch": 1.0, + "learning_rate": 0.00010558751357657186, + "loss": 4.6979, + "step": 13055 + }, + { + "epoch": 1.0, + "learning_rate": 0.00010554728669697092, + "loss": 3.1915, + "step": 13060 + }, + { + "epoch": 1.0, + "learning_rate": 0.00010550705981736997, + "loss": 2.701, + "step": 13065 + }, + { + "epoch": 1.0, + "learning_rate": 0.00010546683293776902, + "loss": 3.3332, + "step": 13070 + }, + { + "epoch": 1.0, + "learning_rate": 0.00010542660605816808, + "loss": 3.1688, + "step": 13075 + }, + { + "epoch": 1.0, + "learning_rate": 0.00010538637917856714, + "loss": 2.2415, + "step": 13080 + }, + { + "epoch": 1.0, + "eval_exact_match": 19.315068493150687, + "eval_f1": 23.273953174088238, + "eval_loss": 3.1703224182128906, + "eval_runtime": 132.4755, + "eval_samples_per_second": 11.021, + "eval_steps_per_second": 11.021, + "step": 13084 + }, + { + "epoch": 1.0, + "learning_rate": 0.00010534615229896618, + "loss": 5.7792, + "step": 13085 + }, + { + "epoch": 1.0, + "learning_rate": 0.00010530592541936521, + "loss": 3.9447, + "step": 13090 + }, + { + "epoch": 1.0, + "learning_rate": 0.00010526569853976427, + "loss": 3.901, + "step": 13095 + }, + { + "epoch": 1.0, + "learning_rate": 0.00010522547166016333, + "loss": 4.2629, + "step": 13100 + }, + { + "epoch": 1.0, + "learning_rate": 0.00010518524478056237, + "loss": 3.0552, + "step": 13105 + }, + { + "epoch": 1.0, + "learning_rate": 0.00010514501790096143, + "loss": 2.5984, + "step": 13110 + }, + { + "epoch": 1.0, + "learning_rate": 0.00010510479102136048, + "loss": 2.634, + "step": 13115 + }, + { + "epoch": 1.0, + "learning_rate": 0.00010506456414175953, + "loss": 2.7914, + "step": 13120 + }, + { + "epoch": 1.0, + "learning_rate": 0.00010502433726215859, + "loss": 2.0628, + "step": 13125 + }, + { + "epoch": 1.0, + "learning_rate": 0.00010498411038255762, + "loss": 3.1447, + "step": 13130 + }, + { + "epoch": 1.0, + "learning_rate": 0.00010494388350295667, + "loss": 2.7328, + "step": 13135 + }, + { + "epoch": 1.0, + "learning_rate": 0.00010490365662335573, + "loss": 3.76, + "step": 13140 + }, + { + "epoch": 1.0, + "learning_rate": 0.00010486342974375478, + "loss": 4.6025, + "step": 13145 + }, + { + "epoch": 1.01, + "learning_rate": 0.00010482320286415383, + "loss": 3.5472, + "step": 13150 + }, + { + "epoch": 1.01, + "learning_rate": 0.00010478297598455289, + "loss": 3.857, + "step": 13155 + }, + { + "epoch": 1.01, + "learning_rate": 0.00010474274910495195, + "loss": 2.5723, + "step": 13160 + }, + { + "epoch": 1.01, + "learning_rate": 0.00010470252222535098, + "loss": 2.8614, + "step": 13165 + }, + { + "epoch": 1.01, + "learning_rate": 0.00010466229534575002, + "loss": 2.782, + "step": 13170 + }, + { + "epoch": 1.01, + "learning_rate": 0.00010462206846614908, + "loss": 2.5527, + "step": 13175 + }, + { + "epoch": 1.01, + "learning_rate": 0.00010458184158654814, + "loss": 2.8946, + "step": 13180 + }, + { + "epoch": 1.01, + "learning_rate": 0.00010454161470694718, + "loss": 2.8931, + "step": 13185 + }, + { + "epoch": 1.01, + "learning_rate": 0.00010450138782734624, + "loss": 4.4236, + "step": 13190 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001044611609477453, + "loss": 4.7928, + "step": 13195 + }, + { + "epoch": 1.01, + "learning_rate": 0.00010442093406814434, + "loss": 3.9273, + "step": 13200 + }, + { + "epoch": 1.01, + "learning_rate": 0.00010438070718854338, + "loss": 3.9992, + "step": 13205 + }, + { + "epoch": 1.01, + "learning_rate": 0.00010434048030894243, + "loss": 4.2393, + "step": 13210 + }, + { + "epoch": 1.01, + "learning_rate": 0.00010430025342934149, + "loss": 3.4548, + "step": 13215 + }, + { + "epoch": 1.01, + "learning_rate": 0.00010426002654974054, + "loss": 3.0828, + "step": 13220 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001042197996701396, + "loss": 2.7957, + "step": 13225 + }, + { + "epoch": 1.01, + "learning_rate": 0.00010417957279053865, + "loss": 2.4393, + "step": 13230 + }, + { + "epoch": 1.01, + "learning_rate": 0.0001041393459109377, + "loss": 3.4124, + "step": 13235 + }, + { + "epoch": 1.01, + "learning_rate": 0.00010409911903133673, + "loss": 4.693, + "step": 13240 + }, + { + "epoch": 1.01, + "learning_rate": 0.00010405889215173579, + "loss": 3.4697, + "step": 13245 + }, + { + "epoch": 1.01, + "learning_rate": 0.00010401866527213485, + "loss": 2.767, + "step": 13250 + }, + { + "epoch": 1.01, + "learning_rate": 0.00010397843839253389, + "loss": 3.817, + "step": 13255 + }, + { + "epoch": 1.01, + "learning_rate": 0.00010393821151293295, + "loss": 1.5805, + "step": 13260 + }, + { + "epoch": 1.01, + "learning_rate": 0.000103897984633332, + "loss": 2.659, + "step": 13265 + }, + { + "epoch": 1.01, + "learning_rate": 0.00010385775775373105, + "loss": 1.4231, + "step": 13270 + }, + { + "epoch": 1.01, + "learning_rate": 0.00010381753087413011, + "loss": 1.9977, + "step": 13275 + }, + { + "epoch": 1.01, + "learning_rate": 0.00010377730399452914, + "loss": 1.0283, + "step": 13280 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001037370771149282, + "loss": 4.241, + "step": 13285 + }, + { + "epoch": 1.02, + "learning_rate": 0.00010369685023532724, + "loss": 3.7126, + "step": 13290 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001036566233557263, + "loss": 3.5291, + "step": 13295 + }, + { + "epoch": 1.02, + "learning_rate": 0.00010361639647612536, + "loss": 4.2404, + "step": 13300 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001035761695965244, + "loss": 3.9266, + "step": 13305 + }, + { + "epoch": 1.02, + "learning_rate": 0.00010353594271692346, + "loss": 3.1577, + "step": 13310 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001034957158373225, + "loss": 4.3291, + "step": 13315 + }, + { + "epoch": 1.02, + "learning_rate": 0.00010345548895772155, + "loss": 2.2583, + "step": 13320 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001034152620781206, + "loss": 2.4731, + "step": 13325 + }, + { + "epoch": 1.02, + "learning_rate": 0.00010337503519851966, + "loss": 2.9766, + "step": 13330 + }, + { + "epoch": 1.02, + "learning_rate": 0.00010333480831891871, + "loss": 3.4053, + "step": 13335 + }, + { + "epoch": 1.02, + "learning_rate": 0.00010329458143931776, + "loss": 5.1129, + "step": 13340 + }, + { + "epoch": 1.02, + "learning_rate": 0.00010325435455971682, + "loss": 4.6836, + "step": 13345 + }, + { + "epoch": 1.02, + "learning_rate": 0.00010321412768011587, + "loss": 3.6654, + "step": 13350 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001031739008005149, + "loss": 4.1396, + "step": 13355 + }, + { + "epoch": 1.02, + "learning_rate": 0.00010313367392091395, + "loss": 3.5745, + "step": 13360 + }, + { + "epoch": 1.02, + "learning_rate": 0.00010309344704131301, + "loss": 2.6486, + "step": 13365 + }, + { + "epoch": 1.02, + "learning_rate": 0.00010305322016171207, + "loss": 2.9922, + "step": 13370 + }, + { + "epoch": 1.02, + "learning_rate": 0.00010301299328211111, + "loss": 2.7838, + "step": 13375 + }, + { + "epoch": 1.02, + "learning_rate": 0.00010297276640251017, + "loss": 1.5658, + "step": 13380 + }, + { + "epoch": 1.02, + "learning_rate": 0.00010293253952290923, + "loss": 1.1819, + "step": 13385 + }, + { + "epoch": 1.02, + "learning_rate": 0.00010289231264330826, + "loss": 3.4799, + "step": 13390 + }, + { + "epoch": 1.02, + "learning_rate": 0.0001028520857637073, + "loss": 4.5635, + "step": 13395 + }, + { + "epoch": 1.02, + "learning_rate": 0.00010281185888410636, + "loss": 3.1668, + "step": 13400 + }, + { + "epoch": 1.02, + "learning_rate": 0.00010277163200450542, + "loss": 3.4604, + "step": 13405 + }, + { + "epoch": 1.02, + "learning_rate": 0.00010273140512490447, + "loss": 2.7437, + "step": 13410 + }, + { + "epoch": 1.03, + "learning_rate": 0.00010269117824530352, + "loss": 1.6988, + "step": 13415 + }, + { + "epoch": 1.03, + "learning_rate": 0.00010265095136570257, + "loss": 3.8053, + "step": 13420 + }, + { + "epoch": 1.03, + "learning_rate": 0.00010261072448610163, + "loss": 1.8165, + "step": 13425 + }, + { + "epoch": 1.03, + "learning_rate": 0.00010257049760650066, + "loss": 1.5206, + "step": 13430 + }, + { + "epoch": 1.03, + "learning_rate": 0.00010253027072689972, + "loss": 1.837, + "step": 13435 + }, + { + "epoch": 1.03, + "learning_rate": 0.00010249004384729876, + "loss": 3.6209, + "step": 13440 + }, + { + "epoch": 1.03, + "learning_rate": 0.00010244981696769782, + "loss": 5.1863, + "step": 13445 + }, + { + "epoch": 1.03, + "learning_rate": 0.00010240959008809688, + "loss": 3.5703, + "step": 13450 + }, + { + "epoch": 1.03, + "learning_rate": 0.00010236936320849592, + "loss": 4.4045, + "step": 13455 + }, + { + "epoch": 1.03, + "learning_rate": 0.00010232913632889498, + "loss": 2.1611, + "step": 13460 + }, + { + "epoch": 1.03, + "learning_rate": 0.00010228890944929401, + "loss": 1.9873, + "step": 13465 + }, + { + "epoch": 1.03, + "learning_rate": 0.00010224868256969307, + "loss": 2.744, + "step": 13470 + }, + { + "epoch": 1.03, + "learning_rate": 0.00010220845569009211, + "loss": 3.7924, + "step": 13475 + }, + { + "epoch": 1.03, + "learning_rate": 0.00010216822881049117, + "loss": 1.564, + "step": 13480 + }, + { + "epoch": 1.03, + "learning_rate": 0.00010212800193089023, + "loss": 1.5397, + "step": 13485 + }, + { + "epoch": 1.03, + "learning_rate": 0.00010208777505128927, + "loss": 4.3014, + "step": 13490 + }, + { + "epoch": 1.03, + "learning_rate": 0.00010204754817168833, + "loss": 4.2459, + "step": 13495 + }, + { + "epoch": 1.03, + "learning_rate": 0.00010200732129208739, + "loss": 3.9824, + "step": 13500 + }, + { + "epoch": 1.03, + "learning_rate": 0.00010196709441248642, + "loss": 3.8135, + "step": 13505 + }, + { + "epoch": 1.03, + "learning_rate": 0.00010192686753288547, + "loss": 2.5058, + "step": 13510 + }, + { + "epoch": 1.03, + "learning_rate": 0.00010188664065328453, + "loss": 3.7534, + "step": 13515 + }, + { + "epoch": 1.03, + "learning_rate": 0.00010184641377368358, + "loss": 3.8482, + "step": 13520 + }, + { + "epoch": 1.03, + "learning_rate": 0.00010180618689408263, + "loss": 2.4387, + "step": 13525 + }, + { + "epoch": 1.03, + "learning_rate": 0.00010176596001448169, + "loss": 1.9385, + "step": 13530 + }, + { + "epoch": 1.03, + "learning_rate": 0.00010172573313488074, + "loss": 2.6273, + "step": 13535 + }, + { + "epoch": 1.03, + "learning_rate": 0.00010168550625527978, + "loss": 4.2504, + "step": 13540 + }, + { + "epoch": 1.04, + "learning_rate": 0.00010164527937567882, + "loss": 4.3891, + "step": 13545 + }, + { + "epoch": 1.04, + "learning_rate": 0.00010160505249607788, + "loss": 4.7129, + "step": 13550 + }, + { + "epoch": 1.04, + "learning_rate": 0.00010156482561647694, + "loss": 4.8672, + "step": 13555 + }, + { + "epoch": 1.04, + "learning_rate": 0.00010152459873687598, + "loss": 4.241, + "step": 13560 + }, + { + "epoch": 1.04, + "learning_rate": 0.00010148437185727504, + "loss": 3.9742, + "step": 13565 + }, + { + "epoch": 1.04, + "learning_rate": 0.0001014441449776741, + "loss": 2.0136, + "step": 13570 + }, + { + "epoch": 1.04, + "learning_rate": 0.00010140391809807314, + "loss": 1.6322, + "step": 13575 + }, + { + "epoch": 1.04, + "learning_rate": 0.00010136369121847217, + "loss": 1.4756, + "step": 13580 + }, + { + "epoch": 1.04, + "learning_rate": 0.00010132346433887123, + "loss": 4.3438, + "step": 13585 + }, + { + "epoch": 1.04, + "learning_rate": 0.00010128323745927029, + "loss": 4.408, + "step": 13590 + }, + { + "epoch": 1.04, + "learning_rate": 0.00010124301057966934, + "loss": 3.9469, + "step": 13595 + }, + { + "epoch": 1.04, + "learning_rate": 0.0001012027837000684, + "loss": 3.242, + "step": 13600 + }, + { + "epoch": 1.04, + "learning_rate": 0.00010116255682046745, + "loss": 3.3846, + "step": 13605 + }, + { + "epoch": 1.04, + "learning_rate": 0.0001011223299408665, + "loss": 3.5865, + "step": 13610 + }, + { + "epoch": 1.04, + "learning_rate": 0.00010108210306126553, + "loss": 0.9825, + "step": 13615 + }, + { + "epoch": 1.04, + "learning_rate": 0.00010104187618166459, + "loss": 1.6402, + "step": 13620 + }, + { + "epoch": 1.04, + "learning_rate": 0.00010100164930206364, + "loss": 1.2693, + "step": 13625 + }, + { + "epoch": 1.04, + "learning_rate": 0.00010096142242246269, + "loss": 2.1932, + "step": 13630 + }, + { + "epoch": 1.04, + "learning_rate": 0.00010092119554286175, + "loss": 3.0698, + "step": 13635 + }, + { + "epoch": 1.04, + "learning_rate": 0.0001008809686632608, + "loss": 4.5355, + "step": 13640 + }, + { + "epoch": 1.04, + "learning_rate": 0.00010084074178365985, + "loss": 4.377, + "step": 13645 + }, + { + "epoch": 1.04, + "learning_rate": 0.00010080051490405891, + "loss": 2.7748, + "step": 13650 + }, + { + "epoch": 1.04, + "learning_rate": 0.00010076028802445794, + "loss": 2.7549, + "step": 13655 + }, + { + "epoch": 1.04, + "learning_rate": 0.000100720061144857, + "loss": 2.8492, + "step": 13660 + }, + { + "epoch": 1.04, + "learning_rate": 0.00010067983426525604, + "loss": 2.1549, + "step": 13665 + }, + { + "epoch": 1.04, + "learning_rate": 0.0001006396073856551, + "loss": 4.6738, + "step": 13670 + }, + { + "epoch": 1.05, + "learning_rate": 0.00010059938050605416, + "loss": 3.3374, + "step": 13675 + }, + { + "epoch": 1.05, + "learning_rate": 0.0001005591536264532, + "loss": 1.8652, + "step": 13680 + }, + { + "epoch": 1.05, + "learning_rate": 0.00010051892674685226, + "loss": 1.9717, + "step": 13685 + }, + { + "epoch": 1.05, + "learning_rate": 0.00010047869986725129, + "loss": 4.8148, + "step": 13690 + }, + { + "epoch": 1.05, + "learning_rate": 0.00010043847298765035, + "loss": 2.9995, + "step": 13695 + }, + { + "epoch": 1.05, + "learning_rate": 0.0001003982461080494, + "loss": 3.6135, + "step": 13700 + }, + { + "epoch": 1.05, + "learning_rate": 0.00010035801922844845, + "loss": 3.8224, + "step": 13705 + }, + { + "epoch": 1.05, + "learning_rate": 0.00010031779234884751, + "loss": 3.3087, + "step": 13710 + }, + { + "epoch": 1.05, + "learning_rate": 0.00010027756546924656, + "loss": 3.1735, + "step": 13715 + }, + { + "epoch": 1.05, + "learning_rate": 0.00010023733858964562, + "loss": 2.4742, + "step": 13720 + }, + { + "epoch": 1.05, + "learning_rate": 0.00010019711171004466, + "loss": 1.4694, + "step": 13725 + }, + { + "epoch": 1.05, + "learning_rate": 0.00010015688483044369, + "loss": 0.4333, + "step": 13730 + }, + { + "epoch": 1.05, + "learning_rate": 0.00010011665795084275, + "loss": 1.2572, + "step": 13735 + }, + { + "epoch": 1.05, + "learning_rate": 0.00010007643107124181, + "loss": 3.6918, + "step": 13740 + }, + { + "epoch": 1.05, + "learning_rate": 0.00010003620419164085, + "loss": 3.8943, + "step": 13745 + }, + { + "epoch": 1.05, + "learning_rate": 9.999597731203991e-05, + "loss": 4.2775, + "step": 13750 + }, + { + "epoch": 1.05, + "learning_rate": 9.995575043243897e-05, + "loss": 3.6002, + "step": 13755 + }, + { + "epoch": 1.05, + "learning_rate": 9.991552355283801e-05, + "loss": 2.8794, + "step": 13760 + }, + { + "epoch": 1.05, + "learning_rate": 9.987529667323706e-05, + "loss": 3.1105, + "step": 13765 + }, + { + "epoch": 1.05, + "learning_rate": 9.983506979363612e-05, + "loss": 4.7389, + "step": 13770 + }, + { + "epoch": 1.05, + "learning_rate": 9.979484291403516e-05, + "loss": 2.5333, + "step": 13775 + }, + { + "epoch": 1.05, + "learning_rate": 9.97546160344342e-05, + "loss": 2.6911, + "step": 13780 + }, + { + "epoch": 1.05, + "learning_rate": 9.971438915483326e-05, + "loss": 0.6666, + "step": 13785 + }, + { + "epoch": 1.05, + "learning_rate": 9.967416227523232e-05, + "loss": 4.4268, + "step": 13790 + }, + { + "epoch": 1.05, + "learning_rate": 9.963393539563137e-05, + "loss": 4.1672, + "step": 13795 + }, + { + "epoch": 1.05, + "learning_rate": 9.959370851603041e-05, + "loss": 3.8434, + "step": 13800 + }, + { + "epoch": 1.06, + "learning_rate": 9.955348163642947e-05, + "loss": 4.1248, + "step": 13805 + }, + { + "epoch": 1.06, + "learning_rate": 9.951325475682853e-05, + "loss": 3.2066, + "step": 13810 + }, + { + "epoch": 1.06, + "learning_rate": 9.947302787722756e-05, + "loss": 3.5237, + "step": 13815 + }, + { + "epoch": 1.06, + "learning_rate": 9.943280099762662e-05, + "loss": 3.5643, + "step": 13820 + }, + { + "epoch": 1.06, + "learning_rate": 9.939257411802568e-05, + "loss": 3.0314, + "step": 13825 + }, + { + "epoch": 1.06, + "learning_rate": 9.935234723842472e-05, + "loss": 1.1349, + "step": 13830 + }, + { + "epoch": 1.06, + "learning_rate": 9.931212035882377e-05, + "loss": 3.0678, + "step": 13835 + }, + { + "epoch": 1.06, + "learning_rate": 9.927189347922282e-05, + "loss": 3.6559, + "step": 13840 + }, + { + "epoch": 1.06, + "learning_rate": 9.923166659962188e-05, + "loss": 4.8559, + "step": 13845 + }, + { + "epoch": 1.06, + "learning_rate": 9.919143972002091e-05, + "loss": 3.8783, + "step": 13850 + }, + { + "epoch": 1.06, + "learning_rate": 9.915121284041997e-05, + "loss": 4.1764, + "step": 13855 + }, + { + "epoch": 1.06, + "learning_rate": 9.911098596081903e-05, + "loss": 3.7188, + "step": 13860 + }, + { + "epoch": 1.06, + "learning_rate": 9.907075908121807e-05, + "loss": 2.8204, + "step": 13865 + }, + { + "epoch": 1.06, + "learning_rate": 9.903053220161712e-05, + "loss": 3.4747, + "step": 13870 + }, + { + "epoch": 1.06, + "learning_rate": 9.899030532201618e-05, + "loss": 2.0576, + "step": 13875 + }, + { + "epoch": 1.06, + "learning_rate": 9.895007844241522e-05, + "loss": 1.8002, + "step": 13880 + }, + { + "epoch": 1.06, + "learning_rate": 9.890985156281428e-05, + "loss": 1.943, + "step": 13885 + }, + { + "epoch": 1.06, + "learning_rate": 9.886962468321332e-05, + "loss": 5.2752, + "step": 13890 + }, + { + "epoch": 1.06, + "learning_rate": 9.882939780361238e-05, + "loss": 3.9357, + "step": 13895 + }, + { + "epoch": 1.06, + "learning_rate": 9.878917092401143e-05, + "loss": 3.7773, + "step": 13900 + }, + { + "epoch": 1.06, + "learning_rate": 9.874894404441049e-05, + "loss": 5.2211, + "step": 13905 + }, + { + "epoch": 1.06, + "learning_rate": 9.870871716480953e-05, + "loss": 3.1906, + "step": 13910 + }, + { + "epoch": 1.06, + "learning_rate": 9.866849028520858e-05, + "loss": 2.1916, + "step": 13915 + }, + { + "epoch": 1.06, + "learning_rate": 9.862826340560763e-05, + "loss": 3.045, + "step": 13920 + }, + { + "epoch": 1.06, + "learning_rate": 9.858803652600668e-05, + "loss": 3.3197, + "step": 13925 + }, + { + "epoch": 1.06, + "learning_rate": 9.854780964640574e-05, + "loss": 1.3265, + "step": 13930 + }, + { + "epoch": 1.07, + "learning_rate": 9.850758276680478e-05, + "loss": 2.4261, + "step": 13935 + }, + { + "epoch": 1.07, + "learning_rate": 9.846735588720384e-05, + "loss": 4.1789, + "step": 13940 + }, + { + "epoch": 1.07, + "learning_rate": 9.842712900760288e-05, + "loss": 4.1479, + "step": 13945 + }, + { + "epoch": 1.07, + "learning_rate": 9.838690212800193e-05, + "loss": 3.1523, + "step": 13950 + }, + { + "epoch": 1.07, + "learning_rate": 9.834667524840099e-05, + "loss": 3.0566, + "step": 13955 + }, + { + "epoch": 1.07, + "learning_rate": 9.830644836880005e-05, + "loss": 3.6089, + "step": 13960 + }, + { + "epoch": 1.07, + "learning_rate": 9.826622148919909e-05, + "loss": 2.8464, + "step": 13965 + }, + { + "epoch": 1.07, + "learning_rate": 9.822599460959813e-05, + "loss": 1.619, + "step": 13970 + }, + { + "epoch": 1.07, + "learning_rate": 9.818576772999719e-05, + "loss": 2.2897, + "step": 13975 + }, + { + "epoch": 1.07, + "learning_rate": 9.814554085039625e-05, + "loss": 1.4078, + "step": 13980 + }, + { + "epoch": 1.07, + "learning_rate": 9.810531397079528e-05, + "loss": 2.4326, + "step": 13985 + }, + { + "epoch": 1.07, + "learning_rate": 9.806508709119434e-05, + "loss": 4.1578, + "step": 13990 + }, + { + "epoch": 1.07, + "learning_rate": 9.80248602115934e-05, + "loss": 4.7398, + "step": 13995 + }, + { + "epoch": 1.07, + "learning_rate": 9.798463333199244e-05, + "loss": 3.6618, + "step": 14000 + }, + { + "epoch": 1.07, + "learning_rate": 9.794440645239149e-05, + "loss": 3.7291, + "step": 14005 + }, + { + "epoch": 1.07, + "learning_rate": 9.790417957279055e-05, + "loss": 2.9765, + "step": 14010 + }, + { + "epoch": 1.07, + "learning_rate": 9.78639526931896e-05, + "loss": 3.6363, + "step": 14015 + }, + { + "epoch": 1.07, + "learning_rate": 9.782372581358864e-05, + "loss": 1.3663, + "step": 14020 + }, + { + "epoch": 1.07, + "learning_rate": 9.77834989339877e-05, + "loss": 3.0463, + "step": 14025 + }, + { + "epoch": 1.07, + "learning_rate": 9.774327205438675e-05, + "loss": 5.0727, + "step": 14030 + }, + { + "epoch": 1.07, + "learning_rate": 9.77030451747858e-05, + "loss": 1.6521, + "step": 14035 + }, + { + "epoch": 1.07, + "learning_rate": 9.766281829518484e-05, + "loss": 4.0453, + "step": 14040 + }, + { + "epoch": 1.07, + "learning_rate": 9.76225914155839e-05, + "loss": 4.0281, + "step": 14045 + }, + { + "epoch": 1.07, + "learning_rate": 9.758236453598294e-05, + "loss": 4.8402, + "step": 14050 + }, + { + "epoch": 1.07, + "learning_rate": 9.7542137656382e-05, + "loss": 4.2115, + "step": 14055 + }, + { + "epoch": 1.07, + "learning_rate": 9.750191077678105e-05, + "loss": 2.9648, + "step": 14060 + }, + { + "epoch": 1.07, + "learning_rate": 9.74616838971801e-05, + "loss": 2.7151, + "step": 14065 + }, + { + "epoch": 1.08, + "learning_rate": 9.742145701757915e-05, + "loss": 2.5117, + "step": 14070 + }, + { + "epoch": 1.08, + "learning_rate": 9.73812301379782e-05, + "loss": 1.9299, + "step": 14075 + }, + { + "epoch": 1.08, + "learning_rate": 9.734100325837725e-05, + "loss": 2.7252, + "step": 14080 + }, + { + "epoch": 1.08, + "learning_rate": 9.73007763787763e-05, + "loss": 2.3402, + "step": 14085 + }, + { + "epoch": 1.08, + "learning_rate": 9.726054949917536e-05, + "loss": 4.2039, + "step": 14090 + }, + { + "epoch": 1.08, + "learning_rate": 9.72203226195744e-05, + "loss": 4.5305, + "step": 14095 + }, + { + "epoch": 1.08, + "learning_rate": 9.718009573997346e-05, + "loss": 3.88, + "step": 14100 + }, + { + "epoch": 1.08, + "learning_rate": 9.71398688603725e-05, + "loss": 3.9152, + "step": 14105 + }, + { + "epoch": 1.08, + "learning_rate": 9.709964198077156e-05, + "loss": 2.9659, + "step": 14110 + }, + { + "epoch": 1.08, + "learning_rate": 9.70594151011706e-05, + "loss": 1.5826, + "step": 14115 + }, + { + "epoch": 1.08, + "learning_rate": 9.701918822156965e-05, + "loss": 2.4249, + "step": 14120 + }, + { + "epoch": 1.08, + "learning_rate": 9.697896134196871e-05, + "loss": 1.3763, + "step": 14125 + }, + { + "epoch": 1.08, + "learning_rate": 9.693873446236777e-05, + "loss": 3.5167, + "step": 14130 + }, + { + "epoch": 1.08, + "learning_rate": 9.689850758276681e-05, + "loss": 1.3236, + "step": 14135 + }, + { + "epoch": 1.08, + "learning_rate": 9.685828070316586e-05, + "loss": 3.5751, + "step": 14140 + }, + { + "epoch": 1.08, + "learning_rate": 9.681805382356492e-05, + "loss": 5.0203, + "step": 14145 + }, + { + "epoch": 1.08, + "learning_rate": 9.677782694396396e-05, + "loss": 4.1728, + "step": 14150 + }, + { + "epoch": 1.08, + "learning_rate": 9.6737600064363e-05, + "loss": 3.4594, + "step": 14155 + }, + { + "epoch": 1.08, + "learning_rate": 9.669737318476206e-05, + "loss": 3.3553, + "step": 14160 + }, + { + "epoch": 1.08, + "learning_rate": 9.665714630516112e-05, + "loss": 2.2479, + "step": 14165 + }, + { + "epoch": 1.08, + "learning_rate": 9.661691942556017e-05, + "loss": 1.877, + "step": 14170 + }, + { + "epoch": 1.08, + "learning_rate": 9.657669254595921e-05, + "loss": 2.6314, + "step": 14175 + }, + { + "epoch": 1.08, + "learning_rate": 9.653646566635827e-05, + "loss": 2.0204, + "step": 14180 + }, + { + "epoch": 1.08, + "learning_rate": 9.649623878675731e-05, + "loss": 0.9242, + "step": 14185 + }, + { + "epoch": 1.08, + "learning_rate": 9.645601190715636e-05, + "loss": 4.19, + "step": 14190 + }, + { + "epoch": 1.08, + "learning_rate": 9.641578502755542e-05, + "loss": 3.0912, + "step": 14195 + }, + { + "epoch": 1.09, + "learning_rate": 9.637555814795447e-05, + "loss": 4.9994, + "step": 14200 + }, + { + "epoch": 1.09, + "learning_rate": 9.633533126835352e-05, + "loss": 3.509, + "step": 14205 + }, + { + "epoch": 1.09, + "learning_rate": 9.629510438875256e-05, + "loss": 2.7194, + "step": 14210 + }, + { + "epoch": 1.09, + "learning_rate": 9.625487750915162e-05, + "loss": 2.4299, + "step": 14215 + }, + { + "epoch": 1.09, + "learning_rate": 9.621465062955067e-05, + "loss": 2.4398, + "step": 14220 + }, + { + "epoch": 1.09, + "learning_rate": 9.617442374994971e-05, + "loss": 2.8874, + "step": 14225 + }, + { + "epoch": 1.09, + "learning_rate": 9.613419687034877e-05, + "loss": 2.0132, + "step": 14230 + }, + { + "epoch": 1.09, + "learning_rate": 9.609396999074783e-05, + "loss": 2.243, + "step": 14235 + }, + { + "epoch": 1.09, + "learning_rate": 9.605374311114687e-05, + "loss": 4.0598, + "step": 14240 + }, + { + "epoch": 1.09, + "learning_rate": 9.601351623154592e-05, + "loss": 5.1547, + "step": 14245 + }, + { + "epoch": 1.09, + "learning_rate": 9.597328935194498e-05, + "loss": 3.9782, + "step": 14250 + }, + { + "epoch": 1.09, + "learning_rate": 9.593306247234402e-05, + "loss": 2.596, + "step": 14255 + }, + { + "epoch": 1.09, + "learning_rate": 9.589283559274308e-05, + "loss": 1.8018, + "step": 14260 + }, + { + "epoch": 1.09, + "learning_rate": 9.585260871314212e-05, + "loss": 3.1381, + "step": 14265 + }, + { + "epoch": 1.09, + "learning_rate": 9.581238183354118e-05, + "loss": 3.5054, + "step": 14270 + }, + { + "epoch": 1.09, + "learning_rate": 9.577215495394023e-05, + "loss": 2.9537, + "step": 14275 + }, + { + "epoch": 1.09, + "learning_rate": 9.573192807433928e-05, + "loss": 1.9567, + "step": 14280 + }, + { + "epoch": 1.09, + "learning_rate": 9.569170119473833e-05, + "loss": 2.3284, + "step": 14285 + }, + { + "epoch": 1.09, + "learning_rate": 9.565147431513737e-05, + "loss": 3.6228, + "step": 14290 + }, + { + "epoch": 1.09, + "learning_rate": 9.561124743553643e-05, + "loss": 3.4064, + "step": 14295 + }, + { + "epoch": 1.09, + "learning_rate": 9.557102055593548e-05, + "loss": 4.2512, + "step": 14300 + }, + { + "epoch": 1.09, + "learning_rate": 9.553079367633454e-05, + "loss": 3.4361, + "step": 14305 + }, + { + "epoch": 1.09, + "learning_rate": 9.549056679673358e-05, + "loss": 3.2916, + "step": 14310 + }, + { + "epoch": 1.09, + "learning_rate": 9.545033991713264e-05, + "loss": 4.3375, + "step": 14315 + }, + { + "epoch": 1.09, + "learning_rate": 9.541011303753168e-05, + "loss": 1.8709, + "step": 14320 + }, + { + "epoch": 1.09, + "learning_rate": 9.536988615793073e-05, + "loss": 1.8777, + "step": 14325 + }, + { + "epoch": 1.1, + "learning_rate": 9.532965927832979e-05, + "loss": 3.9809, + "step": 14330 + }, + { + "epoch": 1.1, + "learning_rate": 9.528943239872884e-05, + "loss": 4.1262, + "step": 14335 + }, + { + "epoch": 1.1, + "learning_rate": 9.524920551912788e-05, + "loss": 4.7285, + "step": 14340 + }, + { + "epoch": 1.1, + "learning_rate": 9.520897863952693e-05, + "loss": 4.2479, + "step": 14345 + }, + { + "epoch": 1.1, + "learning_rate": 9.516875175992599e-05, + "loss": 3.2966, + "step": 14350 + }, + { + "epoch": 1.1, + "learning_rate": 9.512852488032504e-05, + "loss": 3.9391, + "step": 14355 + }, + { + "epoch": 1.1, + "learning_rate": 9.508829800072408e-05, + "loss": 3.4237, + "step": 14360 + }, + { + "epoch": 1.1, + "learning_rate": 9.504807112112314e-05, + "loss": 2.8419, + "step": 14365 + }, + { + "epoch": 1.1, + "learning_rate": 9.50078442415222e-05, + "loss": 3.4014, + "step": 14370 + }, + { + "epoch": 1.1, + "learning_rate": 9.496761736192123e-05, + "loss": 2.3402, + "step": 14375 + }, + { + "epoch": 1.1, + "learning_rate": 9.492739048232029e-05, + "loss": 2.5134, + "step": 14380 + }, + { + "epoch": 1.1, + "learning_rate": 9.488716360271935e-05, + "loss": 2.5162, + "step": 14385 + }, + { + "epoch": 1.1, + "learning_rate": 9.484693672311839e-05, + "loss": 4.0307, + "step": 14390 + }, + { + "epoch": 1.1, + "learning_rate": 9.480670984351743e-05, + "loss": 4.8123, + "step": 14395 + }, + { + "epoch": 1.1, + "learning_rate": 9.476648296391649e-05, + "loss": 3.1288, + "step": 14400 + }, + { + "epoch": 1.1, + "learning_rate": 9.472625608431555e-05, + "loss": 3.1115, + "step": 14405 + }, + { + "epoch": 1.1, + "learning_rate": 9.46860292047146e-05, + "loss": 2.6729, + "step": 14410 + }, + { + "epoch": 1.1, + "learning_rate": 9.464580232511364e-05, + "loss": 1.9552, + "step": 14415 + }, + { + "epoch": 1.1, + "learning_rate": 9.46055754455127e-05, + "loss": 3.342, + "step": 14420 + }, + { + "epoch": 1.1, + "learning_rate": 9.456534856591174e-05, + "loss": 0.694, + "step": 14425 + }, + { + "epoch": 1.1, + "learning_rate": 9.45251216863108e-05, + "loss": 1.77, + "step": 14430 + }, + { + "epoch": 1.1, + "learning_rate": 9.448489480670985e-05, + "loss": 1.6854, + "step": 14435 + }, + { + "epoch": 1.1, + "learning_rate": 9.44446679271089e-05, + "loss": 4.6209, + "step": 14440 + }, + { + "epoch": 1.1, + "learning_rate": 9.440444104750795e-05, + "loss": 4.9201, + "step": 14445 + }, + { + "epoch": 1.1, + "learning_rate": 9.4364214167907e-05, + "loss": 4.5273, + "step": 14450 + }, + { + "epoch": 1.1, + "learning_rate": 9.432398728830605e-05, + "loss": 3.6521, + "step": 14455 + }, + { + "epoch": 1.11, + "learning_rate": 9.42837604087051e-05, + "loss": 3.382, + "step": 14460 + }, + { + "epoch": 1.11, + "learning_rate": 9.424353352910416e-05, + "loss": 2.8238, + "step": 14465 + }, + { + "epoch": 1.11, + "learning_rate": 9.42033066495032e-05, + "loss": 3.1938, + "step": 14470 + }, + { + "epoch": 1.11, + "learning_rate": 9.416307976990226e-05, + "loss": 1.7498, + "step": 14475 + }, + { + "epoch": 1.11, + "learning_rate": 9.41228528903013e-05, + "loss": 2.284, + "step": 14480 + }, + { + "epoch": 1.11, + "learning_rate": 9.408262601070036e-05, + "loss": 2.6607, + "step": 14485 + }, + { + "epoch": 1.11, + "learning_rate": 9.40423991310994e-05, + "loss": 3.5379, + "step": 14490 + }, + { + "epoch": 1.11, + "learning_rate": 9.400217225149845e-05, + "loss": 4.8506, + "step": 14495 + }, + { + "epoch": 1.11, + "learning_rate": 9.396194537189751e-05, + "loss": 4.3186, + "step": 14500 + }, + { + "epoch": 1.11, + "learning_rate": 9.392171849229657e-05, + "loss": 4.0508, + "step": 14505 + }, + { + "epoch": 1.11, + "learning_rate": 9.38814916126956e-05, + "loss": 3.2298, + "step": 14510 + }, + { + "epoch": 1.11, + "learning_rate": 9.384126473309466e-05, + "loss": 2.3462, + "step": 14515 + }, + { + "epoch": 1.11, + "learning_rate": 9.380103785349371e-05, + "loss": 1.9607, + "step": 14520 + }, + { + "epoch": 1.11, + "learning_rate": 9.376081097389276e-05, + "loss": 1.1034, + "step": 14525 + }, + { + "epoch": 1.11, + "learning_rate": 9.37205840942918e-05, + "loss": 2.063, + "step": 14530 + }, + { + "epoch": 1.11, + "learning_rate": 9.368035721469086e-05, + "loss": 2.0346, + "step": 14535 + }, + { + "epoch": 1.11, + "learning_rate": 9.364013033508992e-05, + "loss": 4.0316, + "step": 14540 + }, + { + "epoch": 1.11, + "learning_rate": 9.359990345548895e-05, + "loss": 4.7727, + "step": 14545 + }, + { + "epoch": 1.11, + "learning_rate": 9.355967657588801e-05, + "loss": 4.61, + "step": 14550 + }, + { + "epoch": 1.11, + "learning_rate": 9.351944969628707e-05, + "loss": 3.921, + "step": 14555 + }, + { + "epoch": 1.11, + "learning_rate": 9.347922281668611e-05, + "loss": 3.5495, + "step": 14560 + }, + { + "epoch": 1.11, + "learning_rate": 9.343899593708516e-05, + "loss": 3.8674, + "step": 14565 + }, + { + "epoch": 1.11, + "learning_rate": 9.339876905748422e-05, + "loss": 2.3444, + "step": 14570 + }, + { + "epoch": 1.11, + "learning_rate": 9.335854217788327e-05, + "loss": 2.3662, + "step": 14575 + }, + { + "epoch": 1.11, + "learning_rate": 9.331831529828232e-05, + "loss": 1.6259, + "step": 14580 + }, + { + "epoch": 1.11, + "learning_rate": 9.327808841868136e-05, + "loss": 2.9731, + "step": 14585 + }, + { + "epoch": 1.12, + "learning_rate": 9.323786153908042e-05, + "loss": 5.4543, + "step": 14590 + }, + { + "epoch": 1.12, + "learning_rate": 9.319763465947947e-05, + "loss": 3.7518, + "step": 14595 + }, + { + "epoch": 1.12, + "learning_rate": 9.315740777987851e-05, + "loss": 2.8011, + "step": 14600 + }, + { + "epoch": 1.12, + "learning_rate": 9.311718090027757e-05, + "loss": 2.6506, + "step": 14605 + }, + { + "epoch": 1.12, + "learning_rate": 9.307695402067663e-05, + "loss": 2.9476, + "step": 14610 + }, + { + "epoch": 1.12, + "learning_rate": 9.303672714107567e-05, + "loss": 2.7561, + "step": 14615 + }, + { + "epoch": 1.12, + "learning_rate": 9.299650026147472e-05, + "loss": 1.8628, + "step": 14620 + }, + { + "epoch": 1.12, + "learning_rate": 9.295627338187378e-05, + "loss": 3.3484, + "step": 14625 + }, + { + "epoch": 1.12, + "learning_rate": 9.291604650227282e-05, + "loss": 1.8501, + "step": 14630 + }, + { + "epoch": 1.12, + "learning_rate": 9.287581962267188e-05, + "loss": 1.1852, + "step": 14635 + }, + { + "epoch": 1.12, + "learning_rate": 9.283559274307092e-05, + "loss": 3.9387, + "step": 14640 + }, + { + "epoch": 1.12, + "learning_rate": 9.279536586346997e-05, + "loss": 5.0062, + "step": 14645 + }, + { + "epoch": 1.12, + "learning_rate": 9.275513898386903e-05, + "loss": 3.9559, + "step": 14650 + }, + { + "epoch": 1.12, + "learning_rate": 9.271491210426808e-05, + "loss": 3.1137, + "step": 14655 + }, + { + "epoch": 1.12, + "learning_rate": 9.267468522466713e-05, + "loss": 2.2882, + "step": 14660 + }, + { + "epoch": 1.12, + "learning_rate": 9.263445834506617e-05, + "loss": 3.7979, + "step": 14665 + }, + { + "epoch": 1.12, + "learning_rate": 9.259423146546523e-05, + "loss": 1.49, + "step": 14670 + }, + { + "epoch": 1.12, + "learning_rate": 9.255400458586428e-05, + "loss": 2.9072, + "step": 14675 + }, + { + "epoch": 1.12, + "learning_rate": 9.251377770626332e-05, + "loss": 0.0377, + "step": 14680 + }, + { + "epoch": 1.12, + "learning_rate": 9.247355082666238e-05, + "loss": 0.8162, + "step": 14685 + }, + { + "epoch": 1.12, + "learning_rate": 9.243332394706144e-05, + "loss": 4.1197, + "step": 14690 + }, + { + "epoch": 1.12, + "learning_rate": 9.239309706746048e-05, + "loss": 3.5349, + "step": 14695 + }, + { + "epoch": 1.12, + "learning_rate": 9.235287018785953e-05, + "loss": 3.0641, + "step": 14700 + }, + { + "epoch": 1.12, + "learning_rate": 9.231264330825858e-05, + "loss": 3.3049, + "step": 14705 + }, + { + "epoch": 1.12, + "learning_rate": 9.227241642865764e-05, + "loss": 2.9953, + "step": 14710 + }, + { + "epoch": 1.12, + "learning_rate": 9.223218954905667e-05, + "loss": 4.6273, + "step": 14715 + }, + { + "epoch": 1.13, + "learning_rate": 9.219196266945573e-05, + "loss": 3.2683, + "step": 14720 + }, + { + "epoch": 1.13, + "learning_rate": 9.215173578985479e-05, + "loss": 1.9925, + "step": 14725 + }, + { + "epoch": 1.13, + "learning_rate": 9.211150891025384e-05, + "loss": 4.6559, + "step": 14730 + }, + { + "epoch": 1.13, + "learning_rate": 9.207128203065288e-05, + "loss": 2.2516, + "step": 14735 + }, + { + "epoch": 1.13, + "learning_rate": 9.203105515105194e-05, + "loss": 5.6625, + "step": 14740 + }, + { + "epoch": 1.13, + "learning_rate": 9.1990828271451e-05, + "loss": 3.9943, + "step": 14745 + }, + { + "epoch": 1.13, + "learning_rate": 9.195060139185003e-05, + "loss": 3.9695, + "step": 14750 + }, + { + "epoch": 1.13, + "learning_rate": 9.191037451224909e-05, + "loss": 4.2107, + "step": 14755 + }, + { + "epoch": 1.13, + "learning_rate": 9.187014763264814e-05, + "loss": 3.3329, + "step": 14760 + }, + { + "epoch": 1.13, + "learning_rate": 9.182992075304719e-05, + "loss": 3.2871, + "step": 14765 + }, + { + "epoch": 1.13, + "learning_rate": 9.178969387344623e-05, + "loss": 4.1098, + "step": 14770 + }, + { + "epoch": 1.13, + "learning_rate": 9.174946699384529e-05, + "loss": 1.0645, + "step": 14775 + }, + { + "epoch": 1.13, + "learning_rate": 9.170924011424435e-05, + "loss": 0.74, + "step": 14780 + }, + { + "epoch": 1.13, + "learning_rate": 9.16690132346434e-05, + "loss": 4.3225, + "step": 14785 + }, + { + "epoch": 1.13, + "learning_rate": 9.162878635504244e-05, + "loss": 4.441, + "step": 14790 + }, + { + "epoch": 1.13, + "learning_rate": 9.15885594754415e-05, + "loss": 3.9297, + "step": 14795 + }, + { + "epoch": 1.13, + "learning_rate": 9.154833259584054e-05, + "loss": 3.2412, + "step": 14800 + }, + { + "epoch": 1.13, + "learning_rate": 9.15081057162396e-05, + "loss": 3.4386, + "step": 14805 + }, + { + "epoch": 1.13, + "learning_rate": 9.146787883663865e-05, + "loss": 3.839, + "step": 14810 + }, + { + "epoch": 1.13, + "learning_rate": 9.142765195703769e-05, + "loss": 4.0252, + "step": 14815 + }, + { + "epoch": 1.13, + "learning_rate": 9.138742507743675e-05, + "loss": 2.962, + "step": 14820 + }, + { + "epoch": 1.13, + "learning_rate": 9.13471981978358e-05, + "loss": 2.9706, + "step": 14825 + }, + { + "epoch": 1.13, + "learning_rate": 9.130697131823485e-05, + "loss": 3.4539, + "step": 14830 + }, + { + "epoch": 1.13, + "learning_rate": 9.12667444386339e-05, + "loss": 3.3337, + "step": 14835 + }, + { + "epoch": 1.13, + "learning_rate": 9.122651755903295e-05, + "loss": 4.6289, + "step": 14840 + }, + { + "epoch": 1.13, + "learning_rate": 9.1186290679432e-05, + "loss": 4.4992, + "step": 14845 + }, + { + "epoch": 1.13, + "learning_rate": 9.114606379983104e-05, + "loss": 4.495, + "step": 14850 + }, + { + "epoch": 1.14, + "learning_rate": 9.11058369202301e-05, + "loss": 3.5725, + "step": 14855 + }, + { + "epoch": 1.14, + "learning_rate": 9.106561004062916e-05, + "loss": 2.8839, + "step": 14860 + }, + { + "epoch": 1.14, + "learning_rate": 9.10253831610282e-05, + "loss": 2.2224, + "step": 14865 + }, + { + "epoch": 1.14, + "learning_rate": 9.098515628142725e-05, + "loss": 2.9777, + "step": 14870 + }, + { + "epoch": 1.14, + "learning_rate": 9.094492940182631e-05, + "loss": 0.4112, + "step": 14875 + }, + { + "epoch": 1.14, + "learning_rate": 9.090470252222537e-05, + "loss": 3.0038, + "step": 14880 + }, + { + "epoch": 1.14, + "learning_rate": 9.08644756426244e-05, + "loss": 2.8895, + "step": 14885 + }, + { + "epoch": 1.14, + "learning_rate": 9.082424876302346e-05, + "loss": 4.4019, + "step": 14890 + }, + { + "epoch": 1.14, + "learning_rate": 9.078402188342251e-05, + "loss": 4.5066, + "step": 14895 + }, + { + "epoch": 1.14, + "learning_rate": 9.074379500382156e-05, + "loss": 4.3509, + "step": 14900 + }, + { + "epoch": 1.14, + "learning_rate": 9.07035681242206e-05, + "loss": 3.8162, + "step": 14905 + }, + { + "epoch": 1.14, + "learning_rate": 9.066334124461966e-05, + "loss": 2.8266, + "step": 14910 + }, + { + "epoch": 1.14, + "learning_rate": 9.062311436501872e-05, + "loss": 1.7993, + "step": 14915 + }, + { + "epoch": 1.14, + "learning_rate": 9.058288748541775e-05, + "loss": 3.0204, + "step": 14920 + }, + { + "epoch": 1.14, + "learning_rate": 9.054266060581681e-05, + "loss": 2.9019, + "step": 14925 + }, + { + "epoch": 1.14, + "learning_rate": 9.050243372621587e-05, + "loss": 1.8711, + "step": 14930 + }, + { + "epoch": 1.14, + "learning_rate": 9.046220684661491e-05, + "loss": 0.6589, + "step": 14935 + }, + { + "epoch": 1.14, + "learning_rate": 9.042197996701396e-05, + "loss": 4.4053, + "step": 14940 + }, + { + "epoch": 1.14, + "learning_rate": 9.038175308741301e-05, + "loss": 4.7037, + "step": 14945 + }, + { + "epoch": 1.14, + "learning_rate": 9.034152620781206e-05, + "loss": 3.6162, + "step": 14950 + }, + { + "epoch": 1.14, + "learning_rate": 9.030129932821112e-05, + "loss": 3.1366, + "step": 14955 + }, + { + "epoch": 1.14, + "learning_rate": 9.026107244861016e-05, + "loss": 4.0396, + "step": 14960 + }, + { + "epoch": 1.14, + "learning_rate": 9.022084556900922e-05, + "loss": 2.0449, + "step": 14965 + }, + { + "epoch": 1.14, + "learning_rate": 9.018061868940827e-05, + "loss": 2.2303, + "step": 14970 + }, + { + "epoch": 1.14, + "learning_rate": 9.014039180980732e-05, + "loss": 2.6239, + "step": 14975 + }, + { + "epoch": 1.14, + "learning_rate": 9.010016493020637e-05, + "loss": 2.1086, + "step": 14980 + }, + { + "epoch": 1.15, + "learning_rate": 9.005993805060541e-05, + "loss": 5.5885, + "step": 14985 + }, + { + "epoch": 1.15, + "learning_rate": 9.001971117100447e-05, + "loss": 4.2605, + "step": 14990 + }, + { + "epoch": 1.15, + "learning_rate": 8.997948429140352e-05, + "loss": 4.6322, + "step": 14995 + }, + { + "epoch": 1.15, + "learning_rate": 8.993925741180257e-05, + "loss": 3.768, + "step": 15000 + }, + { + "epoch": 1.15, + "learning_rate": 8.989903053220162e-05, + "loss": 3.6373, + "step": 15005 + }, + { + "epoch": 1.15, + "learning_rate": 8.985880365260068e-05, + "loss": 3.0281, + "step": 15010 + }, + { + "epoch": 1.15, + "learning_rate": 8.981857677299972e-05, + "loss": 3.4137, + "step": 15015 + }, + { + "epoch": 1.15, + "learning_rate": 8.977834989339877e-05, + "loss": 2.8749, + "step": 15020 + }, + { + "epoch": 1.15, + "learning_rate": 8.973812301379782e-05, + "loss": 0.759, + "step": 15025 + }, + { + "epoch": 1.15, + "learning_rate": 8.969789613419688e-05, + "loss": 2.8243, + "step": 15030 + }, + { + "epoch": 1.15, + "learning_rate": 8.965766925459593e-05, + "loss": 2.4736, + "step": 15035 + }, + { + "epoch": 1.15, + "learning_rate": 8.961744237499497e-05, + "loss": 4.4713, + "step": 15040 + }, + { + "epoch": 1.15, + "learning_rate": 8.957721549539403e-05, + "loss": 4.3701, + "step": 15045 + }, + { + "epoch": 1.15, + "learning_rate": 8.953698861579309e-05, + "loss": 4.4148, + "step": 15050 + }, + { + "epoch": 1.15, + "learning_rate": 8.949676173619212e-05, + "loss": 3.6678, + "step": 15055 + }, + { + "epoch": 1.15, + "learning_rate": 8.945653485659118e-05, + "loss": 3.7158, + "step": 15060 + }, + { + "epoch": 1.15, + "learning_rate": 8.941630797699024e-05, + "loss": 3.458, + "step": 15065 + }, + { + "epoch": 1.15, + "learning_rate": 8.937608109738928e-05, + "loss": 3.1514, + "step": 15070 + }, + { + "epoch": 1.15, + "learning_rate": 8.933585421778833e-05, + "loss": 1.927, + "step": 15075 + }, + { + "epoch": 1.15, + "learning_rate": 8.929562733818738e-05, + "loss": 2.2447, + "step": 15080 + }, + { + "epoch": 1.15, + "learning_rate": 8.925540045858644e-05, + "loss": 2.924, + "step": 15085 + }, + { + "epoch": 1.15, + "learning_rate": 8.921517357898547e-05, + "loss": 3.8148, + "step": 15090 + }, + { + "epoch": 1.15, + "learning_rate": 8.917494669938453e-05, + "loss": 3.6268, + "step": 15095 + }, + { + "epoch": 1.15, + "learning_rate": 8.913471981978359e-05, + "loss": 3.8186, + "step": 15100 + }, + { + "epoch": 1.15, + "learning_rate": 8.909449294018263e-05, + "loss": 4.7416, + "step": 15105 + }, + { + "epoch": 1.15, + "learning_rate": 8.905426606058168e-05, + "loss": 3.0722, + "step": 15110 + }, + { + "epoch": 1.16, + "learning_rate": 8.901403918098074e-05, + "loss": 2.4213, + "step": 15115 + }, + { + "epoch": 1.16, + "learning_rate": 8.897381230137978e-05, + "loss": 1.8222, + "step": 15120 + }, + { + "epoch": 1.16, + "learning_rate": 8.893358542177884e-05, + "loss": 2.871, + "step": 15125 + }, + { + "epoch": 1.16, + "learning_rate": 8.889335854217789e-05, + "loss": 1.8803, + "step": 15130 + }, + { + "epoch": 1.16, + "learning_rate": 8.885313166257694e-05, + "loss": 2.8122, + "step": 15135 + }, + { + "epoch": 1.16, + "learning_rate": 8.881290478297599e-05, + "loss": 3.7818, + "step": 15140 + }, + { + "epoch": 1.16, + "learning_rate": 8.877267790337503e-05, + "loss": 3.7229, + "step": 15145 + }, + { + "epoch": 1.16, + "learning_rate": 8.873245102377409e-05, + "loss": 4.0561, + "step": 15150 + }, + { + "epoch": 1.16, + "learning_rate": 8.869222414417314e-05, + "loss": 3.7717, + "step": 15155 + }, + { + "epoch": 1.16, + "learning_rate": 8.86519972645722e-05, + "loss": 3.7115, + "step": 15160 + }, + { + "epoch": 1.16, + "learning_rate": 8.861177038497124e-05, + "loss": 3.7303, + "step": 15165 + }, + { + "epoch": 1.16, + "learning_rate": 8.85715435053703e-05, + "loss": 2.5779, + "step": 15170 + }, + { + "epoch": 1.16, + "learning_rate": 8.853131662576934e-05, + "loss": 3.3491, + "step": 15175 + }, + { + "epoch": 1.16, + "learning_rate": 8.84910897461684e-05, + "loss": 1.5736, + "step": 15180 + }, + { + "epoch": 1.16, + "learning_rate": 8.845086286656744e-05, + "loss": 2.1361, + "step": 15185 + }, + { + "epoch": 1.16, + "learning_rate": 8.841063598696649e-05, + "loss": 4.5369, + "step": 15190 + }, + { + "epoch": 1.16, + "learning_rate": 8.837040910736555e-05, + "loss": 4.5656, + "step": 15195 + }, + { + "epoch": 1.16, + "learning_rate": 8.83301822277646e-05, + "loss": 2.8299, + "step": 15200 + }, + { + "epoch": 1.16, + "learning_rate": 8.828995534816365e-05, + "loss": 4.0764, + "step": 15205 + }, + { + "epoch": 1.16, + "learning_rate": 8.82497284685627e-05, + "loss": 3.6823, + "step": 15210 + }, + { + "epoch": 1.16, + "learning_rate": 8.820950158896175e-05, + "loss": 2.6001, + "step": 15215 + }, + { + "epoch": 1.16, + "learning_rate": 8.81692747093608e-05, + "loss": 2.5453, + "step": 15220 + }, + { + "epoch": 1.16, + "learning_rate": 8.812904782975984e-05, + "loss": 3.1553, + "step": 15225 + }, + { + "epoch": 1.16, + "learning_rate": 8.80888209501589e-05, + "loss": 2.2298, + "step": 15230 + }, + { + "epoch": 1.16, + "learning_rate": 8.804859407055796e-05, + "loss": 1.4315, + "step": 15235 + }, + { + "epoch": 1.16, + "learning_rate": 8.8008367190957e-05, + "loss": 4.1703, + "step": 15240 + }, + { + "epoch": 1.17, + "learning_rate": 8.796814031135605e-05, + "loss": 4.1615, + "step": 15245 + }, + { + "epoch": 1.17, + "learning_rate": 8.792791343175511e-05, + "loss": 4.0225, + "step": 15250 + }, + { + "epoch": 1.17, + "learning_rate": 8.788768655215415e-05, + "loss": 3.3277, + "step": 15255 + }, + { + "epoch": 1.17, + "learning_rate": 8.78474596725532e-05, + "loss": 4.7938, + "step": 15260 + }, + { + "epoch": 1.17, + "learning_rate": 8.780723279295225e-05, + "loss": 3.795, + "step": 15265 + }, + { + "epoch": 1.17, + "learning_rate": 8.776700591335131e-05, + "loss": 3.2113, + "step": 15270 + }, + { + "epoch": 1.17, + "learning_rate": 8.772677903375036e-05, + "loss": 1.447, + "step": 15275 + }, + { + "epoch": 1.17, + "learning_rate": 8.76865521541494e-05, + "loss": 1.6464, + "step": 15280 + }, + { + "epoch": 1.17, + "learning_rate": 8.764632527454846e-05, + "loss": 2.8649, + "step": 15285 + }, + { + "epoch": 1.17, + "learning_rate": 8.76060983949475e-05, + "loss": 4.4762, + "step": 15290 + }, + { + "epoch": 1.17, + "learning_rate": 8.756587151534655e-05, + "loss": 4.0934, + "step": 15295 + }, + { + "epoch": 1.17, + "learning_rate": 8.752564463574561e-05, + "loss": 4.1266, + "step": 15300 + }, + { + "epoch": 1.17, + "learning_rate": 8.748541775614467e-05, + "loss": 3.9455, + "step": 15305 + }, + { + "epoch": 1.17, + "learning_rate": 8.744519087654371e-05, + "loss": 2.9204, + "step": 15310 + }, + { + "epoch": 1.17, + "learning_rate": 8.740496399694276e-05, + "loss": 3.201, + "step": 15315 + }, + { + "epoch": 1.17, + "learning_rate": 8.736473711734181e-05, + "loss": 3.3204, + "step": 15320 + }, + { + "epoch": 1.17, + "learning_rate": 8.732451023774086e-05, + "loss": 1.0861, + "step": 15325 + }, + { + "epoch": 1.17, + "learning_rate": 8.728428335813992e-05, + "loss": 2.0387, + "step": 15330 + }, + { + "epoch": 1.17, + "learning_rate": 8.724405647853896e-05, + "loss": 2.5072, + "step": 15335 + }, + { + "epoch": 1.17, + "learning_rate": 8.720382959893802e-05, + "loss": 3.6967, + "step": 15340 + }, + { + "epoch": 1.17, + "learning_rate": 8.716360271933706e-05, + "loss": 4.2148, + "step": 15345 + }, + { + "epoch": 1.17, + "learning_rate": 8.712337583973612e-05, + "loss": 2.8939, + "step": 15350 + }, + { + "epoch": 1.17, + "learning_rate": 8.708314896013517e-05, + "loss": 3.6441, + "step": 15355 + }, + { + "epoch": 1.17, + "learning_rate": 8.704292208053421e-05, + "loss": 2.4613, + "step": 15360 + }, + { + "epoch": 1.17, + "learning_rate": 8.700269520093327e-05, + "loss": 1.0514, + "step": 15365 + }, + { + "epoch": 1.17, + "learning_rate": 8.696246832133231e-05, + "loss": 4.2469, + "step": 15370 + }, + { + "epoch": 1.18, + "learning_rate": 8.692224144173137e-05, + "loss": 3.054, + "step": 15375 + }, + { + "epoch": 1.18, + "learning_rate": 8.688201456213042e-05, + "loss": 1.4101, + "step": 15380 + }, + { + "epoch": 1.18, + "learning_rate": 8.684178768252948e-05, + "loss": 2.7099, + "step": 15385 + }, + { + "epoch": 1.18, + "learning_rate": 8.680156080292852e-05, + "loss": 4.5326, + "step": 15390 + }, + { + "epoch": 1.18, + "learning_rate": 8.676133392332757e-05, + "loss": 3.8502, + "step": 15395 + }, + { + "epoch": 1.18, + "learning_rate": 8.672110704372662e-05, + "loss": 3.8027, + "step": 15400 + }, + { + "epoch": 1.18, + "learning_rate": 8.668088016412568e-05, + "loss": 3.708, + "step": 15405 + }, + { + "epoch": 1.18, + "learning_rate": 8.664065328452471e-05, + "loss": 3.4665, + "step": 15410 + }, + { + "epoch": 1.18, + "learning_rate": 8.660042640492377e-05, + "loss": 2.2829, + "step": 15415 + }, + { + "epoch": 1.18, + "learning_rate": 8.656019952532283e-05, + "loss": 2.9721, + "step": 15420 + }, + { + "epoch": 1.18, + "learning_rate": 8.651997264572187e-05, + "loss": 2.7123, + "step": 15425 + }, + { + "epoch": 1.18, + "learning_rate": 8.647974576612092e-05, + "loss": 1.2022, + "step": 15430 + }, + { + "epoch": 1.18, + "learning_rate": 8.643951888651998e-05, + "loss": 1.9478, + "step": 15435 + }, + { + "epoch": 1.18, + "learning_rate": 8.639929200691904e-05, + "loss": 4.2762, + "step": 15440 + }, + { + "epoch": 1.18, + "learning_rate": 8.635906512731807e-05, + "loss": 3.9773, + "step": 15445 + }, + { + "epoch": 1.18, + "learning_rate": 8.631883824771712e-05, + "loss": 4.3887, + "step": 15450 + }, + { + "epoch": 1.18, + "learning_rate": 8.627861136811618e-05, + "loss": 4.1759, + "step": 15455 + }, + { + "epoch": 1.18, + "learning_rate": 8.623838448851523e-05, + "loss": 2.006, + "step": 15460 + }, + { + "epoch": 1.18, + "learning_rate": 8.619815760891427e-05, + "loss": 3.5844, + "step": 15465 + }, + { + "epoch": 1.18, + "learning_rate": 8.615793072931333e-05, + "loss": 4.083, + "step": 15470 + }, + { + "epoch": 1.18, + "learning_rate": 8.611770384971239e-05, + "loss": 2.3228, + "step": 15475 + }, + { + "epoch": 1.18, + "learning_rate": 8.607747697011143e-05, + "loss": 3.9055, + "step": 15480 + }, + { + "epoch": 1.18, + "learning_rate": 8.603725009051048e-05, + "loss": 1.9998, + "step": 15485 + }, + { + "epoch": 1.18, + "learning_rate": 8.599702321090954e-05, + "loss": 4.3775, + "step": 15490 + }, + { + "epoch": 1.18, + "learning_rate": 8.595679633130858e-05, + "loss": 5.6619, + "step": 15495 + }, + { + "epoch": 1.18, + "learning_rate": 8.591656945170764e-05, + "loss": 3.8799, + "step": 15500 + }, + { + "epoch": 1.19, + "learning_rate": 8.587634257210668e-05, + "loss": 4.2123, + "step": 15505 + }, + { + "epoch": 1.19, + "learning_rate": 8.583611569250574e-05, + "loss": 3.1585, + "step": 15510 + }, + { + "epoch": 1.19, + "learning_rate": 8.579588881290479e-05, + "loss": 3.1555, + "step": 15515 + }, + { + "epoch": 1.19, + "learning_rate": 8.575566193330383e-05, + "loss": 2.6523, + "step": 15520 + }, + { + "epoch": 1.19, + "learning_rate": 8.571543505370289e-05, + "loss": 1.6222, + "step": 15525 + }, + { + "epoch": 1.19, + "learning_rate": 8.567520817410193e-05, + "loss": 1.5623, + "step": 15530 + }, + { + "epoch": 1.19, + "learning_rate": 8.563498129450099e-05, + "loss": 2.1904, + "step": 15535 + }, + { + "epoch": 1.19, + "learning_rate": 8.559475441490004e-05, + "loss": 4.3115, + "step": 15540 + }, + { + "epoch": 1.19, + "learning_rate": 8.55545275352991e-05, + "loss": 4.4436, + "step": 15545 + }, + { + "epoch": 1.19, + "learning_rate": 8.551430065569814e-05, + "loss": 4.1313, + "step": 15550 + }, + { + "epoch": 1.19, + "learning_rate": 8.54740737760972e-05, + "loss": 3.7244, + "step": 15555 + }, + { + "epoch": 1.19, + "learning_rate": 8.543384689649624e-05, + "loss": 1.7071, + "step": 15560 + }, + { + "epoch": 1.19, + "learning_rate": 8.539362001689529e-05, + "loss": 2.7745, + "step": 15565 + }, + { + "epoch": 1.19, + "learning_rate": 8.536143851321453e-05, + "loss": 3.1961, + "step": 15570 + }, + { + "epoch": 1.19, + "learning_rate": 8.532121163361358e-05, + "loss": 1.68, + "step": 15575 + }, + { + "epoch": 1.19, + "learning_rate": 8.528098475401264e-05, + "loss": 2.2349, + "step": 15580 + }, + { + "epoch": 1.19, + "learning_rate": 8.524075787441168e-05, + "loss": 3.1614, + "step": 15585 + }, + { + "epoch": 1.19, + "learning_rate": 8.520053099481074e-05, + "loss": 5.0854, + "step": 15590 + }, + { + "epoch": 1.19, + "learning_rate": 8.516030411520979e-05, + "loss": 4.314, + "step": 15595 + }, + { + "epoch": 1.19, + "learning_rate": 8.512007723560884e-05, + "loss": 3.1324, + "step": 15600 + }, + { + "epoch": 1.19, + "learning_rate": 8.507985035600789e-05, + "loss": 4.0867, + "step": 15605 + }, + { + "epoch": 1.19, + "learning_rate": 8.503962347640693e-05, + "loss": 2.0064, + "step": 15610 + }, + { + "epoch": 1.19, + "learning_rate": 8.499939659680599e-05, + "loss": 2.5377, + "step": 15615 + }, + { + "epoch": 1.19, + "learning_rate": 8.495916971720505e-05, + "loss": 3.2949, + "step": 15620 + }, + { + "epoch": 1.19, + "learning_rate": 8.491894283760408e-05, + "loss": 1.9116, + "step": 15625 + }, + { + "epoch": 1.19, + "learning_rate": 8.487871595800314e-05, + "loss": 1.14, + "step": 15630 + }, + { + "epoch": 1.19, + "learning_rate": 8.48384890784022e-05, + "loss": 4.8432, + "step": 15635 + }, + { + "epoch": 1.2, + "learning_rate": 8.479826219880124e-05, + "loss": 4.5416, + "step": 15640 + }, + { + "epoch": 1.2, + "learning_rate": 8.475803531920029e-05, + "loss": 4.0746, + "step": 15645 + }, + { + "epoch": 1.2, + "learning_rate": 8.471780843959934e-05, + "loss": 3.4386, + "step": 15650 + }, + { + "epoch": 1.2, + "learning_rate": 8.46775815599984e-05, + "loss": 3.0038, + "step": 15655 + }, + { + "epoch": 1.2, + "learning_rate": 8.463735468039743e-05, + "loss": 3.2941, + "step": 15660 + }, + { + "epoch": 1.2, + "learning_rate": 8.459712780079649e-05, + "loss": 3.2194, + "step": 15665 + }, + { + "epoch": 1.2, + "learning_rate": 8.455690092119555e-05, + "loss": 1.8783, + "step": 15670 + }, + { + "epoch": 1.2, + "learning_rate": 8.45166740415946e-05, + "loss": 2.5379, + "step": 15675 + }, + { + "epoch": 1.2, + "learning_rate": 8.447644716199364e-05, + "loss": 2.8854, + "step": 15680 + }, + { + "epoch": 1.2, + "learning_rate": 8.44362202823927e-05, + "loss": 2.5177, + "step": 15685 + }, + { + "epoch": 1.2, + "learning_rate": 8.439599340279176e-05, + "loss": 4.4383, + "step": 15690 + }, + { + "epoch": 1.2, + "learning_rate": 8.43557665231908e-05, + "loss": 3.7588, + "step": 15695 + }, + { + "epoch": 1.2, + "learning_rate": 8.431553964358985e-05, + "loss": 4.1584, + "step": 15700 + }, + { + "epoch": 1.2, + "learning_rate": 8.42753127639889e-05, + "loss": 4.5521, + "step": 15705 + }, + { + "epoch": 1.2, + "learning_rate": 8.423508588438795e-05, + "loss": 3.7143, + "step": 15710 + }, + { + "epoch": 1.2, + "learning_rate": 8.419485900478701e-05, + "loss": 3.5352, + "step": 15715 + }, + { + "epoch": 1.2, + "learning_rate": 8.415463212518605e-05, + "loss": 2.5749, + "step": 15720 + }, + { + "epoch": 1.2, + "learning_rate": 8.411440524558511e-05, + "loss": 2.019, + "step": 15725 + }, + { + "epoch": 1.2, + "learning_rate": 8.407417836598415e-05, + "loss": 1.2621, + "step": 15730 + }, + { + "epoch": 1.2, + "learning_rate": 8.40339514863832e-05, + "loss": 1.5193, + "step": 15735 + }, + { + "epoch": 1.2, + "learning_rate": 8.399372460678226e-05, + "loss": 4.5795, + "step": 15740 + }, + { + "epoch": 1.2, + "learning_rate": 8.39534977271813e-05, + "loss": 4.4549, + "step": 15745 + }, + { + "epoch": 1.2, + "learning_rate": 8.391327084758036e-05, + "loss": 3.8432, + "step": 15750 + }, + { + "epoch": 1.2, + "learning_rate": 8.38730439679794e-05, + "loss": 4.1367, + "step": 15755 + }, + { + "epoch": 1.2, + "learning_rate": 8.383281708837846e-05, + "loss": 2.8366, + "step": 15760 + }, + { + "epoch": 1.2, + "learning_rate": 8.379259020877751e-05, + "loss": 3.5475, + "step": 15765 + }, + { + "epoch": 1.21, + "learning_rate": 8.375236332917657e-05, + "loss": 3.802, + "step": 15770 + }, + { + "epoch": 1.21, + "learning_rate": 8.371213644957561e-05, + "loss": 2.1158, + "step": 15775 + }, + { + "epoch": 1.21, + "learning_rate": 8.367190956997466e-05, + "loss": 2.9005, + "step": 15780 + }, + { + "epoch": 1.21, + "learning_rate": 8.363168269037371e-05, + "loss": 4.0268, + "step": 15785 + }, + { + "epoch": 1.21, + "learning_rate": 8.359145581077277e-05, + "loss": 3.5148, + "step": 15790 + }, + { + "epoch": 1.21, + "learning_rate": 8.35512289311718e-05, + "loss": 3.9832, + "step": 15795 + }, + { + "epoch": 1.21, + "learning_rate": 8.351100205157086e-05, + "loss": 4.1332, + "step": 15800 + }, + { + "epoch": 1.21, + "learning_rate": 8.347077517196992e-05, + "loss": 2.8575, + "step": 15805 + }, + { + "epoch": 1.21, + "learning_rate": 8.343054829236896e-05, + "loss": 2.4339, + "step": 15810 + }, + { + "epoch": 1.21, + "learning_rate": 8.339032141276801e-05, + "loss": 2.5007, + "step": 15815 + }, + { + "epoch": 1.21, + "learning_rate": 8.335009453316707e-05, + "loss": 2.438, + "step": 15820 + }, + { + "epoch": 1.21, + "learning_rate": 8.330986765356613e-05, + "loss": 2.3291, + "step": 15825 + }, + { + "epoch": 1.21, + "learning_rate": 8.326964077396516e-05, + "loss": 1.1762, + "step": 15830 + }, + { + "epoch": 1.21, + "learning_rate": 8.322941389436422e-05, + "loss": 1.7898, + "step": 15835 + }, + { + "epoch": 1.21, + "learning_rate": 8.318918701476327e-05, + "loss": 4.6023, + "step": 15840 + }, + { + "epoch": 1.21, + "learning_rate": 8.314896013516232e-05, + "loss": 4.2715, + "step": 15845 + }, + { + "epoch": 1.21, + "learning_rate": 8.310873325556136e-05, + "loss": 4.5984, + "step": 15850 + }, + { + "epoch": 1.21, + "learning_rate": 8.306850637596042e-05, + "loss": 3.9041, + "step": 15855 + }, + { + "epoch": 1.21, + "learning_rate": 8.302827949635948e-05, + "loss": 3.0089, + "step": 15860 + }, + { + "epoch": 1.21, + "learning_rate": 8.298805261675852e-05, + "loss": 3.0388, + "step": 15865 + }, + { + "epoch": 1.21, + "learning_rate": 8.294782573715757e-05, + "loss": 3.5033, + "step": 15870 + }, + { + "epoch": 1.21, + "learning_rate": 8.290759885755663e-05, + "loss": 3.0061, + "step": 15875 + }, + { + "epoch": 1.21, + "learning_rate": 8.286737197795567e-05, + "loss": 2.2896, + "step": 15880 + }, + { + "epoch": 1.21, + "learning_rate": 8.282714509835472e-05, + "loss": 2.4662, + "step": 15885 + }, + { + "epoch": 1.21, + "learning_rate": 8.278691821875377e-05, + "loss": 4.1061, + "step": 15890 + }, + { + "epoch": 1.21, + "learning_rate": 8.274669133915283e-05, + "loss": 4.6795, + "step": 15895 + }, + { + "epoch": 1.22, + "learning_rate": 8.270646445955188e-05, + "loss": 4.7134, + "step": 15900 + }, + { + "epoch": 1.22, + "learning_rate": 8.266623757995092e-05, + "loss": 3.7715, + "step": 15905 + }, + { + "epoch": 1.22, + "learning_rate": 8.262601070034998e-05, + "loss": 3.1307, + "step": 15910 + }, + { + "epoch": 1.22, + "learning_rate": 8.258578382074903e-05, + "loss": 1.9352, + "step": 15915 + }, + { + "epoch": 1.22, + "learning_rate": 8.254555694114808e-05, + "loss": 1.595, + "step": 15920 + }, + { + "epoch": 1.22, + "learning_rate": 8.250533006154713e-05, + "loss": 1.327, + "step": 15925 + }, + { + "epoch": 1.22, + "learning_rate": 8.246510318194617e-05, + "loss": 1.5466, + "step": 15930 + }, + { + "epoch": 1.22, + "learning_rate": 8.242487630234523e-05, + "loss": 2.0442, + "step": 15935 + }, + { + "epoch": 1.22, + "learning_rate": 8.238464942274429e-05, + "loss": 5.4271, + "step": 15940 + }, + { + "epoch": 1.22, + "learning_rate": 8.234442254314333e-05, + "loss": 4.067, + "step": 15945 + }, + { + "epoch": 1.22, + "learning_rate": 8.230419566354238e-05, + "loss": 3.5479, + "step": 15950 + }, + { + "epoch": 1.22, + "learning_rate": 8.226396878394144e-05, + "loss": 3.7012, + "step": 15955 + }, + { + "epoch": 1.22, + "learning_rate": 8.222374190434048e-05, + "loss": 3.7094, + "step": 15960 + }, + { + "epoch": 1.22, + "learning_rate": 8.218351502473953e-05, + "loss": 3.0054, + "step": 15965 + }, + { + "epoch": 1.22, + "learning_rate": 8.214328814513858e-05, + "loss": 2.5629, + "step": 15970 + }, + { + "epoch": 1.22, + "learning_rate": 8.210306126553764e-05, + "loss": 1.7412, + "step": 15975 + }, + { + "epoch": 1.22, + "learning_rate": 8.206283438593669e-05, + "loss": 2.4695, + "step": 15980 + }, + { + "epoch": 1.22, + "learning_rate": 8.202260750633573e-05, + "loss": 3.7052, + "step": 15985 + }, + { + "epoch": 1.22, + "learning_rate": 8.198238062673479e-05, + "loss": 4.3586, + "step": 15990 + }, + { + "epoch": 1.22, + "learning_rate": 8.194215374713385e-05, + "loss": 4.7576, + "step": 15995 + }, + { + "epoch": 1.22, + "learning_rate": 8.190192686753288e-05, + "loss": 4.1148, + "step": 16000 + }, + { + "epoch": 1.22, + "learning_rate": 8.186169998793194e-05, + "loss": 4.3701, + "step": 16005 + }, + { + "epoch": 1.22, + "learning_rate": 8.1821473108331e-05, + "loss": 2.6996, + "step": 16010 + }, + { + "epoch": 1.22, + "learning_rate": 8.178124622873004e-05, + "loss": 3.8242, + "step": 16015 + }, + { + "epoch": 1.22, + "learning_rate": 8.174101934912909e-05, + "loss": 1.7617, + "step": 16020 + }, + { + "epoch": 1.22, + "learning_rate": 8.170079246952814e-05, + "loss": 3.2437, + "step": 16025 + }, + { + "epoch": 1.23, + "learning_rate": 8.16605655899272e-05, + "loss": 2.0556, + "step": 16030 + }, + { + "epoch": 1.23, + "learning_rate": 8.162033871032623e-05, + "loss": 1.7406, + "step": 16035 + }, + { + "epoch": 1.23, + "learning_rate": 8.158011183072529e-05, + "loss": 3.6297, + "step": 16040 + }, + { + "epoch": 1.23, + "learning_rate": 8.153988495112435e-05, + "loss": 3.8234, + "step": 16045 + }, + { + "epoch": 1.23, + "learning_rate": 8.14996580715234e-05, + "loss": 3.1287, + "step": 16050 + }, + { + "epoch": 1.23, + "learning_rate": 8.145943119192244e-05, + "loss": 3.4059, + "step": 16055 + }, + { + "epoch": 1.23, + "learning_rate": 8.14192043123215e-05, + "loss": 3.0411, + "step": 16060 + }, + { + "epoch": 1.23, + "learning_rate": 8.137897743272056e-05, + "loss": 1.7471, + "step": 16065 + }, + { + "epoch": 1.23, + "learning_rate": 8.13387505531196e-05, + "loss": 2.7685, + "step": 16070 + }, + { + "epoch": 1.23, + "learning_rate": 8.129852367351864e-05, + "loss": 1.8943, + "step": 16075 + }, + { + "epoch": 1.23, + "learning_rate": 8.12582967939177e-05, + "loss": 2.1345, + "step": 16080 + }, + { + "epoch": 1.23, + "learning_rate": 8.121806991431675e-05, + "loss": 0.4706, + "step": 16085 + }, + { + "epoch": 1.23, + "learning_rate": 8.11778430347158e-05, + "loss": 4.9555, + "step": 16090 + }, + { + "epoch": 1.23, + "learning_rate": 8.113761615511485e-05, + "loss": 3.7021, + "step": 16095 + }, + { + "epoch": 1.23, + "learning_rate": 8.10973892755139e-05, + "loss": 3.6622, + "step": 16100 + }, + { + "epoch": 1.23, + "learning_rate": 8.105716239591295e-05, + "loss": 3.2216, + "step": 16105 + }, + { + "epoch": 1.23, + "learning_rate": 8.1016935516312e-05, + "loss": 3.7122, + "step": 16110 + }, + { + "epoch": 1.23, + "learning_rate": 8.097670863671106e-05, + "loss": 3.2248, + "step": 16115 + }, + { + "epoch": 1.23, + "learning_rate": 8.09364817571101e-05, + "loss": 2.1737, + "step": 16120 + }, + { + "epoch": 1.23, + "learning_rate": 8.089625487750916e-05, + "loss": 3.0768, + "step": 16125 + }, + { + "epoch": 1.23, + "learning_rate": 8.08560279979082e-05, + "loss": 1.7508, + "step": 16130 + }, + { + "epoch": 1.23, + "learning_rate": 8.081580111830725e-05, + "loss": 1.544, + "step": 16135 + }, + { + "epoch": 1.23, + "learning_rate": 8.077557423870631e-05, + "loss": 4.6045, + "step": 16140 + }, + { + "epoch": 1.23, + "learning_rate": 8.073534735910537e-05, + "loss": 4.5873, + "step": 16145 + }, + { + "epoch": 1.23, + "learning_rate": 8.069512047950441e-05, + "loss": 4.3631, + "step": 16150 + }, + { + "epoch": 1.23, + "learning_rate": 8.065489359990345e-05, + "loss": 4.2217, + "step": 16155 + }, + { + "epoch": 1.24, + "learning_rate": 8.061466672030251e-05, + "loss": 2.9564, + "step": 16160 + }, + { + "epoch": 1.24, + "learning_rate": 8.057443984070157e-05, + "loss": 3.6113, + "step": 16165 + }, + { + "epoch": 1.24, + "learning_rate": 8.05342129611006e-05, + "loss": 2.8933, + "step": 16170 + }, + { + "epoch": 1.24, + "learning_rate": 8.049398608149966e-05, + "loss": 3.0604, + "step": 16175 + }, + { + "epoch": 1.24, + "learning_rate": 8.045375920189872e-05, + "loss": 3.6262, + "step": 16180 + }, + { + "epoch": 1.24, + "learning_rate": 8.041353232229776e-05, + "loss": 3.0539, + "step": 16185 + }, + { + "epoch": 1.24, + "learning_rate": 8.037330544269681e-05, + "loss": 4.0107, + "step": 16190 + }, + { + "epoch": 1.24, + "learning_rate": 8.033307856309587e-05, + "loss": 4.325, + "step": 16195 + }, + { + "epoch": 1.24, + "learning_rate": 8.029285168349492e-05, + "loss": 3.9152, + "step": 16200 + }, + { + "epoch": 1.24, + "learning_rate": 8.025262480389396e-05, + "loss": 2.3825, + "step": 16205 + }, + { + "epoch": 1.24, + "learning_rate": 8.021239792429301e-05, + "loss": 3.0158, + "step": 16210 + }, + { + "epoch": 1.24, + "learning_rate": 8.017217104469207e-05, + "loss": 3.0349, + "step": 16215 + }, + { + "epoch": 1.24, + "learning_rate": 8.013194416509112e-05, + "loss": 2.9871, + "step": 16220 + }, + { + "epoch": 1.24, + "learning_rate": 8.009171728549016e-05, + "loss": 3.3361, + "step": 16225 + }, + { + "epoch": 1.24, + "learning_rate": 8.005149040588922e-05, + "loss": 2.4099, + "step": 16230 + }, + { + "epoch": 1.24, + "learning_rate": 8.001126352628826e-05, + "loss": 4.984, + "step": 16235 + }, + { + "epoch": 1.24, + "learning_rate": 7.997103664668732e-05, + "loss": 4.5197, + "step": 16240 + }, + { + "epoch": 1.24, + "learning_rate": 7.993080976708637e-05, + "loss": 4.6932, + "step": 16245 + }, + { + "epoch": 1.24, + "learning_rate": 7.989058288748543e-05, + "loss": 3.5482, + "step": 16250 + }, + { + "epoch": 1.24, + "learning_rate": 7.985035600788447e-05, + "loss": 4.3166, + "step": 16255 + }, + { + "epoch": 1.24, + "learning_rate": 7.981012912828353e-05, + "loss": 3.2842, + "step": 16260 + }, + { + "epoch": 1.24, + "learning_rate": 7.976990224868257e-05, + "loss": 3.2132, + "step": 16265 + }, + { + "epoch": 1.24, + "learning_rate": 7.972967536908162e-05, + "loss": 3.3531, + "step": 16270 + }, + { + "epoch": 1.24, + "learning_rate": 7.968944848948068e-05, + "loss": 3.0101, + "step": 16275 + }, + { + "epoch": 1.24, + "learning_rate": 7.964922160987972e-05, + "loss": 3.3757, + "step": 16280 + }, + { + "epoch": 1.24, + "learning_rate": 7.960899473027878e-05, + "loss": 2.4891, + "step": 16285 + }, + { + "epoch": 1.25, + "learning_rate": 7.956876785067782e-05, + "loss": 4.3678, + "step": 16290 + }, + { + "epoch": 1.25, + "learning_rate": 7.952854097107688e-05, + "loss": 4.4967, + "step": 16295 + }, + { + "epoch": 1.25, + "learning_rate": 7.948831409147593e-05, + "loss": 4.7766, + "step": 16300 + }, + { + "epoch": 1.25, + "learning_rate": 7.944808721187497e-05, + "loss": 4.4363, + "step": 16305 + }, + { + "epoch": 1.25, + "learning_rate": 7.940786033227403e-05, + "loss": 3.5326, + "step": 16310 + }, + { + "epoch": 1.25, + "learning_rate": 7.936763345267309e-05, + "loss": 2.0882, + "step": 16315 + }, + { + "epoch": 1.25, + "learning_rate": 7.932740657307213e-05, + "loss": 2.1091, + "step": 16320 + }, + { + "epoch": 1.25, + "learning_rate": 7.928717969347118e-05, + "loss": 2.316, + "step": 16325 + }, + { + "epoch": 1.25, + "learning_rate": 7.924695281387024e-05, + "loss": 3.159, + "step": 16330 + }, + { + "epoch": 1.25, + "learning_rate": 7.92067259342693e-05, + "loss": 1.6889, + "step": 16335 + }, + { + "epoch": 1.25, + "learning_rate": 7.916649905466833e-05, + "loss": 3.8652, + "step": 16340 + }, + { + "epoch": 1.25, + "learning_rate": 7.912627217506738e-05, + "loss": 4.3504, + "step": 16345 + }, + { + "epoch": 1.25, + "learning_rate": 7.908604529546644e-05, + "loss": 4.0221, + "step": 16350 + }, + { + "epoch": 1.25, + "learning_rate": 7.904581841586549e-05, + "loss": 4.374, + "step": 16355 + }, + { + "epoch": 1.25, + "learning_rate": 7.900559153626453e-05, + "loss": 4.5057, + "step": 16360 + }, + { + "epoch": 1.25, + "learning_rate": 7.896536465666359e-05, + "loss": 2.6971, + "step": 16365 + }, + { + "epoch": 1.25, + "learning_rate": 7.892513777706265e-05, + "loss": 1.9066, + "step": 16370 + }, + { + "epoch": 1.25, + "learning_rate": 7.888491089746168e-05, + "loss": 2.0673, + "step": 16375 + }, + { + "epoch": 1.25, + "learning_rate": 7.884468401786074e-05, + "loss": 2.7272, + "step": 16380 + }, + { + "epoch": 1.25, + "learning_rate": 7.88044571382598e-05, + "loss": 3.6661, + "step": 16385 + }, + { + "epoch": 1.25, + "learning_rate": 7.876423025865884e-05, + "loss": 4.982, + "step": 16390 + }, + { + "epoch": 1.25, + "learning_rate": 7.872400337905788e-05, + "loss": 3.9939, + "step": 16395 + }, + { + "epoch": 1.25, + "learning_rate": 7.868377649945694e-05, + "loss": 4.1607, + "step": 16400 + }, + { + "epoch": 1.25, + "learning_rate": 7.864354961985599e-05, + "loss": 3.9865, + "step": 16405 + }, + { + "epoch": 1.25, + "learning_rate": 7.860332274025505e-05, + "loss": 4.0355, + "step": 16410 + }, + { + "epoch": 1.25, + "learning_rate": 7.856309586065409e-05, + "loss": 3.2419, + "step": 16415 + }, + { + "epoch": 1.25, + "learning_rate": 7.852286898105315e-05, + "loss": 2.5692, + "step": 16420 + }, + { + "epoch": 1.26, + "learning_rate": 7.84826421014522e-05, + "loss": 3.0755, + "step": 16425 + }, + { + "epoch": 1.26, + "learning_rate": 7.844241522185124e-05, + "loss": 1.1237, + "step": 16430 + }, + { + "epoch": 1.26, + "learning_rate": 7.84021883422503e-05, + "loss": 2.8103, + "step": 16435 + }, + { + "epoch": 1.26, + "learning_rate": 7.836196146264934e-05, + "loss": 4.2614, + "step": 16440 + }, + { + "epoch": 1.26, + "learning_rate": 7.83217345830484e-05, + "loss": 4.7865, + "step": 16445 + }, + { + "epoch": 1.26, + "learning_rate": 7.828150770344744e-05, + "loss": 4.392, + "step": 16450 + }, + { + "epoch": 1.26, + "learning_rate": 7.82412808238465e-05, + "loss": 2.8524, + "step": 16455 + }, + { + "epoch": 1.26, + "learning_rate": 7.820105394424555e-05, + "loss": 2.9417, + "step": 16460 + }, + { + "epoch": 1.26, + "learning_rate": 7.81608270646446e-05, + "loss": 3.8519, + "step": 16465 + }, + { + "epoch": 1.26, + "learning_rate": 7.812060018504365e-05, + "loss": 3.798, + "step": 16470 + }, + { + "epoch": 1.26, + "learning_rate": 7.80803733054427e-05, + "loss": 3.2817, + "step": 16475 + }, + { + "epoch": 1.26, + "learning_rate": 7.804014642584175e-05, + "loss": 0.6551, + "step": 16480 + }, + { + "epoch": 1.26, + "learning_rate": 7.799991954624081e-05, + "loss": 0.9354, + "step": 16485 + }, + { + "epoch": 1.26, + "learning_rate": 7.795969266663986e-05, + "loss": 4.2775, + "step": 16490 + }, + { + "epoch": 1.26, + "learning_rate": 7.79194657870389e-05, + "loss": 4.2949, + "step": 16495 + }, + { + "epoch": 1.26, + "learning_rate": 7.787923890743796e-05, + "loss": 4.0965, + "step": 16500 + }, + { + "epoch": 1.26, + "learning_rate": 7.7839012027837e-05, + "loss": 3.2883, + "step": 16505 + }, + { + "epoch": 1.26, + "learning_rate": 7.779878514823605e-05, + "loss": 3.9234, + "step": 16510 + }, + { + "epoch": 1.26, + "learning_rate": 7.77585582686351e-05, + "loss": 2.299, + "step": 16515 + }, + { + "epoch": 1.26, + "learning_rate": 7.771833138903416e-05, + "loss": 1.2933, + "step": 16520 + }, + { + "epoch": 1.26, + "learning_rate": 7.767810450943321e-05, + "loss": 0.9545, + "step": 16525 + }, + { + "epoch": 1.26, + "learning_rate": 7.763787762983225e-05, + "loss": 2.7195, + "step": 16530 + }, + { + "epoch": 1.26, + "learning_rate": 7.759765075023131e-05, + "loss": 3.0816, + "step": 16535 + }, + { + "epoch": 1.26, + "learning_rate": 7.755742387063036e-05, + "loss": 3.9545, + "step": 16540 + }, + { + "epoch": 1.26, + "learning_rate": 7.75171969910294e-05, + "loss": 4.1322, + "step": 16545 + }, + { + "epoch": 1.26, + "learning_rate": 7.747697011142846e-05, + "loss": 4.1234, + "step": 16550 + }, + { + "epoch": 1.27, + "learning_rate": 7.743674323182752e-05, + "loss": 3.8441, + "step": 16555 + }, + { + "epoch": 1.27, + "learning_rate": 7.739651635222656e-05, + "loss": 4.11, + "step": 16560 + }, + { + "epoch": 1.27, + "learning_rate": 7.735628947262561e-05, + "loss": 1.9523, + "step": 16565 + }, + { + "epoch": 1.27, + "learning_rate": 7.731606259302467e-05, + "loss": 2.1999, + "step": 16570 + }, + { + "epoch": 1.27, + "learning_rate": 7.727583571342371e-05, + "loss": 2.8916, + "step": 16575 + }, + { + "epoch": 1.27, + "learning_rate": 7.723560883382276e-05, + "loss": 0.9832, + "step": 16580 + }, + { + "epoch": 1.27, + "learning_rate": 7.719538195422181e-05, + "loss": 2.7123, + "step": 16585 + }, + { + "epoch": 1.27, + "learning_rate": 7.715515507462087e-05, + "loss": 4.9367, + "step": 16590 + }, + { + "epoch": 1.27, + "learning_rate": 7.711492819501992e-05, + "loss": 3.8705, + "step": 16595 + }, + { + "epoch": 1.27, + "learning_rate": 7.707470131541896e-05, + "loss": 3.7537, + "step": 16600 + }, + { + "epoch": 1.27, + "learning_rate": 7.703447443581802e-05, + "loss": 3.0968, + "step": 16605 + }, + { + "epoch": 1.27, + "learning_rate": 7.699424755621706e-05, + "loss": 2.827, + "step": 16610 + }, + { + "epoch": 1.27, + "learning_rate": 7.695402067661612e-05, + "loss": 3.0993, + "step": 16615 + }, + { + "epoch": 1.27, + "learning_rate": 7.691379379701517e-05, + "loss": 0.7201, + "step": 16620 + }, + { + "epoch": 1.27, + "learning_rate": 7.687356691741423e-05, + "loss": 2.1911, + "step": 16625 + }, + { + "epoch": 1.27, + "learning_rate": 7.683334003781327e-05, + "loss": 0.3554, + "step": 16630 + }, + { + "epoch": 1.27, + "learning_rate": 7.679311315821233e-05, + "loss": 1.0917, + "step": 16635 + }, + { + "epoch": 1.27, + "learning_rate": 7.675288627861137e-05, + "loss": 4.0449, + "step": 16640 + }, + { + "epoch": 1.27, + "learning_rate": 7.671265939901042e-05, + "loss": 4.4773, + "step": 16645 + }, + { + "epoch": 1.27, + "learning_rate": 7.667243251940948e-05, + "loss": 4.5842, + "step": 16650 + }, + { + "epoch": 1.27, + "learning_rate": 7.663220563980852e-05, + "loss": 4.2728, + "step": 16655 + }, + { + "epoch": 1.27, + "learning_rate": 7.659197876020758e-05, + "loss": 3.2278, + "step": 16660 + }, + { + "epoch": 1.27, + "learning_rate": 7.655175188060662e-05, + "loss": 3.1791, + "step": 16665 + }, + { + "epoch": 1.27, + "learning_rate": 7.651152500100568e-05, + "loss": 2.4688, + "step": 16670 + }, + { + "epoch": 1.27, + "learning_rate": 7.647129812140473e-05, + "loss": 3.6795, + "step": 16675 + }, + { + "epoch": 1.27, + "learning_rate": 7.643107124180377e-05, + "loss": 1.2943, + "step": 16680 + }, + { + "epoch": 1.28, + "learning_rate": 7.639084436220283e-05, + "loss": 3.7789, + "step": 16685 + }, + { + "epoch": 1.28, + "learning_rate": 7.635061748260189e-05, + "loss": 5.2229, + "step": 16690 + }, + { + "epoch": 1.28, + "learning_rate": 7.631039060300092e-05, + "loss": 4.4244, + "step": 16695 + }, + { + "epoch": 1.28, + "learning_rate": 7.627016372339998e-05, + "loss": 2.974, + "step": 16700 + }, + { + "epoch": 1.28, + "learning_rate": 7.622993684379903e-05, + "loss": 3.6898, + "step": 16705 + }, + { + "epoch": 1.28, + "learning_rate": 7.618970996419808e-05, + "loss": 2.8105, + "step": 16710 + }, + { + "epoch": 1.28, + "learning_rate": 7.614948308459712e-05, + "loss": 1.7367, + "step": 16715 + }, + { + "epoch": 1.28, + "learning_rate": 7.610925620499618e-05, + "loss": 2.6505, + "step": 16720 + }, + { + "epoch": 1.28, + "learning_rate": 7.606902932539524e-05, + "loss": 1.5005, + "step": 16725 + }, + { + "epoch": 1.28, + "learning_rate": 7.602880244579427e-05, + "loss": 1.0236, + "step": 16730 + }, + { + "epoch": 1.28, + "learning_rate": 7.598857556619333e-05, + "loss": 3.0022, + "step": 16735 + }, + { + "epoch": 1.28, + "learning_rate": 7.594834868659239e-05, + "loss": 4.4209, + "step": 16740 + }, + { + "epoch": 1.28, + "learning_rate": 7.590812180699143e-05, + "loss": 4.0451, + "step": 16745 + }, + { + "epoch": 1.28, + "learning_rate": 7.586789492739048e-05, + "loss": 4.0252, + "step": 16750 + }, + { + "epoch": 1.28, + "learning_rate": 7.582766804778954e-05, + "loss": 2.3807, + "step": 16755 + }, + { + "epoch": 1.28, + "learning_rate": 7.57874411681886e-05, + "loss": 3.1335, + "step": 16760 + }, + { + "epoch": 1.28, + "learning_rate": 7.574721428858764e-05, + "loss": 3.6875, + "step": 16765 + }, + { + "epoch": 1.28, + "learning_rate": 7.570698740898668e-05, + "loss": 2.3758, + "step": 16770 + }, + { + "epoch": 1.28, + "learning_rate": 7.566676052938574e-05, + "loss": 1.6693, + "step": 16775 + }, + { + "epoch": 1.28, + "learning_rate": 7.562653364978479e-05, + "loss": 2.396, + "step": 16780 + }, + { + "epoch": 1.28, + "learning_rate": 7.558630677018384e-05, + "loss": 1.7757, + "step": 16785 + }, + { + "epoch": 1.28, + "learning_rate": 7.554607989058289e-05, + "loss": 4.6365, + "step": 16790 + }, + { + "epoch": 1.28, + "learning_rate": 7.550585301098195e-05, + "loss": 3.7377, + "step": 16795 + }, + { + "epoch": 1.28, + "learning_rate": 7.546562613138099e-05, + "loss": 3.5774, + "step": 16800 + }, + { + "epoch": 1.28, + "learning_rate": 7.542539925178004e-05, + "loss": 3.6809, + "step": 16805 + }, + { + "epoch": 1.28, + "learning_rate": 7.53851723721791e-05, + "loss": 3.406, + "step": 16810 + }, + { + "epoch": 1.29, + "learning_rate": 7.534494549257814e-05, + "loss": 2.5013, + "step": 16815 + }, + { + "epoch": 1.29, + "learning_rate": 7.53047186129772e-05, + "loss": 3.9091, + "step": 16820 + }, + { + "epoch": 1.29, + "learning_rate": 7.526449173337624e-05, + "loss": 3.2679, + "step": 16825 + }, + { + "epoch": 1.29, + "learning_rate": 7.52242648537753e-05, + "loss": 3.0857, + "step": 16830 + }, + { + "epoch": 1.29, + "learning_rate": 7.518403797417435e-05, + "loss": 3.6681, + "step": 16835 + }, + { + "epoch": 1.29, + "learning_rate": 7.51438110945734e-05, + "loss": 6.1348, + "step": 16840 + }, + { + "epoch": 1.29, + "learning_rate": 7.510358421497245e-05, + "loss": 3.2572, + "step": 16845 + }, + { + "epoch": 1.29, + "learning_rate": 7.50633573353715e-05, + "loss": 4.0573, + "step": 16850 + }, + { + "epoch": 1.29, + "learning_rate": 7.502313045577055e-05, + "loss": 3.3355, + "step": 16855 + }, + { + "epoch": 1.29, + "learning_rate": 7.498290357616961e-05, + "loss": 3.1018, + "step": 16860 + }, + { + "epoch": 1.29, + "learning_rate": 7.494267669656864e-05, + "loss": 2.3866, + "step": 16865 + }, + { + "epoch": 1.29, + "learning_rate": 7.49024498169677e-05, + "loss": 4.0219, + "step": 16870 + }, + { + "epoch": 1.29, + "learning_rate": 7.486222293736676e-05, + "loss": 1.12, + "step": 16875 + }, + { + "epoch": 1.29, + "learning_rate": 7.48219960577658e-05, + "loss": 1.5368, + "step": 16880 + }, + { + "epoch": 1.29, + "learning_rate": 7.478176917816485e-05, + "loss": 3.0138, + "step": 16885 + }, + { + "epoch": 1.29, + "learning_rate": 7.47415422985639e-05, + "loss": 3.6557, + "step": 16890 + }, + { + "epoch": 1.29, + "learning_rate": 7.470131541896296e-05, + "loss": 3.2788, + "step": 16895 + }, + { + "epoch": 1.29, + "learning_rate": 7.4661088539362e-05, + "loss": 3.7785, + "step": 16900 + }, + { + "epoch": 1.29, + "learning_rate": 7.462086165976105e-05, + "loss": 3.4871, + "step": 16905 + }, + { + "epoch": 1.29, + "learning_rate": 7.458063478016011e-05, + "loss": 4.8828, + "step": 16910 + }, + { + "epoch": 1.29, + "learning_rate": 7.454040790055916e-05, + "loss": 3.833, + "step": 16915 + }, + { + "epoch": 1.29, + "learning_rate": 7.45001810209582e-05, + "loss": 1.6004, + "step": 16920 + }, + { + "epoch": 1.29, + "learning_rate": 7.445995414135726e-05, + "loss": 2.4341, + "step": 16925 + }, + { + "epoch": 1.29, + "learning_rate": 7.441972726175632e-05, + "loss": 2.7167, + "step": 16930 + }, + { + "epoch": 1.29, + "learning_rate": 7.437950038215536e-05, + "loss": 3.296, + "step": 16935 + }, + { + "epoch": 1.29, + "learning_rate": 7.43392735025544e-05, + "loss": 4.0896, + "step": 16940 + }, + { + "epoch": 1.3, + "learning_rate": 7.429904662295346e-05, + "loss": 2.8058, + "step": 16945 + }, + { + "epoch": 1.3, + "learning_rate": 7.425881974335251e-05, + "loss": 2.2906, + "step": 16950 + }, + { + "epoch": 1.3, + "learning_rate": 7.421859286375155e-05, + "loss": 3.6217, + "step": 16955 + }, + { + "epoch": 1.3, + "learning_rate": 7.417836598415061e-05, + "loss": 2.6921, + "step": 16960 + }, + { + "epoch": 1.3, + "learning_rate": 7.413813910454967e-05, + "loss": 3.0229, + "step": 16965 + }, + { + "epoch": 1.3, + "learning_rate": 7.409791222494872e-05, + "loss": 2.7896, + "step": 16970 + }, + { + "epoch": 1.3, + "learning_rate": 7.405768534534776e-05, + "loss": 2.8448, + "step": 16975 + }, + { + "epoch": 1.3, + "learning_rate": 7.401745846574682e-05, + "loss": 2.2765, + "step": 16980 + }, + { + "epoch": 1.3, + "learning_rate": 7.397723158614586e-05, + "loss": 3.1928, + "step": 16985 + }, + { + "epoch": 1.3, + "learning_rate": 7.393700470654492e-05, + "loss": 4.5197, + "step": 16990 + }, + { + "epoch": 1.3, + "learning_rate": 7.389677782694397e-05, + "loss": 4.3378, + "step": 16995 + }, + { + "epoch": 1.3, + "learning_rate": 7.385655094734301e-05, + "loss": 3.5922, + "step": 17000 + }, + { + "epoch": 1.3, + "learning_rate": 7.381632406774207e-05, + "loss": 3.5587, + "step": 17005 + }, + { + "epoch": 1.3, + "learning_rate": 7.377609718814113e-05, + "loss": 3.9139, + "step": 17010 + }, + { + "epoch": 1.3, + "learning_rate": 7.373587030854017e-05, + "loss": 2.5823, + "step": 17015 + }, + { + "epoch": 1.3, + "learning_rate": 7.369564342893922e-05, + "loss": 2.5079, + "step": 17020 + }, + { + "epoch": 1.3, + "learning_rate": 7.365541654933827e-05, + "loss": 0.9608, + "step": 17025 + }, + { + "epoch": 1.3, + "learning_rate": 7.362323504565751e-05, + "loss": 2.5257, + "step": 17030 + }, + { + "epoch": 1.3, + "learning_rate": 7.358300816605657e-05, + "loss": 3.4347, + "step": 17035 + }, + { + "epoch": 1.3, + "learning_rate": 7.354278128645561e-05, + "loss": 3.7679, + "step": 17040 + }, + { + "epoch": 1.3, + "learning_rate": 7.350255440685467e-05, + "loss": 3.6029, + "step": 17045 + }, + { + "epoch": 1.3, + "learning_rate": 7.346232752725371e-05, + "loss": 3.6551, + "step": 17050 + }, + { + "epoch": 1.3, + "learning_rate": 7.342210064765277e-05, + "loss": 4.3367, + "step": 17055 + }, + { + "epoch": 1.3, + "learning_rate": 7.338187376805182e-05, + "loss": 3.6209, + "step": 17060 + }, + { + "epoch": 1.3, + "learning_rate": 7.334164688845086e-05, + "loss": 4.4252, + "step": 17065 + }, + { + "epoch": 1.3, + "learning_rate": 7.330142000884992e-05, + "loss": 3.3807, + "step": 17070 + }, + { + "epoch": 1.31, + "learning_rate": 7.326119312924898e-05, + "loss": 3.8533, + "step": 17075 + }, + { + "epoch": 1.31, + "learning_rate": 7.322096624964801e-05, + "loss": 2.6486, + "step": 17080 + }, + { + "epoch": 1.31, + "learning_rate": 7.318073937004707e-05, + "loss": 1.0389, + "step": 17085 + }, + { + "epoch": 1.31, + "learning_rate": 7.314051249044613e-05, + "loss": 4.8477, + "step": 17090 + }, + { + "epoch": 1.31, + "learning_rate": 7.310028561084517e-05, + "loss": 4.098, + "step": 17095 + }, + { + "epoch": 1.31, + "learning_rate": 7.306005873124421e-05, + "loss": 3.5662, + "step": 17100 + }, + { + "epoch": 1.31, + "learning_rate": 7.301983185164327e-05, + "loss": 4.3957, + "step": 17105 + }, + { + "epoch": 1.31, + "learning_rate": 7.297960497204233e-05, + "loss": 1.8946, + "step": 17110 + }, + { + "epoch": 1.31, + "learning_rate": 7.293937809244136e-05, + "loss": 4.3447, + "step": 17115 + }, + { + "epoch": 1.31, + "learning_rate": 7.289915121284042e-05, + "loss": 3.3158, + "step": 17120 + }, + { + "epoch": 1.31, + "learning_rate": 7.285892433323948e-05, + "loss": 1.9162, + "step": 17125 + }, + { + "epoch": 1.31, + "learning_rate": 7.281869745363852e-05, + "loss": 2.8082, + "step": 17130 + }, + { + "epoch": 1.31, + "learning_rate": 7.277847057403757e-05, + "loss": 2.7045, + "step": 17135 + }, + { + "epoch": 1.31, + "learning_rate": 7.273824369443663e-05, + "loss": 4.3832, + "step": 17140 + }, + { + "epoch": 1.31, + "learning_rate": 7.269801681483568e-05, + "loss": 4.2967, + "step": 17145 + }, + { + "epoch": 1.31, + "learning_rate": 7.265778993523473e-05, + "loss": 4.1742, + "step": 17150 + }, + { + "epoch": 1.31, + "learning_rate": 7.261756305563377e-05, + "loss": 4.3498, + "step": 17155 + }, + { + "epoch": 1.31, + "learning_rate": 7.257733617603283e-05, + "loss": 3.0135, + "step": 17160 + }, + { + "epoch": 1.31, + "learning_rate": 7.253710929643188e-05, + "loss": 3.2709, + "step": 17165 + }, + { + "epoch": 1.31, + "learning_rate": 7.249688241683092e-05, + "loss": 2.5106, + "step": 17170 + }, + { + "epoch": 1.31, + "learning_rate": 7.245665553722998e-05, + "loss": 1.6757, + "step": 17175 + }, + { + "epoch": 1.31, + "learning_rate": 7.241642865762904e-05, + "loss": 2.2027, + "step": 17180 + }, + { + "epoch": 1.31, + "learning_rate": 7.237620177802808e-05, + "loss": 3.2238, + "step": 17185 + }, + { + "epoch": 1.31, + "learning_rate": 7.233597489842713e-05, + "loss": 4.7036, + "step": 17190 + }, + { + "epoch": 1.31, + "learning_rate": 7.229574801882619e-05, + "loss": 4.2068, + "step": 17195 + }, + { + "epoch": 1.31, + "learning_rate": 7.225552113922523e-05, + "loss": 3.9623, + "step": 17200 + }, + { + "epoch": 1.31, + "learning_rate": 7.221529425962429e-05, + "loss": 3.6021, + "step": 17205 + }, + { + "epoch": 1.32, + "learning_rate": 7.217506738002333e-05, + "loss": 2.7974, + "step": 17210 + }, + { + "epoch": 1.32, + "learning_rate": 7.213484050042238e-05, + "loss": 3.2512, + "step": 17215 + }, + { + "epoch": 1.32, + "learning_rate": 7.209461362082144e-05, + "loss": 2.4599, + "step": 17220 + }, + { + "epoch": 1.32, + "learning_rate": 7.20543867412205e-05, + "loss": 3.6352, + "step": 17225 + }, + { + "epoch": 1.32, + "learning_rate": 7.201415986161954e-05, + "loss": 1.6141, + "step": 17230 + }, + { + "epoch": 1.32, + "learning_rate": 7.197393298201858e-05, + "loss": 1.3339, + "step": 17235 + }, + { + "epoch": 1.32, + "learning_rate": 7.193370610241764e-05, + "loss": 4.0625, + "step": 17240 + }, + { + "epoch": 1.32, + "learning_rate": 7.189347922281669e-05, + "loss": 3.9266, + "step": 17245 + }, + { + "epoch": 1.32, + "learning_rate": 7.185325234321573e-05, + "loss": 4.1445, + "step": 17250 + }, + { + "epoch": 1.32, + "learning_rate": 7.181302546361479e-05, + "loss": 4.4178, + "step": 17255 + }, + { + "epoch": 1.32, + "learning_rate": 7.177279858401385e-05, + "loss": 5.1043, + "step": 17260 + }, + { + "epoch": 1.32, + "learning_rate": 7.173257170441289e-05, + "loss": 1.8865, + "step": 17265 + }, + { + "epoch": 1.32, + "learning_rate": 7.169234482481194e-05, + "loss": 3.1843, + "step": 17270 + }, + { + "epoch": 1.32, + "learning_rate": 7.1652117945211e-05, + "loss": 3.2057, + "step": 17275 + }, + { + "epoch": 1.32, + "learning_rate": 7.161189106561005e-05, + "loss": 2.154, + "step": 17280 + }, + { + "epoch": 1.32, + "learning_rate": 7.157166418600909e-05, + "loss": 4.2513, + "step": 17285 + }, + { + "epoch": 1.32, + "learning_rate": 7.153143730640814e-05, + "loss": 4.2359, + "step": 17290 + }, + { + "epoch": 1.32, + "learning_rate": 7.14912104268072e-05, + "loss": 3.6398, + "step": 17295 + }, + { + "epoch": 1.32, + "learning_rate": 7.145098354720625e-05, + "loss": 3.352, + "step": 17300 + }, + { + "epoch": 1.32, + "learning_rate": 7.141075666760529e-05, + "loss": 2.9969, + "step": 17305 + }, + { + "epoch": 1.32, + "learning_rate": 7.137052978800435e-05, + "loss": 2.6333, + "step": 17310 + }, + { + "epoch": 1.32, + "learning_rate": 7.133030290840341e-05, + "loss": 3.7852, + "step": 17315 + }, + { + "epoch": 1.32, + "learning_rate": 7.129007602880244e-05, + "loss": 1.6063, + "step": 17320 + }, + { + "epoch": 1.32, + "learning_rate": 7.12498491492015e-05, + "loss": 3.288, + "step": 17325 + }, + { + "epoch": 1.32, + "learning_rate": 7.120962226960056e-05, + "loss": 1.2184, + "step": 17330 + }, + { + "epoch": 1.32, + "learning_rate": 7.11693953899996e-05, + "loss": 2.4909, + "step": 17335 + }, + { + "epoch": 1.33, + "learning_rate": 7.112916851039864e-05, + "loss": 4.035, + "step": 17340 + }, + { + "epoch": 1.33, + "learning_rate": 7.10889416307977e-05, + "loss": 4.6348, + "step": 17345 + }, + { + "epoch": 1.33, + "learning_rate": 7.104871475119676e-05, + "loss": 3.6294, + "step": 17350 + }, + { + "epoch": 1.33, + "learning_rate": 7.10084878715958e-05, + "loss": 4.0395, + "step": 17355 + }, + { + "epoch": 1.33, + "learning_rate": 7.096826099199485e-05, + "loss": 4.607, + "step": 17360 + }, + { + "epoch": 1.33, + "learning_rate": 7.092803411239391e-05, + "loss": 3.9953, + "step": 17365 + }, + { + "epoch": 1.33, + "learning_rate": 7.088780723279295e-05, + "loss": 2.5543, + "step": 17370 + }, + { + "epoch": 1.33, + "learning_rate": 7.084758035319201e-05, + "loss": 2.2387, + "step": 17375 + }, + { + "epoch": 1.33, + "learning_rate": 7.080735347359106e-05, + "loss": 1.8694, + "step": 17380 + }, + { + "epoch": 1.33, + "learning_rate": 7.07671265939901e-05, + "loss": 2.5382, + "step": 17385 + }, + { + "epoch": 1.33, + "learning_rate": 7.072689971438916e-05, + "loss": 5.0727, + "step": 17390 + }, + { + "epoch": 1.33, + "learning_rate": 7.06866728347882e-05, + "loss": 3.9621, + "step": 17395 + }, + { + "epoch": 1.33, + "learning_rate": 7.064644595518726e-05, + "loss": 4.0389, + "step": 17400 + }, + { + "epoch": 1.33, + "learning_rate": 7.06062190755863e-05, + "loss": 3.7287, + "step": 17405 + }, + { + "epoch": 1.33, + "learning_rate": 7.056599219598536e-05, + "loss": 3.1371, + "step": 17410 + }, + { + "epoch": 1.33, + "learning_rate": 7.052576531638441e-05, + "loss": 2.7134, + "step": 17415 + }, + { + "epoch": 1.33, + "learning_rate": 7.048553843678345e-05, + "loss": 3.0964, + "step": 17420 + }, + { + "epoch": 1.33, + "learning_rate": 7.044531155718251e-05, + "loss": 1.5899, + "step": 17425 + }, + { + "epoch": 1.33, + "learning_rate": 7.040508467758157e-05, + "loss": 1.634, + "step": 17430 + }, + { + "epoch": 1.33, + "learning_rate": 7.036485779798062e-05, + "loss": 3.0479, + "step": 17435 + }, + { + "epoch": 1.33, + "learning_rate": 7.032463091837966e-05, + "loss": 4.1072, + "step": 17440 + }, + { + "epoch": 1.33, + "learning_rate": 7.028440403877872e-05, + "loss": 3.6551, + "step": 17445 + }, + { + "epoch": 1.33, + "learning_rate": 7.024417715917778e-05, + "loss": 4.5043, + "step": 17450 + }, + { + "epoch": 1.33, + "learning_rate": 7.020395027957681e-05, + "loss": 3.7246, + "step": 17455 + }, + { + "epoch": 1.33, + "learning_rate": 7.016372339997587e-05, + "loss": 3.2265, + "step": 17460 + }, + { + "epoch": 1.33, + "learning_rate": 7.012349652037492e-05, + "loss": 3.3408, + "step": 17465 + }, + { + "epoch": 1.34, + "learning_rate": 7.008326964077397e-05, + "loss": 3.5267, + "step": 17470 + }, + { + "epoch": 1.34, + "learning_rate": 7.004304276117301e-05, + "loss": 1.8221, + "step": 17475 + }, + { + "epoch": 1.34, + "learning_rate": 7.000281588157207e-05, + "loss": 1.5264, + "step": 17480 + }, + { + "epoch": 1.34, + "learning_rate": 6.996258900197113e-05, + "loss": 2.7208, + "step": 17485 + }, + { + "epoch": 1.34, + "learning_rate": 6.992236212237016e-05, + "loss": 3.7732, + "step": 17490 + }, + { + "epoch": 1.34, + "learning_rate": 6.988213524276922e-05, + "loss": 4.3205, + "step": 17495 + }, + { + "epoch": 1.34, + "learning_rate": 6.984190836316828e-05, + "loss": 4.0115, + "step": 17500 + }, + { + "epoch": 1.34, + "learning_rate": 6.980168148356732e-05, + "loss": 3.7114, + "step": 17505 + }, + { + "epoch": 1.34, + "learning_rate": 6.976145460396637e-05, + "loss": 2.9249, + "step": 17510 + }, + { + "epoch": 1.34, + "learning_rate": 6.972122772436543e-05, + "loss": 1.8061, + "step": 17515 + }, + { + "epoch": 1.34, + "learning_rate": 6.968100084476447e-05, + "loss": 1.9254, + "step": 17520 + }, + { + "epoch": 1.34, + "learning_rate": 6.964077396516353e-05, + "loss": 0.8883, + "step": 17525 + }, + { + "epoch": 1.34, + "learning_rate": 6.960054708556257e-05, + "loss": 0.7865, + "step": 17530 + }, + { + "epoch": 1.34, + "learning_rate": 6.956032020596163e-05, + "loss": 1.5326, + "step": 17535 + }, + { + "epoch": 1.34, + "learning_rate": 6.952009332636068e-05, + "loss": 3.7009, + "step": 17540 + }, + { + "epoch": 1.34, + "learning_rate": 6.947986644675972e-05, + "loss": 3.5918, + "step": 17545 + }, + { + "epoch": 1.34, + "learning_rate": 6.943963956715878e-05, + "loss": 3.55, + "step": 17550 + }, + { + "epoch": 1.34, + "learning_rate": 6.939941268755782e-05, + "loss": 2.8442, + "step": 17555 + }, + { + "epoch": 1.34, + "learning_rate": 6.935918580795688e-05, + "loss": 3.8076, + "step": 17560 + }, + { + "epoch": 1.34, + "learning_rate": 6.931895892835593e-05, + "loss": 2.268, + "step": 17565 + }, + { + "epoch": 1.34, + "learning_rate": 6.927873204875498e-05, + "loss": 1.8671, + "step": 17570 + }, + { + "epoch": 1.34, + "learning_rate": 6.923850516915403e-05, + "loss": 3.1274, + "step": 17575 + }, + { + "epoch": 1.34, + "learning_rate": 6.919827828955309e-05, + "loss": 1.8582, + "step": 17580 + }, + { + "epoch": 1.34, + "learning_rate": 6.915805140995213e-05, + "loss": 1.7748, + "step": 17585 + }, + { + "epoch": 1.34, + "learning_rate": 6.911782453035118e-05, + "loss": 3.6909, + "step": 17590 + }, + { + "epoch": 1.34, + "learning_rate": 6.907759765075024e-05, + "loss": 4.9395, + "step": 17595 + }, + { + "epoch": 1.35, + "learning_rate": 6.90373707711493e-05, + "loss": 5.6717, + "step": 17600 + }, + { + "epoch": 1.35, + "learning_rate": 6.899714389154834e-05, + "loss": 3.5836, + "step": 17605 + }, + { + "epoch": 1.35, + "learning_rate": 6.895691701194738e-05, + "loss": 4.4979, + "step": 17610 + }, + { + "epoch": 1.35, + "learning_rate": 6.891669013234644e-05, + "loss": 2.4373, + "step": 17615 + }, + { + "epoch": 1.35, + "learning_rate": 6.887646325274549e-05, + "loss": 2.756, + "step": 17620 + }, + { + "epoch": 1.35, + "learning_rate": 6.883623637314453e-05, + "loss": 1.4593, + "step": 17625 + }, + { + "epoch": 1.35, + "learning_rate": 6.879600949354359e-05, + "loss": 1.1477, + "step": 17630 + }, + { + "epoch": 1.35, + "learning_rate": 6.875578261394265e-05, + "loss": 1.9229, + "step": 17635 + }, + { + "epoch": 1.35, + "learning_rate": 6.871555573434169e-05, + "loss": 4.017, + "step": 17640 + }, + { + "epoch": 1.35, + "learning_rate": 6.867532885474074e-05, + "loss": 5.0738, + "step": 17645 + }, + { + "epoch": 1.35, + "learning_rate": 6.86351019751398e-05, + "loss": 3.6027, + "step": 17650 + }, + { + "epoch": 1.35, + "learning_rate": 6.859487509553885e-05, + "loss": 3.1089, + "step": 17655 + }, + { + "epoch": 1.35, + "learning_rate": 6.855464821593788e-05, + "loss": 3.3415, + "step": 17660 + }, + { + "epoch": 1.35, + "learning_rate": 6.851442133633694e-05, + "loss": 3.0649, + "step": 17665 + }, + { + "epoch": 1.35, + "learning_rate": 6.8474194456736e-05, + "loss": 1.925, + "step": 17670 + }, + { + "epoch": 1.35, + "learning_rate": 6.843396757713505e-05, + "loss": 2.909, + "step": 17675 + }, + { + "epoch": 1.35, + "learning_rate": 6.839374069753409e-05, + "loss": 3.0972, + "step": 17680 + }, + { + "epoch": 1.35, + "learning_rate": 6.835351381793315e-05, + "loss": 1.8968, + "step": 17685 + }, + { + "epoch": 1.35, + "learning_rate": 6.831328693833219e-05, + "loss": 4.1662, + "step": 17690 + }, + { + "epoch": 1.35, + "learning_rate": 6.827306005873125e-05, + "loss": 3.5768, + "step": 17695 + }, + { + "epoch": 1.35, + "learning_rate": 6.82328331791303e-05, + "loss": 3.7977, + "step": 17700 + }, + { + "epoch": 1.35, + "learning_rate": 6.819260629952935e-05, + "loss": 2.5532, + "step": 17705 + }, + { + "epoch": 1.35, + "learning_rate": 6.81523794199284e-05, + "loss": 2.4193, + "step": 17710 + }, + { + "epoch": 1.35, + "learning_rate": 6.811215254032744e-05, + "loss": 2.7911, + "step": 17715 + }, + { + "epoch": 1.35, + "learning_rate": 6.80719256607265e-05, + "loss": 1.1723, + "step": 17720 + }, + { + "epoch": 1.35, + "learning_rate": 6.803169878112555e-05, + "loss": 2.1002, + "step": 17725 + }, + { + "epoch": 1.36, + "learning_rate": 6.79914719015246e-05, + "loss": 1.357, + "step": 17730 + }, + { + "epoch": 1.36, + "learning_rate": 6.795124502192365e-05, + "loss": 2.7562, + "step": 17735 + }, + { + "epoch": 1.36, + "learning_rate": 6.791101814232271e-05, + "loss": 3.9273, + "step": 17740 + }, + { + "epoch": 1.36, + "learning_rate": 6.787079126272175e-05, + "loss": 4.8758, + "step": 17745 + }, + { + "epoch": 1.36, + "learning_rate": 6.783056438312081e-05, + "loss": 3.9227, + "step": 17750 + }, + { + "epoch": 1.36, + "learning_rate": 6.779033750351986e-05, + "loss": 3.1739, + "step": 17755 + }, + { + "epoch": 1.36, + "learning_rate": 6.77501106239189e-05, + "loss": 3.0794, + "step": 17760 + }, + { + "epoch": 1.36, + "learning_rate": 6.770988374431796e-05, + "loss": 2.0279, + "step": 17765 + }, + { + "epoch": 1.36, + "learning_rate": 6.766965686471702e-05, + "loss": 2.2346, + "step": 17770 + }, + { + "epoch": 1.36, + "learning_rate": 6.762942998511606e-05, + "loss": 2.6593, + "step": 17775 + }, + { + "epoch": 1.36, + "learning_rate": 6.75892031055151e-05, + "loss": 0.953, + "step": 17780 + }, + { + "epoch": 1.36, + "learning_rate": 6.754897622591416e-05, + "loss": 2.3692, + "step": 17785 + }, + { + "epoch": 1.36, + "learning_rate": 6.750874934631321e-05, + "loss": 4.9299, + "step": 17790 + }, + { + "epoch": 1.36, + "learning_rate": 6.746852246671225e-05, + "loss": 4.4605, + "step": 17795 + }, + { + "epoch": 1.36, + "learning_rate": 6.742829558711131e-05, + "loss": 4.1517, + "step": 17800 + }, + { + "epoch": 1.36, + "learning_rate": 6.738806870751037e-05, + "loss": 2.3445, + "step": 17805 + }, + { + "epoch": 1.36, + "learning_rate": 6.734784182790941e-05, + "loss": 4.102, + "step": 17810 + }, + { + "epoch": 1.36, + "learning_rate": 6.730761494830846e-05, + "loss": 2.3169, + "step": 17815 + }, + { + "epoch": 1.36, + "learning_rate": 6.726738806870752e-05, + "loss": 3.781, + "step": 17820 + }, + { + "epoch": 1.36, + "learning_rate": 6.722716118910656e-05, + "loss": 1.5209, + "step": 17825 + }, + { + "epoch": 1.36, + "learning_rate": 6.718693430950561e-05, + "loss": 2.827, + "step": 17830 + }, + { + "epoch": 1.36, + "learning_rate": 6.714670742990467e-05, + "loss": 1.0628, + "step": 17835 + }, + { + "epoch": 1.36, + "learning_rate": 6.710648055030372e-05, + "loss": 4.5979, + "step": 17840 + }, + { + "epoch": 1.36, + "learning_rate": 6.706625367070277e-05, + "loss": 3.8928, + "step": 17845 + }, + { + "epoch": 1.36, + "learning_rate": 6.702602679110181e-05, + "loss": 4.7121, + "step": 17850 + }, + { + "epoch": 1.36, + "learning_rate": 6.698579991150087e-05, + "loss": 3.5897, + "step": 17855 + }, + { + "epoch": 1.37, + "learning_rate": 6.694557303189992e-05, + "loss": 2.4365, + "step": 17860 + }, + { + "epoch": 1.37, + "learning_rate": 6.690534615229896e-05, + "loss": 3.8054, + "step": 17865 + }, + { + "epoch": 1.37, + "learning_rate": 6.686511927269802e-05, + "loss": 3.1852, + "step": 17870 + }, + { + "epoch": 1.37, + "learning_rate": 6.682489239309708e-05, + "loss": 1.203, + "step": 17875 + }, + { + "epoch": 1.37, + "learning_rate": 6.678466551349612e-05, + "loss": 0.7864, + "step": 17880 + }, + { + "epoch": 1.37, + "learning_rate": 6.674443863389517e-05, + "loss": 1.6607, + "step": 17885 + }, + { + "epoch": 1.37, + "learning_rate": 6.670421175429422e-05, + "loss": 4.7062, + "step": 17890 + }, + { + "epoch": 1.37, + "learning_rate": 6.666398487469327e-05, + "loss": 4.7527, + "step": 17895 + }, + { + "epoch": 1.37, + "learning_rate": 6.662375799509233e-05, + "loss": 3.8579, + "step": 17900 + }, + { + "epoch": 1.37, + "learning_rate": 6.658353111549137e-05, + "loss": 3.7981, + "step": 17905 + }, + { + "epoch": 1.37, + "learning_rate": 6.654330423589043e-05, + "loss": 2.9869, + "step": 17910 + }, + { + "epoch": 1.37, + "learning_rate": 6.650307735628948e-05, + "loss": 1.7792, + "step": 17915 + }, + { + "epoch": 1.37, + "learning_rate": 6.646285047668853e-05, + "loss": 4.4921, + "step": 17920 + }, + { + "epoch": 1.37, + "learning_rate": 6.642262359708758e-05, + "loss": 3.5635, + "step": 17925 + }, + { + "epoch": 1.37, + "learning_rate": 6.638239671748662e-05, + "loss": 2.6001, + "step": 17930 + }, + { + "epoch": 1.37, + "learning_rate": 6.634216983788568e-05, + "loss": 1.0408, + "step": 17935 + }, + { + "epoch": 1.37, + "learning_rate": 6.630194295828473e-05, + "loss": 4.8291, + "step": 17940 + }, + { + "epoch": 1.37, + "learning_rate": 6.626171607868378e-05, + "loss": 4.6105, + "step": 17945 + }, + { + "epoch": 1.37, + "learning_rate": 6.622148919908283e-05, + "loss": 3.6875, + "step": 17950 + }, + { + "epoch": 1.37, + "learning_rate": 6.618126231948189e-05, + "loss": 3.7762, + "step": 17955 + }, + { + "epoch": 1.37, + "learning_rate": 6.614103543988093e-05, + "loss": 3.6926, + "step": 17960 + }, + { + "epoch": 1.37, + "learning_rate": 6.610080856027998e-05, + "loss": 4.0579, + "step": 17965 + }, + { + "epoch": 1.37, + "learning_rate": 6.606058168067903e-05, + "loss": 2.8567, + "step": 17970 + }, + { + "epoch": 1.37, + "learning_rate": 6.602035480107809e-05, + "loss": 2.1443, + "step": 17975 + }, + { + "epoch": 1.37, + "learning_rate": 6.598012792147714e-05, + "loss": 3.152, + "step": 17980 + }, + { + "epoch": 1.37, + "learning_rate": 6.593990104187618e-05, + "loss": 1.3468, + "step": 17985 + }, + { + "epoch": 1.37, + "learning_rate": 6.589967416227524e-05, + "loss": 4.0677, + "step": 17990 + }, + { + "epoch": 1.38, + "learning_rate": 6.585944728267429e-05, + "loss": 4.3828, + "step": 17995 + }, + { + "epoch": 1.38, + "learning_rate": 6.581922040307333e-05, + "loss": 4.085, + "step": 18000 + }, + { + "epoch": 1.38, + "learning_rate": 6.577899352347239e-05, + "loss": 3.9857, + "step": 18005 + }, + { + "epoch": 1.38, + "learning_rate": 6.573876664387145e-05, + "loss": 4.3557, + "step": 18010 + }, + { + "epoch": 1.38, + "learning_rate": 6.569853976427048e-05, + "loss": 3.3637, + "step": 18015 + }, + { + "epoch": 1.38, + "learning_rate": 6.565831288466954e-05, + "loss": 3.1275, + "step": 18020 + }, + { + "epoch": 1.38, + "learning_rate": 6.56180860050686e-05, + "loss": 1.9914, + "step": 18025 + }, + { + "epoch": 1.38, + "learning_rate": 6.557785912546764e-05, + "loss": 1.9206, + "step": 18030 + }, + { + "epoch": 1.38, + "learning_rate": 6.553763224586668e-05, + "loss": 2.7614, + "step": 18035 + }, + { + "epoch": 1.38, + "learning_rate": 6.549740536626574e-05, + "loss": 3.8361, + "step": 18040 + }, + { + "epoch": 1.38, + "learning_rate": 6.54571784866648e-05, + "loss": 4.5418, + "step": 18045 + }, + { + "epoch": 1.38, + "learning_rate": 6.541695160706384e-05, + "loss": 3.9312, + "step": 18050 + }, + { + "epoch": 1.38, + "learning_rate": 6.537672472746289e-05, + "loss": 2.5614, + "step": 18055 + }, + { + "epoch": 1.38, + "learning_rate": 6.533649784786195e-05, + "loss": 2.4895, + "step": 18060 + }, + { + "epoch": 1.38, + "learning_rate": 6.529627096826099e-05, + "loss": 1.8234, + "step": 18065 + }, + { + "epoch": 1.38, + "learning_rate": 6.525604408866005e-05, + "loss": 2.198, + "step": 18070 + }, + { + "epoch": 1.38, + "learning_rate": 6.52158172090591e-05, + "loss": 1.9346, + "step": 18075 + }, + { + "epoch": 1.38, + "learning_rate": 6.517559032945815e-05, + "loss": 1.823, + "step": 18080 + }, + { + "epoch": 1.38, + "learning_rate": 6.51353634498572e-05, + "loss": 1.2123, + "step": 18085 + }, + { + "epoch": 1.38, + "learning_rate": 6.509513657025624e-05, + "loss": 3.9965, + "step": 18090 + }, + { + "epoch": 1.38, + "learning_rate": 6.50549096906553e-05, + "loss": 4.3736, + "step": 18095 + }, + { + "epoch": 1.38, + "learning_rate": 6.501468281105435e-05, + "loss": 3.9881, + "step": 18100 + }, + { + "epoch": 1.38, + "learning_rate": 6.49744559314534e-05, + "loss": 3.8488, + "step": 18105 + }, + { + "epoch": 1.38, + "learning_rate": 6.493422905185245e-05, + "loss": 4.2275, + "step": 18110 + }, + { + "epoch": 1.38, + "learning_rate": 6.48940021722515e-05, + "loss": 4.3555, + "step": 18115 + }, + { + "epoch": 1.38, + "learning_rate": 6.485377529265055e-05, + "loss": 3.4964, + "step": 18120 + }, + { + "epoch": 1.39, + "learning_rate": 6.481354841304961e-05, + "loss": 2.727, + "step": 18125 + }, + { + "epoch": 1.39, + "learning_rate": 6.477332153344865e-05, + "loss": 3.9642, + "step": 18130 + }, + { + "epoch": 1.39, + "learning_rate": 6.47330946538477e-05, + "loss": 1.054, + "step": 18135 + }, + { + "epoch": 1.39, + "learning_rate": 6.469286777424676e-05, + "loss": 4.3533, + "step": 18140 + }, + { + "epoch": 1.39, + "learning_rate": 6.465264089464582e-05, + "loss": 3.6725, + "step": 18145 + }, + { + "epoch": 1.39, + "learning_rate": 6.461241401504485e-05, + "loss": 3.9557, + "step": 18150 + }, + { + "epoch": 1.39, + "learning_rate": 6.45721871354439e-05, + "loss": 4.5719, + "step": 18155 + }, + { + "epoch": 1.39, + "learning_rate": 6.453196025584296e-05, + "loss": 3.3895, + "step": 18160 + }, + { + "epoch": 1.39, + "learning_rate": 6.449173337624201e-05, + "loss": 2.3612, + "step": 18165 + }, + { + "epoch": 1.39, + "learning_rate": 6.445150649664105e-05, + "loss": 3.5728, + "step": 18170 + }, + { + "epoch": 1.39, + "learning_rate": 6.441127961704011e-05, + "loss": 1.809, + "step": 18175 + }, + { + "epoch": 1.39, + "learning_rate": 6.437105273743917e-05, + "loss": 2.2907, + "step": 18180 + }, + { + "epoch": 1.39, + "learning_rate": 6.43308258578382e-05, + "loss": 3.9573, + "step": 18185 + }, + { + "epoch": 1.39, + "learning_rate": 6.429059897823726e-05, + "loss": 3.91, + "step": 18190 + }, + { + "epoch": 1.39, + "learning_rate": 6.425037209863632e-05, + "loss": 3.9791, + "step": 18195 + }, + { + "epoch": 1.39, + "learning_rate": 6.421014521903536e-05, + "loss": 4.3547, + "step": 18200 + }, + { + "epoch": 1.39, + "learning_rate": 6.41699183394344e-05, + "loss": 4.3502, + "step": 18205 + }, + { + "epoch": 1.39, + "learning_rate": 6.412969145983346e-05, + "loss": 3.3553, + "step": 18210 + }, + { + "epoch": 1.39, + "learning_rate": 6.408946458023252e-05, + "loss": 2.7739, + "step": 18215 + }, + { + "epoch": 1.39, + "learning_rate": 6.404923770063157e-05, + "loss": 2.2082, + "step": 18220 + }, + { + "epoch": 1.39, + "learning_rate": 6.400901082103061e-05, + "loss": 1.1458, + "step": 18225 + }, + { + "epoch": 1.39, + "learning_rate": 6.396878394142967e-05, + "loss": 2.6004, + "step": 18230 + }, + { + "epoch": 1.39, + "learning_rate": 6.392855706182871e-05, + "loss": 2.9328, + "step": 18235 + }, + { + "epoch": 1.39, + "learning_rate": 6.388833018222776e-05, + "loss": 4.5781, + "step": 18240 + }, + { + "epoch": 1.39, + "learning_rate": 6.384810330262682e-05, + "loss": 4.5582, + "step": 18245 + }, + { + "epoch": 1.39, + "learning_rate": 6.380787642302588e-05, + "loss": 3.7113, + "step": 18250 + }, + { + "epoch": 1.4, + "learning_rate": 6.376764954342492e-05, + "loss": 3.8949, + "step": 18255 + }, + { + "epoch": 1.4, + "learning_rate": 6.372742266382397e-05, + "loss": 4.1025, + "step": 18260 + }, + { + "epoch": 1.4, + "learning_rate": 6.368719578422302e-05, + "loss": 4.8066, + "step": 18265 + }, + { + "epoch": 1.4, + "learning_rate": 6.364696890462207e-05, + "loss": 3.1322, + "step": 18270 + }, + { + "epoch": 1.4, + "learning_rate": 6.360674202502113e-05, + "loss": 2.281, + "step": 18275 + }, + { + "epoch": 1.4, + "learning_rate": 6.356651514542017e-05, + "loss": 1.8965, + "step": 18280 + }, + { + "epoch": 1.4, + "learning_rate": 6.352628826581922e-05, + "loss": 2.0165, + "step": 18285 + }, + { + "epoch": 1.4, + "learning_rate": 6.348606138621827e-05, + "loss": 4.3139, + "step": 18290 + }, + { + "epoch": 1.4, + "learning_rate": 6.344583450661733e-05, + "loss": 3.5159, + "step": 18295 + }, + { + "epoch": 1.4, + "learning_rate": 6.340560762701638e-05, + "loss": 3.6641, + "step": 18300 + }, + { + "epoch": 1.4, + "learning_rate": 6.336538074741542e-05, + "loss": 4.3941, + "step": 18305 + }, + { + "epoch": 1.4, + "learning_rate": 6.332515386781448e-05, + "loss": 4.0872, + "step": 18310 + }, + { + "epoch": 1.4, + "learning_rate": 6.328492698821352e-05, + "loss": 2.6069, + "step": 18315 + }, + { + "epoch": 1.4, + "learning_rate": 6.324470010861257e-05, + "loss": 2.309, + "step": 18320 + }, + { + "epoch": 1.4, + "learning_rate": 6.320447322901163e-05, + "loss": 2.1683, + "step": 18325 + }, + { + "epoch": 1.4, + "learning_rate": 6.316424634941069e-05, + "loss": 1.5804, + "step": 18330 + }, + { + "epoch": 1.4, + "learning_rate": 6.312401946980973e-05, + "loss": 2.2497, + "step": 18335 + }, + { + "epoch": 1.4, + "learning_rate": 6.308379259020878e-05, + "loss": 3.6252, + "step": 18340 + }, + { + "epoch": 1.4, + "learning_rate": 6.304356571060783e-05, + "loss": 4.0513, + "step": 18345 + }, + { + "epoch": 1.4, + "learning_rate": 6.300333883100689e-05, + "loss": 3.3787, + "step": 18350 + }, + { + "epoch": 1.4, + "learning_rate": 6.296311195140592e-05, + "loss": 3.8106, + "step": 18355 + }, + { + "epoch": 1.4, + "learning_rate": 6.292288507180498e-05, + "loss": 4.0287, + "step": 18360 + }, + { + "epoch": 1.4, + "learning_rate": 6.288265819220404e-05, + "loss": 2.5771, + "step": 18365 + }, + { + "epoch": 1.4, + "learning_rate": 6.284243131260308e-05, + "loss": 3.1564, + "step": 18370 + }, + { + "epoch": 1.4, + "learning_rate": 6.280220443300213e-05, + "loss": 0.9893, + "step": 18375 + }, + { + "epoch": 1.4, + "learning_rate": 6.276197755340119e-05, + "loss": 1.7055, + "step": 18380 + }, + { + "epoch": 1.41, + "learning_rate": 6.272175067380025e-05, + "loss": 2.0058, + "step": 18385 + }, + { + "epoch": 1.41, + "learning_rate": 6.268152379419928e-05, + "loss": 4.6082, + "step": 18390 + }, + { + "epoch": 1.41, + "learning_rate": 6.264129691459833e-05, + "loss": 4.1299, + "step": 18395 + }, + { + "epoch": 1.41, + "learning_rate": 6.260107003499739e-05, + "loss": 2.9067, + "step": 18400 + }, + { + "epoch": 1.41, + "learning_rate": 6.256084315539644e-05, + "loss": 4.0816, + "step": 18405 + }, + { + "epoch": 1.41, + "learning_rate": 6.252061627579548e-05, + "loss": 4.8357, + "step": 18410 + }, + { + "epoch": 1.41, + "learning_rate": 6.248038939619454e-05, + "loss": 3.3007, + "step": 18415 + }, + { + "epoch": 1.41, + "learning_rate": 6.24401625165936e-05, + "loss": 2.1316, + "step": 18420 + }, + { + "epoch": 1.41, + "learning_rate": 6.239993563699264e-05, + "loss": 3.2057, + "step": 18425 + }, + { + "epoch": 1.41, + "learning_rate": 6.235970875739169e-05, + "loss": 1.6077, + "step": 18430 + }, + { + "epoch": 1.41, + "learning_rate": 6.231948187779075e-05, + "loss": 1.3006, + "step": 18435 + }, + { + "epoch": 1.41, + "learning_rate": 6.227925499818979e-05, + "loss": 4.4477, + "step": 18440 + }, + { + "epoch": 1.41, + "learning_rate": 6.223902811858885e-05, + "loss": 3.8949, + "step": 18445 + }, + { + "epoch": 1.41, + "learning_rate": 6.21988012389879e-05, + "loss": 4.133, + "step": 18450 + }, + { + "epoch": 1.41, + "learning_rate": 6.215857435938694e-05, + "loss": 4.193, + "step": 18455 + }, + { + "epoch": 1.41, + "learning_rate": 6.2118347479786e-05, + "loss": 4.1554, + "step": 18460 + }, + { + "epoch": 1.41, + "learning_rate": 6.207812060018504e-05, + "loss": 3.3928, + "step": 18465 + }, + { + "epoch": 1.41, + "learning_rate": 6.20378937205841e-05, + "loss": 2.6005, + "step": 18470 + }, + { + "epoch": 1.41, + "learning_rate": 6.199766684098314e-05, + "loss": 1.3332, + "step": 18475 + }, + { + "epoch": 1.41, + "learning_rate": 6.19574399613822e-05, + "loss": 1.9054, + "step": 18480 + }, + { + "epoch": 1.41, + "learning_rate": 6.191721308178125e-05, + "loss": 1.7774, + "step": 18485 + }, + { + "epoch": 1.41, + "learning_rate": 6.187698620218029e-05, + "loss": 4.46, + "step": 18490 + }, + { + "epoch": 1.41, + "learning_rate": 6.183675932257935e-05, + "loss": 4.0219, + "step": 18495 + }, + { + "epoch": 1.41, + "learning_rate": 6.179653244297841e-05, + "loss": 2.6053, + "step": 18500 + }, + { + "epoch": 1.41, + "learning_rate": 6.175630556337745e-05, + "loss": 3.7852, + "step": 18505 + }, + { + "epoch": 1.41, + "learning_rate": 6.17160786837765e-05, + "loss": 3.012, + "step": 18510 + }, + { + "epoch": 1.42, + "learning_rate": 6.167585180417556e-05, + "loss": 4.4118, + "step": 18515 + }, + { + "epoch": 1.42, + "learning_rate": 6.163562492457461e-05, + "loss": 3.0597, + "step": 18520 + }, + { + "epoch": 1.42, + "learning_rate": 6.159539804497365e-05, + "loss": 2.179, + "step": 18525 + }, + { + "epoch": 1.42, + "learning_rate": 6.15551711653727e-05, + "loss": 1.4966, + "step": 18530 + }, + { + "epoch": 1.42, + "learning_rate": 6.151494428577176e-05, + "loss": 1.3322, + "step": 18535 + }, + { + "epoch": 1.42, + "learning_rate": 6.147471740617081e-05, + "loss": 3.75, + "step": 18540 + }, + { + "epoch": 1.42, + "learning_rate": 6.143449052656985e-05, + "loss": 4.7051, + "step": 18545 + }, + { + "epoch": 1.42, + "learning_rate": 6.139426364696891e-05, + "loss": 5.2428, + "step": 18550 + }, + { + "epoch": 1.42, + "learning_rate": 6.135403676736797e-05, + "loss": 3.5576, + "step": 18555 + }, + { + "epoch": 1.42, + "learning_rate": 6.1313809887767e-05, + "loss": 3.1026, + "step": 18560 + }, + { + "epoch": 1.42, + "learning_rate": 6.127358300816606e-05, + "loss": 4.2556, + "step": 18565 + }, + { + "epoch": 1.42, + "learning_rate": 6.123335612856512e-05, + "loss": 1.9716, + "step": 18570 + }, + { + "epoch": 1.42, + "learning_rate": 6.119312924896416e-05, + "loss": 1.7601, + "step": 18575 + }, + { + "epoch": 1.42, + "learning_rate": 6.11529023693632e-05, + "loss": 3.0333, + "step": 18580 + }, + { + "epoch": 1.42, + "learning_rate": 6.111267548976226e-05, + "loss": 1.5847, + "step": 18585 + }, + { + "epoch": 1.42, + "learning_rate": 6.107244861016131e-05, + "loss": 3.682, + "step": 18590 + }, + { + "epoch": 1.42, + "learning_rate": 6.1032221730560366e-05, + "loss": 4.1221, + "step": 18595 + }, + { + "epoch": 1.42, + "learning_rate": 6.099199485095941e-05, + "loss": 4.0278, + "step": 18600 + }, + { + "epoch": 1.42, + "learning_rate": 6.095176797135846e-05, + "loss": 2.6338, + "step": 18605 + }, + { + "epoch": 1.42, + "learning_rate": 6.091154109175752e-05, + "loss": 3.3207, + "step": 18610 + }, + { + "epoch": 1.42, + "learning_rate": 6.087131421215656e-05, + "loss": 3.9458, + "step": 18615 + }, + { + "epoch": 1.42, + "learning_rate": 6.083108733255562e-05, + "loss": 2.4551, + "step": 18620 + }, + { + "epoch": 1.42, + "learning_rate": 6.079086045295467e-05, + "loss": 1.0928, + "step": 18625 + }, + { + "epoch": 1.42, + "learning_rate": 6.075063357335372e-05, + "loss": 1.2878, + "step": 18630 + }, + { + "epoch": 1.42, + "learning_rate": 6.0710406693752764e-05, + "loss": 3.1286, + "step": 18635 + }, + { + "epoch": 1.42, + "learning_rate": 6.0670179814151816e-05, + "loss": 4.3492, + "step": 18640 + }, + { + "epoch": 1.43, + "learning_rate": 6.0629952934550874e-05, + "loss": 4.4941, + "step": 18645 + }, + { + "epoch": 1.43, + "learning_rate": 6.0589726054949926e-05, + "loss": 3.3197, + "step": 18650 + }, + { + "epoch": 1.43, + "learning_rate": 6.054949917534897e-05, + "loss": 2.8205, + "step": 18655 + }, + { + "epoch": 1.43, + "learning_rate": 6.050927229574802e-05, + "loss": 2.2681, + "step": 18660 + }, + { + "epoch": 1.43, + "learning_rate": 6.046904541614707e-05, + "loss": 2.6547, + "step": 18665 + }, + { + "epoch": 1.43, + "learning_rate": 6.0428818536546125e-05, + "loss": 3.5173, + "step": 18670 + }, + { + "epoch": 1.43, + "learning_rate": 6.038859165694517e-05, + "loss": 1.8866, + "step": 18675 + }, + { + "epoch": 1.43, + "learning_rate": 6.034836477734422e-05, + "loss": 2.6423, + "step": 18680 + }, + { + "epoch": 1.43, + "learning_rate": 6.030813789774328e-05, + "loss": 1.6941, + "step": 18685 + }, + { + "epoch": 1.43, + "learning_rate": 6.026791101814232e-05, + "loss": 3.9561, + "step": 18690 + }, + { + "epoch": 1.43, + "learning_rate": 6.0227684138541375e-05, + "loss": 4.3443, + "step": 18695 + }, + { + "epoch": 1.43, + "learning_rate": 6.018745725894043e-05, + "loss": 3.6342, + "step": 18700 + }, + { + "epoch": 1.43, + "learning_rate": 6.014723037933948e-05, + "loss": 3.3798, + "step": 18705 + }, + { + "epoch": 1.43, + "learning_rate": 6.010700349973852e-05, + "loss": 2.8606, + "step": 18710 + }, + { + "epoch": 1.43, + "learning_rate": 6.0066776620137574e-05, + "loss": 2.683, + "step": 18715 + }, + { + "epoch": 1.43, + "learning_rate": 6.002654974053663e-05, + "loss": 1.8671, + "step": 18720 + }, + { + "epoch": 1.43, + "learning_rate": 5.9986322860935684e-05, + "loss": 4.4889, + "step": 18725 + }, + { + "epoch": 1.43, + "learning_rate": 5.994609598133473e-05, + "loss": 2.7197, + "step": 18730 + }, + { + "epoch": 1.43, + "learning_rate": 5.990586910173378e-05, + "loss": 3.2302, + "step": 18735 + }, + { + "epoch": 1.43, + "learning_rate": 5.986564222213283e-05, + "loss": 3.9961, + "step": 18740 + }, + { + "epoch": 1.43, + "learning_rate": 5.982541534253189e-05, + "loss": 3.467, + "step": 18745 + }, + { + "epoch": 1.43, + "learning_rate": 5.978518846293093e-05, + "loss": 4.3445, + "step": 18750 + }, + { + "epoch": 1.43, + "learning_rate": 5.9744961583329986e-05, + "loss": 4.9615, + "step": 18755 + }, + { + "epoch": 1.43, + "learning_rate": 5.970473470372904e-05, + "loss": 3.1598, + "step": 18760 + }, + { + "epoch": 1.43, + "learning_rate": 5.966450782412808e-05, + "loss": 3.2324, + "step": 18765 + }, + { + "epoch": 1.43, + "learning_rate": 5.9624280944527134e-05, + "loss": 3.9107, + "step": 18770 + }, + { + "epoch": 1.43, + "learning_rate": 5.9584054064926185e-05, + "loss": 1.1585, + "step": 18775 + }, + { + "epoch": 1.44, + "learning_rate": 5.9543827185325243e-05, + "loss": 0.6935, + "step": 18780 + }, + { + "epoch": 1.44, + "learning_rate": 5.950360030572428e-05, + "loss": 1.9886, + "step": 18785 + }, + { + "epoch": 1.44, + "learning_rate": 5.946337342612334e-05, + "loss": 6.2145, + "step": 18790 + }, + { + "epoch": 1.44, + "learning_rate": 5.942314654652239e-05, + "loss": 4.3086, + "step": 18795 + }, + { + "epoch": 1.44, + "learning_rate": 5.938291966692144e-05, + "loss": 3.2976, + "step": 18800 + }, + { + "epoch": 1.44, + "learning_rate": 5.934269278732049e-05, + "loss": 3.9161, + "step": 18805 + }, + { + "epoch": 1.44, + "learning_rate": 5.930246590771954e-05, + "loss": 3.1264, + "step": 18810 + }, + { + "epoch": 1.44, + "learning_rate": 5.92622390281186e-05, + "loss": 4.4698, + "step": 18815 + }, + { + "epoch": 1.44, + "learning_rate": 5.922201214851765e-05, + "loss": 3.101, + "step": 18820 + }, + { + "epoch": 1.44, + "learning_rate": 5.9181785268916686e-05, + "loss": 1.7798, + "step": 18825 + }, + { + "epoch": 1.44, + "learning_rate": 5.9141558389315745e-05, + "loss": 2.7562, + "step": 18830 + }, + { + "epoch": 1.44, + "learning_rate": 5.9101331509714796e-05, + "loss": 2.8743, + "step": 18835 + }, + { + "epoch": 1.44, + "learning_rate": 5.906110463011385e-05, + "loss": 4.3965, + "step": 18840 + }, + { + "epoch": 1.44, + "learning_rate": 5.902087775051289e-05, + "loss": 3.4023, + "step": 18845 + }, + { + "epoch": 1.44, + "learning_rate": 5.8980650870911944e-05, + "loss": 4.3494, + "step": 18850 + }, + { + "epoch": 1.44, + "learning_rate": 5.8940423991311e-05, + "loss": 2.9739, + "step": 18855 + }, + { + "epoch": 1.44, + "learning_rate": 5.890019711171004e-05, + "loss": 4.0516, + "step": 18860 + }, + { + "epoch": 1.44, + "learning_rate": 5.88599702321091e-05, + "loss": 2.3647, + "step": 18865 + }, + { + "epoch": 1.44, + "learning_rate": 5.881974335250815e-05, + "loss": 2.4891, + "step": 18870 + }, + { + "epoch": 1.44, + "learning_rate": 5.87795164729072e-05, + "loss": 2.7286, + "step": 18875 + }, + { + "epoch": 1.44, + "learning_rate": 5.8739289593306246e-05, + "loss": 2.3304, + "step": 18880 + }, + { + "epoch": 1.44, + "learning_rate": 5.86990627137053e-05, + "loss": 1.6823, + "step": 18885 + }, + { + "epoch": 1.44, + "learning_rate": 5.8658835834104355e-05, + "loss": 4.1017, + "step": 18890 + }, + { + "epoch": 1.44, + "learning_rate": 5.861860895450341e-05, + "loss": 4.4844, + "step": 18895 + }, + { + "epoch": 1.44, + "learning_rate": 5.857838207490245e-05, + "loss": 3.8736, + "step": 18900 + }, + { + "epoch": 1.44, + "learning_rate": 5.85381551953015e-05, + "loss": 3.4389, + "step": 18905 + }, + { + "epoch": 1.45, + "learning_rate": 5.8497928315700554e-05, + "loss": 4.6137, + "step": 18910 + }, + { + "epoch": 1.45, + "learning_rate": 5.845770143609961e-05, + "loss": 3.7057, + "step": 18915 + }, + { + "epoch": 1.45, + "learning_rate": 5.841747455649865e-05, + "loss": 1.9062, + "step": 18920 + }, + { + "epoch": 1.45, + "learning_rate": 5.837724767689771e-05, + "loss": 2.6992, + "step": 18925 + }, + { + "epoch": 1.45, + "learning_rate": 5.833702079729676e-05, + "loss": 2.4897, + "step": 18930 + }, + { + "epoch": 1.45, + "learning_rate": 5.8296793917695805e-05, + "loss": 2.3581, + "step": 18935 + }, + { + "epoch": 1.45, + "learning_rate": 5.8256567038094856e-05, + "loss": 4.217, + "step": 18940 + }, + { + "epoch": 1.45, + "learning_rate": 5.821634015849391e-05, + "loss": 3.4085, + "step": 18945 + }, + { + "epoch": 1.45, + "learning_rate": 5.8176113278892966e-05, + "loss": 3.5184, + "step": 18950 + }, + { + "epoch": 1.45, + "learning_rate": 5.8135886399292004e-05, + "loss": 3.6127, + "step": 18955 + }, + { + "epoch": 1.45, + "learning_rate": 5.809565951969106e-05, + "loss": 3.4811, + "step": 18960 + }, + { + "epoch": 1.45, + "learning_rate": 5.8055432640090114e-05, + "loss": 3.3875, + "step": 18965 + }, + { + "epoch": 1.45, + "learning_rate": 5.8015205760489165e-05, + "loss": 2.1214, + "step": 18970 + }, + { + "epoch": 1.45, + "learning_rate": 5.797497888088821e-05, + "loss": 3.5039, + "step": 18975 + }, + { + "epoch": 1.45, + "learning_rate": 5.793475200128726e-05, + "loss": 1.3013, + "step": 18980 + }, + { + "epoch": 1.45, + "learning_rate": 5.789452512168631e-05, + "loss": 1.1169, + "step": 18985 + }, + { + "epoch": 1.45, + "learning_rate": 5.785429824208537e-05, + "loss": 3.883, + "step": 18990 + }, + { + "epoch": 1.45, + "learning_rate": 5.781407136248441e-05, + "loss": 4.4398, + "step": 18995 + }, + { + "epoch": 1.45, + "learning_rate": 5.777384448288347e-05, + "loss": 2.2548, + "step": 19000 + }, + { + "epoch": 1.45, + "learning_rate": 5.773361760328252e-05, + "loss": 4.5357, + "step": 19005 + }, + { + "epoch": 1.45, + "learning_rate": 5.7693390723681563e-05, + "loss": 3.7588, + "step": 19010 + }, + { + "epoch": 1.45, + "learning_rate": 5.7653163844080615e-05, + "loss": 1.6498, + "step": 19015 + }, + { + "epoch": 1.45, + "learning_rate": 5.7612936964479666e-05, + "loss": 3.7896, + "step": 19020 + }, + { + "epoch": 1.45, + "learning_rate": 5.7572710084878725e-05, + "loss": 1.6262, + "step": 19025 + }, + { + "epoch": 1.45, + "learning_rate": 5.753248320527776e-05, + "loss": 0.6316, + "step": 19030 + }, + { + "epoch": 1.45, + "learning_rate": 5.749225632567682e-05, + "loss": 0.6665, + "step": 19035 + }, + { + "epoch": 1.46, + "learning_rate": 5.745202944607587e-05, + "loss": 4.3615, + "step": 19040 + }, + { + "epoch": 1.46, + "learning_rate": 5.7411802566474924e-05, + "loss": 4.8541, + "step": 19045 + }, + { + "epoch": 1.46, + "learning_rate": 5.737157568687397e-05, + "loss": 3.6393, + "step": 19050 + }, + { + "epoch": 1.46, + "learning_rate": 5.733134880727302e-05, + "loss": 4.5691, + "step": 19055 + }, + { + "epoch": 1.46, + "learning_rate": 5.729112192767208e-05, + "loss": 3.1455, + "step": 19060 + }, + { + "epoch": 1.46, + "learning_rate": 5.725089504807113e-05, + "loss": 3.8037, + "step": 19065 + }, + { + "epoch": 1.46, + "learning_rate": 5.7210668168470174e-05, + "loss": 2.758, + "step": 19070 + }, + { + "epoch": 1.46, + "learning_rate": 5.7170441288869226e-05, + "loss": 2.8246, + "step": 19075 + }, + { + "epoch": 1.46, + "learning_rate": 5.713021440926828e-05, + "loss": 2.058, + "step": 19080 + }, + { + "epoch": 1.46, + "learning_rate": 5.708998752966732e-05, + "loss": 1.0103, + "step": 19085 + }, + { + "epoch": 1.46, + "learning_rate": 5.704976065006637e-05, + "loss": 3.3998, + "step": 19090 + }, + { + "epoch": 1.46, + "learning_rate": 5.700953377046543e-05, + "loss": 3.6197, + "step": 19095 + }, + { + "epoch": 1.46, + "learning_rate": 5.696930689086448e-05, + "loss": 3.9162, + "step": 19100 + }, + { + "epoch": 1.46, + "learning_rate": 5.692908001126353e-05, + "loss": 4.3734, + "step": 19105 + }, + { + "epoch": 1.46, + "learning_rate": 5.688885313166258e-05, + "loss": 3.8322, + "step": 19110 + }, + { + "epoch": 1.46, + "learning_rate": 5.684862625206163e-05, + "loss": 2.6953, + "step": 19115 + }, + { + "epoch": 1.46, + "learning_rate": 5.680839937246068e-05, + "loss": 2.9081, + "step": 19120 + }, + { + "epoch": 1.46, + "learning_rate": 5.676817249285973e-05, + "loss": 2.9252, + "step": 19125 + }, + { + "epoch": 1.46, + "learning_rate": 5.672794561325878e-05, + "loss": 2.051, + "step": 19130 + }, + { + "epoch": 1.46, + "learning_rate": 5.6687718733657837e-05, + "loss": 4.5108, + "step": 19135 + }, + { + "epoch": 1.46, + "learning_rate": 5.664749185405689e-05, + "loss": 4.5414, + "step": 19140 + }, + { + "epoch": 1.46, + "learning_rate": 5.660726497445593e-05, + "loss": 4.4689, + "step": 19145 + }, + { + "epoch": 1.46, + "learning_rate": 5.6567038094854984e-05, + "loss": 3.826, + "step": 19150 + }, + { + "epoch": 1.46, + "learning_rate": 5.6526811215254036e-05, + "loss": 2.8979, + "step": 19155 + }, + { + "epoch": 1.46, + "learning_rate": 5.648658433565308e-05, + "loss": 3.5128, + "step": 19160 + }, + { + "epoch": 1.46, + "learning_rate": 5.644635745605213e-05, + "loss": 2.2047, + "step": 19165 + }, + { + "epoch": 1.47, + "learning_rate": 5.640613057645119e-05, + "loss": 2.9307, + "step": 19170 + }, + { + "epoch": 1.47, + "learning_rate": 5.636590369685024e-05, + "loss": 0.9308, + "step": 19175 + }, + { + "epoch": 1.47, + "learning_rate": 5.6325676817249286e-05, + "loss": 1.3805, + "step": 19180 + }, + { + "epoch": 1.47, + "learning_rate": 5.628544993764834e-05, + "loss": 3.8648, + "step": 19185 + }, + { + "epoch": 1.47, + "learning_rate": 5.624522305804739e-05, + "loss": 4.8469, + "step": 19190 + }, + { + "epoch": 1.47, + "learning_rate": 5.620499617844645e-05, + "loss": 4.4084, + "step": 19195 + }, + { + "epoch": 1.47, + "learning_rate": 5.6164769298845485e-05, + "loss": 4.0066, + "step": 19200 + }, + { + "epoch": 1.47, + "learning_rate": 5.6124542419244544e-05, + "loss": 2.9517, + "step": 19205 + }, + { + "epoch": 1.47, + "learning_rate": 5.6084315539643595e-05, + "loss": 3.6738, + "step": 19210 + }, + { + "epoch": 1.47, + "learning_rate": 5.6044088660042647e-05, + "loss": 2.7926, + "step": 19215 + }, + { + "epoch": 1.47, + "learning_rate": 5.600386178044169e-05, + "loss": 1.8658, + "step": 19220 + }, + { + "epoch": 1.47, + "learning_rate": 5.596363490084074e-05, + "loss": 2.3674, + "step": 19225 + }, + { + "epoch": 1.47, + "learning_rate": 5.59234080212398e-05, + "loss": 3.3319, + "step": 19230 + }, + { + "epoch": 1.47, + "learning_rate": 5.588318114163884e-05, + "loss": 2.3487, + "step": 19235 + }, + { + "epoch": 1.47, + "learning_rate": 5.58429542620379e-05, + "loss": 4.4271, + "step": 19240 + }, + { + "epoch": 1.47, + "learning_rate": 5.580272738243695e-05, + "loss": 5.4869, + "step": 19245 + }, + { + "epoch": 1.47, + "learning_rate": 5.5762500502836e-05, + "loss": 3.9018, + "step": 19250 + }, + { + "epoch": 1.47, + "learning_rate": 5.5722273623235045e-05, + "loss": 4.5006, + "step": 19255 + }, + { + "epoch": 1.47, + "learning_rate": 5.5682046743634096e-05, + "loss": 4.2176, + "step": 19260 + }, + { + "epoch": 1.47, + "learning_rate": 5.5641819864033154e-05, + "loss": 2.4257, + "step": 19265 + }, + { + "epoch": 1.47, + "learning_rate": 5.5601592984432206e-05, + "loss": 2.4731, + "step": 19270 + }, + { + "epoch": 1.47, + "learning_rate": 5.556136610483125e-05, + "loss": 3.5372, + "step": 19275 + }, + { + "epoch": 1.47, + "learning_rate": 5.55211392252303e-05, + "loss": 1.971, + "step": 19280 + }, + { + "epoch": 1.47, + "learning_rate": 5.5480912345629353e-05, + "loss": 2.1551, + "step": 19285 + }, + { + "epoch": 1.47, + "learning_rate": 5.5440685466028405e-05, + "loss": 3.9453, + "step": 19290 + }, + { + "epoch": 1.47, + "learning_rate": 5.540045858642745e-05, + "loss": 4.4363, + "step": 19295 + }, + { + "epoch": 1.48, + "learning_rate": 5.53602317068265e-05, + "loss": 3.8611, + "step": 19300 + }, + { + "epoch": 1.48, + "learning_rate": 5.532000482722556e-05, + "loss": 3.0589, + "step": 19305 + }, + { + "epoch": 1.48, + "learning_rate": 5.52797779476246e-05, + "loss": 3.3494, + "step": 19310 + }, + { + "epoch": 1.48, + "learning_rate": 5.5239551068023656e-05, + "loss": 2.2144, + "step": 19315 + }, + { + "epoch": 1.48, + "learning_rate": 5.519932418842271e-05, + "loss": 4.4766, + "step": 19320 + }, + { + "epoch": 1.48, + "learning_rate": 5.515909730882176e-05, + "loss": 3.267, + "step": 19325 + }, + { + "epoch": 1.48, + "learning_rate": 5.51188704292208e-05, + "loss": 2.9234, + "step": 19330 + }, + { + "epoch": 1.48, + "learning_rate": 5.5078643549619855e-05, + "loss": 3.8816, + "step": 19335 + }, + { + "epoch": 1.48, + "learning_rate": 5.503841667001891e-05, + "loss": 4.1756, + "step": 19340 + }, + { + "epoch": 1.48, + "learning_rate": 5.4998189790417964e-05, + "loss": 4.3277, + "step": 19345 + }, + { + "epoch": 1.48, + "learning_rate": 5.495796291081701e-05, + "loss": 4.1113, + "step": 19350 + }, + { + "epoch": 1.48, + "learning_rate": 5.491773603121606e-05, + "loss": 3.8789, + "step": 19355 + }, + { + "epoch": 1.48, + "learning_rate": 5.487750915161511e-05, + "loss": 2.3336, + "step": 19360 + }, + { + "epoch": 1.48, + "learning_rate": 5.483728227201417e-05, + "loss": 3.0759, + "step": 19365 + }, + { + "epoch": 1.48, + "learning_rate": 5.479705539241321e-05, + "loss": 2.9986, + "step": 19370 + }, + { + "epoch": 1.48, + "learning_rate": 5.4756828512812266e-05, + "loss": 2.5309, + "step": 19375 + }, + { + "epoch": 1.48, + "learning_rate": 5.471660163321132e-05, + "loss": 2.5339, + "step": 19380 + }, + { + "epoch": 1.48, + "learning_rate": 5.467637475361036e-05, + "loss": 2.0254, + "step": 19385 + }, + { + "epoch": 1.48, + "learning_rate": 5.4636147874009414e-05, + "loss": 4.4572, + "step": 19390 + }, + { + "epoch": 1.48, + "learning_rate": 5.4595920994408465e-05, + "loss": 4.1877, + "step": 19395 + }, + { + "epoch": 1.48, + "learning_rate": 5.4555694114807524e-05, + "loss": 3.7068, + "step": 19400 + }, + { + "epoch": 1.48, + "learning_rate": 5.451546723520656e-05, + "loss": 3.5297, + "step": 19405 + }, + { + "epoch": 1.48, + "learning_rate": 5.447524035560562e-05, + "loss": 3.059, + "step": 19410 + }, + { + "epoch": 1.48, + "learning_rate": 5.443501347600467e-05, + "loss": 3.3376, + "step": 19415 + }, + { + "epoch": 1.48, + "learning_rate": 5.439478659640372e-05, + "loss": 2.6208, + "step": 19420 + }, + { + "epoch": 1.48, + "learning_rate": 5.435455971680277e-05, + "loss": 0.5573, + "step": 19425 + }, + { + "epoch": 1.49, + "learning_rate": 5.431433283720182e-05, + "loss": 0.388, + "step": 19430 + }, + { + "epoch": 1.49, + "learning_rate": 5.427410595760087e-05, + "loss": 2.8777, + "step": 19435 + }, + { + "epoch": 1.49, + "learning_rate": 5.423387907799993e-05, + "loss": 4.8811, + "step": 19440 + }, + { + "epoch": 1.49, + "learning_rate": 5.4193652198398967e-05, + "loss": 3.5342, + "step": 19445 + }, + { + "epoch": 1.49, + "learning_rate": 5.4153425318798025e-05, + "loss": 3.8281, + "step": 19450 + }, + { + "epoch": 1.49, + "learning_rate": 5.4113198439197076e-05, + "loss": 4.1465, + "step": 19455 + }, + { + "epoch": 1.49, + "learning_rate": 5.407297155959612e-05, + "loss": 3.8576, + "step": 19460 + }, + { + "epoch": 1.49, + "learning_rate": 5.403274467999517e-05, + "loss": 3.6264, + "step": 19465 + }, + { + "epoch": 1.49, + "learning_rate": 5.3992517800394224e-05, + "loss": 3.2877, + "step": 19470 + }, + { + "epoch": 1.49, + "learning_rate": 5.395229092079328e-05, + "loss": 2.1062, + "step": 19475 + }, + { + "epoch": 1.49, + "learning_rate": 5.391206404119232e-05, + "loss": 1.655, + "step": 19480 + }, + { + "epoch": 1.49, + "learning_rate": 5.387183716159138e-05, + "loss": 1.7521, + "step": 19485 + }, + { + "epoch": 1.49, + "learning_rate": 5.383161028199043e-05, + "loss": 4.1643, + "step": 19490 + }, + { + "epoch": 1.49, + "learning_rate": 5.379138340238948e-05, + "loss": 4.5741, + "step": 19495 + }, + { + "epoch": 1.49, + "learning_rate": 5.3751156522788526e-05, + "loss": 4.5176, + "step": 19500 + }, + { + "epoch": 1.49, + "learning_rate": 5.371092964318758e-05, + "loss": 2.9813, + "step": 19505 + }, + { + "epoch": 1.49, + "learning_rate": 5.3670702763586636e-05, + "loss": 3.9217, + "step": 19510 + }, + { + "epoch": 1.49, + "learning_rate": 5.363047588398569e-05, + "loss": 2.6086, + "step": 19515 + }, + { + "epoch": 1.49, + "learning_rate": 5.359024900438473e-05, + "loss": 3.6834, + "step": 19520 + }, + { + "epoch": 1.49, + "learning_rate": 5.355002212478378e-05, + "loss": 3.0846, + "step": 19525 + }, + { + "epoch": 1.49, + "learning_rate": 5.3509795245182835e-05, + "loss": 2.2924, + "step": 19530 + }, + { + "epoch": 1.49, + "learning_rate": 5.346956836558188e-05, + "loss": 3.0072, + "step": 19535 + }, + { + "epoch": 1.49, + "learning_rate": 5.342934148598093e-05, + "loss": 3.6783, + "step": 19540 + }, + { + "epoch": 1.49, + "learning_rate": 5.338911460637999e-05, + "loss": 3.4266, + "step": 19545 + }, + { + "epoch": 1.49, + "learning_rate": 5.334888772677904e-05, + "loss": 4.5687, + "step": 19550 + }, + { + "epoch": 1.49, + "learning_rate": 5.3308660847178085e-05, + "loss": 3.6299, + "step": 19555 + }, + { + "epoch": 1.49, + "learning_rate": 5.326843396757714e-05, + "loss": 4.2918, + "step": 19560 + }, + { + "epoch": 1.5, + "learning_rate": 5.322820708797619e-05, + "loss": 3.1556, + "step": 19565 + }, + { + "epoch": 1.5, + "learning_rate": 5.3187980208375246e-05, + "loss": 1.7684, + "step": 19570 + }, + { + "epoch": 1.5, + "learning_rate": 5.3147753328774284e-05, + "loss": 1.7001, + "step": 19575 + }, + { + "epoch": 1.5, + "learning_rate": 5.310752644917334e-05, + "loss": 2.9054, + "step": 19580 + }, + { + "epoch": 1.5, + "learning_rate": 5.3067299569572394e-05, + "loss": 3.3056, + "step": 19585 + }, + { + "epoch": 1.5, + "learning_rate": 5.3027072689971446e-05, + "loss": 4.6641, + "step": 19590 + }, + { + "epoch": 1.5, + "learning_rate": 5.298684581037049e-05, + "loss": 4.5357, + "step": 19595 + }, + { + "epoch": 1.5, + "learning_rate": 5.294661893076954e-05, + "loss": 4.2678, + "step": 19600 + }, + { + "epoch": 1.5, + "learning_rate": 5.290639205116859e-05, + "loss": 3.8678, + "step": 19605 + }, + { + "epoch": 1.5, + "learning_rate": 5.286616517156764e-05, + "loss": 2.2252, + "step": 19610 + }, + { + "epoch": 1.5, + "learning_rate": 5.282593829196669e-05, + "loss": 2.7532, + "step": 19615 + }, + { + "epoch": 1.5, + "learning_rate": 5.278571141236575e-05, + "loss": 3.1894, + "step": 19620 + }, + { + "epoch": 1.5, + "learning_rate": 5.27454845327648e-05, + "loss": 1.7801, + "step": 19625 + }, + { + "epoch": 1.5, + "learning_rate": 5.2705257653163844e-05, + "loss": 0.9882, + "step": 19630 + }, + { + "epoch": 1.5, + "learning_rate": 5.2665030773562895e-05, + "loss": 1.3804, + "step": 19635 + }, + { + "epoch": 1.5, + "learning_rate": 5.262480389396195e-05, + "loss": 4.2154, + "step": 19640 + }, + { + "epoch": 1.5, + "learning_rate": 5.2584577014361005e-05, + "loss": 4.8629, + "step": 19645 + }, + { + "epoch": 1.5, + "learning_rate": 5.254435013476004e-05, + "loss": 2.9258, + "step": 19650 + }, + { + "epoch": 1.5, + "learning_rate": 5.25041232551591e-05, + "loss": 2.9222, + "step": 19655 + }, + { + "epoch": 1.5, + "learning_rate": 5.246389637555815e-05, + "loss": 3.0917, + "step": 19660 + }, + { + "epoch": 1.5, + "learning_rate": 5.2423669495957204e-05, + "loss": 2.4604, + "step": 19665 + }, + { + "epoch": 1.5, + "learning_rate": 5.238344261635625e-05, + "loss": 1.6897, + "step": 19670 + }, + { + "epoch": 1.5, + "learning_rate": 5.23432157367553e-05, + "loss": 1.0687, + "step": 19675 + }, + { + "epoch": 1.5, + "learning_rate": 5.230298885715436e-05, + "loss": 1.2178, + "step": 19680 + }, + { + "epoch": 1.5, + "learning_rate": 5.2262761977553396e-05, + "loss": 2.0848, + "step": 19685 + }, + { + "epoch": 1.5, + "learning_rate": 5.2222535097952455e-05, + "loss": 4.2998, + "step": 19690 + }, + { + "epoch": 1.51, + "learning_rate": 5.2182308218351506e-05, + "loss": 5.0777, + "step": 19695 + }, + { + "epoch": 1.51, + "learning_rate": 5.214208133875056e-05, + "loss": 3.448, + "step": 19700 + }, + { + "epoch": 1.51, + "learning_rate": 5.21018544591496e-05, + "loss": 3.3208, + "step": 19705 + }, + { + "epoch": 1.51, + "learning_rate": 5.2061627579548654e-05, + "loss": 2.9604, + "step": 19710 + }, + { + "epoch": 1.51, + "learning_rate": 5.202140069994771e-05, + "loss": 2.4406, + "step": 19715 + }, + { + "epoch": 1.51, + "learning_rate": 5.198117382034676e-05, + "loss": 1.6549, + "step": 19720 + }, + { + "epoch": 1.51, + "learning_rate": 5.194094694074581e-05, + "loss": 1.4544, + "step": 19725 + }, + { + "epoch": 1.51, + "learning_rate": 5.190072006114486e-05, + "loss": 2.5774, + "step": 19730 + }, + { + "epoch": 1.51, + "learning_rate": 5.186049318154391e-05, + "loss": 2.1335, + "step": 19735 + }, + { + "epoch": 1.51, + "learning_rate": 5.182026630194296e-05, + "loss": 4.3416, + "step": 19740 + }, + { + "epoch": 1.51, + "learning_rate": 5.178003942234201e-05, + "loss": 3.4145, + "step": 19745 + }, + { + "epoch": 1.51, + "learning_rate": 5.173981254274106e-05, + "loss": 3.9055, + "step": 19750 + }, + { + "epoch": 1.51, + "learning_rate": 5.169958566314012e-05, + "loss": 3.7634, + "step": 19755 + }, + { + "epoch": 1.51, + "learning_rate": 5.1659358783539155e-05, + "loss": 3.4955, + "step": 19760 + }, + { + "epoch": 1.51, + "learning_rate": 5.161913190393821e-05, + "loss": 1.8993, + "step": 19765 + }, + { + "epoch": 1.51, + "learning_rate": 5.1578905024337264e-05, + "loss": 3.337, + "step": 19770 + }, + { + "epoch": 1.51, + "learning_rate": 5.1538678144736316e-05, + "loss": 1.2481, + "step": 19775 + }, + { + "epoch": 1.51, + "learning_rate": 5.149845126513536e-05, + "loss": 2.3209, + "step": 19780 + }, + { + "epoch": 1.51, + "learning_rate": 5.145822438553441e-05, + "loss": 2.9454, + "step": 19785 + }, + { + "epoch": 1.51, + "learning_rate": 5.141799750593347e-05, + "loss": 4.0148, + "step": 19790 + }, + { + "epoch": 1.51, + "learning_rate": 5.137777062633252e-05, + "loss": 4.0611, + "step": 19795 + }, + { + "epoch": 1.51, + "learning_rate": 5.1337543746731567e-05, + "loss": 3.9623, + "step": 19800 + }, + { + "epoch": 1.51, + "learning_rate": 5.129731686713062e-05, + "loss": 2.918, + "step": 19805 + }, + { + "epoch": 1.51, + "learning_rate": 5.125708998752967e-05, + "loss": 3.5767, + "step": 19810 + }, + { + "epoch": 1.51, + "learning_rate": 5.121686310792873e-05, + "loss": 3.4847, + "step": 19815 + }, + { + "epoch": 1.51, + "learning_rate": 5.1176636228327766e-05, + "loss": 1.9963, + "step": 19820 + }, + { + "epoch": 1.52, + "learning_rate": 5.1136409348726824e-05, + "loss": 1.0946, + "step": 19825 + }, + { + "epoch": 1.52, + "learning_rate": 5.110422784504606e-05, + "loss": 1.6789, + "step": 19830 + }, + { + "epoch": 1.52, + "learning_rate": 5.1064000965445115e-05, + "loss": 1.926, + "step": 19835 + }, + { + "epoch": 1.52, + "learning_rate": 5.1023774085844167e-05, + "loss": 4.7094, + "step": 19840 + }, + { + "epoch": 1.52, + "learning_rate": 5.098354720624321e-05, + "loss": 4.5109, + "step": 19845 + }, + { + "epoch": 1.52, + "learning_rate": 5.094332032664226e-05, + "loss": 4.4045, + "step": 19850 + }, + { + "epoch": 1.52, + "learning_rate": 5.0903093447041314e-05, + "loss": 3.6195, + "step": 19855 + }, + { + "epoch": 1.52, + "learning_rate": 5.086286656744037e-05, + "loss": 1.4138, + "step": 19860 + }, + { + "epoch": 1.52, + "learning_rate": 5.082263968783941e-05, + "loss": 2.8188, + "step": 19865 + }, + { + "epoch": 1.52, + "learning_rate": 5.078241280823847e-05, + "loss": 3.5405, + "step": 19870 + }, + { + "epoch": 1.52, + "learning_rate": 5.074218592863752e-05, + "loss": 2.9546, + "step": 19875 + }, + { + "epoch": 1.52, + "learning_rate": 5.070195904903657e-05, + "loss": 3.7135, + "step": 19880 + }, + { + "epoch": 1.52, + "learning_rate": 5.0661732169435616e-05, + "loss": 3.4582, + "step": 19885 + }, + { + "epoch": 1.52, + "learning_rate": 5.062150528983467e-05, + "loss": 4.2113, + "step": 19890 + }, + { + "epoch": 1.52, + "learning_rate": 5.0581278410233726e-05, + "loss": 4.4158, + "step": 19895 + }, + { + "epoch": 1.52, + "learning_rate": 5.0541051530632764e-05, + "loss": 3.453, + "step": 19900 + }, + { + "epoch": 1.52, + "learning_rate": 5.050082465103182e-05, + "loss": 3.6619, + "step": 19905 + }, + { + "epoch": 1.52, + "learning_rate": 5.0460597771430874e-05, + "loss": 3.9289, + "step": 19910 + }, + { + "epoch": 1.52, + "learning_rate": 5.0420370891829925e-05, + "loss": 2.6809, + "step": 19915 + }, + { + "epoch": 1.52, + "learning_rate": 5.038014401222897e-05, + "loss": 0.5294, + "step": 19920 + }, + { + "epoch": 1.52, + "learning_rate": 5.033991713262802e-05, + "loss": 0.7334, + "step": 19925 + }, + { + "epoch": 1.52, + "learning_rate": 5.029969025302708e-05, + "loss": 3.913, + "step": 19930 + }, + { + "epoch": 1.52, + "learning_rate": 5.025946337342613e-05, + "loss": 0.9632, + "step": 19935 + }, + { + "epoch": 1.52, + "learning_rate": 5.0219236493825176e-05, + "loss": 4.6021, + "step": 19940 + }, + { + "epoch": 1.52, + "learning_rate": 5.017900961422423e-05, + "loss": 4.8836, + "step": 19945 + }, + { + "epoch": 1.52, + "learning_rate": 5.013878273462328e-05, + "loss": 3.9064, + "step": 19950 + }, + { + "epoch": 1.53, + "learning_rate": 5.009855585502233e-05, + "loss": 3.9792, + "step": 19955 + }, + { + "epoch": 1.53, + "learning_rate": 5.0058328975421375e-05, + "loss": 4.1586, + "step": 19960 + }, + { + "epoch": 1.53, + "learning_rate": 5.0018102095820426e-05, + "loss": 3.8375, + "step": 19965 + }, + { + "epoch": 1.53, + "learning_rate": 4.9977875216219484e-05, + "loss": 2.0574, + "step": 19970 + }, + { + "epoch": 1.53, + "learning_rate": 4.993764833661853e-05, + "loss": 1.9708, + "step": 19975 + }, + { + "epoch": 1.53, + "learning_rate": 4.989742145701758e-05, + "loss": 1.9487, + "step": 19980 + }, + { + "epoch": 1.53, + "learning_rate": 4.985719457741663e-05, + "loss": 3.1091, + "step": 19985 + }, + { + "epoch": 1.53, + "learning_rate": 4.9816967697815684e-05, + "loss": 5.8838, + "step": 19990 + }, + { + "epoch": 1.53, + "learning_rate": 4.9776740818214735e-05, + "loss": 4.9205, + "step": 19995 + }, + { + "epoch": 1.53, + "learning_rate": 4.973651393861378e-05, + "loss": 4.5146, + "step": 20000 + }, + { + "epoch": 1.53, + "learning_rate": 4.969628705901284e-05, + "loss": 3.2711, + "step": 20005 + }, + { + "epoch": 1.53, + "learning_rate": 4.965606017941188e-05, + "loss": 3.8259, + "step": 20010 + }, + { + "epoch": 1.53, + "learning_rate": 4.961583329981094e-05, + "loss": 3.1064, + "step": 20015 + }, + { + "epoch": 1.53, + "learning_rate": 4.9575606420209986e-05, + "loss": 3.5432, + "step": 20020 + }, + { + "epoch": 1.53, + "learning_rate": 4.953537954060904e-05, + "loss": 1.2155, + "step": 20025 + }, + { + "epoch": 1.53, + "learning_rate": 4.949515266100809e-05, + "loss": 2.7059, + "step": 20030 + }, + { + "epoch": 1.53, + "learning_rate": 4.945492578140714e-05, + "loss": 1.1075, + "step": 20035 + }, + { + "epoch": 1.53, + "learning_rate": 4.941469890180619e-05, + "loss": 3.995, + "step": 20040 + }, + { + "epoch": 1.53, + "learning_rate": 4.937447202220524e-05, + "loss": 4.1667, + "step": 20045 + }, + { + "epoch": 1.53, + "learning_rate": 4.933424514260429e-05, + "loss": 3.6951, + "step": 20050 + }, + { + "epoch": 1.53, + "learning_rate": 4.929401826300334e-05, + "loss": 3.6781, + "step": 20055 + }, + { + "epoch": 1.53, + "learning_rate": 4.925379138340239e-05, + "loss": 4.773, + "step": 20060 + }, + { + "epoch": 1.53, + "learning_rate": 4.921356450380144e-05, + "loss": 3.6868, + "step": 20065 + }, + { + "epoch": 1.53, + "learning_rate": 4.9173337624200493e-05, + "loss": 2.524, + "step": 20070 + }, + { + "epoch": 1.53, + "learning_rate": 4.9133110744599545e-05, + "loss": 1.7292, + "step": 20075 + }, + { + "epoch": 1.53, + "learning_rate": 4.9092883864998596e-05, + "loss": 0.7888, + "step": 20080 + }, + { + "epoch": 1.54, + "learning_rate": 4.905265698539764e-05, + "loss": 2.0986, + "step": 20085 + }, + { + "epoch": 1.54, + "learning_rate": 4.90124301057967e-05, + "loss": 3.924, + "step": 20090 + }, + { + "epoch": 1.54, + "learning_rate": 4.8972203226195744e-05, + "loss": 4.8615, + "step": 20095 + }, + { + "epoch": 1.54, + "learning_rate": 4.89319763465948e-05, + "loss": 3.5187, + "step": 20100 + }, + { + "epoch": 1.54, + "learning_rate": 4.889174946699385e-05, + "loss": 3.4105, + "step": 20105 + }, + { + "epoch": 1.54, + "learning_rate": 4.88515225873929e-05, + "loss": 2.8189, + "step": 20110 + }, + { + "epoch": 1.54, + "learning_rate": 4.881129570779195e-05, + "loss": 3.0407, + "step": 20115 + }, + { + "epoch": 1.54, + "learning_rate": 4.8771068828191e-05, + "loss": 1.8893, + "step": 20120 + }, + { + "epoch": 1.54, + "learning_rate": 4.873084194859005e-05, + "loss": 3.3274, + "step": 20125 + }, + { + "epoch": 1.54, + "learning_rate": 4.86906150689891e-05, + "loss": 2.7007, + "step": 20130 + }, + { + "epoch": 1.54, + "learning_rate": 4.865038818938815e-05, + "loss": 2.7956, + "step": 20135 + }, + { + "epoch": 1.54, + "learning_rate": 4.86101613097872e-05, + "loss": 5.4281, + "step": 20140 + }, + { + "epoch": 1.54, + "learning_rate": 4.856993443018625e-05, + "loss": 4.4959, + "step": 20145 + }, + { + "epoch": 1.54, + "learning_rate": 4.85297075505853e-05, + "loss": 4.1445, + "step": 20150 + }, + { + "epoch": 1.54, + "learning_rate": 4.8489480670984355e-05, + "loss": 3.5901, + "step": 20155 + }, + { + "epoch": 1.54, + "learning_rate": 4.8449253791383406e-05, + "loss": 2.736, + "step": 20160 + }, + { + "epoch": 1.54, + "learning_rate": 4.840902691178246e-05, + "loss": 2.9837, + "step": 20165 + }, + { + "epoch": 1.54, + "learning_rate": 4.83688000321815e-05, + "loss": 2.1882, + "step": 20170 + }, + { + "epoch": 1.54, + "learning_rate": 4.832857315258056e-05, + "loss": 2.8191, + "step": 20175 + }, + { + "epoch": 1.54, + "learning_rate": 4.8288346272979605e-05, + "loss": 1.4353, + "step": 20180 + }, + { + "epoch": 1.54, + "learning_rate": 4.824811939337866e-05, + "loss": 1.7028, + "step": 20185 + }, + { + "epoch": 1.54, + "learning_rate": 4.820789251377771e-05, + "loss": 3.8006, + "step": 20190 + }, + { + "epoch": 1.54, + "learning_rate": 4.816766563417676e-05, + "loss": 3.9104, + "step": 20195 + }, + { + "epoch": 1.54, + "learning_rate": 4.812743875457581e-05, + "loss": 3.7228, + "step": 20200 + }, + { + "epoch": 1.54, + "learning_rate": 4.8087211874974856e-05, + "loss": 3.077, + "step": 20205 + }, + { + "epoch": 1.54, + "learning_rate": 4.8046984995373914e-05, + "loss": 4.0723, + "step": 20210 + }, + { + "epoch": 1.55, + "learning_rate": 4.800675811577296e-05, + "loss": 2.538, + "step": 20215 + }, + { + "epoch": 1.55, + "learning_rate": 4.796653123617201e-05, + "loss": 2.944, + "step": 20220 + }, + { + "epoch": 1.55, + "learning_rate": 4.792630435657106e-05, + "loss": 1.8839, + "step": 20225 + }, + { + "epoch": 1.55, + "learning_rate": 4.788607747697011e-05, + "loss": 2.1462, + "step": 20230 + }, + { + "epoch": 1.55, + "learning_rate": 4.7845850597369165e-05, + "loss": 3.1881, + "step": 20235 + }, + { + "epoch": 1.55, + "learning_rate": 4.7805623717768216e-05, + "loss": 3.8365, + "step": 20240 + }, + { + "epoch": 1.55, + "learning_rate": 4.776539683816727e-05, + "loss": 4.1854, + "step": 20245 + }, + { + "epoch": 1.55, + "learning_rate": 4.772516995856632e-05, + "loss": 3.5527, + "step": 20250 + }, + { + "epoch": 1.55, + "learning_rate": 4.7684943078965364e-05, + "loss": 2.907, + "step": 20255 + }, + { + "epoch": 1.55, + "learning_rate": 4.764471619936442e-05, + "loss": 4.211, + "step": 20260 + }, + { + "epoch": 1.55, + "learning_rate": 4.760448931976347e-05, + "loss": 3.4987, + "step": 20265 + }, + { + "epoch": 1.55, + "learning_rate": 4.756426244016252e-05, + "loss": 2.5673, + "step": 20270 + }, + { + "epoch": 1.55, + "learning_rate": 4.752403556056157e-05, + "loss": 2.0378, + "step": 20275 + }, + { + "epoch": 1.55, + "learning_rate": 4.7483808680960614e-05, + "loss": 2.3546, + "step": 20280 + }, + { + "epoch": 1.55, + "learning_rate": 4.744358180135967e-05, + "loss": 0.8048, + "step": 20285 + }, + { + "epoch": 1.55, + "learning_rate": 4.740335492175872e-05, + "loss": 4.9102, + "step": 20290 + }, + { + "epoch": 1.55, + "learning_rate": 4.7363128042157776e-05, + "loss": 5.3023, + "step": 20295 + }, + { + "epoch": 1.55, + "learning_rate": 4.732290116255682e-05, + "loss": 4.1691, + "step": 20300 + }, + { + "epoch": 1.55, + "learning_rate": 4.728267428295587e-05, + "loss": 3.5041, + "step": 20305 + }, + { + "epoch": 1.55, + "learning_rate": 4.724244740335492e-05, + "loss": 2.0665, + "step": 20310 + }, + { + "epoch": 1.55, + "learning_rate": 4.7202220523753975e-05, + "loss": 3.1756, + "step": 20315 + }, + { + "epoch": 1.55, + "learning_rate": 4.7161993644153026e-05, + "loss": 1.7097, + "step": 20320 + }, + { + "epoch": 1.55, + "learning_rate": 4.712176676455208e-05, + "loss": 2.9661, + "step": 20325 + }, + { + "epoch": 1.55, + "learning_rate": 4.708153988495113e-05, + "loss": 2.5157, + "step": 20330 + }, + { + "epoch": 1.55, + "learning_rate": 4.704131300535018e-05, + "loss": 3.5753, + "step": 20335 + }, + { + "epoch": 1.55, + "learning_rate": 4.7001086125749225e-05, + "loss": 4.2086, + "step": 20340 + }, + { + "epoch": 1.55, + "learning_rate": 4.6960859246148283e-05, + "loss": 4.2891, + "step": 20345 + }, + { + "epoch": 1.56, + "learning_rate": 4.692063236654733e-05, + "loss": 4.4199, + "step": 20350 + }, + { + "epoch": 1.56, + "learning_rate": 4.688040548694638e-05, + "loss": 4.1923, + "step": 20355 + }, + { + "epoch": 1.56, + "learning_rate": 4.684017860734543e-05, + "loss": 1.1563, + "step": 20360 + }, + { + "epoch": 1.56, + "learning_rate": 4.6799951727744476e-05, + "loss": 2.6206, + "step": 20365 + }, + { + "epoch": 1.56, + "learning_rate": 4.6759724848143534e-05, + "loss": 2.1299, + "step": 20370 + }, + { + "epoch": 1.56, + "learning_rate": 4.671949796854258e-05, + "loss": 1.8058, + "step": 20375 + }, + { + "epoch": 1.56, + "learning_rate": 4.667927108894164e-05, + "loss": 2.8761, + "step": 20380 + }, + { + "epoch": 1.56, + "learning_rate": 4.663904420934068e-05, + "loss": 2.907, + "step": 20385 + }, + { + "epoch": 1.56, + "learning_rate": 4.659881732973973e-05, + "loss": 3.6823, + "step": 20390 + }, + { + "epoch": 1.56, + "learning_rate": 4.6558590450138785e-05, + "loss": 3.6055, + "step": 20395 + }, + { + "epoch": 1.56, + "learning_rate": 4.6518363570537836e-05, + "loss": 3.5477, + "step": 20400 + }, + { + "epoch": 1.56, + "learning_rate": 4.647813669093689e-05, + "loss": 3.2163, + "step": 20405 + }, + { + "epoch": 1.56, + "learning_rate": 4.643790981133594e-05, + "loss": 3.3979, + "step": 20410 + }, + { + "epoch": 1.56, + "learning_rate": 4.6397682931734984e-05, + "loss": 3.7832, + "step": 20415 + }, + { + "epoch": 1.56, + "learning_rate": 4.635745605213404e-05, + "loss": 2.9825, + "step": 20420 + }, + { + "epoch": 1.56, + "learning_rate": 4.631722917253309e-05, + "loss": 2.2435, + "step": 20425 + }, + { + "epoch": 1.56, + "learning_rate": 4.627700229293214e-05, + "loss": 1.402, + "step": 20430 + }, + { + "epoch": 1.56, + "learning_rate": 4.623677541333119e-05, + "loss": 1.3045, + "step": 20435 + }, + { + "epoch": 1.56, + "learning_rate": 4.619654853373024e-05, + "loss": 4.5785, + "step": 20440 + }, + { + "epoch": 1.56, + "learning_rate": 4.615632165412929e-05, + "loss": 5.1027, + "step": 20445 + }, + { + "epoch": 1.56, + "learning_rate": 4.611609477452834e-05, + "loss": 3.85, + "step": 20450 + }, + { + "epoch": 1.56, + "learning_rate": 4.6075867894927395e-05, + "loss": 3.7928, + "step": 20455 + }, + { + "epoch": 1.56, + "learning_rate": 4.603564101532644e-05, + "loss": 3.9852, + "step": 20460 + }, + { + "epoch": 1.56, + "learning_rate": 4.59954141357255e-05, + "loss": 3.3365, + "step": 20465 + }, + { + "epoch": 1.56, + "learning_rate": 4.595518725612454e-05, + "loss": 2.475, + "step": 20470 + }, + { + "epoch": 1.56, + "learning_rate": 4.5914960376523595e-05, + "loss": 3.7325, + "step": 20475 + }, + { + "epoch": 1.57, + "learning_rate": 4.5874733496922646e-05, + "loss": 2.4769, + "step": 20480 + }, + { + "epoch": 1.57, + "learning_rate": 4.58345066173217e-05, + "loss": 3.2034, + "step": 20485 + }, + { + "epoch": 1.57, + "learning_rate": 4.579427973772075e-05, + "loss": 4.4615, + "step": 20490 + }, + { + "epoch": 1.57, + "learning_rate": 4.57540528581198e-05, + "loss": 3.6992, + "step": 20495 + }, + { + "epoch": 1.57, + "learning_rate": 4.5713825978518845e-05, + "loss": 3.141, + "step": 20500 + }, + { + "epoch": 1.57, + "learning_rate": 4.56735990989179e-05, + "loss": 3.8582, + "step": 20505 + }, + { + "epoch": 1.57, + "learning_rate": 4.563337221931695e-05, + "loss": 4.3367, + "step": 20510 + }, + { + "epoch": 1.57, + "learning_rate": 4.5593145339716e-05, + "loss": 4.4576, + "step": 20515 + }, + { + "epoch": 1.57, + "learning_rate": 4.555291846011505e-05, + "loss": 0.9217, + "step": 20520 + }, + { + "epoch": 1.57, + "learning_rate": 4.55126915805141e-05, + "loss": 2.4517, + "step": 20525 + }, + { + "epoch": 1.57, + "learning_rate": 4.5472464700913154e-05, + "loss": 0.8062, + "step": 20530 + }, + { + "epoch": 1.57, + "learning_rate": 4.54322378213122e-05, + "loss": 0.7989, + "step": 20535 + }, + { + "epoch": 1.57, + "learning_rate": 4.539201094171126e-05, + "loss": 3.5436, + "step": 20540 + }, + { + "epoch": 1.57, + "learning_rate": 4.53517840621103e-05, + "loss": 3.1012, + "step": 20545 + }, + { + "epoch": 1.57, + "learning_rate": 4.531155718250936e-05, + "loss": 3.8428, + "step": 20550 + }, + { + "epoch": 1.57, + "learning_rate": 4.5271330302908404e-05, + "loss": 3.9062, + "step": 20555 + }, + { + "epoch": 1.57, + "learning_rate": 4.5231103423307456e-05, + "loss": 2.7281, + "step": 20560 + }, + { + "epoch": 1.57, + "learning_rate": 4.519087654370651e-05, + "loss": 3.4412, + "step": 20565 + }, + { + "epoch": 1.57, + "learning_rate": 4.515064966410556e-05, + "loss": 1.8876, + "step": 20570 + }, + { + "epoch": 1.57, + "learning_rate": 4.511042278450461e-05, + "loss": 1.8162, + "step": 20575 + }, + { + "epoch": 1.57, + "learning_rate": 4.507019590490366e-05, + "loss": 3.2144, + "step": 20580 + }, + { + "epoch": 1.57, + "learning_rate": 4.5029969025302706e-05, + "loss": 1.301, + "step": 20585 + }, + { + "epoch": 1.57, + "learning_rate": 4.498974214570176e-05, + "loss": 4.8656, + "step": 20590 + }, + { + "epoch": 1.57, + "learning_rate": 4.494951526610081e-05, + "loss": 4.2117, + "step": 20595 + }, + { + "epoch": 1.57, + "learning_rate": 4.490928838649986e-05, + "loss": 4.4994, + "step": 20600 + }, + { + "epoch": 1.57, + "learning_rate": 4.486906150689891e-05, + "loss": 2.6848, + "step": 20605 + }, + { + "epoch": 1.58, + "learning_rate": 4.4828834627297964e-05, + "loss": 3.4116, + "step": 20610 + }, + { + "epoch": 1.58, + "learning_rate": 4.4788607747697015e-05, + "loss": 3.0542, + "step": 20615 + }, + { + "epoch": 1.58, + "learning_rate": 4.474838086809606e-05, + "loss": 3.0172, + "step": 20620 + }, + { + "epoch": 1.58, + "learning_rate": 4.470815398849512e-05, + "loss": 1.6656, + "step": 20625 + }, + { + "epoch": 1.58, + "learning_rate": 4.466792710889416e-05, + "loss": 2.6862, + "step": 20630 + }, + { + "epoch": 1.58, + "learning_rate": 4.462770022929322e-05, + "loss": 2.2119, + "step": 20635 + }, + { + "epoch": 1.58, + "learning_rate": 4.4587473349692266e-05, + "loss": 5.6586, + "step": 20640 + }, + { + "epoch": 1.58, + "learning_rate": 4.454724647009132e-05, + "loss": 3.9809, + "step": 20645 + }, + { + "epoch": 1.58, + "learning_rate": 4.450701959049037e-05, + "loss": 4.7715, + "step": 20650 + }, + { + "epoch": 1.58, + "learning_rate": 4.446679271088942e-05, + "loss": 4.0703, + "step": 20655 + }, + { + "epoch": 1.58, + "learning_rate": 4.442656583128847e-05, + "loss": 3.8097, + "step": 20660 + }, + { + "epoch": 1.58, + "learning_rate": 4.4386338951687516e-05, + "loss": 3.4632, + "step": 20665 + }, + { + "epoch": 1.58, + "learning_rate": 4.434611207208657e-05, + "loss": 3.296, + "step": 20670 + }, + { + "epoch": 1.58, + "learning_rate": 4.430588519248562e-05, + "loss": 3.2801, + "step": 20675 + }, + { + "epoch": 1.58, + "learning_rate": 4.426565831288467e-05, + "loss": 2.3163, + "step": 20680 + }, + { + "epoch": 1.58, + "learning_rate": 4.422543143328372e-05, + "loss": 4.8768, + "step": 20685 + }, + { + "epoch": 1.58, + "learning_rate": 4.4185204553682774e-05, + "loss": 4.2062, + "step": 20690 + }, + { + "epoch": 1.58, + "learning_rate": 4.4144977674081825e-05, + "loss": 3.9586, + "step": 20695 + }, + { + "epoch": 1.58, + "learning_rate": 4.410475079448088e-05, + "loss": 4.7627, + "step": 20700 + }, + { + "epoch": 1.58, + "learning_rate": 4.406452391487992e-05, + "loss": 4.001, + "step": 20705 + }, + { + "epoch": 1.58, + "learning_rate": 4.402429703527898e-05, + "loss": 3.7379, + "step": 20710 + }, + { + "epoch": 1.58, + "learning_rate": 4.3984070155678024e-05, + "loss": 4.5615, + "step": 20715 + }, + { + "epoch": 1.58, + "learning_rate": 4.3943843276077076e-05, + "loss": 3.2771, + "step": 20720 + }, + { + "epoch": 1.58, + "learning_rate": 4.390361639647613e-05, + "loss": 2.4874, + "step": 20725 + }, + { + "epoch": 1.58, + "learning_rate": 4.386338951687518e-05, + "loss": 1.7379, + "step": 20730 + }, + { + "epoch": 1.58, + "learning_rate": 4.382316263727423e-05, + "loss": 1.266, + "step": 20735 + }, + { + "epoch": 1.59, + "learning_rate": 4.3782935757673275e-05, + "loss": 4.9725, + "step": 20740 + }, + { + "epoch": 1.59, + "learning_rate": 4.374270887807233e-05, + "loss": 4.191, + "step": 20745 + }, + { + "epoch": 1.59, + "learning_rate": 4.370248199847138e-05, + "loss": 3.8467, + "step": 20750 + }, + { + "epoch": 1.59, + "learning_rate": 4.366225511887043e-05, + "loss": 3.1284, + "step": 20755 + }, + { + "epoch": 1.59, + "learning_rate": 4.362202823926948e-05, + "loss": 3.0203, + "step": 20760 + }, + { + "epoch": 1.59, + "learning_rate": 4.358180135966853e-05, + "loss": 3.1714, + "step": 20765 + }, + { + "epoch": 1.59, + "learning_rate": 4.3541574480067584e-05, + "loss": 1.5033, + "step": 20770 + }, + { + "epoch": 1.59, + "learning_rate": 4.3501347600466635e-05, + "loss": 2.4259, + "step": 20775 + }, + { + "epoch": 1.59, + "learning_rate": 4.3461120720865687e-05, + "loss": 3.8117, + "step": 20780 + }, + { + "epoch": 1.59, + "learning_rate": 4.342089384126474e-05, + "loss": 1.745, + "step": 20785 + }, + { + "epoch": 1.59, + "learning_rate": 4.338066696166378e-05, + "loss": 4.2043, + "step": 20790 + }, + { + "epoch": 1.59, + "learning_rate": 4.334044008206284e-05, + "loss": 2.6391, + "step": 20795 + }, + { + "epoch": 1.59, + "learning_rate": 4.3300213202461886e-05, + "loss": 3.6945, + "step": 20800 + }, + { + "epoch": 1.59, + "learning_rate": 4.325998632286094e-05, + "loss": 4.534, + "step": 20805 + }, + { + "epoch": 1.59, + "learning_rate": 4.321975944325999e-05, + "loss": 2.93, + "step": 20810 + }, + { + "epoch": 1.59, + "learning_rate": 4.317953256365903e-05, + "loss": 4.0686, + "step": 20815 + }, + { + "epoch": 1.59, + "learning_rate": 4.313930568405809e-05, + "loss": 4.2174, + "step": 20820 + }, + { + "epoch": 1.59, + "learning_rate": 4.3099078804457136e-05, + "loss": 2.6016, + "step": 20825 + }, + { + "epoch": 1.59, + "learning_rate": 4.3058851924856194e-05, + "loss": 2.1726, + "step": 20830 + }, + { + "epoch": 1.59, + "learning_rate": 4.301862504525524e-05, + "loss": 2.44, + "step": 20835 + }, + { + "epoch": 1.59, + "learning_rate": 4.297839816565429e-05, + "loss": 3.3509, + "step": 20840 + }, + { + "epoch": 1.59, + "learning_rate": 4.293817128605334e-05, + "loss": 3.4732, + "step": 20845 + }, + { + "epoch": 1.59, + "learning_rate": 4.2897944406452394e-05, + "loss": 4.4779, + "step": 20850 + }, + { + "epoch": 1.59, + "learning_rate": 4.2857717526851445e-05, + "loss": 3.4031, + "step": 20855 + }, + { + "epoch": 1.59, + "learning_rate": 4.2817490647250497e-05, + "loss": 2.8928, + "step": 20860 + }, + { + "epoch": 1.59, + "learning_rate": 4.277726376764955e-05, + "loss": 2.8609, + "step": 20865 + }, + { + "epoch": 1.6, + "learning_rate": 4.27370368880486e-05, + "loss": 2.7903, + "step": 20870 + }, + { + "epoch": 1.6, + "learning_rate": 4.2696810008447644e-05, + "loss": 1.7776, + "step": 20875 + }, + { + "epoch": 1.6, + "learning_rate": 4.26565831288467e-05, + "loss": 2.5144, + "step": 20880 + }, + { + "epoch": 1.6, + "learning_rate": 4.261635624924575e-05, + "loss": 1.6099, + "step": 20885 + }, + { + "epoch": 1.6, + "learning_rate": 4.25761293696448e-05, + "loss": 4.2301, + "step": 20890 + }, + { + "epoch": 1.6, + "learning_rate": 4.253590249004385e-05, + "loss": 3.3306, + "step": 20895 + }, + { + "epoch": 1.6, + "learning_rate": 4.2495675610442895e-05, + "loss": 3.6641, + "step": 20900 + }, + { + "epoch": 1.6, + "learning_rate": 4.245544873084195e-05, + "loss": 3.1027, + "step": 20905 + }, + { + "epoch": 1.6, + "learning_rate": 4.2415221851241e-05, + "loss": 4.1236, + "step": 20910 + }, + { + "epoch": 1.6, + "learning_rate": 4.2374994971640056e-05, + "loss": 1.9495, + "step": 20915 + }, + { + "epoch": 1.6, + "learning_rate": 4.23347680920391e-05, + "loss": 1.7783, + "step": 20920 + }, + { + "epoch": 1.6, + "learning_rate": 4.229454121243815e-05, + "loss": 2.336, + "step": 20925 + }, + { + "epoch": 1.6, + "learning_rate": 4.2254314332837204e-05, + "loss": 2.1791, + "step": 20930 + }, + { + "epoch": 1.6, + "learning_rate": 4.2214087453236255e-05, + "loss": 3.2233, + "step": 20935 + }, + { + "epoch": 1.6, + "learning_rate": 4.2173860573635306e-05, + "loss": 3.8101, + "step": 20940 + }, + { + "epoch": 1.6, + "learning_rate": 4.213363369403436e-05, + "loss": 3.4906, + "step": 20945 + }, + { + "epoch": 1.6, + "learning_rate": 4.20934068144334e-05, + "loss": 5.3133, + "step": 20950 + }, + { + "epoch": 1.6, + "learning_rate": 4.205317993483246e-05, + "loss": 3.0161, + "step": 20955 + }, + { + "epoch": 1.6, + "learning_rate": 4.2012953055231506e-05, + "loss": 3.6072, + "step": 20960 + }, + { + "epoch": 1.6, + "learning_rate": 4.197272617563056e-05, + "loss": 3.5251, + "step": 20965 + }, + { + "epoch": 1.6, + "learning_rate": 4.193249929602961e-05, + "loss": 2.6024, + "step": 20970 + }, + { + "epoch": 1.6, + "learning_rate": 4.189227241642866e-05, + "loss": 2.6017, + "step": 20975 + }, + { + "epoch": 1.6, + "learning_rate": 4.185204553682771e-05, + "loss": 3.2057, + "step": 20980 + }, + { + "epoch": 1.6, + "learning_rate": 4.1811818657226756e-05, + "loss": 1.6699, + "step": 20985 + }, + { + "epoch": 1.6, + "learning_rate": 4.1771591777625814e-05, + "loss": 4.5773, + "step": 20990 + }, + { + "epoch": 1.6, + "learning_rate": 4.173136489802486e-05, + "loss": 4.158, + "step": 20995 + }, + { + "epoch": 1.61, + "learning_rate": 4.169113801842392e-05, + "loss": 5.7055, + "step": 21000 + }, + { + "epoch": 1.61, + "learning_rate": 4.165091113882296e-05, + "loss": 3.018, + "step": 21005 + }, + { + "epoch": 1.61, + "learning_rate": 4.1610684259222013e-05, + "loss": 2.5031, + "step": 21010 + }, + { + "epoch": 1.61, + "learning_rate": 4.1570457379621065e-05, + "loss": 3.1937, + "step": 21015 + }, + { + "epoch": 1.61, + "learning_rate": 4.1530230500020116e-05, + "loss": 1.6752, + "step": 21020 + }, + { + "epoch": 1.61, + "learning_rate": 4.149000362041917e-05, + "loss": 1.4042, + "step": 21025 + }, + { + "epoch": 1.61, + "learning_rate": 4.144977674081822e-05, + "loss": 2.4617, + "step": 21030 + }, + { + "epoch": 1.61, + "learning_rate": 4.1409549861217264e-05, + "loss": 3.0018, + "step": 21035 + }, + { + "epoch": 1.61, + "learning_rate": 4.1369322981616315e-05, + "loss": 4.4467, + "step": 21040 + }, + { + "epoch": 1.61, + "learning_rate": 4.132909610201537e-05, + "loss": 4.9004, + "step": 21045 + }, + { + "epoch": 1.61, + "learning_rate": 4.128886922241442e-05, + "loss": 4.0383, + "step": 21050 + }, + { + "epoch": 1.61, + "learning_rate": 4.124864234281347e-05, + "loss": 4.24, + "step": 21055 + }, + { + "epoch": 1.61, + "learning_rate": 4.120841546321252e-05, + "loss": 3.2634, + "step": 21060 + }, + { + "epoch": 1.61, + "learning_rate": 4.116818858361157e-05, + "loss": 2.5876, + "step": 21065 + }, + { + "epoch": 1.61, + "learning_rate": 4.112796170401062e-05, + "loss": 4.6117, + "step": 21070 + }, + { + "epoch": 1.61, + "learning_rate": 4.1087734824409676e-05, + "loss": 0.7939, + "step": 21075 + }, + { + "epoch": 1.61, + "learning_rate": 4.104750794480872e-05, + "loss": 2.6782, + "step": 21080 + }, + { + "epoch": 1.61, + "learning_rate": 4.100728106520778e-05, + "loss": 3.213, + "step": 21085 + }, + { + "epoch": 1.61, + "learning_rate": 4.096705418560682e-05, + "loss": 5.3123, + "step": 21090 + }, + { + "epoch": 1.61, + "learning_rate": 4.0926827306005875e-05, + "loss": 3.9619, + "step": 21095 + }, + { + "epoch": 1.61, + "learning_rate": 4.0886600426404926e-05, + "loss": 4.3588, + "step": 21100 + }, + { + "epoch": 1.61, + "learning_rate": 4.084637354680398e-05, + "loss": 3.2867, + "step": 21105 + }, + { + "epoch": 1.61, + "learning_rate": 4.080614666720303e-05, + "loss": 2.582, + "step": 21110 + }, + { + "epoch": 1.61, + "learning_rate": 4.076591978760208e-05, + "loss": 1.9699, + "step": 21115 + }, + { + "epoch": 1.61, + "learning_rate": 4.0725692908001125e-05, + "loss": 2.4965, + "step": 21120 + }, + { + "epoch": 1.61, + "learning_rate": 4.068546602840018e-05, + "loss": 3.1243, + "step": 21125 + }, + { + "epoch": 1.61, + "learning_rate": 4.064523914879923e-05, + "loss": 1.0647, + "step": 21130 + }, + { + "epoch": 1.62, + "learning_rate": 4.060501226919828e-05, + "loss": 2.6103, + "step": 21135 + }, + { + "epoch": 1.62, + "learning_rate": 4.056478538959733e-05, + "loss": 3.5543, + "step": 21140 + }, + { + "epoch": 1.62, + "learning_rate": 4.052455850999638e-05, + "loss": 4.9754, + "step": 21145 + }, + { + "epoch": 1.62, + "learning_rate": 4.0484331630395434e-05, + "loss": 3.5666, + "step": 21150 + }, + { + "epoch": 1.62, + "learning_rate": 4.044410475079448e-05, + "loss": 3.3064, + "step": 21155 + }, + { + "epoch": 1.62, + "learning_rate": 4.040387787119354e-05, + "loss": 3.4265, + "step": 21160 + }, + { + "epoch": 1.62, + "learning_rate": 4.036365099159258e-05, + "loss": 2.6561, + "step": 21165 + }, + { + "epoch": 1.62, + "learning_rate": 4.032342411199164e-05, + "loss": 1.9494, + "step": 21170 + }, + { + "epoch": 1.62, + "learning_rate": 4.0283197232390685e-05, + "loss": 1.829, + "step": 21175 + }, + { + "epoch": 1.62, + "learning_rate": 4.0242970352789736e-05, + "loss": 2.8679, + "step": 21180 + }, + { + "epoch": 1.62, + "learning_rate": 4.020274347318879e-05, + "loss": 2.5059, + "step": 21185 + }, + { + "epoch": 1.62, + "learning_rate": 4.016251659358784e-05, + "loss": 3.6293, + "step": 21190 + }, + { + "epoch": 1.62, + "learning_rate": 4.012228971398689e-05, + "loss": 4.1984, + "step": 21195 + }, + { + "epoch": 1.62, + "learning_rate": 4.0082062834385935e-05, + "loss": 3.8107, + "step": 21200 + }, + { + "epoch": 1.62, + "learning_rate": 4.004183595478499e-05, + "loss": 2.8273, + "step": 21205 + }, + { + "epoch": 1.62, + "learning_rate": 4.000160907518404e-05, + "loss": 3.2559, + "step": 21210 + }, + { + "epoch": 1.62, + "learning_rate": 3.996138219558309e-05, + "loss": 2.5233, + "step": 21215 + }, + { + "epoch": 1.62, + "learning_rate": 3.992115531598214e-05, + "loss": 2.903, + "step": 21220 + }, + { + "epoch": 1.62, + "learning_rate": 3.988092843638119e-05, + "loss": 3.3081, + "step": 21225 + }, + { + "epoch": 1.62, + "learning_rate": 3.9840701556780244e-05, + "loss": 3.3352, + "step": 21230 + }, + { + "epoch": 1.62, + "learning_rate": 3.9800474677179296e-05, + "loss": 1.8759, + "step": 21235 + }, + { + "epoch": 1.62, + "learning_rate": 3.976024779757834e-05, + "loss": 5.8707, + "step": 21240 + }, + { + "epoch": 1.62, + "learning_rate": 3.97200209179774e-05, + "loss": 4.9869, + "step": 21245 + }, + { + "epoch": 1.62, + "learning_rate": 3.967979403837644e-05, + "loss": 4.4971, + "step": 21250 + }, + { + "epoch": 1.62, + "learning_rate": 3.9639567158775495e-05, + "loss": 3.6207, + "step": 21255 + }, + { + "epoch": 1.62, + "learning_rate": 3.9599340279174546e-05, + "loss": 2.1977, + "step": 21260 + }, + { + "epoch": 1.63, + "learning_rate": 3.95591133995736e-05, + "loss": 2.8388, + "step": 21265 + }, + { + "epoch": 1.63, + "learning_rate": 3.951888651997265e-05, + "loss": 1.8477, + "step": 21270 + }, + { + "epoch": 1.63, + "learning_rate": 3.9478659640371694e-05, + "loss": 1.4987, + "step": 21275 + }, + { + "epoch": 1.63, + "learning_rate": 3.943843276077075e-05, + "loss": 1.4276, + "step": 21280 + }, + { + "epoch": 1.63, + "learning_rate": 3.93982058811698e-05, + "loss": 1.3133, + "step": 21285 + }, + { + "epoch": 1.63, + "learning_rate": 3.935797900156885e-05, + "loss": 4.0492, + "step": 21290 + }, + { + "epoch": 1.63, + "learning_rate": 3.93177521219679e-05, + "loss": 4.2029, + "step": 21295 + }, + { + "epoch": 1.63, + "learning_rate": 3.927752524236695e-05, + "loss": 3.3377, + "step": 21300 + }, + { + "epoch": 1.63, + "learning_rate": 3.9237298362766e-05, + "loss": 4.1035, + "step": 21305 + }, + { + "epoch": 1.63, + "learning_rate": 3.9197071483165054e-05, + "loss": 4.1178, + "step": 21310 + }, + { + "epoch": 1.63, + "learning_rate": 3.9156844603564105e-05, + "loss": 2.9779, + "step": 21315 + }, + { + "epoch": 1.63, + "learning_rate": 3.911661772396316e-05, + "loss": 1.2614, + "step": 21320 + }, + { + "epoch": 1.63, + "learning_rate": 3.90763908443622e-05, + "loss": 1.6433, + "step": 21325 + }, + { + "epoch": 1.63, + "learning_rate": 3.903616396476126e-05, + "loss": 1.6593, + "step": 21330 + }, + { + "epoch": 1.63, + "learning_rate": 3.8995937085160305e-05, + "loss": 4.43, + "step": 21335 + }, + { + "epoch": 1.63, + "learning_rate": 3.8955710205559356e-05, + "loss": 5.0719, + "step": 21340 + }, + { + "epoch": 1.63, + "learning_rate": 3.891548332595841e-05, + "loss": 3.8008, + "step": 21345 + }, + { + "epoch": 1.63, + "learning_rate": 3.887525644635745e-05, + "loss": 4.433, + "step": 21350 + }, + { + "epoch": 1.63, + "learning_rate": 3.883502956675651e-05, + "loss": 3.3838, + "step": 21355 + }, + { + "epoch": 1.63, + "learning_rate": 3.8794802687155555e-05, + "loss": 3.7695, + "step": 21360 + }, + { + "epoch": 1.63, + "learning_rate": 3.875457580755461e-05, + "loss": 2.0561, + "step": 21365 + }, + { + "epoch": 1.63, + "learning_rate": 3.871434892795366e-05, + "loss": 2.6766, + "step": 21370 + }, + { + "epoch": 1.63, + "learning_rate": 3.867412204835271e-05, + "loss": 2.1337, + "step": 21375 + }, + { + "epoch": 1.63, + "learning_rate": 3.863389516875176e-05, + "loss": 2.6355, + "step": 21380 + }, + { + "epoch": 1.63, + "learning_rate": 3.859366828915081e-05, + "loss": 1.5269, + "step": 21385 + }, + { + "epoch": 1.63, + "learning_rate": 3.8553441409549864e-05, + "loss": 4.2592, + "step": 21390 + }, + { + "epoch": 1.64, + "learning_rate": 3.8513214529948915e-05, + "loss": 4.6906, + "step": 21395 + }, + { + "epoch": 1.64, + "learning_rate": 3.847298765034797e-05, + "loss": 3.7479, + "step": 21400 + }, + { + "epoch": 1.64, + "learning_rate": 3.843276077074702e-05, + "loss": 4.2096, + "step": 21405 + }, + { + "epoch": 1.64, + "learning_rate": 3.839253389114606e-05, + "loss": 3.7812, + "step": 21410 + }, + { + "epoch": 1.64, + "learning_rate": 3.835230701154512e-05, + "loss": 5.1412, + "step": 21415 + }, + { + "epoch": 1.64, + "learning_rate": 3.8312080131944166e-05, + "loss": 2.5208, + "step": 21420 + }, + { + "epoch": 1.64, + "learning_rate": 3.827185325234322e-05, + "loss": 1.2869, + "step": 21425 + }, + { + "epoch": 1.64, + "learning_rate": 3.823162637274227e-05, + "loss": 1.4612, + "step": 21430 + }, + { + "epoch": 1.64, + "learning_rate": 3.8191399493141314e-05, + "loss": 4.1741, + "step": 21435 + }, + { + "epoch": 1.64, + "learning_rate": 3.815117261354037e-05, + "loss": 4.3588, + "step": 21440 + }, + { + "epoch": 1.64, + "learning_rate": 3.8110945733939417e-05, + "loss": 3.8693, + "step": 21445 + }, + { + "epoch": 1.64, + "learning_rate": 3.8070718854338475e-05, + "loss": 3.3549, + "step": 21450 + }, + { + "epoch": 1.64, + "learning_rate": 3.803049197473752e-05, + "loss": 3.8469, + "step": 21455 + }, + { + "epoch": 1.64, + "learning_rate": 3.799026509513657e-05, + "loss": 2.6488, + "step": 21460 + }, + { + "epoch": 1.64, + "learning_rate": 3.795003821553562e-05, + "loss": 4.3189, + "step": 21465 + }, + { + "epoch": 1.64, + "learning_rate": 3.7909811335934674e-05, + "loss": 2.1022, + "step": 21470 + }, + { + "epoch": 1.64, + "learning_rate": 3.7869584456333725e-05, + "loss": 3.1552, + "step": 21475 + }, + { + "epoch": 1.64, + "learning_rate": 3.782935757673278e-05, + "loss": 0.6143, + "step": 21480 + }, + { + "epoch": 1.64, + "learning_rate": 3.778913069713182e-05, + "loss": 2.1666, + "step": 21485 + }, + { + "epoch": 1.64, + "learning_rate": 3.774890381753088e-05, + "loss": 4.232, + "step": 21490 + }, + { + "epoch": 1.64, + "learning_rate": 3.7708676937929924e-05, + "loss": 4.3281, + "step": 21495 + }, + { + "epoch": 1.64, + "learning_rate": 3.7668450058328976e-05, + "loss": 4.859, + "step": 21500 + }, + { + "epoch": 1.64, + "learning_rate": 3.762822317872803e-05, + "loss": 3.485, + "step": 21505 + }, + { + "epoch": 1.64, + "learning_rate": 3.758799629912708e-05, + "loss": 3.0216, + "step": 21510 + }, + { + "epoch": 1.64, + "learning_rate": 3.754776941952613e-05, + "loss": 2.9945, + "step": 21515 + }, + { + "epoch": 1.64, + "learning_rate": 3.7507542539925175e-05, + "loss": 3.9133, + "step": 21520 + }, + { + "epoch": 1.65, + "learning_rate": 3.746731566032423e-05, + "loss": 1.9285, + "step": 21525 + }, + { + "epoch": 1.65, + "learning_rate": 3.742708878072328e-05, + "loss": 2.2346, + "step": 21530 + }, + { + "epoch": 1.65, + "learning_rate": 3.7386861901122336e-05, + "loss": 2.376, + "step": 21535 + }, + { + "epoch": 1.65, + "learning_rate": 3.734663502152138e-05, + "loss": 5.018, + "step": 21540 + }, + { + "epoch": 1.65, + "learning_rate": 3.730640814192043e-05, + "loss": 3.4057, + "step": 21545 + }, + { + "epoch": 1.65, + "learning_rate": 3.7266181262319484e-05, + "loss": 4.6818, + "step": 21550 + }, + { + "epoch": 1.65, + "learning_rate": 3.7225954382718535e-05, + "loss": 3.9047, + "step": 21555 + }, + { + "epoch": 1.65, + "learning_rate": 3.718572750311759e-05, + "loss": 3.6093, + "step": 21560 + }, + { + "epoch": 1.65, + "learning_rate": 3.714550062351664e-05, + "loss": 4.4719, + "step": 21565 + }, + { + "epoch": 1.65, + "learning_rate": 3.710527374391568e-05, + "loss": 3.2473, + "step": 21570 + }, + { + "epoch": 1.65, + "learning_rate": 3.7065046864314734e-05, + "loss": 2.2506, + "step": 21575 + }, + { + "epoch": 1.65, + "learning_rate": 3.7024819984713786e-05, + "loss": 2.268, + "step": 21580 + }, + { + "epoch": 1.65, + "learning_rate": 3.698459310511284e-05, + "loss": 3.6383, + "step": 21585 + }, + { + "epoch": 1.65, + "learning_rate": 3.694436622551189e-05, + "loss": 4.4379, + "step": 21590 + }, + { + "epoch": 1.65, + "learning_rate": 3.690413934591094e-05, + "loss": 4.248, + "step": 21595 + }, + { + "epoch": 1.65, + "learning_rate": 3.686391246630999e-05, + "loss": 4.4307, + "step": 21600 + }, + { + "epoch": 1.65, + "learning_rate": 3.6823685586709036e-05, + "loss": 3.4584, + "step": 21605 + }, + { + "epoch": 1.65, + "learning_rate": 3.6783458707108095e-05, + "loss": 4.1742, + "step": 21610 + }, + { + "epoch": 1.65, + "learning_rate": 3.674323182750714e-05, + "loss": 5.0411, + "step": 21615 + }, + { + "epoch": 1.65, + "learning_rate": 3.67030049479062e-05, + "loss": 2.3921, + "step": 21620 + }, + { + "epoch": 1.65, + "learning_rate": 3.666277806830524e-05, + "loss": 2.7103, + "step": 21625 + }, + { + "epoch": 1.65, + "learning_rate": 3.6622551188704294e-05, + "loss": 2.1656, + "step": 21630 + }, + { + "epoch": 1.65, + "learning_rate": 3.6582324309103345e-05, + "loss": 1.949, + "step": 21635 + }, + { + "epoch": 1.65, + "learning_rate": 3.65420974295024e-05, + "loss": 2.9959, + "step": 21640 + }, + { + "epoch": 1.65, + "learning_rate": 3.650187054990145e-05, + "loss": 3.8207, + "step": 21645 + }, + { + "epoch": 1.65, + "learning_rate": 3.646164367030049e-05, + "loss": 3.8807, + "step": 21650 + }, + { + "epoch": 1.66, + "learning_rate": 3.6421416790699544e-05, + "loss": 4.1562, + "step": 21655 + }, + { + "epoch": 1.66, + "learning_rate": 3.6381189911098596e-05, + "loss": 3.2987, + "step": 21660 + }, + { + "epoch": 1.66, + "learning_rate": 3.634096303149765e-05, + "loss": 2.7125, + "step": 21665 + }, + { + "epoch": 1.66, + "learning_rate": 3.63007361518967e-05, + "loss": 2.6132, + "step": 21670 + }, + { + "epoch": 1.66, + "learning_rate": 3.626050927229575e-05, + "loss": 2.0622, + "step": 21675 + }, + { + "epoch": 1.66, + "learning_rate": 3.62202823926948e-05, + "loss": 2.7405, + "step": 21680 + }, + { + "epoch": 1.66, + "learning_rate": 3.618005551309385e-05, + "loss": 2.9218, + "step": 21685 + }, + { + "epoch": 1.66, + "learning_rate": 3.61398286334929e-05, + "loss": 4.7357, + "step": 21690 + }, + { + "epoch": 1.66, + "learning_rate": 3.6099601753891956e-05, + "loss": 4.2771, + "step": 21695 + }, + { + "epoch": 1.66, + "learning_rate": 3.6059374874291e-05, + "loss": 4.242, + "step": 21700 + }, + { + "epoch": 1.66, + "learning_rate": 3.601914799469006e-05, + "loss": 4.1357, + "step": 21705 + }, + { + "epoch": 1.66, + "learning_rate": 3.5978921115089104e-05, + "loss": 2.1266, + "step": 21710 + }, + { + "epoch": 1.66, + "learning_rate": 3.5938694235488155e-05, + "loss": 4.3561, + "step": 21715 + }, + { + "epoch": 1.66, + "learning_rate": 3.5898467355887207e-05, + "loss": 2.5493, + "step": 21720 + }, + { + "epoch": 1.66, + "learning_rate": 3.585824047628626e-05, + "loss": 3.3787, + "step": 21725 + }, + { + "epoch": 1.66, + "learning_rate": 3.581801359668531e-05, + "loss": 2.3158, + "step": 21730 + }, + { + "epoch": 1.66, + "learning_rate": 3.5777786717084354e-05, + "loss": 0.401, + "step": 21735 + }, + { + "epoch": 1.66, + "learning_rate": 3.5737559837483406e-05, + "loss": 4.6857, + "step": 21740 + }, + { + "epoch": 1.66, + "learning_rate": 3.569733295788246e-05, + "loss": 4.5549, + "step": 21745 + }, + { + "epoch": 1.66, + "learning_rate": 3.565710607828151e-05, + "loss": 4.2082, + "step": 21750 + }, + { + "epoch": 1.66, + "learning_rate": 3.561687919868056e-05, + "loss": 3.7506, + "step": 21755 + }, + { + "epoch": 1.66, + "learning_rate": 3.557665231907961e-05, + "loss": 3.4496, + "step": 21760 + }, + { + "epoch": 1.66, + "learning_rate": 3.553642543947866e-05, + "loss": 3.6854, + "step": 21765 + }, + { + "epoch": 1.66, + "learning_rate": 3.5496198559877714e-05, + "loss": 2.3951, + "step": 21770 + }, + { + "epoch": 1.66, + "learning_rate": 3.545597168027676e-05, + "loss": 1.496, + "step": 21775 + }, + { + "epoch": 1.66, + "learning_rate": 3.541574480067582e-05, + "loss": 2.3501, + "step": 21780 + }, + { + "epoch": 1.67, + "learning_rate": 3.537551792107486e-05, + "loss": 2.6589, + "step": 21785 + }, + { + "epoch": 1.67, + "learning_rate": 3.5335291041473914e-05, + "loss": 4.2689, + "step": 21790 + }, + { + "epoch": 1.67, + "learning_rate": 3.5295064161872965e-05, + "loss": 3.6809, + "step": 21795 + }, + { + "epoch": 1.67, + "learning_rate": 3.5254837282272016e-05, + "loss": 3.1069, + "step": 21800 + }, + { + "epoch": 1.67, + "learning_rate": 3.521461040267107e-05, + "loss": 2.6169, + "step": 21805 + }, + { + "epoch": 1.67, + "learning_rate": 3.517438352307011e-05, + "loss": 2.609, + "step": 21810 + }, + { + "epoch": 1.67, + "learning_rate": 3.513415664346917e-05, + "loss": 1.233, + "step": 21815 + }, + { + "epoch": 1.67, + "learning_rate": 3.5093929763868216e-05, + "loss": 3.612, + "step": 21820 + }, + { + "epoch": 1.67, + "learning_rate": 3.505370288426727e-05, + "loss": 2.2503, + "step": 21825 + }, + { + "epoch": 1.67, + "learning_rate": 3.501347600466632e-05, + "loss": 2.9724, + "step": 21830 + }, + { + "epoch": 1.67, + "learning_rate": 3.497324912506537e-05, + "loss": 1.8672, + "step": 21835 + }, + { + "epoch": 1.67, + "learning_rate": 3.493302224546442e-05, + "loss": 5.3764, + "step": 21840 + }, + { + "epoch": 1.67, + "learning_rate": 3.489279536586347e-05, + "loss": 4.4464, + "step": 21845 + }, + { + "epoch": 1.67, + "learning_rate": 3.4852568486262524e-05, + "loss": 3.6959, + "step": 21850 + }, + { + "epoch": 1.67, + "learning_rate": 3.4812341606661576e-05, + "loss": 3.6832, + "step": 21855 + }, + { + "epoch": 1.67, + "learning_rate": 3.477211472706062e-05, + "loss": 3.5953, + "step": 21860 + }, + { + "epoch": 1.67, + "learning_rate": 3.473188784745968e-05, + "loss": 3.1239, + "step": 21865 + }, + { + "epoch": 1.67, + "learning_rate": 3.4691660967858723e-05, + "loss": 2.1076, + "step": 21870 + }, + { + "epoch": 1.67, + "learning_rate": 3.4651434088257775e-05, + "loss": 3.3783, + "step": 21875 + }, + { + "epoch": 1.67, + "learning_rate": 3.4611207208656826e-05, + "loss": 1.4034, + "step": 21880 + }, + { + "epoch": 1.67, + "learning_rate": 3.457098032905587e-05, + "loss": 1.7014, + "step": 21885 + }, + { + "epoch": 1.67, + "learning_rate": 3.453075344945493e-05, + "loss": 5.3859, + "step": 21890 + }, + { + "epoch": 1.67, + "learning_rate": 3.4490526569853974e-05, + "loss": 4.724, + "step": 21895 + }, + { + "epoch": 1.67, + "learning_rate": 3.445029969025303e-05, + "loss": 3.8088, + "step": 21900 + }, + { + "epoch": 1.67, + "learning_rate": 3.441007281065208e-05, + "loss": 3.1448, + "step": 21905 + }, + { + "epoch": 1.67, + "learning_rate": 3.436984593105113e-05, + "loss": 3.1462, + "step": 21910 + }, + { + "epoch": 1.67, + "learning_rate": 3.432961905145018e-05, + "loss": 2.371, + "step": 21915 + }, + { + "epoch": 1.68, + "learning_rate": 3.428939217184923e-05, + "loss": 2.8063, + "step": 21920 + }, + { + "epoch": 1.68, + "learning_rate": 3.424916529224828e-05, + "loss": 2.514, + "step": 21925 + }, + { + "epoch": 1.68, + "learning_rate": 3.4208938412647334e-05, + "loss": 3.2729, + "step": 21930 + }, + { + "epoch": 1.68, + "learning_rate": 3.4168711533046386e-05, + "loss": 2.0309, + "step": 21935 + }, + { + "epoch": 1.68, + "learning_rate": 3.412848465344544e-05, + "loss": 3.9336, + "step": 21940 + }, + { + "epoch": 1.68, + "learning_rate": 3.408825777384448e-05, + "loss": 4.7568, + "step": 21945 + }, + { + "epoch": 1.68, + "learning_rate": 3.404803089424354e-05, + "loss": 3.4958, + "step": 21950 + }, + { + "epoch": 1.68, + "learning_rate": 3.4007804014642585e-05, + "loss": 3.6162, + "step": 21955 + }, + { + "epoch": 1.68, + "learning_rate": 3.3967577135041636e-05, + "loss": 3.7423, + "step": 21960 + }, + { + "epoch": 1.68, + "learning_rate": 3.392735025544069e-05, + "loss": 1.3449, + "step": 21965 + }, + { + "epoch": 1.68, + "learning_rate": 3.388712337583973e-05, + "loss": 2.8116, + "step": 21970 + }, + { + "epoch": 1.68, + "learning_rate": 3.384689649623879e-05, + "loss": 2.0983, + "step": 21975 + }, + { + "epoch": 1.68, + "learning_rate": 3.3806669616637835e-05, + "loss": 3.2099, + "step": 21980 + }, + { + "epoch": 1.68, + "learning_rate": 3.3766442737036894e-05, + "loss": 1.4468, + "step": 21985 + }, + { + "epoch": 1.68, + "learning_rate": 3.372621585743594e-05, + "loss": 4.3094, + "step": 21990 + }, + { + "epoch": 1.68, + "learning_rate": 3.368598897783499e-05, + "loss": 2.935, + "step": 21995 + }, + { + "epoch": 1.68, + "learning_rate": 3.364576209823404e-05, + "loss": 4.1764, + "step": 22000 + }, + { + "epoch": 1.68, + "learning_rate": 3.360553521863309e-05, + "loss": 4.1816, + "step": 22005 + }, + { + "epoch": 1.68, + "learning_rate": 3.3565308339032144e-05, + "loss": 2.4688, + "step": 22010 + }, + { + "epoch": 1.68, + "learning_rate": 3.3525081459431196e-05, + "loss": 3.324, + "step": 22015 + }, + { + "epoch": 1.68, + "learning_rate": 3.348485457983024e-05, + "loss": 3.443, + "step": 22020 + }, + { + "epoch": 1.68, + "learning_rate": 3.34446277002293e-05, + "loss": 2.2952, + "step": 22025 + }, + { + "epoch": 1.68, + "learning_rate": 3.340440082062834e-05, + "loss": 2.7904, + "step": 22030 + }, + { + "epoch": 1.68, + "learning_rate": 3.3364173941027395e-05, + "loss": 3.2367, + "step": 22035 + }, + { + "epoch": 1.68, + "learning_rate": 3.3323947061426446e-05, + "loss": 3.6912, + "step": 22040 + }, + { + "epoch": 1.68, + "learning_rate": 3.32837201818255e-05, + "loss": 4.3529, + "step": 22045 + }, + { + "epoch": 1.69, + "learning_rate": 3.324349330222455e-05, + "loss": 4.1754, + "step": 22050 + }, + { + "epoch": 1.69, + "learning_rate": 3.3203266422623594e-05, + "loss": 4.0643, + "step": 22055 + }, + { + "epoch": 1.69, + "learning_rate": 3.316303954302265e-05, + "loss": 3.0192, + "step": 22060 + }, + { + "epoch": 1.69, + "learning_rate": 3.31228126634217e-05, + "loss": 3.0924, + "step": 22065 + }, + { + "epoch": 1.69, + "learning_rate": 3.3082585783820755e-05, + "loss": 2.415, + "step": 22070 + }, + { + "epoch": 1.69, + "learning_rate": 3.30423589042198e-05, + "loss": 3.3243, + "step": 22075 + }, + { + "epoch": 1.69, + "learning_rate": 3.300213202461885e-05, + "loss": 1.2963, + "step": 22080 + }, + { + "epoch": 1.69, + "learning_rate": 3.29619051450179e-05, + "loss": 3.9981, + "step": 22085 + }, + { + "epoch": 1.69, + "learning_rate": 3.2921678265416954e-05, + "loss": 3.3592, + "step": 22090 + }, + { + "epoch": 1.69, + "learning_rate": 3.2881451385816006e-05, + "loss": 3.4289, + "step": 22095 + }, + { + "epoch": 1.69, + "learning_rate": 3.284122450621506e-05, + "loss": 3.7771, + "step": 22100 + }, + { + "epoch": 1.69, + "learning_rate": 3.28009976266141e-05, + "loss": 3.6328, + "step": 22105 + }, + { + "epoch": 1.69, + "learning_rate": 3.276077074701315e-05, + "loss": 3.0618, + "step": 22110 + }, + { + "epoch": 1.69, + "learning_rate": 3.2720543867412205e-05, + "loss": 2.6314, + "step": 22115 + }, + { + "epoch": 1.69, + "learning_rate": 3.2680316987811256e-05, + "loss": 1.7609, + "step": 22120 + }, + { + "epoch": 1.69, + "learning_rate": 3.264009010821031e-05, + "loss": 1.2902, + "step": 22125 + }, + { + "epoch": 1.69, + "learning_rate": 3.259986322860936e-05, + "loss": 0.4134, + "step": 22130 + }, + { + "epoch": 1.69, + "learning_rate": 3.255963634900841e-05, + "loss": 2.865, + "step": 22135 + }, + { + "epoch": 1.69, + "learning_rate": 3.2519409469407455e-05, + "loss": 4.5992, + "step": 22140 + }, + { + "epoch": 1.69, + "learning_rate": 3.2479182589806514e-05, + "loss": 4.1874, + "step": 22145 + }, + { + "epoch": 1.69, + "learning_rate": 3.243895571020556e-05, + "loss": 2.4801, + "step": 22150 + }, + { + "epoch": 1.69, + "learning_rate": 3.2398728830604616e-05, + "loss": 3.9021, + "step": 22155 + }, + { + "epoch": 1.69, + "learning_rate": 3.235850195100366e-05, + "loss": 3.6825, + "step": 22160 + }, + { + "epoch": 1.69, + "learning_rate": 3.231827507140271e-05, + "loss": 3.2391, + "step": 22165 + }, + { + "epoch": 1.69, + "learning_rate": 3.2278048191801764e-05, + "loss": 3.5101, + "step": 22170 + }, + { + "epoch": 1.69, + "learning_rate": 3.2237821312200816e-05, + "loss": 3.7007, + "step": 22175 + }, + { + "epoch": 1.7, + "learning_rate": 3.219759443259987e-05, + "loss": 1.1778, + "step": 22180 + }, + { + "epoch": 1.7, + "learning_rate": 3.215736755299891e-05, + "loss": 0.9388, + "step": 22185 + }, + { + "epoch": 1.7, + "learning_rate": 3.211714067339796e-05, + "loss": 4.8832, + "step": 22190 + }, + { + "epoch": 1.7, + "learning_rate": 3.2076913793797015e-05, + "loss": 3.309, + "step": 22195 + }, + { + "epoch": 1.7, + "learning_rate": 3.2036686914196066e-05, + "loss": 4.3115, + "step": 22200 + }, + { + "epoch": 1.7, + "learning_rate": 3.199646003459512e-05, + "loss": 4.6436, + "step": 22205 + }, + { + "epoch": 1.7, + "learning_rate": 3.195623315499417e-05, + "loss": 3.9236, + "step": 22210 + }, + { + "epoch": 1.7, + "learning_rate": 3.191600627539322e-05, + "loss": 2.4813, + "step": 22215 + }, + { + "epoch": 1.7, + "learning_rate": 3.187577939579227e-05, + "loss": 1.7186, + "step": 22220 + }, + { + "epoch": 1.7, + "learning_rate": 3.183555251619132e-05, + "loss": 3.0956, + "step": 22225 + }, + { + "epoch": 1.7, + "learning_rate": 3.1795325636590375e-05, + "loss": 0.8746, + "step": 22230 + }, + { + "epoch": 1.7, + "learning_rate": 3.175509875698942e-05, + "loss": 2.345, + "step": 22235 + }, + { + "epoch": 1.7, + "learning_rate": 3.171487187738848e-05, + "loss": 3.7502, + "step": 22240 + }, + { + "epoch": 1.7, + "learning_rate": 3.167464499778752e-05, + "loss": 4.0754, + "step": 22245 + }, + { + "epoch": 1.7, + "learning_rate": 3.1634418118186574e-05, + "loss": 5.118, + "step": 22250 + }, + { + "epoch": 1.7, + "learning_rate": 3.1594191238585625e-05, + "loss": 5.1141, + "step": 22255 + }, + { + "epoch": 1.7, + "learning_rate": 3.155396435898468e-05, + "loss": 3.1888, + "step": 22260 + }, + { + "epoch": 1.7, + "learning_rate": 3.151373747938373e-05, + "loss": 2.4457, + "step": 22265 + }, + { + "epoch": 1.7, + "learning_rate": 3.147351059978277e-05, + "loss": 3.7101, + "step": 22270 + }, + { + "epoch": 1.7, + "learning_rate": 3.1433283720181825e-05, + "loss": 2.8647, + "step": 22275 + }, + { + "epoch": 1.7, + "learning_rate": 3.1393056840580876e-05, + "loss": 2.2749, + "step": 22280 + }, + { + "epoch": 1.7, + "learning_rate": 3.135282996097993e-05, + "loss": 2.0576, + "step": 22285 + }, + { + "epoch": 1.7, + "learning_rate": 3.131260308137898e-05, + "loss": 4.5924, + "step": 22290 + }, + { + "epoch": 1.7, + "learning_rate": 3.127237620177803e-05, + "loss": 3.8521, + "step": 22295 + }, + { + "epoch": 1.7, + "learning_rate": 3.123214932217708e-05, + "loss": 3.3519, + "step": 22300 + }, + { + "epoch": 1.7, + "learning_rate": 3.119192244257613e-05, + "loss": 3.1276, + "step": 22305 + }, + { + "epoch": 1.71, + "learning_rate": 3.115169556297518e-05, + "loss": 3.8262, + "step": 22310 + }, + { + "epoch": 1.71, + "learning_rate": 3.1111468683374236e-05, + "loss": 2.2402, + "step": 22315 + }, + { + "epoch": 1.71, + "learning_rate": 3.107124180377328e-05, + "loss": 2.7102, + "step": 22320 + }, + { + "epoch": 1.71, + "learning_rate": 3.103101492417233e-05, + "loss": 1.8521, + "step": 22325 + }, + { + "epoch": 1.71, + "learning_rate": 3.0990788044571384e-05, + "loss": 1.5898, + "step": 22330 + }, + { + "epoch": 1.71, + "learning_rate": 3.0950561164970435e-05, + "loss": 3.3699, + "step": 22335 + }, + { + "epoch": 1.71, + "learning_rate": 3.091033428536949e-05, + "loss": 3.958, + "step": 22340 + }, + { + "epoch": 1.71, + "learning_rate": 3.087010740576853e-05, + "loss": 4.5051, + "step": 22345 + }, + { + "epoch": 1.71, + "learning_rate": 3.082988052616759e-05, + "loss": 3.4871, + "step": 22350 + }, + { + "epoch": 1.71, + "learning_rate": 3.0789653646566634e-05, + "loss": 2.7721, + "step": 22355 + }, + { + "epoch": 1.71, + "learning_rate": 3.0749426766965686e-05, + "loss": 3.9631, + "step": 22360 + }, + { + "epoch": 1.71, + "learning_rate": 3.070919988736474e-05, + "loss": 4.0345, + "step": 22365 + }, + { + "epoch": 1.71, + "learning_rate": 3.066897300776379e-05, + "loss": 2.9204, + "step": 22370 + }, + { + "epoch": 1.71, + "learning_rate": 3.062874612816284e-05, + "loss": 2.6203, + "step": 22375 + }, + { + "epoch": 1.71, + "learning_rate": 3.058851924856189e-05, + "loss": 2.0655, + "step": 22380 + }, + { + "epoch": 1.71, + "learning_rate": 3.054829236896094e-05, + "loss": 2.9641, + "step": 22385 + }, + { + "epoch": 1.71, + "learning_rate": 3.0508065489359995e-05, + "loss": 3.8604, + "step": 22390 + }, + { + "epoch": 1.71, + "learning_rate": 3.0467838609759043e-05, + "loss": 4.3584, + "step": 22395 + }, + { + "epoch": 1.71, + "learning_rate": 3.0427611730158094e-05, + "loss": 4.0123, + "step": 22400 + }, + { + "epoch": 1.71, + "learning_rate": 3.0387384850557142e-05, + "loss": 4.0429, + "step": 22405 + }, + { + "epoch": 1.71, + "learning_rate": 3.0347157970956197e-05, + "loss": 3.8104, + "step": 22410 + }, + { + "epoch": 1.71, + "learning_rate": 3.0306931091355245e-05, + "loss": 2.3843, + "step": 22415 + }, + { + "epoch": 1.71, + "learning_rate": 3.0266704211754293e-05, + "loss": 2.1063, + "step": 22420 + }, + { + "epoch": 1.71, + "learning_rate": 3.0226477332153348e-05, + "loss": 2.0419, + "step": 22425 + }, + { + "epoch": 1.71, + "learning_rate": 3.0186250452552396e-05, + "loss": 1.0568, + "step": 22430 + }, + { + "epoch": 1.71, + "learning_rate": 3.015406894887164e-05, + "loss": 3.2819, + "step": 22435 + }, + { + "epoch": 1.72, + "learning_rate": 3.0113842069270688e-05, + "loss": 5.0303, + "step": 22440 + }, + { + "epoch": 1.72, + "learning_rate": 3.007361518966974e-05, + "loss": 3.7967, + "step": 22445 + }, + { + "epoch": 1.72, + "learning_rate": 3.0033388310068787e-05, + "loss": 3.6611, + "step": 22450 + }, + { + "epoch": 1.72, + "learning_rate": 2.9993161430467842e-05, + "loss": 3.6416, + "step": 22455 + }, + { + "epoch": 1.72, + "learning_rate": 2.995293455086689e-05, + "loss": 1.9096, + "step": 22460 + }, + { + "epoch": 1.72, + "learning_rate": 2.9912707671265945e-05, + "loss": 1.793, + "step": 22465 + }, + { + "epoch": 1.72, + "learning_rate": 2.9872480791664993e-05, + "loss": 2.3129, + "step": 22470 + }, + { + "epoch": 1.72, + "learning_rate": 2.983225391206404e-05, + "loss": 0.621, + "step": 22475 + }, + { + "epoch": 1.72, + "learning_rate": 2.9792027032463093e-05, + "loss": 1.0425, + "step": 22480 + }, + { + "epoch": 1.72, + "learning_rate": 2.975180015286214e-05, + "loss": 2.1539, + "step": 22485 + }, + { + "epoch": 1.72, + "learning_rate": 2.9711573273261196e-05, + "loss": 4.5879, + "step": 22490 + }, + { + "epoch": 1.72, + "learning_rate": 2.9671346393660244e-05, + "loss": 5.5826, + "step": 22495 + }, + { + "epoch": 1.72, + "learning_rate": 2.96311195140593e-05, + "loss": 4.015, + "step": 22500 + }, + { + "epoch": 1.72, + "learning_rate": 2.9590892634458343e-05, + "loss": 3.9393, + "step": 22505 + }, + { + "epoch": 1.72, + "learning_rate": 2.9550665754857398e-05, + "loss": 4.0424, + "step": 22510 + }, + { + "epoch": 1.72, + "learning_rate": 2.9510438875256446e-05, + "loss": 2.3775, + "step": 22515 + }, + { + "epoch": 1.72, + "learning_rate": 2.94702119956555e-05, + "loss": 3.9154, + "step": 22520 + }, + { + "epoch": 1.72, + "learning_rate": 2.942998511605455e-05, + "loss": 3.6949, + "step": 22525 + }, + { + "epoch": 1.72, + "learning_rate": 2.93897582364536e-05, + "loss": 3.6259, + "step": 22530 + }, + { + "epoch": 1.72, + "learning_rate": 2.934953135685265e-05, + "loss": 2.0479, + "step": 22535 + }, + { + "epoch": 1.72, + "learning_rate": 2.9309304477251703e-05, + "loss": 4.7387, + "step": 22540 + }, + { + "epoch": 1.72, + "learning_rate": 2.926907759765075e-05, + "loss": 4.1223, + "step": 22545 + }, + { + "epoch": 1.72, + "learning_rate": 2.9228850718049806e-05, + "loss": 3.8104, + "step": 22550 + }, + { + "epoch": 1.72, + "learning_rate": 2.9188623838448854e-05, + "loss": 2.8752, + "step": 22555 + }, + { + "epoch": 1.72, + "learning_rate": 2.9148396958847902e-05, + "loss": 3.5936, + "step": 22560 + }, + { + "epoch": 1.72, + "learning_rate": 2.9108170079246954e-05, + "loss": 3.1607, + "step": 22565 + }, + { + "epoch": 1.73, + "learning_rate": 2.9067943199646002e-05, + "loss": 2.8457, + "step": 22570 + }, + { + "epoch": 1.73, + "learning_rate": 2.9027716320045057e-05, + "loss": 2.4739, + "step": 22575 + }, + { + "epoch": 1.73, + "learning_rate": 2.8987489440444105e-05, + "loss": 3.7004, + "step": 22580 + }, + { + "epoch": 1.73, + "learning_rate": 2.8947262560843156e-05, + "loss": 2.4516, + "step": 22585 + }, + { + "epoch": 1.73, + "learning_rate": 2.8907035681242205e-05, + "loss": 4.2662, + "step": 22590 + }, + { + "epoch": 1.73, + "learning_rate": 2.886680880164126e-05, + "loss": 3.1941, + "step": 22595 + }, + { + "epoch": 1.73, + "learning_rate": 2.8826581922040307e-05, + "loss": 4.3604, + "step": 22600 + }, + { + "epoch": 1.73, + "learning_rate": 2.8786355042439362e-05, + "loss": 4.5975, + "step": 22605 + }, + { + "epoch": 1.73, + "learning_rate": 2.874612816283841e-05, + "loss": 2.4618, + "step": 22610 + }, + { + "epoch": 1.73, + "learning_rate": 2.8705901283237462e-05, + "loss": 2.5024, + "step": 22615 + }, + { + "epoch": 1.73, + "learning_rate": 2.866567440363651e-05, + "loss": 2.0588, + "step": 22620 + }, + { + "epoch": 1.73, + "learning_rate": 2.8625447524035565e-05, + "loss": 2.6591, + "step": 22625 + }, + { + "epoch": 1.73, + "learning_rate": 2.8585220644434613e-05, + "loss": 1.1996, + "step": 22630 + }, + { + "epoch": 1.73, + "learning_rate": 2.854499376483366e-05, + "loss": 1.5975, + "step": 22635 + }, + { + "epoch": 1.73, + "learning_rate": 2.8504766885232716e-05, + "loss": 4.0307, + "step": 22640 + }, + { + "epoch": 1.73, + "learning_rate": 2.8464540005631764e-05, + "loss": 4.334, + "step": 22645 + }, + { + "epoch": 1.73, + "learning_rate": 2.8424313126030815e-05, + "loss": 3.3168, + "step": 22650 + }, + { + "epoch": 1.73, + "learning_rate": 2.8384086246429863e-05, + "loss": 3.9016, + "step": 22655 + }, + { + "epoch": 1.73, + "learning_rate": 2.8343859366828918e-05, + "loss": 2.4265, + "step": 22660 + }, + { + "epoch": 1.73, + "learning_rate": 2.8303632487227966e-05, + "loss": 2.7111, + "step": 22665 + }, + { + "epoch": 1.73, + "learning_rate": 2.8263405607627018e-05, + "loss": 2.3812, + "step": 22670 + }, + { + "epoch": 1.73, + "learning_rate": 2.8223178728026066e-05, + "loss": 2.0577, + "step": 22675 + }, + { + "epoch": 1.73, + "learning_rate": 2.818295184842512e-05, + "loss": 1.1668, + "step": 22680 + }, + { + "epoch": 1.73, + "learning_rate": 2.814272496882417e-05, + "loss": 2.8699, + "step": 22685 + }, + { + "epoch": 1.73, + "learning_rate": 2.8102498089223224e-05, + "loss": 4.2182, + "step": 22690 + }, + { + "epoch": 1.73, + "learning_rate": 2.8062271209622272e-05, + "loss": 4.5916, + "step": 22695 + }, + { + "epoch": 1.73, + "learning_rate": 2.8022044330021323e-05, + "loss": 4.7451, + "step": 22700 + }, + { + "epoch": 1.74, + "learning_rate": 2.798181745042037e-05, + "loss": 4.1365, + "step": 22705 + }, + { + "epoch": 1.74, + "learning_rate": 2.794159057081942e-05, + "loss": 2.2262, + "step": 22710 + }, + { + "epoch": 1.74, + "learning_rate": 2.7901363691218474e-05, + "loss": 2.2605, + "step": 22715 + }, + { + "epoch": 1.74, + "learning_rate": 2.7861136811617522e-05, + "loss": 3.7399, + "step": 22720 + }, + { + "epoch": 1.74, + "learning_rate": 2.7820909932016577e-05, + "loss": 2.6796, + "step": 22725 + }, + { + "epoch": 1.74, + "learning_rate": 2.7780683052415625e-05, + "loss": 2.1081, + "step": 22730 + }, + { + "epoch": 1.74, + "learning_rate": 2.7740456172814677e-05, + "loss": 3.9569, + "step": 22735 + }, + { + "epoch": 1.74, + "learning_rate": 2.7700229293213725e-05, + "loss": 5.2086, + "step": 22740 + }, + { + "epoch": 1.74, + "learning_rate": 2.766000241361278e-05, + "loss": 4.1189, + "step": 22745 + }, + { + "epoch": 1.74, + "learning_rate": 2.7619775534011828e-05, + "loss": 3.4363, + "step": 22750 + }, + { + "epoch": 1.74, + "learning_rate": 2.757954865441088e-05, + "loss": 4.1549, + "step": 22755 + }, + { + "epoch": 1.74, + "learning_rate": 2.7539321774809927e-05, + "loss": 3.4964, + "step": 22760 + }, + { + "epoch": 1.74, + "learning_rate": 2.7499094895208982e-05, + "loss": 3.3692, + "step": 22765 + }, + { + "epoch": 1.74, + "learning_rate": 2.745886801560803e-05, + "loss": 1.6907, + "step": 22770 + }, + { + "epoch": 1.74, + "learning_rate": 2.7418641136007085e-05, + "loss": 1.4017, + "step": 22775 + }, + { + "epoch": 1.74, + "learning_rate": 2.7378414256406133e-05, + "loss": 1.4608, + "step": 22780 + }, + { + "epoch": 1.74, + "learning_rate": 2.733818737680518e-05, + "loss": 2.8462, + "step": 22785 + }, + { + "epoch": 1.74, + "learning_rate": 2.7297960497204233e-05, + "loss": 4.6463, + "step": 22790 + }, + { + "epoch": 1.74, + "learning_rate": 2.725773361760328e-05, + "loss": 4.1804, + "step": 22795 + }, + { + "epoch": 1.74, + "learning_rate": 2.7217506738002336e-05, + "loss": 3.4373, + "step": 22800 + }, + { + "epoch": 1.74, + "learning_rate": 2.7177279858401384e-05, + "loss": 2.9582, + "step": 22805 + }, + { + "epoch": 1.74, + "learning_rate": 2.7137052978800435e-05, + "loss": 3.3782, + "step": 22810 + }, + { + "epoch": 1.74, + "learning_rate": 2.7096826099199483e-05, + "loss": 3.1693, + "step": 22815 + }, + { + "epoch": 1.74, + "learning_rate": 2.7056599219598538e-05, + "loss": 2.3896, + "step": 22820 + }, + { + "epoch": 1.74, + "learning_rate": 2.7016372339997586e-05, + "loss": 2.9684, + "step": 22825 + }, + { + "epoch": 1.74, + "learning_rate": 2.697614546039664e-05, + "loss": 2.6333, + "step": 22830 + }, + { + "epoch": 1.75, + "learning_rate": 2.693591858079569e-05, + "loss": 4.8025, + "step": 22835 + }, + { + "epoch": 1.75, + "learning_rate": 2.689569170119474e-05, + "loss": 3.923, + "step": 22840 + }, + { + "epoch": 1.75, + "learning_rate": 2.685546482159379e-05, + "loss": 3.8394, + "step": 22845 + }, + { + "epoch": 1.75, + "learning_rate": 2.6815237941992844e-05, + "loss": 4.2166, + "step": 22850 + }, + { + "epoch": 1.75, + "learning_rate": 2.677501106239189e-05, + "loss": 3.2895, + "step": 22855 + }, + { + "epoch": 1.75, + "learning_rate": 2.673478418279094e-05, + "loss": 3.2745, + "step": 22860 + }, + { + "epoch": 1.75, + "learning_rate": 2.6694557303189995e-05, + "loss": 2.9658, + "step": 22865 + }, + { + "epoch": 1.75, + "learning_rate": 2.6654330423589043e-05, + "loss": 2.6981, + "step": 22870 + }, + { + "epoch": 1.75, + "learning_rate": 2.6614103543988094e-05, + "loss": 1.1102, + "step": 22875 + }, + { + "epoch": 1.75, + "learning_rate": 2.6573876664387142e-05, + "loss": 0.2825, + "step": 22880 + }, + { + "epoch": 1.75, + "learning_rate": 2.6533649784786197e-05, + "loss": 0.6763, + "step": 22885 + }, + { + "epoch": 1.75, + "learning_rate": 2.6493422905185245e-05, + "loss": 3.8113, + "step": 22890 + }, + { + "epoch": 1.75, + "learning_rate": 2.6453196025584297e-05, + "loss": 3.5855, + "step": 22895 + }, + { + "epoch": 1.75, + "learning_rate": 2.6412969145983345e-05, + "loss": 4.2107, + "step": 22900 + }, + { + "epoch": 1.75, + "learning_rate": 2.63727422663824e-05, + "loss": 3.6761, + "step": 22905 + }, + { + "epoch": 1.75, + "learning_rate": 2.6332515386781448e-05, + "loss": 2.6449, + "step": 22910 + }, + { + "epoch": 1.75, + "learning_rate": 2.6292288507180502e-05, + "loss": 1.46, + "step": 22915 + }, + { + "epoch": 1.75, + "learning_rate": 2.625206162757955e-05, + "loss": 2.5006, + "step": 22920 + }, + { + "epoch": 1.75, + "learning_rate": 2.6211834747978602e-05, + "loss": 1.2944, + "step": 22925 + }, + { + "epoch": 1.75, + "learning_rate": 2.617160786837765e-05, + "loss": 3.3048, + "step": 22930 + }, + { + "epoch": 1.75, + "learning_rate": 2.6131380988776698e-05, + "loss": 2.9723, + "step": 22935 + }, + { + "epoch": 1.75, + "learning_rate": 2.6091154109175753e-05, + "loss": 4.3123, + "step": 22940 + }, + { + "epoch": 1.75, + "learning_rate": 2.60509272295748e-05, + "loss": 5.226, + "step": 22945 + }, + { + "epoch": 1.75, + "learning_rate": 2.6010700349973856e-05, + "loss": 3.9012, + "step": 22950 + }, + { + "epoch": 1.75, + "learning_rate": 2.5970473470372904e-05, + "loss": 3.7363, + "step": 22955 + }, + { + "epoch": 1.75, + "learning_rate": 2.5930246590771956e-05, + "loss": 3.6529, + "step": 22960 + }, + { + "epoch": 1.76, + "learning_rate": 2.5890019711171004e-05, + "loss": 2.3566, + "step": 22965 + }, + { + "epoch": 1.76, + "learning_rate": 2.584979283157006e-05, + "loss": 2.1602, + "step": 22970 + }, + { + "epoch": 1.76, + "learning_rate": 2.5809565951969107e-05, + "loss": 3.2619, + "step": 22975 + }, + { + "epoch": 1.76, + "learning_rate": 2.5769339072368158e-05, + "loss": 2.565, + "step": 22980 + }, + { + "epoch": 1.76, + "learning_rate": 2.5729112192767206e-05, + "loss": 3.3445, + "step": 22985 + }, + { + "epoch": 1.76, + "learning_rate": 2.568888531316626e-05, + "loss": 4.1391, + "step": 22990 + }, + { + "epoch": 1.76, + "learning_rate": 2.564865843356531e-05, + "loss": 3.7564, + "step": 22995 + }, + { + "epoch": 1.76, + "learning_rate": 2.5608431553964364e-05, + "loss": 3.3184, + "step": 23000 + }, + { + "epoch": 1.76, + "learning_rate": 2.5568204674363412e-05, + "loss": 3.8339, + "step": 23005 + }, + { + "epoch": 1.76, + "learning_rate": 2.552797779476246e-05, + "loss": 2.4366, + "step": 23010 + }, + { + "epoch": 1.76, + "learning_rate": 2.548775091516151e-05, + "loss": 2.4264, + "step": 23015 + }, + { + "epoch": 1.76, + "learning_rate": 2.544752403556056e-05, + "loss": 1.9581, + "step": 23020 + }, + { + "epoch": 1.76, + "learning_rate": 2.5407297155959614e-05, + "loss": 1.901, + "step": 23025 + }, + { + "epoch": 1.76, + "learning_rate": 2.5367070276358662e-05, + "loss": 0.0299, + "step": 23030 + }, + { + "epoch": 1.76, + "learning_rate": 2.5326843396757717e-05, + "loss": 1.0677, + "step": 23035 + }, + { + "epoch": 1.76, + "learning_rate": 2.5286616517156762e-05, + "loss": 4.8, + "step": 23040 + }, + { + "epoch": 1.76, + "learning_rate": 2.5246389637555817e-05, + "loss": 3.8922, + "step": 23045 + }, + { + "epoch": 1.76, + "learning_rate": 2.5206162757954865e-05, + "loss": 3.9426, + "step": 23050 + }, + { + "epoch": 1.76, + "learning_rate": 2.516593587835392e-05, + "loss": 4.1615, + "step": 23055 + }, + { + "epoch": 1.76, + "learning_rate": 2.5125708998752968e-05, + "loss": 4.2786, + "step": 23060 + }, + { + "epoch": 1.76, + "learning_rate": 2.508548211915202e-05, + "loss": 3.0869, + "step": 23065 + }, + { + "epoch": 1.76, + "learning_rate": 2.5045255239551067e-05, + "loss": 2.5565, + "step": 23070 + }, + { + "epoch": 1.76, + "learning_rate": 2.5005028359950122e-05, + "loss": 2.0955, + "step": 23075 + }, + { + "epoch": 1.76, + "learning_rate": 2.496480148034917e-05, + "loss": 1.9791, + "step": 23080 + }, + { + "epoch": 1.76, + "learning_rate": 2.4924574600748222e-05, + "loss": 1.7667, + "step": 23085 + }, + { + "epoch": 1.76, + "learning_rate": 2.4884347721147273e-05, + "loss": 3.9285, + "step": 23090 + }, + { + "epoch": 1.77, + "learning_rate": 2.484412084154632e-05, + "loss": 4.3834, + "step": 23095 + }, + { + "epoch": 1.77, + "learning_rate": 2.4803893961945373e-05, + "loss": 4.3969, + "step": 23100 + }, + { + "epoch": 1.77, + "learning_rate": 2.4763667082344424e-05, + "loss": 4.2646, + "step": 23105 + }, + { + "epoch": 1.77, + "learning_rate": 2.4723440202743476e-05, + "loss": 3.1369, + "step": 23110 + }, + { + "epoch": 1.77, + "learning_rate": 2.4683213323142527e-05, + "loss": 3.584, + "step": 23115 + }, + { + "epoch": 1.77, + "learning_rate": 2.4642986443541575e-05, + "loss": 2.7839, + "step": 23120 + }, + { + "epoch": 1.77, + "learning_rate": 2.4602759563940623e-05, + "loss": 1.9654, + "step": 23125 + }, + { + "epoch": 1.77, + "learning_rate": 2.4562532684339675e-05, + "loss": 2.4053, + "step": 23130 + }, + { + "epoch": 1.77, + "learning_rate": 2.4522305804738726e-05, + "loss": 2.3811, + "step": 23135 + }, + { + "epoch": 1.77, + "learning_rate": 2.4482078925137778e-05, + "loss": 4.3805, + "step": 23140 + }, + { + "epoch": 1.77, + "learning_rate": 2.444185204553683e-05, + "loss": 3.634, + "step": 23145 + }, + { + "epoch": 1.77, + "learning_rate": 2.440162516593588e-05, + "loss": 4.3363, + "step": 23150 + }, + { + "epoch": 1.77, + "learning_rate": 2.436139828633493e-05, + "loss": 4.2129, + "step": 23155 + }, + { + "epoch": 1.77, + "learning_rate": 2.432117140673398e-05, + "loss": 3.1537, + "step": 23160 + }, + { + "epoch": 1.77, + "learning_rate": 2.4280944527133032e-05, + "loss": 3.78, + "step": 23165 + }, + { + "epoch": 1.77, + "learning_rate": 2.4240717647532083e-05, + "loss": 3.6497, + "step": 23170 + }, + { + "epoch": 1.77, + "learning_rate": 2.4200490767931135e-05, + "loss": 4.7099, + "step": 23175 + }, + { + "epoch": 1.77, + "learning_rate": 2.4160263888330183e-05, + "loss": 1.8507, + "step": 23180 + }, + { + "epoch": 1.77, + "learning_rate": 2.4120037008729234e-05, + "loss": 2.9047, + "step": 23185 + }, + { + "epoch": 1.77, + "learning_rate": 2.4079810129128286e-05, + "loss": 4.0793, + "step": 23190 + }, + { + "epoch": 1.77, + "learning_rate": 2.4039583249527337e-05, + "loss": 4.4531, + "step": 23195 + }, + { + "epoch": 1.77, + "learning_rate": 2.3999356369926385e-05, + "loss": 5.0652, + "step": 23200 + }, + { + "epoch": 1.77, + "learning_rate": 2.3959129490325437e-05, + "loss": 3.3764, + "step": 23205 + }, + { + "epoch": 1.77, + "learning_rate": 2.3918902610724485e-05, + "loss": 3.6626, + "step": 23210 + }, + { + "epoch": 1.77, + "learning_rate": 2.3878675731123536e-05, + "loss": 3.4744, + "step": 23215 + }, + { + "epoch": 1.77, + "learning_rate": 2.3838448851522588e-05, + "loss": 2.2833, + "step": 23220 + }, + { + "epoch": 1.78, + "learning_rate": 2.379822197192164e-05, + "loss": 0.5065, + "step": 23225 + }, + { + "epoch": 1.78, + "learning_rate": 2.375799509232069e-05, + "loss": 2.4229, + "step": 23230 + }, + { + "epoch": 1.78, + "learning_rate": 2.371776821271974e-05, + "loss": 0.7311, + "step": 23235 + }, + { + "epoch": 1.78, + "learning_rate": 2.367754133311879e-05, + "loss": 3.8694, + "step": 23240 + }, + { + "epoch": 1.78, + "learning_rate": 2.3637314453517842e-05, + "loss": 3.3383, + "step": 23245 + }, + { + "epoch": 1.78, + "learning_rate": 2.3597087573916893e-05, + "loss": 3.9184, + "step": 23250 + }, + { + "epoch": 1.78, + "learning_rate": 2.3556860694315945e-05, + "loss": 3.0708, + "step": 23255 + }, + { + "epoch": 1.78, + "learning_rate": 2.3516633814714996e-05, + "loss": 2.9025, + "step": 23260 + }, + { + "epoch": 1.78, + "learning_rate": 2.3476406935114044e-05, + "loss": 2.6064, + "step": 23265 + }, + { + "epoch": 1.78, + "learning_rate": 2.3436180055513096e-05, + "loss": 1.7971, + "step": 23270 + }, + { + "epoch": 1.78, + "learning_rate": 2.3395953175912147e-05, + "loss": 2.8858, + "step": 23275 + }, + { + "epoch": 1.78, + "learning_rate": 2.3355726296311195e-05, + "loss": 3.6838, + "step": 23280 + }, + { + "epoch": 1.78, + "learning_rate": 2.3315499416710247e-05, + "loss": 2.2048, + "step": 23285 + }, + { + "epoch": 1.78, + "learning_rate": 2.3275272537109298e-05, + "loss": 4.4943, + "step": 23290 + }, + { + "epoch": 1.78, + "learning_rate": 2.3235045657508346e-05, + "loss": 4.283, + "step": 23295 + }, + { + "epoch": 1.78, + "learning_rate": 2.3194818777907398e-05, + "loss": 4.518, + "step": 23300 + }, + { + "epoch": 1.78, + "learning_rate": 2.315459189830645e-05, + "loss": 3.516, + "step": 23305 + }, + { + "epoch": 1.78, + "learning_rate": 2.31143650187055e-05, + "loss": 4.2672, + "step": 23310 + }, + { + "epoch": 1.78, + "learning_rate": 2.3074138139104552e-05, + "loss": 3.4592, + "step": 23315 + }, + { + "epoch": 1.78, + "learning_rate": 2.30339112595036e-05, + "loss": 3.5898, + "step": 23320 + }, + { + "epoch": 1.78, + "learning_rate": 2.299368437990265e-05, + "loss": 2.7908, + "step": 23325 + }, + { + "epoch": 1.78, + "learning_rate": 2.2953457500301703e-05, + "loss": 2.1385, + "step": 23330 + }, + { + "epoch": 1.78, + "learning_rate": 2.2913230620700755e-05, + "loss": 1.7825, + "step": 23335 + }, + { + "epoch": 1.78, + "learning_rate": 2.2873003741099806e-05, + "loss": 4.9459, + "step": 23340 + }, + { + "epoch": 1.78, + "learning_rate": 2.2832776861498854e-05, + "loss": 3.7873, + "step": 23345 + }, + { + "epoch": 1.78, + "learning_rate": 2.2792549981897906e-05, + "loss": 3.7168, + "step": 23350 + }, + { + "epoch": 1.79, + "learning_rate": 2.2752323102296954e-05, + "loss": 3.9023, + "step": 23355 + }, + { + "epoch": 1.79, + "learning_rate": 2.2712096222696005e-05, + "loss": 1.3564, + "step": 23360 + }, + { + "epoch": 1.79, + "learning_rate": 2.2671869343095057e-05, + "loss": 3.3062, + "step": 23365 + }, + { + "epoch": 1.79, + "learning_rate": 2.2631642463494108e-05, + "loss": 4.416, + "step": 23370 + }, + { + "epoch": 1.79, + "learning_rate": 2.259141558389316e-05, + "loss": 3.2855, + "step": 23375 + }, + { + "epoch": 1.79, + "learning_rate": 2.2551188704292208e-05, + "loss": 3.3284, + "step": 23380 + }, + { + "epoch": 1.79, + "learning_rate": 2.251096182469126e-05, + "loss": 2.627, + "step": 23385 + }, + { + "epoch": 1.79, + "learning_rate": 2.247073494509031e-05, + "loss": 4.008, + "step": 23390 + }, + { + "epoch": 1.79, + "learning_rate": 2.2430508065489362e-05, + "loss": 4.5375, + "step": 23395 + }, + { + "epoch": 1.79, + "learning_rate": 2.2390281185888413e-05, + "loss": 4.3504, + "step": 23400 + }, + { + "epoch": 1.79, + "learning_rate": 2.235005430628746e-05, + "loss": 2.7492, + "step": 23405 + }, + { + "epoch": 1.79, + "learning_rate": 2.2309827426686513e-05, + "loss": 2.9355, + "step": 23410 + }, + { + "epoch": 1.79, + "learning_rate": 2.2269600547085564e-05, + "loss": 2.2093, + "step": 23415 + }, + { + "epoch": 1.79, + "learning_rate": 2.2229373667484616e-05, + "loss": 2.8348, + "step": 23420 + }, + { + "epoch": 1.79, + "learning_rate": 2.2189146787883667e-05, + "loss": 2.7705, + "step": 23425 + }, + { + "epoch": 1.79, + "learning_rate": 2.2148919908282715e-05, + "loss": 1.4777, + "step": 23430 + }, + { + "epoch": 1.79, + "learning_rate": 2.2108693028681764e-05, + "loss": 2.0384, + "step": 23435 + }, + { + "epoch": 1.79, + "learning_rate": 2.2068466149080815e-05, + "loss": 3.5889, + "step": 23440 + }, + { + "epoch": 1.79, + "learning_rate": 2.2028239269479867e-05, + "loss": 4.2697, + "step": 23445 + }, + { + "epoch": 1.79, + "learning_rate": 2.1988012389878918e-05, + "loss": 3.7738, + "step": 23450 + }, + { + "epoch": 1.79, + "learning_rate": 2.194778551027797e-05, + "loss": 3.7055, + "step": 23455 + }, + { + "epoch": 1.79, + "learning_rate": 2.1907558630677018e-05, + "loss": 3.8967, + "step": 23460 + }, + { + "epoch": 1.79, + "learning_rate": 2.186733175107607e-05, + "loss": 4.0676, + "step": 23465 + }, + { + "epoch": 1.79, + "learning_rate": 2.182710487147512e-05, + "loss": 3.5904, + "step": 23470 + }, + { + "epoch": 1.79, + "learning_rate": 2.1786877991874172e-05, + "loss": 1.4659, + "step": 23475 + }, + { + "epoch": 1.79, + "learning_rate": 2.1746651112273223e-05, + "loss": 4.3659, + "step": 23480 + }, + { + "epoch": 1.79, + "learning_rate": 2.1706424232672275e-05, + "loss": 2.4338, + "step": 23485 + }, + { + "epoch": 1.8, + "learning_rate": 2.1666197353071323e-05, + "loss": 3.2225, + "step": 23490 + }, + { + "epoch": 1.8, + "learning_rate": 2.1625970473470374e-05, + "loss": 3.9035, + "step": 23495 + }, + { + "epoch": 1.8, + "learning_rate": 2.1585743593869426e-05, + "loss": 4.0298, + "step": 23500 + }, + { + "epoch": 1.8, + "learning_rate": 2.1545516714268474e-05, + "loss": 4.2254, + "step": 23505 + }, + { + "epoch": 1.8, + "learning_rate": 2.1505289834667525e-05, + "loss": 2.9377, + "step": 23510 + }, + { + "epoch": 1.8, + "learning_rate": 2.1465062955066577e-05, + "loss": 2.3562, + "step": 23515 + }, + { + "epoch": 1.8, + "learning_rate": 2.1424836075465625e-05, + "loss": 1.3816, + "step": 23520 + }, + { + "epoch": 1.8, + "learning_rate": 2.1384609195864676e-05, + "loss": 2.5952, + "step": 23525 + }, + { + "epoch": 1.8, + "learning_rate": 2.1344382316263728e-05, + "loss": 1.156, + "step": 23530 + }, + { + "epoch": 1.8, + "learning_rate": 2.130415543666278e-05, + "loss": 2.085, + "step": 23535 + }, + { + "epoch": 1.8, + "learning_rate": 2.126392855706183e-05, + "loss": 4.2512, + "step": 23540 + }, + { + "epoch": 1.8, + "learning_rate": 2.122370167746088e-05, + "loss": 3.7354, + "step": 23545 + }, + { + "epoch": 1.8, + "learning_rate": 2.118347479785993e-05, + "loss": 3.8537, + "step": 23550 + }, + { + "epoch": 1.8, + "learning_rate": 2.1143247918258982e-05, + "loss": 3.9422, + "step": 23555 + }, + { + "epoch": 1.8, + "learning_rate": 2.1103021038658033e-05, + "loss": 2.9316, + "step": 23560 + }, + { + "epoch": 1.8, + "learning_rate": 2.1062794159057085e-05, + "loss": 3.9269, + "step": 23565 + }, + { + "epoch": 1.8, + "learning_rate": 2.1022567279456136e-05, + "loss": 1.4704, + "step": 23570 + }, + { + "epoch": 1.8, + "learning_rate": 2.0982340399855184e-05, + "loss": 2.5099, + "step": 23575 + }, + { + "epoch": 1.8, + "learning_rate": 2.0942113520254236e-05, + "loss": 2.5829, + "step": 23580 + }, + { + "epoch": 1.8, + "learning_rate": 2.0901886640653284e-05, + "loss": 4.3772, + "step": 23585 + }, + { + "epoch": 1.8, + "learning_rate": 2.0861659761052335e-05, + "loss": 3.4992, + "step": 23590 + }, + { + "epoch": 1.8, + "learning_rate": 2.0821432881451387e-05, + "loss": 4.1639, + "step": 23595 + }, + { + "epoch": 1.8, + "learning_rate": 2.0781206001850438e-05, + "loss": 3.1853, + "step": 23600 + }, + { + "epoch": 1.8, + "learning_rate": 2.0740979122249486e-05, + "loss": 3.6627, + "step": 23605 + }, + { + "epoch": 1.8, + "learning_rate": 2.0700752242648538e-05, + "loss": 3.7691, + "step": 23610 + }, + { + "epoch": 1.8, + "learning_rate": 2.066052536304759e-05, + "loss": 2.473, + "step": 23615 + }, + { + "epoch": 1.81, + "learning_rate": 2.062029848344664e-05, + "loss": 2.8132, + "step": 23620 + }, + { + "epoch": 1.81, + "learning_rate": 2.0580071603845692e-05, + "loss": 1.6381, + "step": 23625 + }, + { + "epoch": 1.81, + "learning_rate": 2.053984472424474e-05, + "loss": 1.6351, + "step": 23630 + }, + { + "epoch": 1.81, + "learning_rate": 2.0499617844643792e-05, + "loss": 2.8561, + "step": 23635 + }, + { + "epoch": 1.81, + "learning_rate": 2.0459390965042843e-05, + "loss": 3.5916, + "step": 23640 + }, + { + "epoch": 1.81, + "learning_rate": 2.0419164085441895e-05, + "loss": 3.0802, + "step": 23645 + }, + { + "epoch": 1.81, + "learning_rate": 2.0378937205840946e-05, + "loss": 4.2158, + "step": 23650 + }, + { + "epoch": 1.81, + "learning_rate": 2.0338710326239994e-05, + "loss": 1.5458, + "step": 23655 + }, + { + "epoch": 1.81, + "learning_rate": 2.0298483446639042e-05, + "loss": 3.1277, + "step": 23660 + }, + { + "epoch": 1.81, + "learning_rate": 2.0258256567038094e-05, + "loss": 3.5196, + "step": 23665 + }, + { + "epoch": 1.81, + "learning_rate": 2.0218029687437145e-05, + "loss": 3.1044, + "step": 23670 + }, + { + "epoch": 1.81, + "learning_rate": 2.0177802807836197e-05, + "loss": 4.1605, + "step": 23675 + }, + { + "epoch": 1.81, + "learning_rate": 2.0137575928235248e-05, + "loss": 1.0403, + "step": 23680 + }, + { + "epoch": 1.81, + "learning_rate": 2.00973490486343e-05, + "loss": 3.4376, + "step": 23685 + }, + { + "epoch": 1.81, + "learning_rate": 2.0057122169033348e-05, + "loss": 3.7799, + "step": 23690 + }, + { + "epoch": 1.81, + "learning_rate": 2.00168952894324e-05, + "loss": 5.3645, + "step": 23695 + }, + { + "epoch": 1.81, + "learning_rate": 1.997666840983145e-05, + "loss": 4.4109, + "step": 23700 + }, + { + "epoch": 1.81, + "learning_rate": 1.9936441530230502e-05, + "loss": 4.6688, + "step": 23705 + }, + { + "epoch": 1.81, + "learning_rate": 1.9896214650629554e-05, + "loss": 3.8071, + "step": 23710 + }, + { + "epoch": 1.81, + "learning_rate": 1.98559877710286e-05, + "loss": 3.7666, + "step": 23715 + }, + { + "epoch": 1.81, + "learning_rate": 1.9815760891427653e-05, + "loss": 2.0954, + "step": 23720 + }, + { + "epoch": 1.81, + "learning_rate": 1.9775534011826705e-05, + "loss": 2.5181, + "step": 23725 + }, + { + "epoch": 1.81, + "learning_rate": 1.9735307132225756e-05, + "loss": 3.2477, + "step": 23730 + }, + { + "epoch": 1.81, + "learning_rate": 1.9695080252624804e-05, + "loss": 1.3823, + "step": 23735 + }, + { + "epoch": 1.81, + "learning_rate": 1.9654853373023856e-05, + "loss": 4.2088, + "step": 23740 + }, + { + "epoch": 1.81, + "learning_rate": 1.9614626493422904e-05, + "loss": 4.3371, + "step": 23745 + }, + { + "epoch": 1.82, + "learning_rate": 1.9574399613821955e-05, + "loss": 4.924, + "step": 23750 + }, + { + "epoch": 1.82, + "learning_rate": 1.9534172734221007e-05, + "loss": 2.5295, + "step": 23755 + }, + { + "epoch": 1.82, + "learning_rate": 1.9493945854620058e-05, + "loss": 2.1711, + "step": 23760 + }, + { + "epoch": 1.82, + "learning_rate": 1.945371897501911e-05, + "loss": 1.7804, + "step": 23765 + }, + { + "epoch": 1.82, + "learning_rate": 1.9413492095418158e-05, + "loss": 2.7061, + "step": 23770 + }, + { + "epoch": 1.82, + "learning_rate": 1.937326521581721e-05, + "loss": 1.8079, + "step": 23775 + }, + { + "epoch": 1.82, + "learning_rate": 1.933303833621626e-05, + "loss": 2.6042, + "step": 23780 + }, + { + "epoch": 1.82, + "learning_rate": 1.9292811456615312e-05, + "loss": 1.4644, + "step": 23785 + }, + { + "epoch": 1.82, + "learning_rate": 1.9252584577014364e-05, + "loss": 3.4973, + "step": 23790 + }, + { + "epoch": 1.82, + "learning_rate": 1.9212357697413415e-05, + "loss": 5.3607, + "step": 23795 + }, + { + "epoch": 1.82, + "learning_rate": 1.9172130817812463e-05, + "loss": 2.8048, + "step": 23800 + }, + { + "epoch": 1.82, + "learning_rate": 1.9131903938211515e-05, + "loss": 1.9658, + "step": 23805 + }, + { + "epoch": 1.82, + "learning_rate": 1.9091677058610566e-05, + "loss": 3.1126, + "step": 23810 + }, + { + "epoch": 1.82, + "learning_rate": 1.9051450179009614e-05, + "loss": 2.9254, + "step": 23815 + }, + { + "epoch": 1.82, + "learning_rate": 1.9011223299408666e-05, + "loss": 0.5672, + "step": 23820 + }, + { + "epoch": 1.82, + "learning_rate": 1.8970996419807717e-05, + "loss": 2.4323, + "step": 23825 + }, + { + "epoch": 1.82, + "learning_rate": 1.8930769540206765e-05, + "loss": 1.714, + "step": 23830 + }, + { + "epoch": 1.82, + "learning_rate": 1.8890542660605817e-05, + "loss": 1.6771, + "step": 23835 + }, + { + "epoch": 1.82, + "learning_rate": 1.8850315781004868e-05, + "loss": 5.8141, + "step": 23840 + }, + { + "epoch": 1.82, + "learning_rate": 1.881008890140392e-05, + "loss": 3.5088, + "step": 23845 + }, + { + "epoch": 1.82, + "learning_rate": 1.876986202180297e-05, + "loss": 3.4639, + "step": 23850 + }, + { + "epoch": 1.82, + "learning_rate": 1.872963514220202e-05, + "loss": 4.4504, + "step": 23855 + }, + { + "epoch": 1.82, + "learning_rate": 1.868940826260107e-05, + "loss": 3.6905, + "step": 23860 + }, + { + "epoch": 1.82, + "learning_rate": 1.8649181383000122e-05, + "loss": 3.9271, + "step": 23865 + }, + { + "epoch": 1.82, + "learning_rate": 1.8608954503399173e-05, + "loss": 2.276, + "step": 23870 + }, + { + "epoch": 1.82, + "learning_rate": 1.8568727623798225e-05, + "loss": 1.4453, + "step": 23875 + }, + { + "epoch": 1.83, + "learning_rate": 1.8528500744197273e-05, + "loss": 1.511, + "step": 23880 + }, + { + "epoch": 1.83, + "learning_rate": 1.8488273864596324e-05, + "loss": 1.0376, + "step": 23885 + }, + { + "epoch": 1.83, + "learning_rate": 1.8448046984995373e-05, + "loss": 4.748, + "step": 23890 + }, + { + "epoch": 1.83, + "learning_rate": 1.8407820105394424e-05, + "loss": 3.6019, + "step": 23895 + }, + { + "epoch": 1.83, + "learning_rate": 1.8367593225793475e-05, + "loss": 4.3068, + "step": 23900 + }, + { + "epoch": 1.83, + "learning_rate": 1.8327366346192527e-05, + "loss": 3.8338, + "step": 23905 + }, + { + "epoch": 1.83, + "learning_rate": 1.828713946659158e-05, + "loss": 1.3604, + "step": 23910 + }, + { + "epoch": 1.83, + "learning_rate": 1.8246912586990626e-05, + "loss": 2.0121, + "step": 23915 + }, + { + "epoch": 1.83, + "learning_rate": 1.8206685707389678e-05, + "loss": 3.177, + "step": 23920 + }, + { + "epoch": 1.83, + "learning_rate": 1.816645882778873e-05, + "loss": 2.4406, + "step": 23925 + }, + { + "epoch": 1.83, + "learning_rate": 1.812623194818778e-05, + "loss": 2.1305, + "step": 23930 + }, + { + "epoch": 1.83, + "learning_rate": 1.8086005068586832e-05, + "loss": 0.9836, + "step": 23935 + }, + { + "epoch": 1.83, + "learning_rate": 1.804577818898588e-05, + "loss": 4.1375, + "step": 23940 + }, + { + "epoch": 1.83, + "learning_rate": 1.8005551309384932e-05, + "loss": 3.9156, + "step": 23945 + }, + { + "epoch": 1.83, + "learning_rate": 1.7965324429783983e-05, + "loss": 3.7585, + "step": 23950 + }, + { + "epoch": 1.83, + "learning_rate": 1.7925097550183035e-05, + "loss": 3.27, + "step": 23955 + }, + { + "epoch": 1.83, + "learning_rate": 1.7884870670582086e-05, + "loss": 4.2422, + "step": 23960 + }, + { + "epoch": 1.83, + "learning_rate": 1.7844643790981134e-05, + "loss": 3.0293, + "step": 23965 + }, + { + "epoch": 1.83, + "learning_rate": 1.7804416911380182e-05, + "loss": 3.3175, + "step": 23970 + }, + { + "epoch": 1.83, + "learning_rate": 1.7764190031779234e-05, + "loss": 2.4429, + "step": 23975 + }, + { + "epoch": 1.83, + "learning_rate": 1.7723963152178285e-05, + "loss": 1.251, + "step": 23980 + }, + { + "epoch": 1.83, + "learning_rate": 1.7683736272577337e-05, + "loss": 2.908, + "step": 23985 + }, + { + "epoch": 1.83, + "learning_rate": 1.764350939297639e-05, + "loss": 4.0328, + "step": 23990 + }, + { + "epoch": 1.83, + "learning_rate": 1.7603282513375436e-05, + "loss": 3.9133, + "step": 23995 + }, + { + "epoch": 1.83, + "learning_rate": 1.7563055633774488e-05, + "loss": 4.8342, + "step": 24000 + }, + { + "epoch": 1.83, + "learning_rate": 1.752282875417354e-05, + "loss": 3.8279, + "step": 24005 + }, + { + "epoch": 1.84, + "learning_rate": 1.748260187457259e-05, + "loss": 1.9251, + "step": 24010 + }, + { + "epoch": 1.84, + "learning_rate": 1.7442374994971642e-05, + "loss": 2.9262, + "step": 24015 + }, + { + "epoch": 1.84, + "learning_rate": 1.7402148115370694e-05, + "loss": 2.1363, + "step": 24020 + }, + { + "epoch": 1.84, + "learning_rate": 1.7361921235769742e-05, + "loss": 1.7946, + "step": 24025 + }, + { + "epoch": 1.84, + "learning_rate": 1.7321694356168793e-05, + "loss": 1.5292, + "step": 24030 + }, + { + "epoch": 1.84, + "learning_rate": 1.7281467476567845e-05, + "loss": 4.4959, + "step": 24035 + }, + { + "epoch": 1.84, + "learning_rate": 1.7241240596966893e-05, + "loss": 4.6133, + "step": 24040 + }, + { + "epoch": 1.84, + "learning_rate": 1.7201013717365944e-05, + "loss": 4.9404, + "step": 24045 + }, + { + "epoch": 1.84, + "learning_rate": 1.7160786837764996e-05, + "loss": 3.764, + "step": 24050 + }, + { + "epoch": 1.84, + "learning_rate": 1.7120559958164044e-05, + "loss": 4.8266, + "step": 24055 + }, + { + "epoch": 1.84, + "learning_rate": 1.7080333078563095e-05, + "loss": 3.7552, + "step": 24060 + }, + { + "epoch": 1.84, + "learning_rate": 1.7040106198962147e-05, + "loss": 2.8064, + "step": 24065 + }, + { + "epoch": 1.84, + "learning_rate": 1.6999879319361198e-05, + "loss": 3.0139, + "step": 24070 + }, + { + "epoch": 1.84, + "learning_rate": 1.695965243976025e-05, + "loss": 2.4623, + "step": 24075 + }, + { + "epoch": 1.84, + "learning_rate": 1.6919425560159298e-05, + "loss": 3.624, + "step": 24080 + }, + { + "epoch": 1.84, + "learning_rate": 1.687919868055835e-05, + "loss": 1.5064, + "step": 24085 + }, + { + "epoch": 1.84, + "learning_rate": 1.68389718009574e-05, + "loss": 4.409, + "step": 24090 + }, + { + "epoch": 1.84, + "learning_rate": 1.6798744921356452e-05, + "loss": 5.5877, + "step": 24095 + }, + { + "epoch": 1.84, + "learning_rate": 1.6758518041755504e-05, + "loss": 3.7055, + "step": 24100 + }, + { + "epoch": 1.84, + "learning_rate": 1.6718291162154555e-05, + "loss": 2.6408, + "step": 24105 + }, + { + "epoch": 1.84, + "learning_rate": 1.6678064282553603e-05, + "loss": 4.0404, + "step": 24110 + }, + { + "epoch": 1.84, + "learning_rate": 1.6637837402952655e-05, + "loss": 2.8409, + "step": 24115 + }, + { + "epoch": 1.84, + "learning_rate": 1.6597610523351703e-05, + "loss": 2.9476, + "step": 24120 + }, + { + "epoch": 1.84, + "learning_rate": 1.6557383643750754e-05, + "loss": 0.8873, + "step": 24125 + }, + { + "epoch": 1.84, + "learning_rate": 1.6517156764149806e-05, + "loss": 2.8307, + "step": 24130 + }, + { + "epoch": 1.84, + "learning_rate": 1.6476929884548857e-05, + "loss": 2.2032, + "step": 24135 + }, + { + "epoch": 1.85, + "learning_rate": 1.6436703004947905e-05, + "loss": 4.2484, + "step": 24140 + }, + { + "epoch": 1.85, + "learning_rate": 1.6396476125346957e-05, + "loss": 3.6621, + "step": 24145 + }, + { + "epoch": 1.85, + "learning_rate": 1.6356249245746008e-05, + "loss": 4.5232, + "step": 24150 + }, + { + "epoch": 1.85, + "learning_rate": 1.631602236614506e-05, + "loss": 3.4658, + "step": 24155 + }, + { + "epoch": 1.85, + "learning_rate": 1.627579548654411e-05, + "loss": 3.6425, + "step": 24160 + }, + { + "epoch": 1.85, + "learning_rate": 1.623556860694316e-05, + "loss": 2.3619, + "step": 24165 + }, + { + "epoch": 1.85, + "learning_rate": 1.619534172734221e-05, + "loss": 2.8156, + "step": 24170 + }, + { + "epoch": 1.85, + "learning_rate": 1.6155114847741262e-05, + "loss": 0.9569, + "step": 24175 + }, + { + "epoch": 1.85, + "learning_rate": 1.6114887968140314e-05, + "loss": 1.9015, + "step": 24180 + }, + { + "epoch": 1.85, + "learning_rate": 1.6074661088539365e-05, + "loss": 1.9707, + "step": 24185 + }, + { + "epoch": 1.85, + "learning_rate": 1.6034434208938413e-05, + "loss": 4.2664, + "step": 24190 + }, + { + "epoch": 1.85, + "learning_rate": 1.599420732933746e-05, + "loss": 4.7297, + "step": 24195 + }, + { + "epoch": 1.85, + "learning_rate": 1.5953980449736513e-05, + "loss": 3.9335, + "step": 24200 + }, + { + "epoch": 1.85, + "learning_rate": 1.5913753570135564e-05, + "loss": 3.4213, + "step": 24205 + }, + { + "epoch": 1.85, + "learning_rate": 1.5873526690534616e-05, + "loss": 3.0822, + "step": 24210 + }, + { + "epoch": 1.85, + "learning_rate": 1.5833299810933667e-05, + "loss": 2.5482, + "step": 24215 + }, + { + "epoch": 1.85, + "learning_rate": 1.579307293133272e-05, + "loss": 3.5275, + "step": 24220 + }, + { + "epoch": 1.85, + "learning_rate": 1.5752846051731767e-05, + "loss": 2.0448, + "step": 24225 + }, + { + "epoch": 1.85, + "learning_rate": 1.5712619172130818e-05, + "loss": 1.7843, + "step": 24230 + }, + { + "epoch": 1.85, + "learning_rate": 1.567239229252987e-05, + "loss": 1.5835, + "step": 24235 + }, + { + "epoch": 1.85, + "learning_rate": 1.563216541292892e-05, + "loss": 4.832, + "step": 24240 + }, + { + "epoch": 1.85, + "learning_rate": 1.5591938533327973e-05, + "loss": 3.8062, + "step": 24245 + }, + { + "epoch": 1.85, + "learning_rate": 1.555171165372702e-05, + "loss": 3.6148, + "step": 24250 + }, + { + "epoch": 1.85, + "learning_rate": 1.5511484774126072e-05, + "loss": 4.2596, + "step": 24255 + }, + { + "epoch": 1.85, + "learning_rate": 1.5471257894525124e-05, + "loss": 3.2226, + "step": 24260 + }, + { + "epoch": 1.85, + "learning_rate": 1.5431031014924175e-05, + "loss": 2.3329, + "step": 24265 + }, + { + "epoch": 1.85, + "learning_rate": 1.5390804135323223e-05, + "loss": 3.3771, + "step": 24270 + }, + { + "epoch": 1.86, + "learning_rate": 1.5350577255722275e-05, + "loss": 2.6768, + "step": 24275 + }, + { + "epoch": 1.86, + "learning_rate": 1.5310350376121323e-05, + "loss": 3.1228, + "step": 24280 + }, + { + "epoch": 1.86, + "learning_rate": 1.5270123496520374e-05, + "loss": 1.9548, + "step": 24285 + }, + { + "epoch": 1.86, + "learning_rate": 1.5229896616919426e-05, + "loss": 4.4299, + "step": 24290 + }, + { + "epoch": 1.86, + "learning_rate": 1.5189669737318477e-05, + "loss": 4.6846, + "step": 24295 + }, + { + "epoch": 1.86, + "learning_rate": 1.5149442857717527e-05, + "loss": 4.5248, + "step": 24300 + }, + { + "epoch": 1.86, + "learning_rate": 1.5109215978116578e-05, + "loss": 3.1845, + "step": 24305 + }, + { + "epoch": 1.86, + "learning_rate": 1.506898909851563e-05, + "loss": 2.2644, + "step": 24310 + }, + { + "epoch": 1.86, + "learning_rate": 1.502876221891468e-05, + "loss": 1.6771, + "step": 24315 + }, + { + "epoch": 1.86, + "learning_rate": 1.4988535339313731e-05, + "loss": 2.1373, + "step": 24320 + }, + { + "epoch": 1.86, + "learning_rate": 1.494830845971278e-05, + "loss": 1.9354, + "step": 24325 + }, + { + "epoch": 1.86, + "learning_rate": 1.4908081580111832e-05, + "loss": 1.3599, + "step": 24330 + }, + { + "epoch": 1.86, + "learning_rate": 1.4867854700510884e-05, + "loss": 1.6992, + "step": 24335 + }, + { + "epoch": 1.86, + "learning_rate": 1.4827627820909933e-05, + "loss": 4.1914, + "step": 24340 + }, + { + "epoch": 1.86, + "learning_rate": 1.4787400941308982e-05, + "loss": 5.009, + "step": 24345 + }, + { + "epoch": 1.86, + "learning_rate": 1.4747174061708033e-05, + "loss": 3.6693, + "step": 24350 + }, + { + "epoch": 1.86, + "learning_rate": 1.4706947182107084e-05, + "loss": 3.537, + "step": 24355 + }, + { + "epoch": 1.86, + "learning_rate": 1.4666720302506134e-05, + "loss": 2.5959, + "step": 24360 + }, + { + "epoch": 1.86, + "learning_rate": 1.4626493422905186e-05, + "loss": 2.8873, + "step": 24365 + }, + { + "epoch": 1.86, + "learning_rate": 1.4586266543304235e-05, + "loss": 2.0024, + "step": 24370 + }, + { + "epoch": 1.86, + "learning_rate": 1.4546039663703287e-05, + "loss": 3.3242, + "step": 24375 + }, + { + "epoch": 1.86, + "learning_rate": 1.4505812784102338e-05, + "loss": 1.5681, + "step": 24380 + }, + { + "epoch": 1.86, + "learning_rate": 1.4465585904501388e-05, + "loss": 4.3365, + "step": 24385 + }, + { + "epoch": 1.86, + "learning_rate": 1.442535902490044e-05, + "loss": 4.5049, + "step": 24390 + }, + { + "epoch": 1.86, + "learning_rate": 1.4385132145299491e-05, + "loss": 3.7834, + "step": 24395 + }, + { + "epoch": 1.86, + "learning_rate": 1.4344905265698541e-05, + "loss": 3.6518, + "step": 24400 + }, + { + "epoch": 1.87, + "learning_rate": 1.4304678386097592e-05, + "loss": 3.5904, + "step": 24405 + }, + { + "epoch": 1.87, + "learning_rate": 1.4264451506496642e-05, + "loss": 2.5824, + "step": 24410 + }, + { + "epoch": 1.87, + "learning_rate": 1.4224224626895694e-05, + "loss": 3.6668, + "step": 24415 + }, + { + "epoch": 1.87, + "learning_rate": 1.4183997747294745e-05, + "loss": 2.6437, + "step": 24420 + }, + { + "epoch": 1.87, + "learning_rate": 1.4143770867693793e-05, + "loss": 1.704, + "step": 24425 + }, + { + "epoch": 1.87, + "learning_rate": 1.4103543988092843e-05, + "loss": 2.013, + "step": 24430 + }, + { + "epoch": 1.87, + "learning_rate": 1.4063317108491894e-05, + "loss": 0.8377, + "step": 24435 + }, + { + "epoch": 1.87, + "learning_rate": 1.4023090228890944e-05, + "loss": 4.3678, + "step": 24440 + }, + { + "epoch": 1.87, + "learning_rate": 1.3982863349289996e-05, + "loss": 4.1889, + "step": 24445 + }, + { + "epoch": 1.87, + "learning_rate": 1.3942636469689047e-05, + "loss": 4.3562, + "step": 24450 + }, + { + "epoch": 1.87, + "learning_rate": 1.3902409590088097e-05, + "loss": 4.6929, + "step": 24455 + }, + { + "epoch": 1.87, + "learning_rate": 1.3862182710487148e-05, + "loss": 3.6932, + "step": 24460 + }, + { + "epoch": 1.87, + "learning_rate": 1.38219558308862e-05, + "loss": 2.9506, + "step": 24465 + }, + { + "epoch": 1.87, + "learning_rate": 1.378172895128525e-05, + "loss": 1.9533, + "step": 24470 + }, + { + "epoch": 1.87, + "learning_rate": 1.3741502071684301e-05, + "loss": 1.3351, + "step": 24475 + }, + { + "epoch": 1.87, + "learning_rate": 1.370127519208335e-05, + "loss": 2.0779, + "step": 24480 + }, + { + "epoch": 1.87, + "learning_rate": 1.3661048312482402e-05, + "loss": 1.5529, + "step": 24485 + }, + { + "epoch": 1.87, + "learning_rate": 1.3620821432881454e-05, + "loss": 4.1068, + "step": 24490 + }, + { + "epoch": 1.87, + "learning_rate": 1.3580594553280504e-05, + "loss": 4.8277, + "step": 24495 + }, + { + "epoch": 1.87, + "learning_rate": 1.3540367673679552e-05, + "loss": 3.857, + "step": 24500 + }, + { + "epoch": 1.87, + "learning_rate": 1.3500140794078603e-05, + "loss": 4.1428, + "step": 24505 + }, + { + "epoch": 1.87, + "learning_rate": 1.3459913914477655e-05, + "loss": 3.019, + "step": 24510 + }, + { + "epoch": 1.87, + "learning_rate": 1.3419687034876704e-05, + "loss": 2.5749, + "step": 24515 + }, + { + "epoch": 1.87, + "learning_rate": 1.3379460155275756e-05, + "loss": 3.4932, + "step": 24520 + }, + { + "epoch": 1.87, + "learning_rate": 1.3339233275674806e-05, + "loss": 2.0479, + "step": 24525 + }, + { + "epoch": 1.87, + "learning_rate": 1.3299006396073857e-05, + "loss": 3.4595, + "step": 24530 + }, + { + "epoch": 1.88, + "learning_rate": 1.3258779516472908e-05, + "loss": 2.5689, + "step": 24535 + }, + { + "epoch": 1.88, + "learning_rate": 1.3218552636871958e-05, + "loss": 3.9021, + "step": 24540 + }, + { + "epoch": 1.88, + "learning_rate": 1.317832575727101e-05, + "loss": 4.1131, + "step": 24545 + }, + { + "epoch": 1.88, + "learning_rate": 1.313809887767006e-05, + "loss": 3.3045, + "step": 24550 + }, + { + "epoch": 1.88, + "learning_rate": 1.3097871998069111e-05, + "loss": 3.8982, + "step": 24555 + }, + { + "epoch": 1.88, + "learning_rate": 1.3057645118468162e-05, + "loss": 2.1605, + "step": 24560 + }, + { + "epoch": 1.88, + "learning_rate": 1.3017418238867212e-05, + "loss": 3.1492, + "step": 24565 + }, + { + "epoch": 1.88, + "learning_rate": 1.2977191359266264e-05, + "loss": 1.427, + "step": 24570 + }, + { + "epoch": 1.88, + "learning_rate": 1.2936964479665312e-05, + "loss": 2.1544, + "step": 24575 + }, + { + "epoch": 1.88, + "learning_rate": 1.2896737600064363e-05, + "loss": 3.2604, + "step": 24580 + }, + { + "epoch": 1.88, + "learning_rate": 1.2856510720463413e-05, + "loss": 1.2599, + "step": 24585 + }, + { + "epoch": 1.88, + "learning_rate": 1.2816283840862464e-05, + "loss": 4.0691, + "step": 24590 + }, + { + "epoch": 1.88, + "learning_rate": 1.2776056961261514e-05, + "loss": 4.2889, + "step": 24595 + }, + { + "epoch": 1.88, + "learning_rate": 1.2735830081660566e-05, + "loss": 3.4133, + "step": 24600 + }, + { + "epoch": 1.88, + "learning_rate": 1.2695603202059617e-05, + "loss": 4.1035, + "step": 24605 + }, + { + "epoch": 1.88, + "learning_rate": 1.2655376322458667e-05, + "loss": 3.4185, + "step": 24610 + }, + { + "epoch": 1.88, + "learning_rate": 1.2615149442857718e-05, + "loss": 2.4912, + "step": 24615 + }, + { + "epoch": 1.88, + "learning_rate": 1.257492256325677e-05, + "loss": 2.678, + "step": 24620 + }, + { + "epoch": 1.88, + "learning_rate": 1.253469568365582e-05, + "loss": 1.9975, + "step": 24625 + }, + { + "epoch": 1.88, + "learning_rate": 1.2494468804054871e-05, + "loss": 0.6405, + "step": 24630 + }, + { + "epoch": 1.88, + "learning_rate": 1.2454241924453921e-05, + "loss": 1.8826, + "step": 24635 + }, + { + "epoch": 1.88, + "learning_rate": 1.241401504485297e-05, + "loss": 4.4268, + "step": 24640 + }, + { + "epoch": 1.88, + "learning_rate": 1.2373788165252022e-05, + "loss": 3.8896, + "step": 24645 + }, + { + "epoch": 1.88, + "learning_rate": 1.2333561285651072e-05, + "loss": 4.8965, + "step": 24650 + }, + { + "epoch": 1.88, + "learning_rate": 1.2293334406050123e-05, + "loss": 3.1094, + "step": 24655 + }, + { + "epoch": 1.88, + "learning_rate": 1.2253107526449175e-05, + "loss": 2.9067, + "step": 24660 + }, + { + "epoch": 1.89, + "learning_rate": 1.2212880646848225e-05, + "loss": 2.9209, + "step": 24665 + }, + { + "epoch": 1.89, + "learning_rate": 1.2172653767247274e-05, + "loss": 2.7946, + "step": 24670 + }, + { + "epoch": 1.89, + "learning_rate": 1.2132426887646326e-05, + "loss": 1.3476, + "step": 24675 + }, + { + "epoch": 1.89, + "learning_rate": 1.2092200008045376e-05, + "loss": 1.7138, + "step": 24680 + }, + { + "epoch": 1.89, + "learning_rate": 1.2051973128444427e-05, + "loss": 2.9379, + "step": 24685 + }, + { + "epoch": 1.89, + "learning_rate": 1.2011746248843479e-05, + "loss": 3.7029, + "step": 24690 + }, + { + "epoch": 1.89, + "learning_rate": 1.1971519369242528e-05, + "loss": 4.3479, + "step": 24695 + }, + { + "epoch": 1.89, + "learning_rate": 1.193129248964158e-05, + "loss": 3.7926, + "step": 24700 + }, + { + "epoch": 1.89, + "learning_rate": 1.189106561004063e-05, + "loss": 3.3841, + "step": 24705 + }, + { + "epoch": 1.89, + "learning_rate": 1.185083873043968e-05, + "loss": 4.1531, + "step": 24710 + }, + { + "epoch": 1.89, + "learning_rate": 1.181061185083873e-05, + "loss": 2.6995, + "step": 24715 + }, + { + "epoch": 1.89, + "learning_rate": 1.1770384971237782e-05, + "loss": 1.0784, + "step": 24720 + }, + { + "epoch": 1.89, + "learning_rate": 1.1730158091636832e-05, + "loss": 2.5422, + "step": 24725 + }, + { + "epoch": 1.89, + "learning_rate": 1.1689931212035884e-05, + "loss": 2.6564, + "step": 24730 + }, + { + "epoch": 1.89, + "learning_rate": 1.1649704332434933e-05, + "loss": 3.302, + "step": 24735 + }, + { + "epoch": 1.89, + "learning_rate": 1.1609477452833985e-05, + "loss": 3.8146, + "step": 24740 + }, + { + "epoch": 1.89, + "learning_rate": 1.1569250573233035e-05, + "loss": 3.9229, + "step": 24745 + }, + { + "epoch": 1.89, + "learning_rate": 1.1529023693632084e-05, + "loss": 4.4492, + "step": 24750 + }, + { + "epoch": 1.89, + "learning_rate": 1.1488796814031136e-05, + "loss": 3.6742, + "step": 24755 + }, + { + "epoch": 1.89, + "learning_rate": 1.1448569934430187e-05, + "loss": 2.4106, + "step": 24760 + }, + { + "epoch": 1.89, + "learning_rate": 1.1408343054829237e-05, + "loss": 2.7309, + "step": 24765 + }, + { + "epoch": 1.89, + "learning_rate": 1.1368116175228288e-05, + "loss": 1.6355, + "step": 24770 + }, + { + "epoch": 1.89, + "learning_rate": 1.132788929562734e-05, + "loss": 2.7919, + "step": 24775 + }, + { + "epoch": 1.89, + "learning_rate": 1.128766241602639e-05, + "loss": 4.5257, + "step": 24780 + }, + { + "epoch": 1.89, + "learning_rate": 1.124743553642544e-05, + "loss": 3.9398, + "step": 24785 + }, + { + "epoch": 1.89, + "learning_rate": 1.1207208656824491e-05, + "loss": 4.6109, + "step": 24790 + }, + { + "epoch": 1.9, + "learning_rate": 1.116698177722354e-05, + "loss": 3.3293, + "step": 24795 + }, + { + "epoch": 1.9, + "learning_rate": 1.1126754897622592e-05, + "loss": 3.921, + "step": 24800 + }, + { + "epoch": 1.9, + "learning_rate": 1.1086528018021642e-05, + "loss": 4.2807, + "step": 24805 + }, + { + "epoch": 1.9, + "learning_rate": 1.1046301138420693e-05, + "loss": 3.1406, + "step": 24810 + }, + { + "epoch": 1.9, + "learning_rate": 1.1006074258819745e-05, + "loss": 3.4969, + "step": 24815 + }, + { + "epoch": 1.9, + "learning_rate": 1.0965847379218795e-05, + "loss": 2.3031, + "step": 24820 + }, + { + "epoch": 1.9, + "learning_rate": 1.0925620499617844e-05, + "loss": 3.3691, + "step": 24825 + }, + { + "epoch": 1.9, + "learning_rate": 1.0885393620016896e-05, + "loss": 2.8064, + "step": 24830 + }, + { + "epoch": 1.9, + "learning_rate": 1.0845166740415946e-05, + "loss": 1.1896, + "step": 24835 + }, + { + "epoch": 1.9, + "learning_rate": 1.0804939860814997e-05, + "loss": 4.5449, + "step": 24840 + }, + { + "epoch": 1.9, + "learning_rate": 1.0764712981214049e-05, + "loss": 3.3299, + "step": 24845 + }, + { + "epoch": 1.9, + "learning_rate": 1.0724486101613098e-05, + "loss": 4.4686, + "step": 24850 + }, + { + "epoch": 1.9, + "learning_rate": 1.068425922201215e-05, + "loss": 3.9811, + "step": 24855 + }, + { + "epoch": 1.9, + "learning_rate": 1.06440323424112e-05, + "loss": 3.383, + "step": 24860 + }, + { + "epoch": 1.9, + "learning_rate": 1.060380546281025e-05, + "loss": 2.5898, + "step": 24865 + }, + { + "epoch": 1.9, + "learning_rate": 1.0563578583209301e-05, + "loss": 2.4245, + "step": 24870 + }, + { + "epoch": 1.9, + "learning_rate": 1.052335170360835e-05, + "loss": 2.3957, + "step": 24875 + }, + { + "epoch": 1.9, + "learning_rate": 1.0483124824007402e-05, + "loss": 1.7501, + "step": 24880 + }, + { + "epoch": 1.9, + "learning_rate": 1.0442897944406454e-05, + "loss": 1.7959, + "step": 24885 + }, + { + "epoch": 1.9, + "learning_rate": 1.0402671064805503e-05, + "loss": 4.0732, + "step": 24890 + }, + { + "epoch": 1.9, + "learning_rate": 1.0362444185204555e-05, + "loss": 4.116, + "step": 24895 + }, + { + "epoch": 1.9, + "learning_rate": 1.0322217305603605e-05, + "loss": 4.5895, + "step": 24900 + }, + { + "epoch": 1.9, + "learning_rate": 1.0281990426002654e-05, + "loss": 3.9937, + "step": 24905 + }, + { + "epoch": 1.9, + "learning_rate": 1.0241763546401706e-05, + "loss": 4.1619, + "step": 24910 + }, + { + "epoch": 1.9, + "learning_rate": 1.0201536666800757e-05, + "loss": 4.0855, + "step": 24915 + }, + { + "epoch": 1.9, + "learning_rate": 1.0161309787199807e-05, + "loss": 2.9171, + "step": 24920 + }, + { + "epoch": 1.9, + "learning_rate": 1.0121082907598859e-05, + "loss": 1.7636, + "step": 24925 + }, + { + "epoch": 1.91, + "learning_rate": 1.008085602799791e-05, + "loss": 2.8449, + "step": 24930 + }, + { + "epoch": 1.91, + "learning_rate": 1.004062914839696e-05, + "loss": 2.8344, + "step": 24935 + }, + { + "epoch": 1.91, + "learning_rate": 1.000040226879601e-05, + "loss": 3.7783, + "step": 24940 + }, + { + "epoch": 1.91, + "learning_rate": 9.960175389195061e-06, + "loss": 4.4023, + "step": 24945 + }, + { + "epoch": 1.91, + "learning_rate": 9.91994850959411e-06, + "loss": 3.5332, + "step": 24950 + }, + { + "epoch": 1.91, + "learning_rate": 9.879721629993162e-06, + "loss": 2.6086, + "step": 24955 + }, + { + "epoch": 1.91, + "learning_rate": 9.839494750392212e-06, + "loss": 3.8582, + "step": 24960 + }, + { + "epoch": 1.91, + "learning_rate": 9.799267870791264e-06, + "loss": 2.062, + "step": 24965 + }, + { + "epoch": 1.91, + "learning_rate": 9.759040991190315e-06, + "loss": 3.416, + "step": 24970 + }, + { + "epoch": 1.91, + "learning_rate": 9.718814111589363e-06, + "loss": 2.2982, + "step": 24975 + }, + { + "epoch": 1.91, + "learning_rate": 9.678587231988415e-06, + "loss": 0.8549, + "step": 24980 + }, + { + "epoch": 1.91, + "learning_rate": 9.638360352387466e-06, + "loss": 2.1826, + "step": 24985 + }, + { + "epoch": 1.91, + "learning_rate": 9.598133472786516e-06, + "loss": 5.0311, + "step": 24990 + }, + { + "epoch": 1.91, + "learning_rate": 9.557906593185567e-06, + "loss": 4.0307, + "step": 24995 + }, + { + "epoch": 1.91, + "learning_rate": 9.517679713584619e-06, + "loss": 3.3335, + "step": 25000 + }, + { + "epoch": 1.91, + "learning_rate": 9.477452833983668e-06, + "loss": 2.483, + "step": 25005 + }, + { + "epoch": 1.91, + "learning_rate": 9.43722595438272e-06, + "loss": 3.2506, + "step": 25010 + }, + { + "epoch": 1.91, + "learning_rate": 9.39699907478177e-06, + "loss": 3.9112, + "step": 25015 + }, + { + "epoch": 1.91, + "learning_rate": 9.35677219518082e-06, + "loss": 2.9277, + "step": 25020 + }, + { + "epoch": 1.91, + "learning_rate": 9.316545315579871e-06, + "loss": 1.3599, + "step": 25025 + }, + { + "epoch": 1.91, + "learning_rate": 9.27631843597892e-06, + "loss": 0.6077, + "step": 25030 + }, + { + "epoch": 1.91, + "learning_rate": 9.236091556377972e-06, + "loss": 1.0871, + "step": 25035 + }, + { + "epoch": 1.91, + "learning_rate": 9.195864676777024e-06, + "loss": 3.9141, + "step": 25040 + }, + { + "epoch": 1.91, + "learning_rate": 9.155637797176073e-06, + "loss": 4.5723, + "step": 25045 + }, + { + "epoch": 1.91, + "learning_rate": 9.115410917575123e-06, + "loss": 3.6136, + "step": 25050 + }, + { + "epoch": 1.91, + "learning_rate": 9.075184037974175e-06, + "loss": 3.3955, + "step": 25055 + }, + { + "epoch": 1.92, + "learning_rate": 9.034957158373224e-06, + "loss": 3.2014, + "step": 25060 + }, + { + "epoch": 1.92, + "learning_rate": 8.994730278772276e-06, + "loss": 2.9881, + "step": 25065 + }, + { + "epoch": 1.92, + "learning_rate": 8.954503399171327e-06, + "loss": 3.3361, + "step": 25070 + }, + { + "epoch": 1.92, + "learning_rate": 8.914276519570377e-06, + "loss": 2.0125, + "step": 25075 + }, + { + "epoch": 1.92, + "learning_rate": 8.874049639969429e-06, + "loss": 2.2374, + "step": 25080 + }, + { + "epoch": 1.92, + "learning_rate": 8.833822760368478e-06, + "loss": 2.4366, + "step": 25085 + }, + { + "epoch": 1.92, + "learning_rate": 8.793595880767528e-06, + "loss": 3.9693, + "step": 25090 + }, + { + "epoch": 1.92, + "learning_rate": 8.75336900116658e-06, + "loss": 4.5941, + "step": 25095 + }, + { + "epoch": 1.92, + "learning_rate": 8.713142121565631e-06, + "loss": 4.5533, + "step": 25100 + }, + { + "epoch": 1.92, + "learning_rate": 8.672915241964681e-06, + "loss": 4.7197, + "step": 25105 + }, + { + "epoch": 1.92, + "learning_rate": 8.632688362363732e-06, + "loss": 3.6422, + "step": 25110 + }, + { + "epoch": 1.92, + "learning_rate": 8.592461482762782e-06, + "loss": 2.0502, + "step": 25115 + }, + { + "epoch": 1.92, + "learning_rate": 8.552234603161834e-06, + "loss": 3.1903, + "step": 25120 + }, + { + "epoch": 1.92, + "learning_rate": 8.512007723560885e-06, + "loss": 1.453, + "step": 25125 + }, + { + "epoch": 1.92, + "learning_rate": 8.471780843959933e-06, + "loss": 2.6905, + "step": 25130 + }, + { + "epoch": 1.92, + "learning_rate": 8.431553964358985e-06, + "loss": 1.6192, + "step": 25135 + }, + { + "epoch": 1.92, + "learning_rate": 8.391327084758036e-06, + "loss": 3.7004, + "step": 25140 + }, + { + "epoch": 1.92, + "learning_rate": 8.351100205157086e-06, + "loss": 3.7824, + "step": 25145 + }, + { + "epoch": 1.92, + "learning_rate": 8.310873325556137e-06, + "loss": 5.5062, + "step": 25150 + }, + { + "epoch": 1.92, + "learning_rate": 8.270646445955189e-06, + "loss": 3.8611, + "step": 25155 + }, + { + "epoch": 1.92, + "learning_rate": 8.230419566354239e-06, + "loss": 3.7684, + "step": 25160 + }, + { + "epoch": 1.92, + "learning_rate": 8.190192686753288e-06, + "loss": 3.4285, + "step": 25165 + }, + { + "epoch": 1.92, + "learning_rate": 8.14996580715234e-06, + "loss": 3.1098, + "step": 25170 + }, + { + "epoch": 1.92, + "learning_rate": 8.10973892755139e-06, + "loss": 3.1606, + "step": 25175 + }, + { + "epoch": 1.92, + "learning_rate": 8.069512047950441e-06, + "loss": 1.8826, + "step": 25180 + }, + { + "epoch": 1.92, + "learning_rate": 8.02928516834949e-06, + "loss": 2.1713, + "step": 25185 + }, + { + "epoch": 1.93, + "learning_rate": 7.989058288748542e-06, + "loss": 4.7242, + "step": 25190 + }, + { + "epoch": 1.93, + "learning_rate": 7.948831409147594e-06, + "loss": 4.0975, + "step": 25195 + }, + { + "epoch": 1.93, + "learning_rate": 7.908604529546644e-06, + "loss": 3.651, + "step": 25200 + }, + { + "epoch": 1.93, + "learning_rate": 7.868377649945693e-06, + "loss": 4.6125, + "step": 25205 + }, + { + "epoch": 1.93, + "learning_rate": 7.828150770344745e-06, + "loss": 3.1507, + "step": 25210 + }, + { + "epoch": 1.93, + "learning_rate": 7.787923890743795e-06, + "loss": 2.712, + "step": 25215 + }, + { + "epoch": 1.93, + "learning_rate": 7.747697011142846e-06, + "loss": 3.0889, + "step": 25220 + }, + { + "epoch": 1.93, + "learning_rate": 7.707470131541897e-06, + "loss": 1.9682, + "step": 25225 + }, + { + "epoch": 1.93, + "learning_rate": 7.667243251940947e-06, + "loss": 3.9404, + "step": 25230 + }, + { + "epoch": 1.93, + "learning_rate": 7.627016372339999e-06, + "loss": 2.6924, + "step": 25235 + }, + { + "epoch": 1.93, + "learning_rate": 7.586789492739049e-06, + "loss": 4.6404, + "step": 25240 + }, + { + "epoch": 1.93, + "learning_rate": 7.546562613138099e-06, + "loss": 4.0658, + "step": 25245 + }, + { + "epoch": 1.93, + "learning_rate": 7.50633573353715e-06, + "loss": 3.5027, + "step": 25250 + }, + { + "epoch": 1.93, + "learning_rate": 7.4661088539362e-06, + "loss": 4.2753, + "step": 25255 + }, + { + "epoch": 1.93, + "learning_rate": 7.425881974335251e-06, + "loss": 2.2604, + "step": 25260 + }, + { + "epoch": 1.93, + "learning_rate": 7.385655094734302e-06, + "loss": 2.5162, + "step": 25265 + }, + { + "epoch": 1.93, + "learning_rate": 7.345428215133353e-06, + "loss": 3.1524, + "step": 25270 + }, + { + "epoch": 1.93, + "learning_rate": 7.305201335532404e-06, + "loss": 2.0495, + "step": 25275 + }, + { + "epoch": 1.93, + "learning_rate": 7.2649744559314534e-06, + "loss": 2.8922, + "step": 25280 + }, + { + "epoch": 1.93, + "learning_rate": 7.224747576330504e-06, + "loss": 0.9049, + "step": 25285 + }, + { + "epoch": 1.93, + "learning_rate": 7.184520696729555e-06, + "loss": 3.7818, + "step": 25290 + }, + { + "epoch": 1.93, + "learning_rate": 7.144293817128605e-06, + "loss": 3.5922, + "step": 25295 + }, + { + "epoch": 1.93, + "learning_rate": 7.104066937527657e-06, + "loss": 3.708, + "step": 25300 + }, + { + "epoch": 1.93, + "learning_rate": 7.063840057926707e-06, + "loss": 3.2295, + "step": 25305 + }, + { + "epoch": 1.93, + "learning_rate": 7.023613178325758e-06, + "loss": 3.4177, + "step": 25310 + }, + { + "epoch": 1.93, + "learning_rate": 6.983386298724809e-06, + "loss": 2.8741, + "step": 25315 + }, + { + "epoch": 1.94, + "learning_rate": 6.943159419123858e-06, + "loss": 2.726, + "step": 25320 + }, + { + "epoch": 1.94, + "learning_rate": 6.902932539522909e-06, + "loss": 3.9523, + "step": 25325 + }, + { + "epoch": 1.94, + "learning_rate": 6.8707510358421505e-06, + "loss": 3.2671, + "step": 25330 + }, + { + "epoch": 1.94, + "learning_rate": 6.830524156241201e-06, + "loss": 3.2882, + "step": 25335 + }, + { + "epoch": 1.94, + "learning_rate": 6.790297276640252e-06, + "loss": 3.732, + "step": 25340 + }, + { + "epoch": 1.94, + "learning_rate": 6.7500703970393015e-06, + "loss": 5.0133, + "step": 25345 + }, + { + "epoch": 1.94, + "learning_rate": 6.709843517438352e-06, + "loss": 2.8558, + "step": 25350 + }, + { + "epoch": 1.94, + "learning_rate": 6.669616637837403e-06, + "loss": 3.9777, + "step": 25355 + }, + { + "epoch": 1.94, + "learning_rate": 6.629389758236454e-06, + "loss": 2.9651, + "step": 25360 + }, + { + "epoch": 1.94, + "learning_rate": 6.589162878635505e-06, + "loss": 3.3267, + "step": 25365 + }, + { + "epoch": 1.94, + "learning_rate": 6.5489359990345555e-06, + "loss": 2.6909, + "step": 25370 + }, + { + "epoch": 1.94, + "learning_rate": 6.508709119433606e-06, + "loss": 1.3714, + "step": 25375 + }, + { + "epoch": 1.94, + "learning_rate": 6.468482239832656e-06, + "loss": 3.6478, + "step": 25380 + }, + { + "epoch": 1.94, + "learning_rate": 6.4282553602317065e-06, + "loss": 0.7816, + "step": 25385 + }, + { + "epoch": 1.94, + "learning_rate": 6.388028480630757e-06, + "loss": 3.7924, + "step": 25390 + }, + { + "epoch": 1.94, + "learning_rate": 6.347801601029809e-06, + "loss": 3.7242, + "step": 25395 + }, + { + "epoch": 1.94, + "learning_rate": 6.307574721428859e-06, + "loss": 5.2104, + "step": 25400 + }, + { + "epoch": 1.94, + "learning_rate": 6.26734784182791e-06, + "loss": 3.7268, + "step": 25405 + }, + { + "epoch": 1.94, + "learning_rate": 6.2271209622269604e-06, + "loss": 3.1541, + "step": 25410 + }, + { + "epoch": 1.94, + "learning_rate": 6.186894082626011e-06, + "loss": 4.2297, + "step": 25415 + }, + { + "epoch": 1.94, + "learning_rate": 6.146667203025062e-06, + "loss": 2.2711, + "step": 25420 + }, + { + "epoch": 1.94, + "learning_rate": 6.106440323424112e-06, + "loss": 3.1184, + "step": 25425 + }, + { + "epoch": 1.94, + "learning_rate": 6.066213443823163e-06, + "loss": 2.7837, + "step": 25430 + }, + { + "epoch": 1.94, + "learning_rate": 6.0259865642222135e-06, + "loss": 1.4746, + "step": 25435 + }, + { + "epoch": 1.94, + "learning_rate": 5.985759684621264e-06, + "loss": 5.2686, + "step": 25440 + }, + { + "epoch": 1.94, + "learning_rate": 5.945532805020315e-06, + "loss": 4.7791, + "step": 25445 + }, + { + "epoch": 1.95, + "learning_rate": 5.905305925419365e-06, + "loss": 3.8869, + "step": 25450 + }, + { + "epoch": 1.95, + "learning_rate": 5.865079045818416e-06, + "loss": 3.2348, + "step": 25455 + }, + { + "epoch": 1.95, + "learning_rate": 5.824852166217467e-06, + "loss": 4.1375, + "step": 25460 + }, + { + "epoch": 1.95, + "learning_rate": 5.784625286616517e-06, + "loss": 2.7724, + "step": 25465 + }, + { + "epoch": 1.95, + "learning_rate": 5.744398407015568e-06, + "loss": 2.3747, + "step": 25470 + }, + { + "epoch": 1.95, + "learning_rate": 5.7041715274146185e-06, + "loss": 1.9912, + "step": 25475 + }, + { + "epoch": 1.95, + "learning_rate": 5.66394464781367e-06, + "loss": 2.2611, + "step": 25480 + }, + { + "epoch": 1.95, + "learning_rate": 5.62371776821272e-06, + "loss": 2.0734, + "step": 25485 + }, + { + "epoch": 1.95, + "learning_rate": 5.58349088861177e-06, + "loss": 4.0169, + "step": 25490 + }, + { + "epoch": 1.95, + "learning_rate": 5.543264009010821e-06, + "loss": 4.0657, + "step": 25495 + }, + { + "epoch": 1.95, + "learning_rate": 5.5030371294098725e-06, + "loss": 3.7256, + "step": 25500 + }, + { + "epoch": 1.95, + "learning_rate": 5.462810249808922e-06, + "loss": 3.7052, + "step": 25505 + }, + { + "epoch": 1.95, + "learning_rate": 5.422583370207973e-06, + "loss": 3.3589, + "step": 25510 + }, + { + "epoch": 1.95, + "learning_rate": 5.382356490607024e-06, + "loss": 3.0988, + "step": 25515 + }, + { + "epoch": 1.95, + "learning_rate": 5.342129611006075e-06, + "loss": 1.339, + "step": 25520 + }, + { + "epoch": 1.95, + "learning_rate": 5.301902731405125e-06, + "loss": 2.7499, + "step": 25525 + }, + { + "epoch": 1.95, + "learning_rate": 5.261675851804175e-06, + "loss": 2.7686, + "step": 25530 + }, + { + "epoch": 1.95, + "learning_rate": 5.221448972203227e-06, + "loss": 3.5382, + "step": 25535 + }, + { + "epoch": 1.95, + "learning_rate": 5.181222092602277e-06, + "loss": 3.9172, + "step": 25540 + }, + { + "epoch": 1.95, + "learning_rate": 5.140995213001327e-06, + "loss": 4.0725, + "step": 25545 + }, + { + "epoch": 1.95, + "learning_rate": 5.100768333400379e-06, + "loss": 4.2646, + "step": 25550 + }, + { + "epoch": 1.95, + "learning_rate": 5.060541453799429e-06, + "loss": 3.8309, + "step": 25555 + }, + { + "epoch": 1.95, + "learning_rate": 5.02031457419848e-06, + "loss": 3.0889, + "step": 25560 + }, + { + "epoch": 1.95, + "learning_rate": 4.9800876945975305e-06, + "loss": 3.4834, + "step": 25565 + }, + { + "epoch": 1.95, + "learning_rate": 4.939860814996581e-06, + "loss": 3.4956, + "step": 25570 + }, + { + "epoch": 1.95, + "learning_rate": 4.899633935395632e-06, + "loss": 1.8908, + "step": 25575 + }, + { + "epoch": 1.96, + "learning_rate": 4.8594070557946815e-06, + "loss": 4.3148, + "step": 25580 + }, + { + "epoch": 1.96, + "learning_rate": 4.819180176193733e-06, + "loss": 3.6078, + "step": 25585 + }, + { + "epoch": 1.96, + "learning_rate": 4.778953296592784e-06, + "loss": 3.7635, + "step": 25590 + }, + { + "epoch": 1.96, + "learning_rate": 4.738726416991834e-06, + "loss": 3.8357, + "step": 25595 + }, + { + "epoch": 1.96, + "learning_rate": 4.698499537390885e-06, + "loss": 3.5507, + "step": 25600 + }, + { + "epoch": 1.96, + "learning_rate": 4.6582726577899355e-06, + "loss": 2.5274, + "step": 25605 + }, + { + "epoch": 1.96, + "learning_rate": 4.618045778188986e-06, + "loss": 2.7862, + "step": 25610 + }, + { + "epoch": 1.96, + "learning_rate": 4.577818898588037e-06, + "loss": 2.16, + "step": 25615 + }, + { + "epoch": 1.96, + "learning_rate": 4.537592018987087e-06, + "loss": 3.5172, + "step": 25620 + }, + { + "epoch": 1.96, + "learning_rate": 4.497365139386138e-06, + "loss": 1.672, + "step": 25625 + }, + { + "epoch": 1.96, + "learning_rate": 4.4571382597851886e-06, + "loss": 0.9123, + "step": 25630 + }, + { + "epoch": 1.96, + "learning_rate": 4.416911380184239e-06, + "loss": 1.4239, + "step": 25635 + }, + { + "epoch": 1.96, + "learning_rate": 4.37668450058329e-06, + "loss": 4.6506, + "step": 25640 + }, + { + "epoch": 1.96, + "learning_rate": 4.3364576209823404e-06, + "loss": 3.7811, + "step": 25645 + }, + { + "epoch": 1.96, + "learning_rate": 4.296230741381391e-06, + "loss": 3.5947, + "step": 25650 + }, + { + "epoch": 1.96, + "learning_rate": 4.2560038617804425e-06, + "loss": 3.8759, + "step": 25655 + }, + { + "epoch": 1.96, + "learning_rate": 4.215776982179492e-06, + "loss": 3.2007, + "step": 25660 + }, + { + "epoch": 1.96, + "learning_rate": 4.175550102578543e-06, + "loss": 2.9636, + "step": 25665 + }, + { + "epoch": 1.96, + "learning_rate": 4.135323222977594e-06, + "loss": 2.5895, + "step": 25670 + }, + { + "epoch": 1.96, + "learning_rate": 4.095096343376644e-06, + "loss": 2.5369, + "step": 25675 + }, + { + "epoch": 1.96, + "learning_rate": 4.054869463775695e-06, + "loss": 1.8998, + "step": 25680 + }, + { + "epoch": 1.96, + "learning_rate": 4.014642584174745e-06, + "loss": 3.4223, + "step": 25685 + }, + { + "epoch": 1.96, + "learning_rate": 3.974415704573797e-06, + "loss": 4.2734, + "step": 25690 + }, + { + "epoch": 1.96, + "learning_rate": 3.934188824972847e-06, + "loss": 3.8895, + "step": 25695 + }, + { + "epoch": 1.96, + "learning_rate": 3.893961945371897e-06, + "loss": 4.1982, + "step": 25700 + }, + { + "epoch": 1.96, + "learning_rate": 3.853735065770949e-06, + "loss": 3.1064, + "step": 25705 + }, + { + "epoch": 1.96, + "learning_rate": 3.8135081861699993e-06, + "loss": 3.512, + "step": 25710 + }, + { + "epoch": 1.97, + "learning_rate": 3.7732813065690495e-06, + "loss": 2.4828, + "step": 25715 + }, + { + "epoch": 1.97, + "learning_rate": 3.7330544269681e-06, + "loss": 3.1687, + "step": 25720 + }, + { + "epoch": 1.97, + "learning_rate": 3.692827547367151e-06, + "loss": 3.2518, + "step": 25725 + }, + { + "epoch": 1.97, + "learning_rate": 3.652600667766202e-06, + "loss": 2.2701, + "step": 25730 + }, + { + "epoch": 1.97, + "learning_rate": 3.612373788165252e-06, + "loss": 2.698, + "step": 25735 + }, + { + "epoch": 1.97, + "learning_rate": 3.5721469085643026e-06, + "loss": 4.4486, + "step": 25740 + }, + { + "epoch": 1.97, + "learning_rate": 3.5319200289633537e-06, + "loss": 4.2291, + "step": 25745 + }, + { + "epoch": 1.97, + "learning_rate": 3.4916931493624043e-06, + "loss": 3.1553, + "step": 25750 + }, + { + "epoch": 1.97, + "learning_rate": 3.4514662697614545e-06, + "loss": 4.0023, + "step": 25755 + }, + { + "epoch": 1.97, + "learning_rate": 3.4112393901605055e-06, + "loss": 5.1721, + "step": 25760 + }, + { + "epoch": 1.97, + "learning_rate": 3.371012510559556e-06, + "loss": 2.4705, + "step": 25765 + }, + { + "epoch": 1.97, + "learning_rate": 3.330785630958607e-06, + "loss": 2.4637, + "step": 25770 + }, + { + "epoch": 1.97, + "learning_rate": 3.290558751357657e-06, + "loss": 2.728, + "step": 25775 + }, + { + "epoch": 1.97, + "learning_rate": 3.250331871756708e-06, + "loss": 2.834, + "step": 25780 + }, + { + "epoch": 1.97, + "learning_rate": 3.2101049921557586e-06, + "loss": 1.8569, + "step": 25785 + }, + { + "epoch": 1.97, + "learning_rate": 3.169878112554809e-06, + "loss": 4.0803, + "step": 25790 + }, + { + "epoch": 1.97, + "learning_rate": 3.12965123295386e-06, + "loss": 3.2705, + "step": 25795 + }, + { + "epoch": 1.97, + "learning_rate": 3.0894243533529105e-06, + "loss": 3.3291, + "step": 25800 + }, + { + "epoch": 1.97, + "learning_rate": 3.049197473751961e-06, + "loss": 5.2221, + "step": 25805 + }, + { + "epoch": 1.97, + "learning_rate": 3.0089705941510117e-06, + "loss": 3.5207, + "step": 25810 + }, + { + "epoch": 1.97, + "learning_rate": 2.9687437145500624e-06, + "loss": 3.5691, + "step": 25815 + }, + { + "epoch": 1.97, + "learning_rate": 2.9285168349491134e-06, + "loss": 1.4946, + "step": 25820 + }, + { + "epoch": 1.97, + "learning_rate": 2.8882899553481636e-06, + "loss": 2.3518, + "step": 25825 + }, + { + "epoch": 1.97, + "learning_rate": 2.8480630757472147e-06, + "loss": 2.2248, + "step": 25830 + }, + { + "epoch": 1.97, + "learning_rate": 2.807836196146265e-06, + "loss": 2.2885, + "step": 25835 + }, + { + "epoch": 1.97, + "learning_rate": 2.767609316545316e-06, + "loss": 3.764, + "step": 25840 + }, + { + "epoch": 1.98, + "learning_rate": 2.7273824369443665e-06, + "loss": 4.1377, + "step": 25845 + }, + { + "epoch": 1.98, + "learning_rate": 2.687155557343417e-06, + "loss": 4.3615, + "step": 25850 + }, + { + "epoch": 1.98, + "learning_rate": 2.6469286777424678e-06, + "loss": 3.9139, + "step": 25855 + }, + { + "epoch": 1.98, + "learning_rate": 2.6067017981415184e-06, + "loss": 3.9283, + "step": 25860 + }, + { + "epoch": 1.98, + "learning_rate": 2.566474918540569e-06, + "loss": 2.8451, + "step": 25865 + }, + { + "epoch": 1.98, + "learning_rate": 2.5262480389396196e-06, + "loss": 2.6068, + "step": 25870 + }, + { + "epoch": 1.98, + "learning_rate": 2.4860211593386702e-06, + "loss": 3.0293, + "step": 25875 + }, + { + "epoch": 1.98, + "learning_rate": 2.445794279737721e-06, + "loss": 1.631, + "step": 25880 + }, + { + "epoch": 1.98, + "learning_rate": 2.4055674001367715e-06, + "loss": 1.9995, + "step": 25885 + }, + { + "epoch": 1.98, + "learning_rate": 2.365340520535822e-06, + "loss": 4.6109, + "step": 25890 + }, + { + "epoch": 1.98, + "learning_rate": 2.3251136409348727e-06, + "loss": 4.3992, + "step": 25895 + }, + { + "epoch": 1.98, + "learning_rate": 2.2848867613339233e-06, + "loss": 3.3955, + "step": 25900 + }, + { + "epoch": 1.98, + "learning_rate": 2.244659881732974e-06, + "loss": 2.3407, + "step": 25905 + }, + { + "epoch": 1.98, + "learning_rate": 2.2044330021320246e-06, + "loss": 2.9357, + "step": 25910 + }, + { + "epoch": 1.98, + "learning_rate": 2.1642061225310756e-06, + "loss": 2.0345, + "step": 25915 + }, + { + "epoch": 1.98, + "learning_rate": 2.123979242930126e-06, + "loss": 2.8767, + "step": 25920 + }, + { + "epoch": 1.98, + "learning_rate": 2.083752363329177e-06, + "loss": 2.4327, + "step": 25925 + }, + { + "epoch": 1.98, + "learning_rate": 2.043525483728227e-06, + "loss": 1.5194, + "step": 25930 + }, + { + "epoch": 1.98, + "learning_rate": 2.003298604127278e-06, + "loss": 2.0557, + "step": 25935 + }, + { + "epoch": 1.98, + "learning_rate": 1.9630717245263287e-06, + "loss": 4.5334, + "step": 25940 + }, + { + "epoch": 1.98, + "learning_rate": 1.9228448449253793e-06, + "loss": 3.535, + "step": 25945 + }, + { + "epoch": 1.98, + "learning_rate": 1.8826179653244297e-06, + "loss": 3.9789, + "step": 25950 + }, + { + "epoch": 1.98, + "learning_rate": 1.8423910857234806e-06, + "loss": 4.2756, + "step": 25955 + }, + { + "epoch": 1.98, + "learning_rate": 1.8021642061225312e-06, + "loss": 3.7443, + "step": 25960 + }, + { + "epoch": 1.98, + "learning_rate": 1.761937326521582e-06, + "loss": 2.4496, + "step": 25965 + }, + { + "epoch": 1.98, + "learning_rate": 1.7217104469206324e-06, + "loss": 2.5245, + "step": 25970 + }, + { + "epoch": 1.99, + "learning_rate": 1.6814835673196833e-06, + "loss": 2.2765, + "step": 25975 + }, + { + "epoch": 1.99, + "learning_rate": 1.6412566877187337e-06, + "loss": 2.3552, + "step": 25980 + }, + { + "epoch": 1.99, + "learning_rate": 1.6010298081177843e-06, + "loss": 3.3843, + "step": 25985 + }, + { + "epoch": 1.99, + "learning_rate": 1.5608029285168351e-06, + "loss": 4.0086, + "step": 25990 + }, + { + "epoch": 1.99, + "learning_rate": 1.5205760489158858e-06, + "loss": 4.1575, + "step": 25995 + }, + { + "epoch": 1.99, + "learning_rate": 1.4803491693149364e-06, + "loss": 4.2672, + "step": 26000 + }, + { + "epoch": 1.99, + "learning_rate": 1.440122289713987e-06, + "loss": 3.7455, + "step": 26005 + }, + { + "epoch": 1.99, + "learning_rate": 1.3998954101130376e-06, + "loss": 3.8271, + "step": 26010 + }, + { + "epoch": 1.99, + "learning_rate": 1.3596685305120882e-06, + "loss": 3.1133, + "step": 26015 + }, + { + "epoch": 1.99, + "learning_rate": 1.3194416509111389e-06, + "loss": 3.2707, + "step": 26020 + }, + { + "epoch": 1.99, + "learning_rate": 1.2792147713101897e-06, + "loss": 1.7346, + "step": 26025 + }, + { + "epoch": 1.99, + "learning_rate": 1.23898789170924e-06, + "loss": 1.4858, + "step": 26030 + }, + { + "epoch": 1.99, + "learning_rate": 1.1987610121082907e-06, + "loss": 2.2901, + "step": 26035 + }, + { + "epoch": 1.99, + "learning_rate": 1.1585341325073413e-06, + "loss": 4.3547, + "step": 26040 + }, + { + "epoch": 1.99, + "learning_rate": 1.118307252906392e-06, + "loss": 4.3891, + "step": 26045 + }, + { + "epoch": 1.99, + "learning_rate": 1.0780803733054428e-06, + "loss": 3.8504, + "step": 26050 + }, + { + "epoch": 1.99, + "learning_rate": 1.0378534937044934e-06, + "loss": 3.1188, + "step": 26055 + }, + { + "epoch": 1.99, + "learning_rate": 9.97626614103544e-07, + "loss": 3.6335, + "step": 26060 + }, + { + "epoch": 1.99, + "learning_rate": 9.573997345025946e-07, + "loss": 1.9387, + "step": 26065 + }, + { + "epoch": 1.99, + "learning_rate": 9.171728549016453e-07, + "loss": 3.2563, + "step": 26070 + }, + { + "epoch": 1.99, + "learning_rate": 8.76945975300696e-07, + "loss": 2.0166, + "step": 26075 + }, + { + "epoch": 1.99, + "learning_rate": 8.367190956997466e-07, + "loss": 3.0024, + "step": 26080 + }, + { + "epoch": 1.99, + "learning_rate": 7.964922160987972e-07, + "loss": 3.026, + "step": 26085 + }, + { + "epoch": 1.99, + "learning_rate": 7.56265336497848e-07, + "loss": 4.6111, + "step": 26090 + }, + { + "epoch": 1.99, + "learning_rate": 7.160384568968986e-07, + "loss": 4.4326, + "step": 26095 + }, + { + "epoch": 1.99, + "learning_rate": 6.758115772959492e-07, + "loss": 4.4527, + "step": 26100 + }, + { + "epoch": 2.0, + "learning_rate": 6.355846976949998e-07, + "loss": 2.902, + "step": 26105 + }, + { + "epoch": 2.0, + "learning_rate": 5.953578180940504e-07, + "loss": 3.3514, + "step": 26110 + }, + { + "epoch": 2.0, + "learning_rate": 5.551309384931011e-07, + "loss": 3.167, + "step": 26115 + }, + { + "epoch": 2.0, + "learning_rate": 5.149040588921518e-07, + "loss": 2.4512, + "step": 26120 + }, + { + "epoch": 2.0, + "learning_rate": 4.746771792912024e-07, + "loss": 1.9114, + "step": 26125 + }, + { + "epoch": 2.0, + "learning_rate": 4.3445029969025303e-07, + "loss": 2.0456, + "step": 26130 + }, + { + "epoch": 2.0, + "learning_rate": 3.942234200893037e-07, + "loss": 1.6755, + "step": 26135 + }, + { + "epoch": 2.0, + "learning_rate": 3.5399654048835437e-07, + "loss": 5.232, + "step": 26140 + }, + { + "epoch": 2.0, + "learning_rate": 3.13769660887405e-07, + "loss": 3.2018, + "step": 26145 + }, + { + "epoch": 2.0, + "learning_rate": 2.735427812864556e-07, + "loss": 2.7205, + "step": 26150 + }, + { + "epoch": 2.0, + "learning_rate": 2.3331590168550626e-07, + "loss": 3.5699, + "step": 26155 + }, + { + "epoch": 2.0, + "learning_rate": 1.930890220845569e-07, + "loss": 3.0736, + "step": 26160 + }, + { + "epoch": 2.0, + "learning_rate": 1.5286214248360755e-07, + "loss": 1.3811, + "step": 26165 + }, + { + "epoch": 2.0, + "eval_exact_match": 19.45205479452055, + "eval_f1": 23.848435348153973, + "eval_loss": 3.1337833404541016, + "eval_runtime": 131.2034, + "eval_samples_per_second": 11.128, + "eval_steps_per_second": 11.128, + "step": 26168 + } + ], + "logging_steps": 5, + "max_steps": 26168, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 500, + "total_flos": 8847303388182072.0, + "train_batch_size": 1, + "trial_name": null, + "trial_params": null +}