{ "best_metric": null, "best_model_checkpoint": null, "epoch": 45.0, "global_step": 6300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "learning_rate": 0.0007936507936507937, "loss": 0.4795, "step": 10 }, { "epoch": 0.14, "learning_rate": 0.0015873015873015873, "loss": 0.4792, "step": 20 }, { "epoch": 0.21, "learning_rate": 0.002380952380952381, "loss": 0.4786, "step": 30 }, { "epoch": 0.29, "learning_rate": 0.0031746031746031746, "loss": 0.4773, "step": 40 }, { "epoch": 0.36, "learning_rate": 0.003968253968253968, "loss": 0.4743, "step": 50 }, { "epoch": 0.43, "learning_rate": 0.004761904761904762, "loss": 0.4672, "step": 60 }, { "epoch": 0.5, "learning_rate": 0.005555555555555556, "loss": 0.4456, "step": 70 }, { "epoch": 0.57, "learning_rate": 0.006349206349206349, "loss": 0.3561, "step": 80 }, { "epoch": 0.64, "learning_rate": 0.007142857142857143, "loss": 0.2459, "step": 90 }, { "epoch": 0.71, "learning_rate": 0.007936507936507936, "loss": 0.2057, "step": 100 }, { "epoch": 0.79, "learning_rate": 0.00873015873015873, "loss": 0.1847, "step": 110 }, { "epoch": 0.86, "learning_rate": 0.009523809523809525, "loss": 0.1765, "step": 120 }, { "epoch": 0.93, "learning_rate": 0.010317460317460317, "loss": 0.1404, "step": 130 }, { "epoch": 1.0, "learning_rate": 0.011111111111111112, "loss": 0.1251, "step": 140 }, { "epoch": 1.07, "learning_rate": 0.011904761904761904, "loss": 0.1224, "step": 150 }, { "epoch": 1.14, "learning_rate": 0.012698412698412698, "loss": 0.1241, "step": 160 }, { "epoch": 1.21, "learning_rate": 0.013492063492063493, "loss": 0.1137, "step": 170 }, { "epoch": 1.29, "learning_rate": 0.014285714285714285, "loss": 0.1178, "step": 180 }, { "epoch": 1.36, "learning_rate": 0.01507936507936508, "loss": 0.1117, "step": 190 }, { "epoch": 1.43, "learning_rate": 0.015873015873015872, "loss": 0.1147, "step": 200 }, { "epoch": 1.5, "learning_rate": 0.016666666666666666, "loss": 0.1049, "step": 210 }, { "epoch": 1.57, "learning_rate": 0.01746031746031746, "loss": 0.1153, "step": 220 }, { "epoch": 1.64, "learning_rate": 0.018253968253968255, "loss": 0.1156, "step": 230 }, { "epoch": 1.71, "learning_rate": 0.01904761904761905, "loss": 0.1154, "step": 240 }, { "epoch": 1.79, "learning_rate": 0.01984126984126984, "loss": 0.1049, "step": 250 }, { "epoch": 1.86, "learning_rate": 0.020634920634920634, "loss": 0.0982, "step": 260 }, { "epoch": 1.93, "learning_rate": 0.02142857142857143, "loss": 0.1182, "step": 270 }, { "epoch": 2.0, "learning_rate": 0.022222222222222223, "loss": 0.1183, "step": 280 }, { "epoch": 2.07, "learning_rate": 0.023015873015873017, "loss": 0.1131, "step": 290 }, { "epoch": 2.14, "learning_rate": 0.023809523809523808, "loss": 0.1063, "step": 300 }, { "epoch": 2.21, "learning_rate": 0.024603174603174603, "loss": 0.0985, "step": 310 }, { "epoch": 2.29, "learning_rate": 0.025396825396825397, "loss": 0.1262, "step": 320 }, { "epoch": 2.36, "learning_rate": 0.026190476190476195, "loss": 0.1319, "step": 330 }, { "epoch": 2.43, "learning_rate": 0.026984126984126985, "loss": 0.1123, "step": 340 }, { "epoch": 2.5, "learning_rate": 0.02777777777777778, "loss": 0.1116, "step": 350 }, { "epoch": 2.57, "learning_rate": 0.02857142857142857, "loss": 0.1031, "step": 360 }, { "epoch": 2.64, "learning_rate": 0.02936507936507937, "loss": 0.1021, "step": 370 }, { "epoch": 2.71, "learning_rate": 0.03015873015873016, "loss": 0.1067, "step": 380 }, { "epoch": 2.79, "learning_rate": 0.030952380952380953, "loss": 0.1038, "step": 390 }, { "epoch": 2.86, "learning_rate": 0.031746031746031744, "loss": 0.1127, "step": 400 }, { "epoch": 2.93, "learning_rate": 0.03253968253968254, "loss": 0.1008, "step": 410 }, { "epoch": 3.0, "learning_rate": 0.03333333333333333, "loss": 0.0965, "step": 420 }, { "epoch": 3.07, "learning_rate": 0.03412698412698413, "loss": 0.0965, "step": 430 }, { "epoch": 3.14, "learning_rate": 0.03492063492063492, "loss": 0.095, "step": 440 }, { "epoch": 3.21, "learning_rate": 0.03571428571428572, "loss": 0.0876, "step": 450 }, { "epoch": 3.29, "learning_rate": 0.03650793650793651, "loss": 0.1019, "step": 460 }, { "epoch": 3.36, "learning_rate": 0.0373015873015873, "loss": 0.0881, "step": 470 }, { "epoch": 3.43, "learning_rate": 0.0380952380952381, "loss": 0.0901, "step": 480 }, { "epoch": 3.5, "learning_rate": 0.03888888888888889, "loss": 0.0878, "step": 490 }, { "epoch": 3.57, "learning_rate": 0.03968253968253968, "loss": 0.0861, "step": 500 }, { "epoch": 3.64, "learning_rate": 0.04047619047619048, "loss": 0.0865, "step": 510 }, { "epoch": 3.71, "learning_rate": 0.04126984126984127, "loss": 0.0818, "step": 520 }, { "epoch": 3.79, "learning_rate": 0.04206349206349207, "loss": 0.0789, "step": 530 }, { "epoch": 3.86, "learning_rate": 0.04285714285714286, "loss": 0.1004, "step": 540 }, { "epoch": 3.93, "learning_rate": 0.043650793650793655, "loss": 0.0799, "step": 550 }, { "epoch": 4.0, "learning_rate": 0.044444444444444446, "loss": 0.0818, "step": 560 }, { "epoch": 4.07, "learning_rate": 0.045238095238095244, "loss": 0.0777, "step": 570 }, { "epoch": 4.14, "learning_rate": 0.046031746031746035, "loss": 0.0767, "step": 580 }, { "epoch": 4.21, "learning_rate": 0.046825396825396826, "loss": 0.0711, "step": 590 }, { "epoch": 4.29, "learning_rate": 0.047619047619047616, "loss": 0.0723, "step": 600 }, { "epoch": 4.36, "learning_rate": 0.048412698412698414, "loss": 0.0694, "step": 610 }, { "epoch": 4.43, "learning_rate": 0.049206349206349205, "loss": 0.0657, "step": 620 }, { "epoch": 4.5, "learning_rate": 0.05, "loss": 0.0567, "step": 630 }, { "epoch": 4.57, "learning_rate": 0.049911816578483245, "loss": 0.0617, "step": 640 }, { "epoch": 4.64, "learning_rate": 0.049823633156966494, "loss": 0.0678, "step": 650 }, { "epoch": 4.71, "learning_rate": 0.04973544973544974, "loss": 0.0647, "step": 660 }, { "epoch": 4.79, "learning_rate": 0.049647266313932986, "loss": 0.0543, "step": 670 }, { "epoch": 4.86, "learning_rate": 0.04955908289241623, "loss": 0.0558, "step": 680 }, { "epoch": 4.93, "learning_rate": 0.04947089947089947, "loss": 0.0519, "step": 690 }, { "epoch": 5.0, "learning_rate": 0.04938271604938271, "loss": 0.0506, "step": 700 }, { "epoch": 5.07, "learning_rate": 0.04929453262786596, "loss": 0.0583, "step": 710 }, { "epoch": 5.14, "learning_rate": 0.049206349206349205, "loss": 0.0518, "step": 720 }, { "epoch": 5.21, "learning_rate": 0.049118165784832454, "loss": 0.0491, "step": 730 }, { "epoch": 5.29, "learning_rate": 0.0490299823633157, "loss": 0.0471, "step": 740 }, { "epoch": 5.36, "learning_rate": 0.048941798941798946, "loss": 0.0488, "step": 750 }, { "epoch": 5.43, "learning_rate": 0.04885361552028219, "loss": 0.0449, "step": 760 }, { "epoch": 5.5, "learning_rate": 0.04876543209876544, "loss": 0.0417, "step": 770 }, { "epoch": 5.57, "learning_rate": 0.04867724867724868, "loss": 0.0409, "step": 780 }, { "epoch": 5.64, "learning_rate": 0.04858906525573192, "loss": 0.0485, "step": 790 }, { "epoch": 5.71, "learning_rate": 0.048500881834215165, "loss": 0.0457, "step": 800 }, { "epoch": 5.79, "learning_rate": 0.048412698412698414, "loss": 0.0422, "step": 810 }, { "epoch": 5.86, "learning_rate": 0.04832451499118166, "loss": 0.0407, "step": 820 }, { "epoch": 5.93, "learning_rate": 0.048236331569664906, "loss": 0.0394, "step": 830 }, { "epoch": 6.0, "learning_rate": 0.04814814814814815, "loss": 0.0417, "step": 840 }, { "epoch": 6.07, "learning_rate": 0.0480599647266314, "loss": 0.0358, "step": 850 }, { "epoch": 6.14, "learning_rate": 0.04797178130511464, "loss": 0.0352, "step": 860 }, { "epoch": 6.21, "learning_rate": 0.04788359788359789, "loss": 0.0389, "step": 870 }, { "epoch": 6.29, "learning_rate": 0.04779541446208113, "loss": 0.036, "step": 880 }, { "epoch": 6.36, "learning_rate": 0.047707231040564374, "loss": 0.034, "step": 890 }, { "epoch": 6.43, "learning_rate": 0.047619047619047616, "loss": 0.0359, "step": 900 }, { "epoch": 6.5, "learning_rate": 0.047530864197530866, "loss": 0.0355, "step": 910 }, { "epoch": 6.57, "learning_rate": 0.04744268077601411, "loss": 0.03, "step": 920 }, { "epoch": 6.64, "learning_rate": 0.04735449735449736, "loss": 0.0278, "step": 930 }, { "epoch": 6.71, "learning_rate": 0.0472663139329806, "loss": 0.0337, "step": 940 }, { "epoch": 6.79, "learning_rate": 0.04717813051146385, "loss": 0.0293, "step": 950 }, { "epoch": 6.86, "learning_rate": 0.04708994708994709, "loss": 0.0302, "step": 960 }, { "epoch": 6.93, "learning_rate": 0.04700176366843034, "loss": 0.0332, "step": 970 }, { "epoch": 7.0, "learning_rate": 0.04691358024691358, "loss": 0.0292, "step": 980 }, { "epoch": 7.07, "learning_rate": 0.046825396825396826, "loss": 0.0298, "step": 990 }, { "epoch": 7.14, "learning_rate": 0.04673721340388007, "loss": 0.0269, "step": 1000 }, { "epoch": 7.21, "learning_rate": 0.04664902998236332, "loss": 0.0236, "step": 1010 }, { "epoch": 7.29, "learning_rate": 0.04656084656084656, "loss": 0.025, "step": 1020 }, { "epoch": 7.36, "learning_rate": 0.04647266313932981, "loss": 0.0266, "step": 1030 }, { "epoch": 7.43, "learning_rate": 0.04638447971781305, "loss": 0.0229, "step": 1040 }, { "epoch": 7.5, "learning_rate": 0.0462962962962963, "loss": 0.0232, "step": 1050 }, { "epoch": 7.57, "learning_rate": 0.04620811287477954, "loss": 0.0244, "step": 1060 }, { "epoch": 7.64, "learning_rate": 0.04611992945326279, "loss": 0.0241, "step": 1070 }, { "epoch": 7.71, "learning_rate": 0.046031746031746035, "loss": 0.0256, "step": 1080 }, { "epoch": 7.79, "learning_rate": 0.04594356261022928, "loss": 0.0266, "step": 1090 }, { "epoch": 7.86, "learning_rate": 0.04585537918871252, "loss": 0.0234, "step": 1100 }, { "epoch": 7.93, "learning_rate": 0.04576719576719577, "loss": 0.0223, "step": 1110 }, { "epoch": 8.0, "learning_rate": 0.04567901234567901, "loss": 0.02, "step": 1120 }, { "epoch": 8.07, "learning_rate": 0.04559082892416226, "loss": 0.022, "step": 1130 }, { "epoch": 8.14, "learning_rate": 0.0455026455026455, "loss": 0.0217, "step": 1140 }, { "epoch": 8.21, "learning_rate": 0.04541446208112875, "loss": 0.0219, "step": 1150 }, { "epoch": 8.29, "learning_rate": 0.045326278659611995, "loss": 0.0197, "step": 1160 }, { "epoch": 8.36, "learning_rate": 0.045238095238095244, "loss": 0.0209, "step": 1170 }, { "epoch": 8.43, "learning_rate": 0.045149911816578486, "loss": 0.0221, "step": 1180 }, { "epoch": 8.5, "learning_rate": 0.04506172839506173, "loss": 0.0198, "step": 1190 }, { "epoch": 8.57, "learning_rate": 0.04497354497354497, "loss": 0.0199, "step": 1200 }, { "epoch": 8.64, "learning_rate": 0.04488536155202822, "loss": 0.0196, "step": 1210 }, { "epoch": 8.71, "learning_rate": 0.04479717813051146, "loss": 0.019, "step": 1220 }, { "epoch": 8.79, "learning_rate": 0.04470899470899471, "loss": 0.0195, "step": 1230 }, { "epoch": 8.86, "learning_rate": 0.044620811287477954, "loss": 0.0196, "step": 1240 }, { "epoch": 8.93, "learning_rate": 0.044532627865961204, "loss": 0.0184, "step": 1250 }, { "epoch": 9.0, "learning_rate": 0.044444444444444446, "loss": 0.0205, "step": 1260 }, { "epoch": 9.07, "learning_rate": 0.044356261022927695, "loss": 0.0191, "step": 1270 }, { "epoch": 9.14, "learning_rate": 0.04426807760141094, "loss": 0.0184, "step": 1280 }, { "epoch": 9.21, "learning_rate": 0.04417989417989418, "loss": 0.0211, "step": 1290 }, { "epoch": 9.29, "learning_rate": 0.04409171075837742, "loss": 0.0184, "step": 1300 }, { "epoch": 9.36, "learning_rate": 0.04400352733686067, "loss": 0.0188, "step": 1310 }, { "epoch": 9.43, "learning_rate": 0.043915343915343914, "loss": 0.017, "step": 1320 }, { "epoch": 9.5, "learning_rate": 0.043827160493827164, "loss": 0.0158, "step": 1330 }, { "epoch": 9.57, "learning_rate": 0.043738977072310406, "loss": 0.0158, "step": 1340 }, { "epoch": 9.64, "learning_rate": 0.043650793650793655, "loss": 0.0165, "step": 1350 }, { "epoch": 9.71, "learning_rate": 0.0435626102292769, "loss": 0.0151, "step": 1360 }, { "epoch": 9.79, "learning_rate": 0.04347442680776015, "loss": 0.0144, "step": 1370 }, { "epoch": 9.86, "learning_rate": 0.04338624338624339, "loss": 0.0168, "step": 1380 }, { "epoch": 9.93, "learning_rate": 0.04329805996472663, "loss": 0.0162, "step": 1390 }, { "epoch": 10.0, "learning_rate": 0.043209876543209874, "loss": 0.0177, "step": 1400 }, { "epoch": 10.07, "learning_rate": 0.04312169312169312, "loss": 0.0144, "step": 1410 }, { "epoch": 10.14, "learning_rate": 0.043033509700176366, "loss": 0.0145, "step": 1420 }, { "epoch": 10.21, "learning_rate": 0.042945326278659615, "loss": 0.0153, "step": 1430 }, { "epoch": 10.29, "learning_rate": 0.04285714285714286, "loss": 0.0147, "step": 1440 }, { "epoch": 10.36, "learning_rate": 0.04276895943562611, "loss": 0.0133, "step": 1450 }, { "epoch": 10.43, "learning_rate": 0.04268077601410935, "loss": 0.0139, "step": 1460 }, { "epoch": 10.5, "learning_rate": 0.0425925925925926, "loss": 0.0133, "step": 1470 }, { "epoch": 10.57, "learning_rate": 0.04250440917107584, "loss": 0.0135, "step": 1480 }, { "epoch": 10.64, "learning_rate": 0.04241622574955908, "loss": 0.0106, "step": 1490 }, { "epoch": 10.71, "learning_rate": 0.042328042328042326, "loss": 0.0132, "step": 1500 }, { "epoch": 10.79, "learning_rate": 0.042239858906525575, "loss": 0.0137, "step": 1510 }, { "epoch": 10.86, "learning_rate": 0.04215167548500882, "loss": 0.0127, "step": 1520 }, { "epoch": 10.93, "learning_rate": 0.04206349206349207, "loss": 0.0116, "step": 1530 }, { "epoch": 11.0, "learning_rate": 0.04197530864197531, "loss": 0.0123, "step": 1540 }, { "epoch": 11.07, "learning_rate": 0.04188712522045856, "loss": 0.0125, "step": 1550 }, { "epoch": 11.14, "learning_rate": 0.0417989417989418, "loss": 0.0111, "step": 1560 }, { "epoch": 11.21, "learning_rate": 0.04171075837742505, "loss": 0.0106, "step": 1570 }, { "epoch": 11.29, "learning_rate": 0.04162257495590829, "loss": 0.0105, "step": 1580 }, { "epoch": 11.36, "learning_rate": 0.041534391534391535, "loss": 0.0099, "step": 1590 }, { "epoch": 11.43, "learning_rate": 0.04144620811287478, "loss": 0.0116, "step": 1600 }, { "epoch": 11.5, "learning_rate": 0.04135802469135803, "loss": 0.0115, "step": 1610 }, { "epoch": 11.57, "learning_rate": 0.04126984126984127, "loss": 0.0113, "step": 1620 }, { "epoch": 11.64, "learning_rate": 0.04118165784832452, "loss": 0.012, "step": 1630 }, { "epoch": 11.71, "learning_rate": 0.04109347442680776, "loss": 0.0115, "step": 1640 }, { "epoch": 11.79, "learning_rate": 0.04100529100529101, "loss": 0.0117, "step": 1650 }, { "epoch": 11.86, "learning_rate": 0.04091710758377425, "loss": 0.0112, "step": 1660 }, { "epoch": 11.93, "learning_rate": 0.0408289241622575, "loss": 0.0095, "step": 1670 }, { "epoch": 12.0, "learning_rate": 0.040740740740740744, "loss": 0.0114, "step": 1680 }, { "epoch": 12.07, "learning_rate": 0.040652557319223986, "loss": 0.0114, "step": 1690 }, { "epoch": 12.14, "learning_rate": 0.04056437389770723, "loss": 0.0105, "step": 1700 }, { "epoch": 12.21, "learning_rate": 0.04047619047619048, "loss": 0.011, "step": 1710 }, { "epoch": 12.29, "learning_rate": 0.04038800705467372, "loss": 0.0113, "step": 1720 }, { "epoch": 12.36, "learning_rate": 0.04029982363315697, "loss": 0.0098, "step": 1730 }, { "epoch": 12.43, "learning_rate": 0.04021164021164021, "loss": 0.0093, "step": 1740 }, { "epoch": 12.5, "learning_rate": 0.04012345679012346, "loss": 0.0086, "step": 1750 }, { "epoch": 12.57, "learning_rate": 0.040035273368606704, "loss": 0.0086, "step": 1760 }, { "epoch": 12.64, "learning_rate": 0.03994708994708995, "loss": 0.0077, "step": 1770 }, { "epoch": 12.71, "learning_rate": 0.039858906525573196, "loss": 0.0088, "step": 1780 }, { "epoch": 12.79, "learning_rate": 0.03977072310405644, "loss": 0.0092, "step": 1790 }, { "epoch": 12.86, "learning_rate": 0.03968253968253968, "loss": 0.0097, "step": 1800 }, { "epoch": 12.93, "learning_rate": 0.03959435626102293, "loss": 0.0089, "step": 1810 }, { "epoch": 13.0, "learning_rate": 0.03950617283950617, "loss": 0.007, "step": 1820 }, { "epoch": 13.07, "learning_rate": 0.03941798941798942, "loss": 0.0071, "step": 1830 }, { "epoch": 13.14, "learning_rate": 0.039329805996472664, "loss": 0.008, "step": 1840 }, { "epoch": 13.21, "learning_rate": 0.03924162257495591, "loss": 0.0074, "step": 1850 }, { "epoch": 13.29, "learning_rate": 0.039153439153439155, "loss": 0.0084, "step": 1860 }, { "epoch": 13.36, "learning_rate": 0.039065255731922405, "loss": 0.0079, "step": 1870 }, { "epoch": 13.43, "learning_rate": 0.03897707231040565, "loss": 0.0082, "step": 1880 }, { "epoch": 13.5, "learning_rate": 0.03888888888888889, "loss": 0.0079, "step": 1890 }, { "epoch": 13.57, "learning_rate": 0.03880070546737213, "loss": 0.007, "step": 1900 }, { "epoch": 13.64, "learning_rate": 0.03871252204585538, "loss": 0.0075, "step": 1910 }, { "epoch": 13.71, "learning_rate": 0.038624338624338624, "loss": 0.0071, "step": 1920 }, { "epoch": 13.79, "learning_rate": 0.03853615520282187, "loss": 0.0073, "step": 1930 }, { "epoch": 13.86, "learning_rate": 0.038447971781305115, "loss": 0.0073, "step": 1940 }, { "epoch": 13.93, "learning_rate": 0.038359788359788365, "loss": 0.0069, "step": 1950 }, { "epoch": 14.0, "learning_rate": 0.03827160493827161, "loss": 0.0062, "step": 1960 }, { "epoch": 14.07, "learning_rate": 0.038183421516754856, "loss": 0.0066, "step": 1970 }, { "epoch": 14.14, "learning_rate": 0.0380952380952381, "loss": 0.0067, "step": 1980 }, { "epoch": 14.21, "learning_rate": 0.03800705467372134, "loss": 0.0086, "step": 1990 }, { "epoch": 14.29, "learning_rate": 0.03791887125220458, "loss": 0.0075, "step": 2000 }, { "epoch": 14.36, "learning_rate": 0.03783068783068783, "loss": 0.0072, "step": 2010 }, { "epoch": 14.43, "learning_rate": 0.037742504409171075, "loss": 0.008, "step": 2020 }, { "epoch": 14.5, "learning_rate": 0.037654320987654324, "loss": 0.0074, "step": 2030 }, { "epoch": 14.57, "learning_rate": 0.03756613756613757, "loss": 0.0054, "step": 2040 }, { "epoch": 14.64, "learning_rate": 0.037477954144620816, "loss": 0.0067, "step": 2050 }, { "epoch": 14.71, "learning_rate": 0.03738977072310406, "loss": 0.0062, "step": 2060 }, { "epoch": 14.79, "learning_rate": 0.0373015873015873, "loss": 0.0058, "step": 2070 }, { "epoch": 14.86, "learning_rate": 0.03721340388007054, "loss": 0.0064, "step": 2080 }, { "epoch": 14.93, "learning_rate": 0.03712522045855379, "loss": 0.0055, "step": 2090 }, { "epoch": 15.0, "learning_rate": 0.037037037037037035, "loss": 0.0059, "step": 2100 }, { "epoch": 15.07, "learning_rate": 0.036948853615520284, "loss": 0.0058, "step": 2110 }, { "epoch": 15.14, "learning_rate": 0.03686067019400353, "loss": 0.006, "step": 2120 }, { "epoch": 15.21, "learning_rate": 0.036772486772486776, "loss": 0.0055, "step": 2130 }, { "epoch": 15.29, "learning_rate": 0.03668430335097002, "loss": 0.0065, "step": 2140 }, { "epoch": 15.36, "learning_rate": 0.03659611992945327, "loss": 0.0063, "step": 2150 }, { "epoch": 15.43, "learning_rate": 0.03650793650793651, "loss": 0.0059, "step": 2160 }, { "epoch": 15.5, "learning_rate": 0.03641975308641975, "loss": 0.0052, "step": 2170 }, { "epoch": 15.57, "learning_rate": 0.036331569664902995, "loss": 0.0056, "step": 2180 }, { "epoch": 15.64, "learning_rate": 0.036243386243386244, "loss": 0.0052, "step": 2190 }, { "epoch": 15.71, "learning_rate": 0.036155202821869487, "loss": 0.0056, "step": 2200 }, { "epoch": 15.79, "learning_rate": 0.036067019400352736, "loss": 0.0051, "step": 2210 }, { "epoch": 15.86, "learning_rate": 0.03597883597883598, "loss": 0.0055, "step": 2220 }, { "epoch": 15.93, "learning_rate": 0.03589065255731923, "loss": 0.0059, "step": 2230 }, { "epoch": 16.0, "learning_rate": 0.03580246913580247, "loss": 0.0061, "step": 2240 }, { "epoch": 16.07, "learning_rate": 0.03571428571428572, "loss": 0.0061, "step": 2250 }, { "epoch": 16.14, "learning_rate": 0.03562610229276896, "loss": 0.0054, "step": 2260 }, { "epoch": 16.21, "learning_rate": 0.035537918871252204, "loss": 0.0064, "step": 2270 }, { "epoch": 16.29, "learning_rate": 0.035449735449735446, "loss": 0.0055, "step": 2280 }, { "epoch": 16.36, "learning_rate": 0.035361552028218696, "loss": 0.0052, "step": 2290 }, { "epoch": 16.43, "learning_rate": 0.03527336860670194, "loss": 0.0051, "step": 2300 }, { "epoch": 16.5, "learning_rate": 0.03518518518518519, "loss": 0.005, "step": 2310 }, { "epoch": 16.57, "learning_rate": 0.03509700176366843, "loss": 0.0055, "step": 2320 }, { "epoch": 16.64, "learning_rate": 0.03500881834215168, "loss": 0.0052, "step": 2330 }, { "epoch": 16.71, "learning_rate": 0.03492063492063492, "loss": 0.0051, "step": 2340 }, { "epoch": 16.79, "learning_rate": 0.03483245149911817, "loss": 0.0052, "step": 2350 }, { "epoch": 16.86, "learning_rate": 0.03474426807760141, "loss": 0.0053, "step": 2360 }, { "epoch": 16.93, "learning_rate": 0.034656084656084656, "loss": 0.0056, "step": 2370 }, { "epoch": 17.0, "learning_rate": 0.0345679012345679, "loss": 0.0054, "step": 2380 }, { "epoch": 17.07, "learning_rate": 0.03447971781305115, "loss": 0.0053, "step": 2390 }, { "epoch": 17.14, "learning_rate": 0.03439153439153439, "loss": 0.0054, "step": 2400 }, { "epoch": 17.21, "learning_rate": 0.03430335097001764, "loss": 0.0064, "step": 2410 }, { "epoch": 17.29, "learning_rate": 0.03421516754850088, "loss": 0.005, "step": 2420 }, { "epoch": 17.36, "learning_rate": 0.03412698412698413, "loss": 0.0054, "step": 2430 }, { "epoch": 17.43, "learning_rate": 0.03403880070546737, "loss": 0.0047, "step": 2440 }, { "epoch": 17.5, "learning_rate": 0.03395061728395062, "loss": 0.0055, "step": 2450 }, { "epoch": 17.57, "learning_rate": 0.033862433862433865, "loss": 0.0052, "step": 2460 }, { "epoch": 17.64, "learning_rate": 0.03377425044091711, "loss": 0.0048, "step": 2470 }, { "epoch": 17.71, "learning_rate": 0.03368606701940035, "loss": 0.0049, "step": 2480 }, { "epoch": 17.79, "learning_rate": 0.0335978835978836, "loss": 0.0049, "step": 2490 }, { "epoch": 17.86, "learning_rate": 0.03350970017636684, "loss": 0.0055, "step": 2500 }, { "epoch": 17.93, "learning_rate": 0.03342151675485009, "loss": 0.0052, "step": 2510 }, { "epoch": 18.0, "learning_rate": 0.03333333333333333, "loss": 0.0059, "step": 2520 }, { "epoch": 18.07, "learning_rate": 0.03324514991181658, "loss": 0.0053, "step": 2530 }, { "epoch": 18.14, "learning_rate": 0.033156966490299825, "loss": 0.0051, "step": 2540 }, { "epoch": 18.21, "learning_rate": 0.033068783068783074, "loss": 0.0043, "step": 2550 }, { "epoch": 18.29, "learning_rate": 0.032980599647266316, "loss": 0.0045, "step": 2560 }, { "epoch": 18.36, "learning_rate": 0.03289241622574956, "loss": 0.0048, "step": 2570 }, { "epoch": 18.43, "learning_rate": 0.0328042328042328, "loss": 0.0042, "step": 2580 }, { "epoch": 18.5, "learning_rate": 0.03271604938271605, "loss": 0.0048, "step": 2590 }, { "epoch": 18.57, "learning_rate": 0.03262786596119929, "loss": 0.0052, "step": 2600 }, { "epoch": 18.64, "learning_rate": 0.03253968253968254, "loss": 0.0061, "step": 2610 }, { "epoch": 18.71, "learning_rate": 0.032451499118165784, "loss": 0.0052, "step": 2620 }, { "epoch": 18.79, "learning_rate": 0.032363315696649034, "loss": 0.0045, "step": 2630 }, { "epoch": 18.86, "learning_rate": 0.032275132275132276, "loss": 0.0045, "step": 2640 }, { "epoch": 18.93, "learning_rate": 0.032186948853615525, "loss": 0.0055, "step": 2650 }, { "epoch": 19.0, "learning_rate": 0.03209876543209877, "loss": 0.0046, "step": 2660 }, { "epoch": 19.07, "learning_rate": 0.03201058201058201, "loss": 0.0047, "step": 2670 }, { "epoch": 19.14, "learning_rate": 0.03192239858906525, "loss": 0.0049, "step": 2680 }, { "epoch": 19.21, "learning_rate": 0.0318342151675485, "loss": 0.0049, "step": 2690 }, { "epoch": 19.29, "learning_rate": 0.031746031746031744, "loss": 0.005, "step": 2700 }, { "epoch": 19.36, "learning_rate": 0.031657848324514994, "loss": 0.0054, "step": 2710 }, { "epoch": 19.43, "learning_rate": 0.031569664902998236, "loss": 0.0049, "step": 2720 }, { "epoch": 19.5, "learning_rate": 0.031481481481481485, "loss": 0.0047, "step": 2730 }, { "epoch": 19.57, "learning_rate": 0.03139329805996473, "loss": 0.0046, "step": 2740 }, { "epoch": 19.64, "learning_rate": 0.03130511463844798, "loss": 0.0045, "step": 2750 }, { "epoch": 19.71, "learning_rate": 0.031216931216931216, "loss": 0.0043, "step": 2760 }, { "epoch": 19.79, "learning_rate": 0.031128747795414465, "loss": 0.0044, "step": 2770 }, { "epoch": 19.86, "learning_rate": 0.031040564373897708, "loss": 0.0051, "step": 2780 }, { "epoch": 19.93, "learning_rate": 0.030952380952380953, "loss": 0.0048, "step": 2790 }, { "epoch": 20.0, "learning_rate": 0.030864197530864196, "loss": 0.0044, "step": 2800 }, { "epoch": 20.07, "learning_rate": 0.030776014109347445, "loss": 0.0048, "step": 2810 }, { "epoch": 20.14, "learning_rate": 0.030687830687830688, "loss": 0.0046, "step": 2820 }, { "epoch": 20.21, "learning_rate": 0.030599647266313937, "loss": 0.0045, "step": 2830 }, { "epoch": 20.29, "learning_rate": 0.03051146384479718, "loss": 0.004, "step": 2840 }, { "epoch": 20.36, "learning_rate": 0.030423280423280425, "loss": 0.0043, "step": 2850 }, { "epoch": 20.43, "learning_rate": 0.030335097001763667, "loss": 0.0052, "step": 2860 }, { "epoch": 20.5, "learning_rate": 0.030246913580246917, "loss": 0.0053, "step": 2870 }, { "epoch": 20.57, "learning_rate": 0.03015873015873016, "loss": 0.0053, "step": 2880 }, { "epoch": 20.64, "learning_rate": 0.030070546737213405, "loss": 0.0048, "step": 2890 }, { "epoch": 20.71, "learning_rate": 0.029982363315696647, "loss": 0.0056, "step": 2900 }, { "epoch": 20.79, "learning_rate": 0.029894179894179897, "loss": 0.0045, "step": 2910 }, { "epoch": 20.86, "learning_rate": 0.02980599647266314, "loss": 0.0048, "step": 2920 }, { "epoch": 20.93, "learning_rate": 0.02971781305114639, "loss": 0.005, "step": 2930 }, { "epoch": 21.0, "learning_rate": 0.02962962962962963, "loss": 0.0047, "step": 2940 }, { "epoch": 21.07, "learning_rate": 0.029541446208112877, "loss": 0.0046, "step": 2950 }, { "epoch": 21.14, "learning_rate": 0.02945326278659612, "loss": 0.0047, "step": 2960 }, { "epoch": 21.21, "learning_rate": 0.02936507936507937, "loss": 0.0046, "step": 2970 }, { "epoch": 21.29, "learning_rate": 0.02927689594356261, "loss": 0.0046, "step": 2980 }, { "epoch": 21.36, "learning_rate": 0.029188712522045857, "loss": 0.0041, "step": 2990 }, { "epoch": 21.43, "learning_rate": 0.0291005291005291, "loss": 0.0048, "step": 3000 }, { "epoch": 21.5, "learning_rate": 0.029012345679012348, "loss": 0.0044, "step": 3010 }, { "epoch": 21.57, "learning_rate": 0.02892416225749559, "loss": 0.0045, "step": 3020 }, { "epoch": 21.64, "learning_rate": 0.02883597883597884, "loss": 0.0045, "step": 3030 }, { "epoch": 21.71, "learning_rate": 0.028747795414462082, "loss": 0.0043, "step": 3040 }, { "epoch": 21.79, "learning_rate": 0.028659611992945328, "loss": 0.0038, "step": 3050 }, { "epoch": 21.86, "learning_rate": 0.02857142857142857, "loss": 0.0044, "step": 3060 }, { "epoch": 21.93, "learning_rate": 0.02848324514991182, "loss": 0.0041, "step": 3070 }, { "epoch": 22.0, "learning_rate": 0.028395061728395062, "loss": 0.0044, "step": 3080 }, { "epoch": 22.07, "learning_rate": 0.028306878306878308, "loss": 0.0038, "step": 3090 }, { "epoch": 22.14, "learning_rate": 0.02821869488536155, "loss": 0.0039, "step": 3100 }, { "epoch": 22.21, "learning_rate": 0.0281305114638448, "loss": 0.0038, "step": 3110 }, { "epoch": 22.29, "learning_rate": 0.028042328042328042, "loss": 0.0041, "step": 3120 }, { "epoch": 22.36, "learning_rate": 0.02795414462081129, "loss": 0.0042, "step": 3130 }, { "epoch": 22.43, "learning_rate": 0.027865961199294534, "loss": 0.0044, "step": 3140 }, { "epoch": 22.5, "learning_rate": 0.02777777777777778, "loss": 0.0044, "step": 3150 }, { "epoch": 22.57, "learning_rate": 0.027689594356261022, "loss": 0.0045, "step": 3160 }, { "epoch": 22.64, "learning_rate": 0.02760141093474427, "loss": 0.0043, "step": 3170 }, { "epoch": 22.71, "learning_rate": 0.027513227513227514, "loss": 0.0042, "step": 3180 }, { "epoch": 22.79, "learning_rate": 0.02742504409171076, "loss": 0.0037, "step": 3190 }, { "epoch": 22.86, "learning_rate": 0.027336860670194002, "loss": 0.0041, "step": 3200 }, { "epoch": 22.93, "learning_rate": 0.02724867724867725, "loss": 0.0039, "step": 3210 }, { "epoch": 23.0, "learning_rate": 0.027160493827160494, "loss": 0.0048, "step": 3220 }, { "epoch": 23.07, "learning_rate": 0.027072310405643743, "loss": 0.004, "step": 3230 }, { "epoch": 23.14, "learning_rate": 0.026984126984126985, "loss": 0.0041, "step": 3240 }, { "epoch": 23.21, "learning_rate": 0.02689594356261023, "loss": 0.0043, "step": 3250 }, { "epoch": 23.29, "learning_rate": 0.026807760141093474, "loss": 0.0044, "step": 3260 }, { "epoch": 23.36, "learning_rate": 0.026719576719576723, "loss": 0.004, "step": 3270 }, { "epoch": 23.43, "learning_rate": 0.026631393298059965, "loss": 0.0041, "step": 3280 }, { "epoch": 23.5, "learning_rate": 0.02654320987654321, "loss": 0.0042, "step": 3290 }, { "epoch": 23.57, "learning_rate": 0.026455026455026454, "loss": 0.004, "step": 3300 }, { "epoch": 23.64, "learning_rate": 0.026366843033509703, "loss": 0.0038, "step": 3310 }, { "epoch": 23.71, "learning_rate": 0.026278659611992945, "loss": 0.0043, "step": 3320 }, { "epoch": 23.79, "learning_rate": 0.026190476190476195, "loss": 0.0043, "step": 3330 }, { "epoch": 23.86, "learning_rate": 0.026102292768959437, "loss": 0.0042, "step": 3340 }, { "epoch": 23.93, "learning_rate": 0.026014109347442683, "loss": 0.0037, "step": 3350 }, { "epoch": 24.0, "learning_rate": 0.025925925925925925, "loss": 0.0032, "step": 3360 }, { "epoch": 24.07, "learning_rate": 0.025837742504409174, "loss": 0.0038, "step": 3370 }, { "epoch": 24.14, "learning_rate": 0.025749559082892417, "loss": 0.0044, "step": 3380 }, { "epoch": 24.21, "learning_rate": 0.025661375661375663, "loss": 0.0035, "step": 3390 }, { "epoch": 24.29, "learning_rate": 0.025573192239858905, "loss": 0.0035, "step": 3400 }, { "epoch": 24.36, "learning_rate": 0.025485008818342154, "loss": 0.0036, "step": 3410 }, { "epoch": 24.43, "learning_rate": 0.025396825396825397, "loss": 0.0035, "step": 3420 }, { "epoch": 24.5, "learning_rate": 0.025308641975308646, "loss": 0.0037, "step": 3430 }, { "epoch": 24.57, "learning_rate": 0.02522045855379189, "loss": 0.0038, "step": 3440 }, { "epoch": 24.64, "learning_rate": 0.025132275132275134, "loss": 0.0042, "step": 3450 }, { "epoch": 24.71, "learning_rate": 0.025044091710758377, "loss": 0.0035, "step": 3460 }, { "epoch": 24.79, "learning_rate": 0.024955908289241623, "loss": 0.0047, "step": 3470 }, { "epoch": 24.86, "learning_rate": 0.02486772486772487, "loss": 0.0041, "step": 3480 }, { "epoch": 24.93, "learning_rate": 0.024779541446208114, "loss": 0.0043, "step": 3490 }, { "epoch": 25.0, "learning_rate": 0.024691358024691357, "loss": 0.0039, "step": 3500 }, { "epoch": 25.07, "learning_rate": 0.024603174603174603, "loss": 0.0041, "step": 3510 }, { "epoch": 25.14, "learning_rate": 0.02451499118165785, "loss": 0.0042, "step": 3520 }, { "epoch": 25.21, "learning_rate": 0.024426807760141094, "loss": 0.0041, "step": 3530 }, { "epoch": 25.29, "learning_rate": 0.02433862433862434, "loss": 0.0037, "step": 3540 }, { "epoch": 25.36, "learning_rate": 0.024250440917107582, "loss": 0.0042, "step": 3550 }, { "epoch": 25.43, "learning_rate": 0.02416225749559083, "loss": 0.0035, "step": 3560 }, { "epoch": 25.5, "learning_rate": 0.024074074074074074, "loss": 0.0042, "step": 3570 }, { "epoch": 25.57, "learning_rate": 0.02398589065255732, "loss": 0.0039, "step": 3580 }, { "epoch": 25.64, "learning_rate": 0.023897707231040566, "loss": 0.0039, "step": 3590 }, { "epoch": 25.71, "learning_rate": 0.023809523809523808, "loss": 0.0035, "step": 3600 }, { "epoch": 25.79, "learning_rate": 0.023721340388007054, "loss": 0.0035, "step": 3610 }, { "epoch": 25.86, "learning_rate": 0.0236331569664903, "loss": 0.0041, "step": 3620 }, { "epoch": 25.93, "learning_rate": 0.023544973544973546, "loss": 0.004, "step": 3630 }, { "epoch": 26.0, "learning_rate": 0.02345679012345679, "loss": 0.0034, "step": 3640 }, { "epoch": 26.07, "learning_rate": 0.023368606701940034, "loss": 0.0039, "step": 3650 }, { "epoch": 26.14, "learning_rate": 0.02328042328042328, "loss": 0.0037, "step": 3660 }, { "epoch": 26.21, "learning_rate": 0.023192239858906526, "loss": 0.0042, "step": 3670 }, { "epoch": 26.29, "learning_rate": 0.02310405643738977, "loss": 0.0034, "step": 3680 }, { "epoch": 26.36, "learning_rate": 0.023015873015873017, "loss": 0.0036, "step": 3690 }, { "epoch": 26.43, "learning_rate": 0.02292768959435626, "loss": 0.0034, "step": 3700 }, { "epoch": 26.5, "learning_rate": 0.022839506172839506, "loss": 0.004, "step": 3710 }, { "epoch": 26.57, "learning_rate": 0.02275132275132275, "loss": 0.0037, "step": 3720 }, { "epoch": 26.64, "learning_rate": 0.022663139329805997, "loss": 0.0039, "step": 3730 }, { "epoch": 26.71, "learning_rate": 0.022574955908289243, "loss": 0.0034, "step": 3740 }, { "epoch": 26.79, "learning_rate": 0.022486772486772486, "loss": 0.0035, "step": 3750 }, { "epoch": 26.86, "learning_rate": 0.02239858906525573, "loss": 0.0035, "step": 3760 }, { "epoch": 26.93, "learning_rate": 0.022310405643738977, "loss": 0.0039, "step": 3770 }, { "epoch": 27.0, "learning_rate": 0.022222222222222223, "loss": 0.0035, "step": 3780 }, { "epoch": 27.07, "learning_rate": 0.02213403880070547, "loss": 0.0038, "step": 3790 }, { "epoch": 27.14, "learning_rate": 0.02204585537918871, "loss": 0.0039, "step": 3800 }, { "epoch": 27.21, "learning_rate": 0.021957671957671957, "loss": 0.0033, "step": 3810 }, { "epoch": 27.29, "learning_rate": 0.021869488536155203, "loss": 0.0035, "step": 3820 }, { "epoch": 27.36, "learning_rate": 0.02178130511463845, "loss": 0.0038, "step": 3830 }, { "epoch": 27.43, "learning_rate": 0.021693121693121695, "loss": 0.0032, "step": 3840 }, { "epoch": 27.5, "learning_rate": 0.021604938271604937, "loss": 0.0042, "step": 3850 }, { "epoch": 27.57, "learning_rate": 0.021516754850088183, "loss": 0.0036, "step": 3860 }, { "epoch": 27.64, "learning_rate": 0.02142857142857143, "loss": 0.0034, "step": 3870 }, { "epoch": 27.71, "learning_rate": 0.021340388007054675, "loss": 0.0037, "step": 3880 }, { "epoch": 27.79, "learning_rate": 0.02125220458553792, "loss": 0.0036, "step": 3890 }, { "epoch": 27.86, "learning_rate": 0.021164021164021163, "loss": 0.0033, "step": 3900 }, { "epoch": 27.93, "learning_rate": 0.02107583774250441, "loss": 0.0035, "step": 3910 }, { "epoch": 28.0, "learning_rate": 0.020987654320987655, "loss": 0.0034, "step": 3920 }, { "epoch": 28.07, "learning_rate": 0.0208994708994709, "loss": 0.0037, "step": 3930 }, { "epoch": 28.14, "learning_rate": 0.020811287477954146, "loss": 0.003, "step": 3940 }, { "epoch": 28.21, "learning_rate": 0.02072310405643739, "loss": 0.0033, "step": 3950 }, { "epoch": 28.29, "learning_rate": 0.020634920634920634, "loss": 0.004, "step": 3960 }, { "epoch": 28.36, "learning_rate": 0.02054673721340388, "loss": 0.0036, "step": 3970 }, { "epoch": 28.43, "learning_rate": 0.020458553791887126, "loss": 0.0034, "step": 3980 }, { "epoch": 28.5, "learning_rate": 0.020370370370370372, "loss": 0.0035, "step": 3990 }, { "epoch": 28.57, "learning_rate": 0.020282186948853614, "loss": 0.0034, "step": 4000 }, { "epoch": 28.64, "learning_rate": 0.02019400352733686, "loss": 0.0035, "step": 4010 }, { "epoch": 28.71, "learning_rate": 0.020105820105820106, "loss": 0.0037, "step": 4020 }, { "epoch": 28.79, "learning_rate": 0.020017636684303352, "loss": 0.0033, "step": 4030 }, { "epoch": 28.86, "learning_rate": 0.019929453262786598, "loss": 0.0037, "step": 4040 }, { "epoch": 28.93, "learning_rate": 0.01984126984126984, "loss": 0.0033, "step": 4050 }, { "epoch": 29.0, "learning_rate": 0.019753086419753086, "loss": 0.0032, "step": 4060 }, { "epoch": 29.07, "learning_rate": 0.019664902998236332, "loss": 0.0034, "step": 4070 }, { "epoch": 29.14, "learning_rate": 0.019576719576719578, "loss": 0.0036, "step": 4080 }, { "epoch": 29.21, "learning_rate": 0.019488536155202824, "loss": 0.0032, "step": 4090 }, { "epoch": 29.29, "learning_rate": 0.019400352733686066, "loss": 0.003, "step": 4100 }, { "epoch": 29.36, "learning_rate": 0.019312169312169312, "loss": 0.0033, "step": 4110 }, { "epoch": 29.43, "learning_rate": 0.019223985890652558, "loss": 0.003, "step": 4120 }, { "epoch": 29.5, "learning_rate": 0.019135802469135803, "loss": 0.0034, "step": 4130 }, { "epoch": 29.57, "learning_rate": 0.01904761904761905, "loss": 0.0032, "step": 4140 }, { "epoch": 29.64, "learning_rate": 0.01895943562610229, "loss": 0.0031, "step": 4150 }, { "epoch": 29.71, "learning_rate": 0.018871252204585538, "loss": 0.0031, "step": 4160 }, { "epoch": 29.79, "learning_rate": 0.018783068783068783, "loss": 0.003, "step": 4170 }, { "epoch": 29.86, "learning_rate": 0.01869488536155203, "loss": 0.0034, "step": 4180 }, { "epoch": 29.93, "learning_rate": 0.01860670194003527, "loss": 0.0037, "step": 4190 }, { "epoch": 30.0, "learning_rate": 0.018518518518518517, "loss": 0.0031, "step": 4200 }, { "epoch": 30.07, "learning_rate": 0.018430335097001763, "loss": 0.0032, "step": 4210 }, { "epoch": 30.14, "learning_rate": 0.01834215167548501, "loss": 0.0032, "step": 4220 }, { "epoch": 30.21, "learning_rate": 0.018253968253968255, "loss": 0.003, "step": 4230 }, { "epoch": 30.29, "learning_rate": 0.018165784832451497, "loss": 0.0033, "step": 4240 }, { "epoch": 30.36, "learning_rate": 0.018077601410934743, "loss": 0.0033, "step": 4250 }, { "epoch": 30.43, "learning_rate": 0.01798941798941799, "loss": 0.003, "step": 4260 }, { "epoch": 30.5, "learning_rate": 0.017901234567901235, "loss": 0.0034, "step": 4270 }, { "epoch": 30.57, "learning_rate": 0.01781305114638448, "loss": 0.0033, "step": 4280 }, { "epoch": 30.64, "learning_rate": 0.017724867724867723, "loss": 0.0036, "step": 4290 }, { "epoch": 30.71, "learning_rate": 0.01763668430335097, "loss": 0.0035, "step": 4300 }, { "epoch": 30.79, "learning_rate": 0.017548500881834215, "loss": 0.0034, "step": 4310 }, { "epoch": 30.86, "learning_rate": 0.01746031746031746, "loss": 0.0034, "step": 4320 }, { "epoch": 30.93, "learning_rate": 0.017372134038800707, "loss": 0.0036, "step": 4330 }, { "epoch": 31.0, "learning_rate": 0.01728395061728395, "loss": 0.0036, "step": 4340 }, { "epoch": 31.07, "learning_rate": 0.017195767195767195, "loss": 0.0035, "step": 4350 }, { "epoch": 31.14, "learning_rate": 0.01710758377425044, "loss": 0.0035, "step": 4360 }, { "epoch": 31.21, "learning_rate": 0.017019400352733687, "loss": 0.0034, "step": 4370 }, { "epoch": 31.29, "learning_rate": 0.016931216931216932, "loss": 0.0035, "step": 4380 }, { "epoch": 31.36, "learning_rate": 0.016843033509700175, "loss": 0.0035, "step": 4390 }, { "epoch": 31.43, "learning_rate": 0.01675485008818342, "loss": 0.0043, "step": 4400 }, { "epoch": 31.5, "learning_rate": 0.016666666666666666, "loss": 0.0033, "step": 4410 }, { "epoch": 31.57, "learning_rate": 0.016578483245149912, "loss": 0.0032, "step": 4420 }, { "epoch": 31.64, "learning_rate": 0.016490299823633158, "loss": 0.003, "step": 4430 }, { "epoch": 31.71, "learning_rate": 0.0164021164021164, "loss": 0.0033, "step": 4440 }, { "epoch": 31.79, "learning_rate": 0.016313932980599646, "loss": 0.0033, "step": 4450 }, { "epoch": 31.86, "learning_rate": 0.016225749559082892, "loss": 0.0033, "step": 4460 }, { "epoch": 31.93, "learning_rate": 0.016137566137566138, "loss": 0.0033, "step": 4470 }, { "epoch": 32.0, "learning_rate": 0.016049382716049384, "loss": 0.0031, "step": 4480 }, { "epoch": 32.07, "learning_rate": 0.015961199294532626, "loss": 0.0031, "step": 4490 }, { "epoch": 32.14, "learning_rate": 0.015873015873015872, "loss": 0.0034, "step": 4500 }, { "epoch": 32.21, "learning_rate": 0.015784832451499118, "loss": 0.0033, "step": 4510 }, { "epoch": 32.29, "learning_rate": 0.015696649029982364, "loss": 0.003, "step": 4520 }, { "epoch": 32.36, "learning_rate": 0.015608465608465608, "loss": 0.0032, "step": 4530 }, { "epoch": 32.43, "learning_rate": 0.015520282186948854, "loss": 0.0031, "step": 4540 }, { "epoch": 32.5, "learning_rate": 0.015432098765432098, "loss": 0.0034, "step": 4550 }, { "epoch": 32.57, "learning_rate": 0.015343915343915344, "loss": 0.0037, "step": 4560 }, { "epoch": 32.64, "learning_rate": 0.01525573192239859, "loss": 0.0031, "step": 4570 }, { "epoch": 32.71, "learning_rate": 0.015167548500881834, "loss": 0.003, "step": 4580 }, { "epoch": 32.79, "learning_rate": 0.01507936507936508, "loss": 0.003, "step": 4590 }, { "epoch": 32.86, "learning_rate": 0.014991181657848324, "loss": 0.0035, "step": 4600 }, { "epoch": 32.93, "learning_rate": 0.01490299823633157, "loss": 0.0031, "step": 4610 }, { "epoch": 33.0, "learning_rate": 0.014814814814814815, "loss": 0.0033, "step": 4620 }, { "epoch": 33.07, "learning_rate": 0.01472663139329806, "loss": 0.0031, "step": 4630 }, { "epoch": 33.14, "learning_rate": 0.014638447971781305, "loss": 0.0032, "step": 4640 }, { "epoch": 33.21, "learning_rate": 0.01455026455026455, "loss": 0.0036, "step": 4650 }, { "epoch": 33.29, "learning_rate": 0.014462081128747795, "loss": 0.0035, "step": 4660 }, { "epoch": 33.36, "learning_rate": 0.014373897707231041, "loss": 0.0033, "step": 4670 }, { "epoch": 33.43, "learning_rate": 0.014285714285714285, "loss": 0.0036, "step": 4680 }, { "epoch": 33.5, "learning_rate": 0.014197530864197531, "loss": 0.003, "step": 4690 }, { "epoch": 33.57, "learning_rate": 0.014109347442680775, "loss": 0.0034, "step": 4700 }, { "epoch": 33.64, "learning_rate": 0.014021164021164021, "loss": 0.0033, "step": 4710 }, { "epoch": 33.71, "learning_rate": 0.013932980599647267, "loss": 0.0033, "step": 4720 }, { "epoch": 33.79, "learning_rate": 0.013844797178130511, "loss": 0.0032, "step": 4730 }, { "epoch": 33.86, "learning_rate": 0.013756613756613757, "loss": 0.0032, "step": 4740 }, { "epoch": 33.93, "learning_rate": 0.013668430335097001, "loss": 0.0033, "step": 4750 }, { "epoch": 34.0, "learning_rate": 0.013580246913580247, "loss": 0.0031, "step": 4760 }, { "epoch": 34.07, "learning_rate": 0.013492063492063493, "loss": 0.0029, "step": 4770 }, { "epoch": 34.14, "learning_rate": 0.013403880070546737, "loss": 0.003, "step": 4780 }, { "epoch": 34.21, "learning_rate": 0.013315696649029983, "loss": 0.003, "step": 4790 }, { "epoch": 34.29, "learning_rate": 0.013227513227513227, "loss": 0.0032, "step": 4800 }, { "epoch": 34.36, "learning_rate": 0.013139329805996473, "loss": 0.0029, "step": 4810 }, { "epoch": 34.43, "learning_rate": 0.013051146384479718, "loss": 0.0034, "step": 4820 }, { "epoch": 34.5, "learning_rate": 0.012962962962962963, "loss": 0.0032, "step": 4830 }, { "epoch": 34.57, "learning_rate": 0.012874779541446208, "loss": 0.0035, "step": 4840 }, { "epoch": 34.64, "learning_rate": 0.012786596119929453, "loss": 0.003, "step": 4850 }, { "epoch": 34.71, "learning_rate": 0.012698412698412698, "loss": 0.0029, "step": 4860 }, { "epoch": 34.79, "learning_rate": 0.012610229276895944, "loss": 0.0031, "step": 4870 }, { "epoch": 34.86, "learning_rate": 0.012522045855379188, "loss": 0.0031, "step": 4880 }, { "epoch": 34.93, "learning_rate": 0.012433862433862434, "loss": 0.0034, "step": 4890 }, { "epoch": 35.0, "learning_rate": 0.012345679012345678, "loss": 0.0028, "step": 4900 }, { "epoch": 35.07, "learning_rate": 0.012257495590828924, "loss": 0.0033, "step": 4910 }, { "epoch": 35.14, "learning_rate": 0.01216931216931217, "loss": 0.0031, "step": 4920 }, { "epoch": 35.21, "learning_rate": 0.012081128747795414, "loss": 0.0032, "step": 4930 }, { "epoch": 35.29, "learning_rate": 0.01199294532627866, "loss": 0.0034, "step": 4940 }, { "epoch": 35.36, "learning_rate": 0.011904761904761904, "loss": 0.0031, "step": 4950 }, { "epoch": 35.43, "learning_rate": 0.01181657848324515, "loss": 0.003, "step": 4960 }, { "epoch": 35.5, "learning_rate": 0.011728395061728396, "loss": 0.003, "step": 4970 }, { "epoch": 35.57, "learning_rate": 0.01164021164021164, "loss": 0.003, "step": 4980 }, { "epoch": 35.64, "learning_rate": 0.011552028218694886, "loss": 0.003, "step": 4990 }, { "epoch": 35.71, "learning_rate": 0.01146384479717813, "loss": 0.0033, "step": 5000 }, { "epoch": 35.79, "learning_rate": 0.011375661375661376, "loss": 0.0032, "step": 5010 }, { "epoch": 35.86, "learning_rate": 0.011287477954144622, "loss": 0.003, "step": 5020 }, { "epoch": 35.93, "learning_rate": 0.011199294532627866, "loss": 0.003, "step": 5030 }, { "epoch": 36.0, "learning_rate": 0.011111111111111112, "loss": 0.0032, "step": 5040 }, { "epoch": 36.07, "learning_rate": 0.011022927689594356, "loss": 0.0034, "step": 5050 }, { "epoch": 36.14, "learning_rate": 0.010934744268077601, "loss": 0.0029, "step": 5060 }, { "epoch": 36.21, "learning_rate": 0.010846560846560847, "loss": 0.0032, "step": 5070 }, { "epoch": 36.29, "learning_rate": 0.010758377425044091, "loss": 0.0029, "step": 5080 }, { "epoch": 36.36, "learning_rate": 0.010670194003527337, "loss": 0.0031, "step": 5090 }, { "epoch": 36.43, "learning_rate": 0.010582010582010581, "loss": 0.0036, "step": 5100 }, { "epoch": 36.5, "learning_rate": 0.010493827160493827, "loss": 0.003, "step": 5110 }, { "epoch": 36.57, "learning_rate": 0.010405643738977073, "loss": 0.0031, "step": 5120 }, { "epoch": 36.64, "learning_rate": 0.010317460317460317, "loss": 0.0031, "step": 5130 }, { "epoch": 36.71, "learning_rate": 0.010229276895943563, "loss": 0.0035, "step": 5140 }, { "epoch": 36.79, "learning_rate": 0.010141093474426807, "loss": 0.003, "step": 5150 }, { "epoch": 36.86, "learning_rate": 0.010052910052910053, "loss": 0.0029, "step": 5160 }, { "epoch": 36.93, "learning_rate": 0.009964726631393299, "loss": 0.0029, "step": 5170 }, { "epoch": 37.0, "learning_rate": 0.009876543209876543, "loss": 0.0031, "step": 5180 }, { "epoch": 37.07, "learning_rate": 0.009788359788359789, "loss": 0.0035, "step": 5190 }, { "epoch": 37.14, "learning_rate": 0.009700176366843033, "loss": 0.0034, "step": 5200 }, { "epoch": 37.21, "learning_rate": 0.009611992945326279, "loss": 0.003, "step": 5210 }, { "epoch": 37.29, "learning_rate": 0.009523809523809525, "loss": 0.0031, "step": 5220 }, { "epoch": 37.36, "learning_rate": 0.009435626102292769, "loss": 0.003, "step": 5230 }, { "epoch": 37.43, "learning_rate": 0.009347442680776015, "loss": 0.0032, "step": 5240 }, { "epoch": 37.5, "learning_rate": 0.009259259259259259, "loss": 0.003, "step": 5250 }, { "epoch": 37.57, "learning_rate": 0.009171075837742505, "loss": 0.0032, "step": 5260 }, { "epoch": 37.64, "learning_rate": 0.009082892416225749, "loss": 0.0029, "step": 5270 }, { "epoch": 37.71, "learning_rate": 0.008994708994708995, "loss": 0.0029, "step": 5280 }, { "epoch": 37.79, "learning_rate": 0.00890652557319224, "loss": 0.0034, "step": 5290 }, { "epoch": 37.86, "learning_rate": 0.008818342151675485, "loss": 0.0029, "step": 5300 }, { "epoch": 37.93, "learning_rate": 0.00873015873015873, "loss": 0.0027, "step": 5310 }, { "epoch": 38.0, "learning_rate": 0.008641975308641974, "loss": 0.0027, "step": 5320 }, { "epoch": 38.07, "learning_rate": 0.00855379188712522, "loss": 0.0029, "step": 5330 }, { "epoch": 38.14, "learning_rate": 0.008465608465608466, "loss": 0.0029, "step": 5340 }, { "epoch": 38.21, "learning_rate": 0.00837742504409171, "loss": 0.0029, "step": 5350 }, { "epoch": 38.29, "learning_rate": 0.008289241622574956, "loss": 0.0029, "step": 5360 }, { "epoch": 38.36, "learning_rate": 0.0082010582010582, "loss": 0.0033, "step": 5370 }, { "epoch": 38.43, "learning_rate": 0.008112874779541446, "loss": 0.0027, "step": 5380 }, { "epoch": 38.5, "learning_rate": 0.008024691358024692, "loss": 0.0033, "step": 5390 }, { "epoch": 38.57, "learning_rate": 0.007936507936507936, "loss": 0.0027, "step": 5400 }, { "epoch": 38.64, "learning_rate": 0.007848324514991182, "loss": 0.0029, "step": 5410 }, { "epoch": 38.71, "learning_rate": 0.007760141093474427, "loss": 0.0034, "step": 5420 }, { "epoch": 38.79, "learning_rate": 0.007671957671957672, "loss": 0.0032, "step": 5430 }, { "epoch": 38.86, "learning_rate": 0.007583774250440917, "loss": 0.0033, "step": 5440 }, { "epoch": 38.93, "learning_rate": 0.007495590828924162, "loss": 0.0031, "step": 5450 }, { "epoch": 39.0, "learning_rate": 0.007407407407407408, "loss": 0.0031, "step": 5460 }, { "epoch": 39.07, "learning_rate": 0.007319223985890653, "loss": 0.0033, "step": 5470 }, { "epoch": 39.14, "learning_rate": 0.007231040564373898, "loss": 0.0029, "step": 5480 }, { "epoch": 39.21, "learning_rate": 0.007142857142857143, "loss": 0.0029, "step": 5490 }, { "epoch": 39.29, "learning_rate": 0.007054673721340388, "loss": 0.003, "step": 5500 }, { "epoch": 39.36, "learning_rate": 0.0069664902998236335, "loss": 0.0026, "step": 5510 }, { "epoch": 39.43, "learning_rate": 0.0068783068783068784, "loss": 0.0029, "step": 5520 }, { "epoch": 39.5, "learning_rate": 0.006790123456790123, "loss": 0.0032, "step": 5530 }, { "epoch": 39.57, "learning_rate": 0.006701940035273368, "loss": 0.0029, "step": 5540 }, { "epoch": 39.64, "learning_rate": 0.006613756613756613, "loss": 0.003, "step": 5550 }, { "epoch": 39.71, "learning_rate": 0.006525573192239859, "loss": 0.0029, "step": 5560 }, { "epoch": 39.79, "learning_rate": 0.006437389770723104, "loss": 0.003, "step": 5570 }, { "epoch": 39.86, "learning_rate": 0.006349206349206349, "loss": 0.003, "step": 5580 }, { "epoch": 39.93, "learning_rate": 0.006261022927689594, "loss": 0.0028, "step": 5590 }, { "epoch": 40.0, "learning_rate": 0.006172839506172839, "loss": 0.0031, "step": 5600 }, { "epoch": 40.07, "learning_rate": 0.006084656084656085, "loss": 0.003, "step": 5610 }, { "epoch": 40.14, "learning_rate": 0.00599647266313933, "loss": 0.003, "step": 5620 }, { "epoch": 40.21, "learning_rate": 0.005908289241622575, "loss": 0.0028, "step": 5630 }, { "epoch": 40.29, "learning_rate": 0.00582010582010582, "loss": 0.0033, "step": 5640 }, { "epoch": 40.36, "learning_rate": 0.005731922398589065, "loss": 0.003, "step": 5650 }, { "epoch": 40.43, "learning_rate": 0.005643738977072311, "loss": 0.0027, "step": 5660 }, { "epoch": 40.5, "learning_rate": 0.005555555555555556, "loss": 0.0031, "step": 5670 }, { "epoch": 40.57, "learning_rate": 0.005467372134038801, "loss": 0.0027, "step": 5680 }, { "epoch": 40.64, "learning_rate": 0.005379188712522046, "loss": 0.0031, "step": 5690 }, { "epoch": 40.71, "learning_rate": 0.005291005291005291, "loss": 0.0032, "step": 5700 }, { "epoch": 40.79, "learning_rate": 0.0052028218694885366, "loss": 0.0031, "step": 5710 }, { "epoch": 40.86, "learning_rate": 0.0051146384479717815, "loss": 0.0028, "step": 5720 }, { "epoch": 40.93, "learning_rate": 0.0050264550264550265, "loss": 0.0031, "step": 5730 }, { "epoch": 41.0, "learning_rate": 0.0049382716049382715, "loss": 0.0029, "step": 5740 }, { "epoch": 41.07, "learning_rate": 0.0048500881834215165, "loss": 0.0029, "step": 5750 }, { "epoch": 41.14, "learning_rate": 0.004761904761904762, "loss": 0.0027, "step": 5760 }, { "epoch": 41.21, "learning_rate": 0.004673721340388007, "loss": 0.0026, "step": 5770 }, { "epoch": 41.29, "learning_rate": 0.004585537918871252, "loss": 0.0033, "step": 5780 }, { "epoch": 41.36, "learning_rate": 0.004497354497354497, "loss": 0.003, "step": 5790 }, { "epoch": 41.43, "learning_rate": 0.004409171075837742, "loss": 0.0029, "step": 5800 }, { "epoch": 41.5, "learning_rate": 0.004320987654320987, "loss": 0.0029, "step": 5810 }, { "epoch": 41.57, "learning_rate": 0.004232804232804233, "loss": 0.0029, "step": 5820 }, { "epoch": 41.64, "learning_rate": 0.004144620811287478, "loss": 0.003, "step": 5830 }, { "epoch": 41.71, "learning_rate": 0.004056437389770723, "loss": 0.0029, "step": 5840 }, { "epoch": 41.79, "learning_rate": 0.003968253968253968, "loss": 0.003, "step": 5850 }, { "epoch": 41.86, "learning_rate": 0.0038800705467372134, "loss": 0.0028, "step": 5860 }, { "epoch": 41.93, "learning_rate": 0.0037918871252204584, "loss": 0.0032, "step": 5870 }, { "epoch": 42.0, "learning_rate": 0.003703703703703704, "loss": 0.0028, "step": 5880 }, { "epoch": 42.07, "learning_rate": 0.003615520282186949, "loss": 0.0029, "step": 5890 }, { "epoch": 42.14, "learning_rate": 0.003527336860670194, "loss": 0.0028, "step": 5900 }, { "epoch": 42.21, "learning_rate": 0.0034391534391534392, "loss": 0.0028, "step": 5910 }, { "epoch": 42.29, "learning_rate": 0.003350970017636684, "loss": 0.0027, "step": 5920 }, { "epoch": 42.36, "learning_rate": 0.0032627865961199296, "loss": 0.003, "step": 5930 }, { "epoch": 42.43, "learning_rate": 0.0031746031746031746, "loss": 0.003, "step": 5940 }, { "epoch": 42.5, "learning_rate": 0.0030864197530864196, "loss": 0.0029, "step": 5950 }, { "epoch": 42.57, "learning_rate": 0.002998236331569665, "loss": 0.0027, "step": 5960 }, { "epoch": 42.64, "learning_rate": 0.00291005291005291, "loss": 0.0027, "step": 5970 }, { "epoch": 42.71, "learning_rate": 0.0028218694885361554, "loss": 0.0031, "step": 5980 }, { "epoch": 42.79, "learning_rate": 0.0027336860670194004, "loss": 0.0029, "step": 5990 }, { "epoch": 42.86, "learning_rate": 0.0026455026455026454, "loss": 0.0028, "step": 6000 }, { "epoch": 42.93, "learning_rate": 0.0025573192239858908, "loss": 0.0029, "step": 6010 }, { "epoch": 43.0, "learning_rate": 0.0024691358024691358, "loss": 0.0028, "step": 6020 }, { "epoch": 43.07, "learning_rate": 0.002380952380952381, "loss": 0.0026, "step": 6030 }, { "epoch": 43.14, "learning_rate": 0.002292768959435626, "loss": 0.0026, "step": 6040 }, { "epoch": 43.21, "learning_rate": 0.002204585537918871, "loss": 0.0028, "step": 6050 }, { "epoch": 43.29, "learning_rate": 0.0021164021164021165, "loss": 0.003, "step": 6060 }, { "epoch": 43.36, "learning_rate": 0.0020282186948853615, "loss": 0.003, "step": 6070 }, { "epoch": 43.43, "learning_rate": 0.0019400352733686067, "loss": 0.0029, "step": 6080 }, { "epoch": 43.5, "learning_rate": 0.001851851851851852, "loss": 0.0028, "step": 6090 }, { "epoch": 43.57, "learning_rate": 0.001763668430335097, "loss": 0.0028, "step": 6100 }, { "epoch": 43.64, "learning_rate": 0.001675485008818342, "loss": 0.0027, "step": 6110 }, { "epoch": 43.71, "learning_rate": 0.0015873015873015873, "loss": 0.003, "step": 6120 }, { "epoch": 43.79, "learning_rate": 0.0014991181657848325, "loss": 0.0028, "step": 6130 }, { "epoch": 43.86, "learning_rate": 0.0014109347442680777, "loss": 0.003, "step": 6140 }, { "epoch": 43.93, "learning_rate": 0.0013227513227513227, "loss": 0.0028, "step": 6150 }, { "epoch": 44.0, "learning_rate": 0.0012345679012345679, "loss": 0.003, "step": 6160 }, { "epoch": 44.07, "learning_rate": 0.001146384479717813, "loss": 0.0028, "step": 6170 }, { "epoch": 44.14, "learning_rate": 0.0010582010582010583, "loss": 0.0028, "step": 6180 }, { "epoch": 44.21, "learning_rate": 0.0009700176366843034, "loss": 0.0032, "step": 6190 }, { "epoch": 44.29, "learning_rate": 0.0008818342151675485, "loss": 0.0027, "step": 6200 }, { "epoch": 44.36, "learning_rate": 0.0007936507936507937, "loss": 0.003, "step": 6210 }, { "epoch": 44.43, "learning_rate": 0.0007054673721340388, "loss": 0.0026, "step": 6220 }, { "epoch": 44.5, "learning_rate": 0.0006172839506172839, "loss": 0.0031, "step": 6230 }, { "epoch": 44.57, "learning_rate": 0.0005291005291005291, "loss": 0.0028, "step": 6240 }, { "epoch": 44.64, "learning_rate": 0.0004409171075837742, "loss": 0.003, "step": 6250 }, { "epoch": 44.71, "learning_rate": 0.0003527336860670194, "loss": 0.0026, "step": 6260 }, { "epoch": 44.79, "learning_rate": 0.00026455026455026457, "loss": 0.003, "step": 6270 }, { "epoch": 44.86, "learning_rate": 0.0001763668430335097, "loss": 0.0028, "step": 6280 }, { "epoch": 44.93, "learning_rate": 8.818342151675486e-05, "loss": 0.0029, "step": 6290 }, { "epoch": 45.0, "learning_rate": 0.0, "loss": 0.0026, "step": 6300 } ], "max_steps": 6300, "num_train_epochs": 45, "total_flos": 7.685698226686525e+18, "trial_name": null, "trial_params": null }