{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0297482837528604, "eval_steps": 500, "global_step": 3000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4e-05, "loss": 2.6282, "step": 10 }, { "epoch": 0.01, "learning_rate": 8e-05, "loss": 1.9839, "step": 20 }, { "epoch": 0.01, "learning_rate": 0.00012, "loss": 1.5426, "step": 30 }, { "epoch": 0.01, "learning_rate": 0.00016, "loss": 1.4084, "step": 40 }, { "epoch": 0.02, "learning_rate": 0.0002, "loss": 1.387, "step": 50 }, { "epoch": 0.02, "learning_rate": 0.0001993220338983051, "loss": 1.3427, "step": 60 }, { "epoch": 0.02, "learning_rate": 0.00019864406779661017, "loss": 1.3838, "step": 70 }, { "epoch": 0.03, "learning_rate": 0.00019796610169491526, "loss": 1.3672, "step": 80 }, { "epoch": 0.03, "learning_rate": 0.00019728813559322035, "loss": 1.3454, "step": 90 }, { "epoch": 0.03, "learning_rate": 0.00019661016949152545, "loss": 1.2267, "step": 100 }, { "epoch": 0.04, "learning_rate": 0.0001959322033898305, "loss": 1.2541, "step": 110 }, { "epoch": 0.04, "learning_rate": 0.0001952542372881356, "loss": 1.3141, "step": 120 }, { "epoch": 0.04, "learning_rate": 0.0001945762711864407, "loss": 1.3119, "step": 130 }, { "epoch": 0.05, "learning_rate": 0.0001938983050847458, "loss": 1.2873, "step": 140 }, { "epoch": 0.05, "learning_rate": 0.00019322033898305085, "loss": 1.2628, "step": 150 }, { "epoch": 0.05, "learning_rate": 0.00019254237288135595, "loss": 1.3217, "step": 160 }, { "epoch": 0.06, "learning_rate": 0.000191864406779661, "loss": 1.302, "step": 170 }, { "epoch": 0.06, "learning_rate": 0.0001911864406779661, "loss": 1.2469, "step": 180 }, { "epoch": 0.07, "learning_rate": 0.0001905084745762712, "loss": 1.2582, "step": 190 }, { "epoch": 0.07, "learning_rate": 0.0001898305084745763, "loss": 1.2481, "step": 200 }, { "epoch": 0.07, "learning_rate": 0.00018915254237288136, "loss": 1.198, "step": 210 }, { "epoch": 0.08, "learning_rate": 0.00018847457627118645, "loss": 1.2184, "step": 220 }, { "epoch": 0.08, "learning_rate": 0.00018779661016949151, "loss": 1.2307, "step": 230 }, { "epoch": 0.08, "learning_rate": 0.00018711864406779663, "loss": 1.2281, "step": 240 }, { "epoch": 0.09, "learning_rate": 0.0001864406779661017, "loss": 1.2761, "step": 250 }, { "epoch": 0.09, "learning_rate": 0.0001857627118644068, "loss": 1.2587, "step": 260 }, { "epoch": 0.09, "learning_rate": 0.00018508474576271186, "loss": 1.2824, "step": 270 }, { "epoch": 0.1, "learning_rate": 0.00018440677966101695, "loss": 1.2988, "step": 280 }, { "epoch": 0.1, "learning_rate": 0.00018372881355932204, "loss": 1.2746, "step": 290 }, { "epoch": 0.1, "learning_rate": 0.00018305084745762714, "loss": 1.2088, "step": 300 }, { "epoch": 0.11, "learning_rate": 0.0001823728813559322, "loss": 1.1606, "step": 310 }, { "epoch": 0.11, "learning_rate": 0.0001816949152542373, "loss": 1.2578, "step": 320 }, { "epoch": 0.11, "learning_rate": 0.00018101694915254239, "loss": 1.2815, "step": 330 }, { "epoch": 0.12, "learning_rate": 0.00018033898305084748, "loss": 1.2771, "step": 340 }, { "epoch": 0.12, "learning_rate": 0.00017966101694915257, "loss": 1.2231, "step": 350 }, { "epoch": 0.12, "learning_rate": 0.00017898305084745764, "loss": 1.2233, "step": 360 }, { "epoch": 0.13, "learning_rate": 0.00017830508474576273, "loss": 1.2291, "step": 370 }, { "epoch": 0.13, "learning_rate": 0.0001776271186440678, "loss": 1.2341, "step": 380 }, { "epoch": 0.13, "learning_rate": 0.0001769491525423729, "loss": 1.1777, "step": 390 }, { "epoch": 0.14, "learning_rate": 0.00017627118644067798, "loss": 1.2878, "step": 400 }, { "epoch": 0.14, "learning_rate": 0.00017559322033898307, "loss": 1.1953, "step": 410 }, { "epoch": 0.14, "learning_rate": 0.00017491525423728814, "loss": 1.1945, "step": 420 }, { "epoch": 0.15, "learning_rate": 0.00017423728813559323, "loss": 1.1931, "step": 430 }, { "epoch": 0.15, "learning_rate": 0.0001735593220338983, "loss": 1.1969, "step": 440 }, { "epoch": 0.15, "learning_rate": 0.00017288135593220342, "loss": 1.3003, "step": 450 }, { "epoch": 0.16, "learning_rate": 0.00017220338983050848, "loss": 1.1747, "step": 460 }, { "epoch": 0.16, "learning_rate": 0.00017152542372881357, "loss": 1.2155, "step": 470 }, { "epoch": 0.16, "learning_rate": 0.00017084745762711864, "loss": 1.2154, "step": 480 }, { "epoch": 0.17, "learning_rate": 0.00017016949152542373, "loss": 1.2085, "step": 490 }, { "epoch": 0.17, "learning_rate": 0.00016949152542372882, "loss": 1.1937, "step": 500 }, { "epoch": 0.18, "learning_rate": 0.00016881355932203392, "loss": 1.2409, "step": 510 }, { "epoch": 0.18, "learning_rate": 0.00016813559322033898, "loss": 1.2248, "step": 520 }, { "epoch": 0.18, "learning_rate": 0.00016745762711864408, "loss": 1.2207, "step": 530 }, { "epoch": 0.19, "learning_rate": 0.00016677966101694914, "loss": 1.1788, "step": 540 }, { "epoch": 0.19, "learning_rate": 0.00016610169491525423, "loss": 1.169, "step": 550 }, { "epoch": 0.19, "learning_rate": 0.00016542372881355933, "loss": 1.1821, "step": 560 }, { "epoch": 0.2, "learning_rate": 0.00016474576271186442, "loss": 1.2172, "step": 570 }, { "epoch": 0.2, "learning_rate": 0.00016406779661016948, "loss": 1.1673, "step": 580 }, { "epoch": 0.2, "learning_rate": 0.00016338983050847458, "loss": 1.156, "step": 590 }, { "epoch": 0.21, "learning_rate": 0.00016271186440677967, "loss": 1.1926, "step": 600 }, { "epoch": 0.21, "learning_rate": 0.00016203389830508476, "loss": 1.2045, "step": 610 }, { "epoch": 0.21, "learning_rate": 0.00016135593220338985, "loss": 1.2378, "step": 620 }, { "epoch": 0.22, "learning_rate": 0.00016067796610169492, "loss": 1.1616, "step": 630 }, { "epoch": 0.22, "learning_rate": 0.00016, "loss": 1.2035, "step": 640 }, { "epoch": 0.22, "learning_rate": 0.00015932203389830508, "loss": 1.1623, "step": 650 }, { "epoch": 0.23, "learning_rate": 0.0001586440677966102, "loss": 1.2262, "step": 660 }, { "epoch": 0.23, "learning_rate": 0.00015796610169491526, "loss": 1.1931, "step": 670 }, { "epoch": 0.23, "learning_rate": 0.00015728813559322036, "loss": 1.1441, "step": 680 }, { "epoch": 0.24, "learning_rate": 0.00015661016949152542, "loss": 1.16, "step": 690 }, { "epoch": 0.24, "learning_rate": 0.00015593220338983051, "loss": 1.1955, "step": 700 }, { "epoch": 0.24, "learning_rate": 0.0001552542372881356, "loss": 1.1956, "step": 710 }, { "epoch": 0.25, "learning_rate": 0.0001545762711864407, "loss": 1.164, "step": 720 }, { "epoch": 0.25, "learning_rate": 0.00015389830508474577, "loss": 1.2317, "step": 730 }, { "epoch": 0.25, "learning_rate": 0.00015322033898305086, "loss": 1.1596, "step": 740 }, { "epoch": 0.26, "learning_rate": 0.00015254237288135592, "loss": 1.1851, "step": 750 }, { "epoch": 0.26, "learning_rate": 0.00015186440677966102, "loss": 1.167, "step": 760 }, { "epoch": 0.26, "learning_rate": 0.0001511864406779661, "loss": 1.1623, "step": 770 }, { "epoch": 0.27, "learning_rate": 0.0001505084745762712, "loss": 1.1974, "step": 780 }, { "epoch": 0.27, "learning_rate": 0.00014983050847457627, "loss": 1.1948, "step": 790 }, { "epoch": 0.27, "learning_rate": 0.00014915254237288136, "loss": 1.1842, "step": 800 }, { "epoch": 0.28, "learning_rate": 0.00014847457627118645, "loss": 1.1257, "step": 810 }, { "epoch": 0.28, "learning_rate": 0.00014779661016949154, "loss": 1.1557, "step": 820 }, { "epoch": 0.28, "learning_rate": 0.0001471186440677966, "loss": 1.1412, "step": 830 }, { "epoch": 0.29, "learning_rate": 0.0001464406779661017, "loss": 1.2058, "step": 840 }, { "epoch": 0.29, "learning_rate": 0.00014576271186440677, "loss": 1.249, "step": 850 }, { "epoch": 0.3, "learning_rate": 0.00014508474576271186, "loss": 1.1608, "step": 860 }, { "epoch": 0.3, "learning_rate": 0.00014440677966101695, "loss": 1.1679, "step": 870 }, { "epoch": 0.3, "learning_rate": 0.00014372881355932205, "loss": 1.175, "step": 880 }, { "epoch": 0.31, "learning_rate": 0.00014305084745762714, "loss": 1.1369, "step": 890 }, { "epoch": 0.31, "learning_rate": 0.0001423728813559322, "loss": 1.2629, "step": 900 }, { "epoch": 0.31, "learning_rate": 0.0001416949152542373, "loss": 1.1771, "step": 910 }, { "epoch": 0.32, "learning_rate": 0.0001410169491525424, "loss": 1.1791, "step": 920 }, { "epoch": 0.32, "learning_rate": 0.00014033898305084748, "loss": 1.1478, "step": 930 }, { "epoch": 0.32, "learning_rate": 0.00013966101694915255, "loss": 1.1767, "step": 940 }, { "epoch": 0.33, "learning_rate": 0.00013898305084745764, "loss": 1.1621, "step": 950 }, { "epoch": 0.33, "learning_rate": 0.0001383050847457627, "loss": 1.1936, "step": 960 }, { "epoch": 0.33, "learning_rate": 0.0001376271186440678, "loss": 1.1556, "step": 970 }, { "epoch": 0.34, "learning_rate": 0.0001369491525423729, "loss": 1.2168, "step": 980 }, { "epoch": 0.34, "learning_rate": 0.00013627118644067798, "loss": 1.1899, "step": 990 }, { "epoch": 0.34, "learning_rate": 0.00013559322033898305, "loss": 1.0627, "step": 1000 }, { "epoch": 0.35, "learning_rate": 0.00013491525423728814, "loss": 1.1734, "step": 1010 }, { "epoch": 0.35, "learning_rate": 0.0001342372881355932, "loss": 1.1362, "step": 1020 }, { "epoch": 0.35, "learning_rate": 0.00013355932203389833, "loss": 1.1481, "step": 1030 }, { "epoch": 0.36, "learning_rate": 0.0001328813559322034, "loss": 1.1567, "step": 1040 }, { "epoch": 0.36, "learning_rate": 0.00013220338983050849, "loss": 1.1406, "step": 1050 }, { "epoch": 0.36, "learning_rate": 0.00013152542372881355, "loss": 1.1228, "step": 1060 }, { "epoch": 0.37, "learning_rate": 0.00013084745762711864, "loss": 1.1357, "step": 1070 }, { "epoch": 0.37, "learning_rate": 0.00013016949152542374, "loss": 1.1677, "step": 1080 }, { "epoch": 0.37, "learning_rate": 0.00012949152542372883, "loss": 1.1726, "step": 1090 }, { "epoch": 0.38, "learning_rate": 0.0001288135593220339, "loss": 1.1771, "step": 1100 }, { "epoch": 0.38, "learning_rate": 0.000128135593220339, "loss": 1.1527, "step": 1110 }, { "epoch": 0.38, "learning_rate": 0.00012745762711864405, "loss": 1.1748, "step": 1120 }, { "epoch": 0.39, "learning_rate": 0.00012677966101694917, "loss": 1.2424, "step": 1130 }, { "epoch": 0.39, "learning_rate": 0.00012610169491525426, "loss": 1.1711, "step": 1140 }, { "epoch": 0.39, "learning_rate": 0.00012542372881355933, "loss": 1.1538, "step": 1150 }, { "epoch": 0.4, "learning_rate": 0.00012474576271186442, "loss": 1.0941, "step": 1160 }, { "epoch": 0.4, "learning_rate": 0.0001240677966101695, "loss": 1.2163, "step": 1170 }, { "epoch": 0.41, "learning_rate": 0.00012338983050847458, "loss": 1.1581, "step": 1180 }, { "epoch": 0.41, "learning_rate": 0.00012271186440677967, "loss": 1.1345, "step": 1190 }, { "epoch": 0.41, "learning_rate": 0.00012203389830508477, "loss": 1.1448, "step": 1200 }, { "epoch": 0.42, "learning_rate": 0.00012135593220338983, "loss": 1.1968, "step": 1210 }, { "epoch": 0.42, "learning_rate": 0.00012067796610169492, "loss": 1.1306, "step": 1220 }, { "epoch": 0.42, "learning_rate": 0.00012, "loss": 1.1125, "step": 1230 }, { "epoch": 0.43, "learning_rate": 0.0001193220338983051, "loss": 1.1354, "step": 1240 }, { "epoch": 0.43, "learning_rate": 0.00011864406779661017, "loss": 1.1113, "step": 1250 }, { "epoch": 0.43, "learning_rate": 0.00011796610169491527, "loss": 1.1472, "step": 1260 }, { "epoch": 0.44, "learning_rate": 0.00011728813559322033, "loss": 1.1011, "step": 1270 }, { "epoch": 0.44, "learning_rate": 0.00011661016949152544, "loss": 1.1174, "step": 1280 }, { "epoch": 0.44, "learning_rate": 0.0001159322033898305, "loss": 1.1574, "step": 1290 }, { "epoch": 0.45, "learning_rate": 0.0001152542372881356, "loss": 1.1016, "step": 1300 }, { "epoch": 0.45, "learning_rate": 0.00011457627118644068, "loss": 1.1353, "step": 1310 }, { "epoch": 0.45, "learning_rate": 0.00011389830508474577, "loss": 1.0817, "step": 1320 }, { "epoch": 0.46, "learning_rate": 0.00011322033898305085, "loss": 1.1467, "step": 1330 }, { "epoch": 0.46, "learning_rate": 0.00011254237288135594, "loss": 1.1301, "step": 1340 }, { "epoch": 0.46, "learning_rate": 0.00011186440677966102, "loss": 1.1716, "step": 1350 }, { "epoch": 0.47, "learning_rate": 0.00011118644067796611, "loss": 1.1534, "step": 1360 }, { "epoch": 0.47, "learning_rate": 0.00011050847457627118, "loss": 1.1629, "step": 1370 }, { "epoch": 0.47, "learning_rate": 0.00010983050847457627, "loss": 1.1446, "step": 1380 }, { "epoch": 0.48, "learning_rate": 0.00010915254237288135, "loss": 1.1448, "step": 1390 }, { "epoch": 0.48, "learning_rate": 0.00010847457627118644, "loss": 1.1453, "step": 1400 }, { "epoch": 0.48, "learning_rate": 0.00010779661016949153, "loss": 1.1202, "step": 1410 }, { "epoch": 0.49, "learning_rate": 0.00010711864406779661, "loss": 1.1288, "step": 1420 }, { "epoch": 0.49, "learning_rate": 0.0001064406779661017, "loss": 1.1126, "step": 1430 }, { "epoch": 0.49, "learning_rate": 0.00010576271186440679, "loss": 1.1432, "step": 1440 }, { "epoch": 0.5, "learning_rate": 0.00010508474576271188, "loss": 1.133, "step": 1450 }, { "epoch": 0.5, "learning_rate": 0.00010440677966101696, "loss": 1.1793, "step": 1460 }, { "epoch": 0.5, "learning_rate": 0.00010372881355932205, "loss": 1.1938, "step": 1470 }, { "epoch": 0.51, "learning_rate": 0.00010305084745762712, "loss": 1.1444, "step": 1480 }, { "epoch": 0.51, "learning_rate": 0.00010237288135593222, "loss": 1.1301, "step": 1490 }, { "epoch": 0.51, "learning_rate": 0.00010169491525423729, "loss": 1.1332, "step": 1500 }, { "epoch": 0.52, "learning_rate": 0.00010101694915254238, "loss": 1.1368, "step": 1510 }, { "epoch": 0.52, "learning_rate": 0.00010033898305084746, "loss": 1.0965, "step": 1520 }, { "epoch": 0.53, "learning_rate": 9.966101694915255e-05, "loss": 1.168, "step": 1530 }, { "epoch": 0.53, "learning_rate": 9.898305084745763e-05, "loss": 1.097, "step": 1540 }, { "epoch": 0.53, "learning_rate": 9.830508474576272e-05, "loss": 1.1166, "step": 1550 }, { "epoch": 0.54, "learning_rate": 9.76271186440678e-05, "loss": 1.1539, "step": 1560 }, { "epoch": 0.54, "learning_rate": 9.69491525423729e-05, "loss": 1.1388, "step": 1570 }, { "epoch": 0.54, "learning_rate": 9.627118644067797e-05, "loss": 1.1898, "step": 1580 }, { "epoch": 0.55, "learning_rate": 9.559322033898305e-05, "loss": 1.155, "step": 1590 }, { "epoch": 0.55, "learning_rate": 9.491525423728815e-05, "loss": 1.1212, "step": 1600 }, { "epoch": 0.55, "learning_rate": 9.423728813559322e-05, "loss": 1.0912, "step": 1610 }, { "epoch": 0.56, "learning_rate": 9.355932203389832e-05, "loss": 1.1017, "step": 1620 }, { "epoch": 0.56, "learning_rate": 9.28813559322034e-05, "loss": 1.1294, "step": 1630 }, { "epoch": 0.56, "learning_rate": 9.220338983050847e-05, "loss": 1.1105, "step": 1640 }, { "epoch": 0.57, "learning_rate": 9.152542372881357e-05, "loss": 1.1159, "step": 1650 }, { "epoch": 0.57, "learning_rate": 9.084745762711865e-05, "loss": 1.1195, "step": 1660 }, { "epoch": 0.57, "learning_rate": 9.016949152542374e-05, "loss": 1.1514, "step": 1670 }, { "epoch": 0.58, "learning_rate": 8.949152542372882e-05, "loss": 1.1156, "step": 1680 }, { "epoch": 0.58, "learning_rate": 8.88135593220339e-05, "loss": 1.1175, "step": 1690 }, { "epoch": 0.58, "learning_rate": 8.813559322033899e-05, "loss": 1.1928, "step": 1700 }, { "epoch": 0.59, "learning_rate": 8.745762711864407e-05, "loss": 1.0526, "step": 1710 }, { "epoch": 0.59, "learning_rate": 8.677966101694915e-05, "loss": 1.0861, "step": 1720 }, { "epoch": 0.59, "learning_rate": 8.610169491525424e-05, "loss": 1.185, "step": 1730 }, { "epoch": 0.6, "learning_rate": 8.542372881355932e-05, "loss": 1.132, "step": 1740 }, { "epoch": 0.6, "learning_rate": 8.474576271186441e-05, "loss": 1.1164, "step": 1750 }, { "epoch": 0.6, "learning_rate": 8.406779661016949e-05, "loss": 1.1232, "step": 1760 }, { "epoch": 0.61, "learning_rate": 8.338983050847457e-05, "loss": 1.0915, "step": 1770 }, { "epoch": 0.61, "learning_rate": 8.271186440677966e-05, "loss": 1.1382, "step": 1780 }, { "epoch": 0.61, "learning_rate": 8.203389830508474e-05, "loss": 1.0837, "step": 1790 }, { "epoch": 0.62, "learning_rate": 8.135593220338983e-05, "loss": 1.1287, "step": 1800 }, { "epoch": 0.62, "learning_rate": 8.067796610169493e-05, "loss": 1.1251, "step": 1810 }, { "epoch": 0.62, "learning_rate": 8e-05, "loss": 1.1527, "step": 1820 }, { "epoch": 0.63, "learning_rate": 7.93220338983051e-05, "loss": 1.1309, "step": 1830 }, { "epoch": 0.63, "learning_rate": 7.864406779661018e-05, "loss": 1.1185, "step": 1840 }, { "epoch": 0.64, "learning_rate": 7.796610169491526e-05, "loss": 1.1131, "step": 1850 }, { "epoch": 0.64, "learning_rate": 7.728813559322035e-05, "loss": 1.1497, "step": 1860 }, { "epoch": 0.64, "learning_rate": 7.661016949152543e-05, "loss": 1.2128, "step": 1870 }, { "epoch": 0.65, "learning_rate": 7.593220338983051e-05, "loss": 1.0872, "step": 1880 }, { "epoch": 0.65, "learning_rate": 7.52542372881356e-05, "loss": 1.113, "step": 1890 }, { "epoch": 0.65, "learning_rate": 7.457627118644068e-05, "loss": 1.0717, "step": 1900 }, { "epoch": 0.66, "learning_rate": 7.389830508474577e-05, "loss": 1.0703, "step": 1910 }, { "epoch": 0.66, "learning_rate": 7.322033898305085e-05, "loss": 1.1531, "step": 1920 }, { "epoch": 0.66, "learning_rate": 7.254237288135593e-05, "loss": 1.1379, "step": 1930 }, { "epoch": 0.67, "learning_rate": 7.186440677966102e-05, "loss": 1.1474, "step": 1940 }, { "epoch": 0.67, "learning_rate": 7.11864406779661e-05, "loss": 1.0944, "step": 1950 }, { "epoch": 0.67, "learning_rate": 7.05084745762712e-05, "loss": 1.0956, "step": 1960 }, { "epoch": 0.68, "learning_rate": 6.983050847457627e-05, "loss": 1.0674, "step": 1970 }, { "epoch": 0.68, "learning_rate": 6.915254237288135e-05, "loss": 1.0851, "step": 1980 }, { "epoch": 0.68, "learning_rate": 6.847457627118645e-05, "loss": 1.1439, "step": 1990 }, { "epoch": 0.69, "learning_rate": 6.779661016949152e-05, "loss": 1.0869, "step": 2000 }, { "epoch": 0.69, "learning_rate": 6.71186440677966e-05, "loss": 1.1284, "step": 2010 }, { "epoch": 0.69, "learning_rate": 6.64406779661017e-05, "loss": 1.0684, "step": 2020 }, { "epoch": 0.7, "learning_rate": 6.576271186440678e-05, "loss": 1.1464, "step": 2030 }, { "epoch": 0.7, "learning_rate": 6.508474576271187e-05, "loss": 1.0331, "step": 2040 }, { "epoch": 0.7, "learning_rate": 6.440677966101695e-05, "loss": 1.0975, "step": 2050 }, { "epoch": 0.71, "learning_rate": 6.379661016949154e-05, "loss": 1.0438, "step": 2060 }, { "epoch": 0.71, "learning_rate": 6.311864406779661e-05, "loss": 1.146, "step": 2070 }, { "epoch": 0.71, "learning_rate": 6.244067796610171e-05, "loss": 1.135, "step": 2080 }, { "epoch": 0.72, "learning_rate": 6.176271186440679e-05, "loss": 1.0624, "step": 2090 }, { "epoch": 0.72, "learning_rate": 6.108474576271187e-05, "loss": 1.0677, "step": 2100 }, { "epoch": 0.72, "learning_rate": 6.040677966101695e-05, "loss": 1.0814, "step": 2110 }, { "epoch": 0.73, "learning_rate": 5.972881355932204e-05, "loss": 1.1558, "step": 2120 }, { "epoch": 0.73, "learning_rate": 5.905084745762712e-05, "loss": 1.0757, "step": 2130 }, { "epoch": 0.73, "learning_rate": 5.837288135593221e-05, "loss": 1.0827, "step": 2140 }, { "epoch": 0.74, "learning_rate": 5.7694915254237295e-05, "loss": 1.1251, "step": 2150 }, { "epoch": 0.74, "learning_rate": 5.7016949152542374e-05, "loss": 1.0687, "step": 2160 }, { "epoch": 0.74, "learning_rate": 5.633898305084746e-05, "loss": 1.0848, "step": 2170 }, { "epoch": 0.75, "learning_rate": 5.5661016949152545e-05, "loss": 1.1592, "step": 2180 }, { "epoch": 0.75, "learning_rate": 5.498305084745763e-05, "loss": 1.0944, "step": 2190 }, { "epoch": 0.76, "learning_rate": 5.430508474576271e-05, "loss": 1.101, "step": 2200 }, { "epoch": 0.76, "learning_rate": 5.3627118644067796e-05, "loss": 1.0942, "step": 2210 }, { "epoch": 0.76, "learning_rate": 5.294915254237288e-05, "loss": 1.0955, "step": 2220 }, { "epoch": 0.77, "learning_rate": 5.227118644067797e-05, "loss": 1.0917, "step": 2230 }, { "epoch": 0.77, "learning_rate": 5.1593220338983054e-05, "loss": 1.1238, "step": 2240 }, { "epoch": 0.77, "learning_rate": 5.091525423728813e-05, "loss": 1.0737, "step": 2250 }, { "epoch": 0.78, "learning_rate": 5.023728813559322e-05, "loss": 1.0966, "step": 2260 }, { "epoch": 0.78, "learning_rate": 4.955932203389831e-05, "loss": 1.0809, "step": 2270 }, { "epoch": 0.78, "learning_rate": 4.888135593220339e-05, "loss": 1.0894, "step": 2280 }, { "epoch": 0.79, "learning_rate": 4.8203389830508476e-05, "loss": 1.1213, "step": 2290 }, { "epoch": 0.79, "learning_rate": 4.752542372881356e-05, "loss": 1.0784, "step": 2300 }, { "epoch": 0.79, "learning_rate": 4.684745762711865e-05, "loss": 1.123, "step": 2310 }, { "epoch": 0.8, "learning_rate": 4.6169491525423734e-05, "loss": 1.1118, "step": 2320 }, { "epoch": 0.8, "learning_rate": 4.549152542372881e-05, "loss": 1.1142, "step": 2330 }, { "epoch": 0.8, "learning_rate": 4.48135593220339e-05, "loss": 1.1157, "step": 2340 }, { "epoch": 0.81, "learning_rate": 4.4135593220338984e-05, "loss": 1.1249, "step": 2350 }, { "epoch": 0.81, "learning_rate": 4.345762711864407e-05, "loss": 1.0925, "step": 2360 }, { "epoch": 0.81, "learning_rate": 4.277966101694915e-05, "loss": 1.1079, "step": 2370 }, { "epoch": 0.82, "learning_rate": 4.210169491525424e-05, "loss": 1.119, "step": 2380 }, { "epoch": 0.82, "learning_rate": 4.142372881355933e-05, "loss": 1.0334, "step": 2390 }, { "epoch": 0.82, "learning_rate": 4.0745762711864414e-05, "loss": 1.0725, "step": 2400 }, { "epoch": 0.83, "learning_rate": 4.006779661016949e-05, "loss": 1.0998, "step": 2410 }, { "epoch": 0.83, "learning_rate": 3.938983050847458e-05, "loss": 1.107, "step": 2420 }, { "epoch": 0.83, "learning_rate": 3.8711864406779664e-05, "loss": 1.1407, "step": 2430 }, { "epoch": 0.84, "learning_rate": 3.803389830508475e-05, "loss": 1.13, "step": 2440 }, { "epoch": 0.84, "learning_rate": 3.735593220338983e-05, "loss": 1.1231, "step": 2450 }, { "epoch": 0.84, "learning_rate": 3.6677966101694915e-05, "loss": 1.0928, "step": 2460 }, { "epoch": 0.85, "learning_rate": 3.6e-05, "loss": 1.0743, "step": 2470 }, { "epoch": 0.85, "learning_rate": 3.532203389830509e-05, "loss": 1.1144, "step": 2480 }, { "epoch": 0.85, "learning_rate": 3.4644067796610166e-05, "loss": 1.0636, "step": 2490 }, { "epoch": 0.86, "learning_rate": 3.396610169491525e-05, "loss": 1.1148, "step": 2500 }, { "epoch": 0.86, "learning_rate": 3.3288135593220344e-05, "loss": 1.0876, "step": 2510 }, { "epoch": 0.86, "learning_rate": 3.261016949152543e-05, "loss": 1.1073, "step": 2520 }, { "epoch": 0.87, "learning_rate": 3.193220338983051e-05, "loss": 1.0801, "step": 2530 }, { "epoch": 0.87, "learning_rate": 3.1254237288135595e-05, "loss": 1.0995, "step": 2540 }, { "epoch": 0.88, "learning_rate": 3.057627118644068e-05, "loss": 1.1142, "step": 2550 }, { "epoch": 0.88, "learning_rate": 2.9898305084745763e-05, "loss": 1.0569, "step": 2560 }, { "epoch": 0.88, "learning_rate": 2.922033898305085e-05, "loss": 1.0937, "step": 2570 }, { "epoch": 0.89, "learning_rate": 2.854237288135593e-05, "loss": 1.0893, "step": 2580 }, { "epoch": 0.89, "learning_rate": 2.7864406779661017e-05, "loss": 1.0466, "step": 2590 }, { "epoch": 0.89, "learning_rate": 2.7186440677966103e-05, "loss": 1.0828, "step": 2600 }, { "epoch": 0.9, "learning_rate": 2.6508474576271186e-05, "loss": 1.0123, "step": 2610 }, { "epoch": 0.9, "learning_rate": 2.583050847457627e-05, "loss": 1.0969, "step": 2620 }, { "epoch": 0.9, "learning_rate": 2.5152542372881354e-05, "loss": 1.049, "step": 2630 }, { "epoch": 0.91, "learning_rate": 2.4474576271186443e-05, "loss": 1.0377, "step": 2640 }, { "epoch": 0.91, "learning_rate": 2.3796610169491526e-05, "loss": 1.103, "step": 2650 }, { "epoch": 0.91, "learning_rate": 2.311864406779661e-05, "loss": 1.0602, "step": 2660 }, { "epoch": 0.92, "learning_rate": 2.2440677966101694e-05, "loss": 1.112, "step": 2670 }, { "epoch": 0.92, "learning_rate": 2.1762711864406783e-05, "loss": 1.0804, "step": 2680 }, { "epoch": 0.92, "learning_rate": 2.1084745762711866e-05, "loss": 1.1136, "step": 2690 }, { "epoch": 0.93, "learning_rate": 2.040677966101695e-05, "loss": 1.064, "step": 2700 }, { "epoch": 0.93, "learning_rate": 1.9728813559322034e-05, "loss": 1.1352, "step": 2710 }, { "epoch": 0.93, "learning_rate": 1.905084745762712e-05, "loss": 1.0464, "step": 2720 }, { "epoch": 0.94, "learning_rate": 1.8372881355932202e-05, "loss": 1.0936, "step": 2730 }, { "epoch": 0.94, "learning_rate": 1.769491525423729e-05, "loss": 1.0586, "step": 2740 }, { "epoch": 0.94, "learning_rate": 1.7016949152542374e-05, "loss": 1.076, "step": 2750 }, { "epoch": 0.95, "learning_rate": 1.633898305084746e-05, "loss": 1.094, "step": 2760 }, { "epoch": 0.95, "learning_rate": 1.5661016949152542e-05, "loss": 1.1068, "step": 2770 }, { "epoch": 0.95, "learning_rate": 1.4983050847457628e-05, "loss": 1.0789, "step": 2780 }, { "epoch": 0.96, "learning_rate": 1.4305084745762712e-05, "loss": 1.1326, "step": 2790 }, { "epoch": 0.96, "learning_rate": 1.3627118644067796e-05, "loss": 1.0818, "step": 2800 }, { "epoch": 0.96, "learning_rate": 1.2949152542372884e-05, "loss": 1.119, "step": 2810 }, { "epoch": 0.97, "learning_rate": 1.2271186440677966e-05, "loss": 1.1013, "step": 2820 }, { "epoch": 0.97, "learning_rate": 1.1593220338983052e-05, "loss": 1.0815, "step": 2830 }, { "epoch": 0.97, "learning_rate": 1.0915254237288136e-05, "loss": 1.0326, "step": 2840 }, { "epoch": 0.98, "learning_rate": 1.023728813559322e-05, "loss": 1.1054, "step": 2850 }, { "epoch": 0.98, "learning_rate": 9.559322033898306e-06, "loss": 1.0615, "step": 2860 }, { "epoch": 0.99, "learning_rate": 8.88135593220339e-06, "loss": 1.1075, "step": 2870 }, { "epoch": 0.99, "learning_rate": 8.203389830508475e-06, "loss": 1.1043, "step": 2880 }, { "epoch": 0.99, "learning_rate": 7.525423728813559e-06, "loss": 1.1056, "step": 2890 }, { "epoch": 1.0, "learning_rate": 6.8474576271186445e-06, "loss": 1.0234, "step": 2900 }, { "epoch": 1.0, "learning_rate": 6.169491525423729e-06, "loss": 1.0732, "step": 2910 }, { "epoch": 1.0, "learning_rate": 5.491525423728814e-06, "loss": 1.0076, "step": 2920 }, { "epoch": 1.01, "learning_rate": 4.813559322033899e-06, "loss": 0.9715, "step": 2930 }, { "epoch": 1.01, "learning_rate": 4.135593220338984e-06, "loss": 0.9092, "step": 2940 }, { "epoch": 1.01, "learning_rate": 3.4576271186440682e-06, "loss": 0.8787, "step": 2950 }, { "epoch": 1.02, "learning_rate": 2.7796610169491524e-06, "loss": 0.841, "step": 2960 }, { "epoch": 1.02, "learning_rate": 2.1016949152542374e-06, "loss": 0.9436, "step": 2970 }, { "epoch": 1.02, "learning_rate": 1.423728813559322e-06, "loss": 0.9332, "step": 2980 }, { "epoch": 1.03, "learning_rate": 7.457627118644068e-07, "loss": 0.8813, "step": 2990 }, { "epoch": 1.03, "learning_rate": 6.779661016949153e-08, "loss": 0.8643, "step": 3000 }, { "epoch": 1.03, "step": 3000, "total_flos": 3.2240003230162944e+16, "train_loss": 1.1516946287155152, "train_runtime": 29468.0473, "train_samples_per_second": 0.611, "train_steps_per_second": 0.102 } ], "logging_steps": 10, "max_steps": 3000, "num_train_epochs": 2, "save_steps": 500, "total_flos": 3.2240003230162944e+16, "trial_name": null, "trial_params": null }