BizConsultant / trainer_state.json
theoldmandthesea's picture
Updated model
15a1e15
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0297482837528604,
"eval_steps": 500,
"global_step": 3000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 4e-05,
"loss": 2.6282,
"step": 10
},
{
"epoch": 0.01,
"learning_rate": 8e-05,
"loss": 1.9839,
"step": 20
},
{
"epoch": 0.01,
"learning_rate": 0.00012,
"loss": 1.5426,
"step": 30
},
{
"epoch": 0.01,
"learning_rate": 0.00016,
"loss": 1.4084,
"step": 40
},
{
"epoch": 0.02,
"learning_rate": 0.0002,
"loss": 1.387,
"step": 50
},
{
"epoch": 0.02,
"learning_rate": 0.0001993220338983051,
"loss": 1.3427,
"step": 60
},
{
"epoch": 0.02,
"learning_rate": 0.00019864406779661017,
"loss": 1.3838,
"step": 70
},
{
"epoch": 0.03,
"learning_rate": 0.00019796610169491526,
"loss": 1.3672,
"step": 80
},
{
"epoch": 0.03,
"learning_rate": 0.00019728813559322035,
"loss": 1.3454,
"step": 90
},
{
"epoch": 0.03,
"learning_rate": 0.00019661016949152545,
"loss": 1.2267,
"step": 100
},
{
"epoch": 0.04,
"learning_rate": 0.0001959322033898305,
"loss": 1.2541,
"step": 110
},
{
"epoch": 0.04,
"learning_rate": 0.0001952542372881356,
"loss": 1.3141,
"step": 120
},
{
"epoch": 0.04,
"learning_rate": 0.0001945762711864407,
"loss": 1.3119,
"step": 130
},
{
"epoch": 0.05,
"learning_rate": 0.0001938983050847458,
"loss": 1.2873,
"step": 140
},
{
"epoch": 0.05,
"learning_rate": 0.00019322033898305085,
"loss": 1.2628,
"step": 150
},
{
"epoch": 0.05,
"learning_rate": 0.00019254237288135595,
"loss": 1.3217,
"step": 160
},
{
"epoch": 0.06,
"learning_rate": 0.000191864406779661,
"loss": 1.302,
"step": 170
},
{
"epoch": 0.06,
"learning_rate": 0.0001911864406779661,
"loss": 1.2469,
"step": 180
},
{
"epoch": 0.07,
"learning_rate": 0.0001905084745762712,
"loss": 1.2582,
"step": 190
},
{
"epoch": 0.07,
"learning_rate": 0.0001898305084745763,
"loss": 1.2481,
"step": 200
},
{
"epoch": 0.07,
"learning_rate": 0.00018915254237288136,
"loss": 1.198,
"step": 210
},
{
"epoch": 0.08,
"learning_rate": 0.00018847457627118645,
"loss": 1.2184,
"step": 220
},
{
"epoch": 0.08,
"learning_rate": 0.00018779661016949151,
"loss": 1.2307,
"step": 230
},
{
"epoch": 0.08,
"learning_rate": 0.00018711864406779663,
"loss": 1.2281,
"step": 240
},
{
"epoch": 0.09,
"learning_rate": 0.0001864406779661017,
"loss": 1.2761,
"step": 250
},
{
"epoch": 0.09,
"learning_rate": 0.0001857627118644068,
"loss": 1.2587,
"step": 260
},
{
"epoch": 0.09,
"learning_rate": 0.00018508474576271186,
"loss": 1.2824,
"step": 270
},
{
"epoch": 0.1,
"learning_rate": 0.00018440677966101695,
"loss": 1.2988,
"step": 280
},
{
"epoch": 0.1,
"learning_rate": 0.00018372881355932204,
"loss": 1.2746,
"step": 290
},
{
"epoch": 0.1,
"learning_rate": 0.00018305084745762714,
"loss": 1.2088,
"step": 300
},
{
"epoch": 0.11,
"learning_rate": 0.0001823728813559322,
"loss": 1.1606,
"step": 310
},
{
"epoch": 0.11,
"learning_rate": 0.0001816949152542373,
"loss": 1.2578,
"step": 320
},
{
"epoch": 0.11,
"learning_rate": 0.00018101694915254239,
"loss": 1.2815,
"step": 330
},
{
"epoch": 0.12,
"learning_rate": 0.00018033898305084748,
"loss": 1.2771,
"step": 340
},
{
"epoch": 0.12,
"learning_rate": 0.00017966101694915257,
"loss": 1.2231,
"step": 350
},
{
"epoch": 0.12,
"learning_rate": 0.00017898305084745764,
"loss": 1.2233,
"step": 360
},
{
"epoch": 0.13,
"learning_rate": 0.00017830508474576273,
"loss": 1.2291,
"step": 370
},
{
"epoch": 0.13,
"learning_rate": 0.0001776271186440678,
"loss": 1.2341,
"step": 380
},
{
"epoch": 0.13,
"learning_rate": 0.0001769491525423729,
"loss": 1.1777,
"step": 390
},
{
"epoch": 0.14,
"learning_rate": 0.00017627118644067798,
"loss": 1.2878,
"step": 400
},
{
"epoch": 0.14,
"learning_rate": 0.00017559322033898307,
"loss": 1.1953,
"step": 410
},
{
"epoch": 0.14,
"learning_rate": 0.00017491525423728814,
"loss": 1.1945,
"step": 420
},
{
"epoch": 0.15,
"learning_rate": 0.00017423728813559323,
"loss": 1.1931,
"step": 430
},
{
"epoch": 0.15,
"learning_rate": 0.0001735593220338983,
"loss": 1.1969,
"step": 440
},
{
"epoch": 0.15,
"learning_rate": 0.00017288135593220342,
"loss": 1.3003,
"step": 450
},
{
"epoch": 0.16,
"learning_rate": 0.00017220338983050848,
"loss": 1.1747,
"step": 460
},
{
"epoch": 0.16,
"learning_rate": 0.00017152542372881357,
"loss": 1.2155,
"step": 470
},
{
"epoch": 0.16,
"learning_rate": 0.00017084745762711864,
"loss": 1.2154,
"step": 480
},
{
"epoch": 0.17,
"learning_rate": 0.00017016949152542373,
"loss": 1.2085,
"step": 490
},
{
"epoch": 0.17,
"learning_rate": 0.00016949152542372882,
"loss": 1.1937,
"step": 500
},
{
"epoch": 0.18,
"learning_rate": 0.00016881355932203392,
"loss": 1.2409,
"step": 510
},
{
"epoch": 0.18,
"learning_rate": 0.00016813559322033898,
"loss": 1.2248,
"step": 520
},
{
"epoch": 0.18,
"learning_rate": 0.00016745762711864408,
"loss": 1.2207,
"step": 530
},
{
"epoch": 0.19,
"learning_rate": 0.00016677966101694914,
"loss": 1.1788,
"step": 540
},
{
"epoch": 0.19,
"learning_rate": 0.00016610169491525423,
"loss": 1.169,
"step": 550
},
{
"epoch": 0.19,
"learning_rate": 0.00016542372881355933,
"loss": 1.1821,
"step": 560
},
{
"epoch": 0.2,
"learning_rate": 0.00016474576271186442,
"loss": 1.2172,
"step": 570
},
{
"epoch": 0.2,
"learning_rate": 0.00016406779661016948,
"loss": 1.1673,
"step": 580
},
{
"epoch": 0.2,
"learning_rate": 0.00016338983050847458,
"loss": 1.156,
"step": 590
},
{
"epoch": 0.21,
"learning_rate": 0.00016271186440677967,
"loss": 1.1926,
"step": 600
},
{
"epoch": 0.21,
"learning_rate": 0.00016203389830508476,
"loss": 1.2045,
"step": 610
},
{
"epoch": 0.21,
"learning_rate": 0.00016135593220338985,
"loss": 1.2378,
"step": 620
},
{
"epoch": 0.22,
"learning_rate": 0.00016067796610169492,
"loss": 1.1616,
"step": 630
},
{
"epoch": 0.22,
"learning_rate": 0.00016,
"loss": 1.2035,
"step": 640
},
{
"epoch": 0.22,
"learning_rate": 0.00015932203389830508,
"loss": 1.1623,
"step": 650
},
{
"epoch": 0.23,
"learning_rate": 0.0001586440677966102,
"loss": 1.2262,
"step": 660
},
{
"epoch": 0.23,
"learning_rate": 0.00015796610169491526,
"loss": 1.1931,
"step": 670
},
{
"epoch": 0.23,
"learning_rate": 0.00015728813559322036,
"loss": 1.1441,
"step": 680
},
{
"epoch": 0.24,
"learning_rate": 0.00015661016949152542,
"loss": 1.16,
"step": 690
},
{
"epoch": 0.24,
"learning_rate": 0.00015593220338983051,
"loss": 1.1955,
"step": 700
},
{
"epoch": 0.24,
"learning_rate": 0.0001552542372881356,
"loss": 1.1956,
"step": 710
},
{
"epoch": 0.25,
"learning_rate": 0.0001545762711864407,
"loss": 1.164,
"step": 720
},
{
"epoch": 0.25,
"learning_rate": 0.00015389830508474577,
"loss": 1.2317,
"step": 730
},
{
"epoch": 0.25,
"learning_rate": 0.00015322033898305086,
"loss": 1.1596,
"step": 740
},
{
"epoch": 0.26,
"learning_rate": 0.00015254237288135592,
"loss": 1.1851,
"step": 750
},
{
"epoch": 0.26,
"learning_rate": 0.00015186440677966102,
"loss": 1.167,
"step": 760
},
{
"epoch": 0.26,
"learning_rate": 0.0001511864406779661,
"loss": 1.1623,
"step": 770
},
{
"epoch": 0.27,
"learning_rate": 0.0001505084745762712,
"loss": 1.1974,
"step": 780
},
{
"epoch": 0.27,
"learning_rate": 0.00014983050847457627,
"loss": 1.1948,
"step": 790
},
{
"epoch": 0.27,
"learning_rate": 0.00014915254237288136,
"loss": 1.1842,
"step": 800
},
{
"epoch": 0.28,
"learning_rate": 0.00014847457627118645,
"loss": 1.1257,
"step": 810
},
{
"epoch": 0.28,
"learning_rate": 0.00014779661016949154,
"loss": 1.1557,
"step": 820
},
{
"epoch": 0.28,
"learning_rate": 0.0001471186440677966,
"loss": 1.1412,
"step": 830
},
{
"epoch": 0.29,
"learning_rate": 0.0001464406779661017,
"loss": 1.2058,
"step": 840
},
{
"epoch": 0.29,
"learning_rate": 0.00014576271186440677,
"loss": 1.249,
"step": 850
},
{
"epoch": 0.3,
"learning_rate": 0.00014508474576271186,
"loss": 1.1608,
"step": 860
},
{
"epoch": 0.3,
"learning_rate": 0.00014440677966101695,
"loss": 1.1679,
"step": 870
},
{
"epoch": 0.3,
"learning_rate": 0.00014372881355932205,
"loss": 1.175,
"step": 880
},
{
"epoch": 0.31,
"learning_rate": 0.00014305084745762714,
"loss": 1.1369,
"step": 890
},
{
"epoch": 0.31,
"learning_rate": 0.0001423728813559322,
"loss": 1.2629,
"step": 900
},
{
"epoch": 0.31,
"learning_rate": 0.0001416949152542373,
"loss": 1.1771,
"step": 910
},
{
"epoch": 0.32,
"learning_rate": 0.0001410169491525424,
"loss": 1.1791,
"step": 920
},
{
"epoch": 0.32,
"learning_rate": 0.00014033898305084748,
"loss": 1.1478,
"step": 930
},
{
"epoch": 0.32,
"learning_rate": 0.00013966101694915255,
"loss": 1.1767,
"step": 940
},
{
"epoch": 0.33,
"learning_rate": 0.00013898305084745764,
"loss": 1.1621,
"step": 950
},
{
"epoch": 0.33,
"learning_rate": 0.0001383050847457627,
"loss": 1.1936,
"step": 960
},
{
"epoch": 0.33,
"learning_rate": 0.0001376271186440678,
"loss": 1.1556,
"step": 970
},
{
"epoch": 0.34,
"learning_rate": 0.0001369491525423729,
"loss": 1.2168,
"step": 980
},
{
"epoch": 0.34,
"learning_rate": 0.00013627118644067798,
"loss": 1.1899,
"step": 990
},
{
"epoch": 0.34,
"learning_rate": 0.00013559322033898305,
"loss": 1.0627,
"step": 1000
},
{
"epoch": 0.35,
"learning_rate": 0.00013491525423728814,
"loss": 1.1734,
"step": 1010
},
{
"epoch": 0.35,
"learning_rate": 0.0001342372881355932,
"loss": 1.1362,
"step": 1020
},
{
"epoch": 0.35,
"learning_rate": 0.00013355932203389833,
"loss": 1.1481,
"step": 1030
},
{
"epoch": 0.36,
"learning_rate": 0.0001328813559322034,
"loss": 1.1567,
"step": 1040
},
{
"epoch": 0.36,
"learning_rate": 0.00013220338983050849,
"loss": 1.1406,
"step": 1050
},
{
"epoch": 0.36,
"learning_rate": 0.00013152542372881355,
"loss": 1.1228,
"step": 1060
},
{
"epoch": 0.37,
"learning_rate": 0.00013084745762711864,
"loss": 1.1357,
"step": 1070
},
{
"epoch": 0.37,
"learning_rate": 0.00013016949152542374,
"loss": 1.1677,
"step": 1080
},
{
"epoch": 0.37,
"learning_rate": 0.00012949152542372883,
"loss": 1.1726,
"step": 1090
},
{
"epoch": 0.38,
"learning_rate": 0.0001288135593220339,
"loss": 1.1771,
"step": 1100
},
{
"epoch": 0.38,
"learning_rate": 0.000128135593220339,
"loss": 1.1527,
"step": 1110
},
{
"epoch": 0.38,
"learning_rate": 0.00012745762711864405,
"loss": 1.1748,
"step": 1120
},
{
"epoch": 0.39,
"learning_rate": 0.00012677966101694917,
"loss": 1.2424,
"step": 1130
},
{
"epoch": 0.39,
"learning_rate": 0.00012610169491525426,
"loss": 1.1711,
"step": 1140
},
{
"epoch": 0.39,
"learning_rate": 0.00012542372881355933,
"loss": 1.1538,
"step": 1150
},
{
"epoch": 0.4,
"learning_rate": 0.00012474576271186442,
"loss": 1.0941,
"step": 1160
},
{
"epoch": 0.4,
"learning_rate": 0.0001240677966101695,
"loss": 1.2163,
"step": 1170
},
{
"epoch": 0.41,
"learning_rate": 0.00012338983050847458,
"loss": 1.1581,
"step": 1180
},
{
"epoch": 0.41,
"learning_rate": 0.00012271186440677967,
"loss": 1.1345,
"step": 1190
},
{
"epoch": 0.41,
"learning_rate": 0.00012203389830508477,
"loss": 1.1448,
"step": 1200
},
{
"epoch": 0.42,
"learning_rate": 0.00012135593220338983,
"loss": 1.1968,
"step": 1210
},
{
"epoch": 0.42,
"learning_rate": 0.00012067796610169492,
"loss": 1.1306,
"step": 1220
},
{
"epoch": 0.42,
"learning_rate": 0.00012,
"loss": 1.1125,
"step": 1230
},
{
"epoch": 0.43,
"learning_rate": 0.0001193220338983051,
"loss": 1.1354,
"step": 1240
},
{
"epoch": 0.43,
"learning_rate": 0.00011864406779661017,
"loss": 1.1113,
"step": 1250
},
{
"epoch": 0.43,
"learning_rate": 0.00011796610169491527,
"loss": 1.1472,
"step": 1260
},
{
"epoch": 0.44,
"learning_rate": 0.00011728813559322033,
"loss": 1.1011,
"step": 1270
},
{
"epoch": 0.44,
"learning_rate": 0.00011661016949152544,
"loss": 1.1174,
"step": 1280
},
{
"epoch": 0.44,
"learning_rate": 0.0001159322033898305,
"loss": 1.1574,
"step": 1290
},
{
"epoch": 0.45,
"learning_rate": 0.0001152542372881356,
"loss": 1.1016,
"step": 1300
},
{
"epoch": 0.45,
"learning_rate": 0.00011457627118644068,
"loss": 1.1353,
"step": 1310
},
{
"epoch": 0.45,
"learning_rate": 0.00011389830508474577,
"loss": 1.0817,
"step": 1320
},
{
"epoch": 0.46,
"learning_rate": 0.00011322033898305085,
"loss": 1.1467,
"step": 1330
},
{
"epoch": 0.46,
"learning_rate": 0.00011254237288135594,
"loss": 1.1301,
"step": 1340
},
{
"epoch": 0.46,
"learning_rate": 0.00011186440677966102,
"loss": 1.1716,
"step": 1350
},
{
"epoch": 0.47,
"learning_rate": 0.00011118644067796611,
"loss": 1.1534,
"step": 1360
},
{
"epoch": 0.47,
"learning_rate": 0.00011050847457627118,
"loss": 1.1629,
"step": 1370
},
{
"epoch": 0.47,
"learning_rate": 0.00010983050847457627,
"loss": 1.1446,
"step": 1380
},
{
"epoch": 0.48,
"learning_rate": 0.00010915254237288135,
"loss": 1.1448,
"step": 1390
},
{
"epoch": 0.48,
"learning_rate": 0.00010847457627118644,
"loss": 1.1453,
"step": 1400
},
{
"epoch": 0.48,
"learning_rate": 0.00010779661016949153,
"loss": 1.1202,
"step": 1410
},
{
"epoch": 0.49,
"learning_rate": 0.00010711864406779661,
"loss": 1.1288,
"step": 1420
},
{
"epoch": 0.49,
"learning_rate": 0.0001064406779661017,
"loss": 1.1126,
"step": 1430
},
{
"epoch": 0.49,
"learning_rate": 0.00010576271186440679,
"loss": 1.1432,
"step": 1440
},
{
"epoch": 0.5,
"learning_rate": 0.00010508474576271188,
"loss": 1.133,
"step": 1450
},
{
"epoch": 0.5,
"learning_rate": 0.00010440677966101696,
"loss": 1.1793,
"step": 1460
},
{
"epoch": 0.5,
"learning_rate": 0.00010372881355932205,
"loss": 1.1938,
"step": 1470
},
{
"epoch": 0.51,
"learning_rate": 0.00010305084745762712,
"loss": 1.1444,
"step": 1480
},
{
"epoch": 0.51,
"learning_rate": 0.00010237288135593222,
"loss": 1.1301,
"step": 1490
},
{
"epoch": 0.51,
"learning_rate": 0.00010169491525423729,
"loss": 1.1332,
"step": 1500
},
{
"epoch": 0.52,
"learning_rate": 0.00010101694915254238,
"loss": 1.1368,
"step": 1510
},
{
"epoch": 0.52,
"learning_rate": 0.00010033898305084746,
"loss": 1.0965,
"step": 1520
},
{
"epoch": 0.53,
"learning_rate": 9.966101694915255e-05,
"loss": 1.168,
"step": 1530
},
{
"epoch": 0.53,
"learning_rate": 9.898305084745763e-05,
"loss": 1.097,
"step": 1540
},
{
"epoch": 0.53,
"learning_rate": 9.830508474576272e-05,
"loss": 1.1166,
"step": 1550
},
{
"epoch": 0.54,
"learning_rate": 9.76271186440678e-05,
"loss": 1.1539,
"step": 1560
},
{
"epoch": 0.54,
"learning_rate": 9.69491525423729e-05,
"loss": 1.1388,
"step": 1570
},
{
"epoch": 0.54,
"learning_rate": 9.627118644067797e-05,
"loss": 1.1898,
"step": 1580
},
{
"epoch": 0.55,
"learning_rate": 9.559322033898305e-05,
"loss": 1.155,
"step": 1590
},
{
"epoch": 0.55,
"learning_rate": 9.491525423728815e-05,
"loss": 1.1212,
"step": 1600
},
{
"epoch": 0.55,
"learning_rate": 9.423728813559322e-05,
"loss": 1.0912,
"step": 1610
},
{
"epoch": 0.56,
"learning_rate": 9.355932203389832e-05,
"loss": 1.1017,
"step": 1620
},
{
"epoch": 0.56,
"learning_rate": 9.28813559322034e-05,
"loss": 1.1294,
"step": 1630
},
{
"epoch": 0.56,
"learning_rate": 9.220338983050847e-05,
"loss": 1.1105,
"step": 1640
},
{
"epoch": 0.57,
"learning_rate": 9.152542372881357e-05,
"loss": 1.1159,
"step": 1650
},
{
"epoch": 0.57,
"learning_rate": 9.084745762711865e-05,
"loss": 1.1195,
"step": 1660
},
{
"epoch": 0.57,
"learning_rate": 9.016949152542374e-05,
"loss": 1.1514,
"step": 1670
},
{
"epoch": 0.58,
"learning_rate": 8.949152542372882e-05,
"loss": 1.1156,
"step": 1680
},
{
"epoch": 0.58,
"learning_rate": 8.88135593220339e-05,
"loss": 1.1175,
"step": 1690
},
{
"epoch": 0.58,
"learning_rate": 8.813559322033899e-05,
"loss": 1.1928,
"step": 1700
},
{
"epoch": 0.59,
"learning_rate": 8.745762711864407e-05,
"loss": 1.0526,
"step": 1710
},
{
"epoch": 0.59,
"learning_rate": 8.677966101694915e-05,
"loss": 1.0861,
"step": 1720
},
{
"epoch": 0.59,
"learning_rate": 8.610169491525424e-05,
"loss": 1.185,
"step": 1730
},
{
"epoch": 0.6,
"learning_rate": 8.542372881355932e-05,
"loss": 1.132,
"step": 1740
},
{
"epoch": 0.6,
"learning_rate": 8.474576271186441e-05,
"loss": 1.1164,
"step": 1750
},
{
"epoch": 0.6,
"learning_rate": 8.406779661016949e-05,
"loss": 1.1232,
"step": 1760
},
{
"epoch": 0.61,
"learning_rate": 8.338983050847457e-05,
"loss": 1.0915,
"step": 1770
},
{
"epoch": 0.61,
"learning_rate": 8.271186440677966e-05,
"loss": 1.1382,
"step": 1780
},
{
"epoch": 0.61,
"learning_rate": 8.203389830508474e-05,
"loss": 1.0837,
"step": 1790
},
{
"epoch": 0.62,
"learning_rate": 8.135593220338983e-05,
"loss": 1.1287,
"step": 1800
},
{
"epoch": 0.62,
"learning_rate": 8.067796610169493e-05,
"loss": 1.1251,
"step": 1810
},
{
"epoch": 0.62,
"learning_rate": 8e-05,
"loss": 1.1527,
"step": 1820
},
{
"epoch": 0.63,
"learning_rate": 7.93220338983051e-05,
"loss": 1.1309,
"step": 1830
},
{
"epoch": 0.63,
"learning_rate": 7.864406779661018e-05,
"loss": 1.1185,
"step": 1840
},
{
"epoch": 0.64,
"learning_rate": 7.796610169491526e-05,
"loss": 1.1131,
"step": 1850
},
{
"epoch": 0.64,
"learning_rate": 7.728813559322035e-05,
"loss": 1.1497,
"step": 1860
},
{
"epoch": 0.64,
"learning_rate": 7.661016949152543e-05,
"loss": 1.2128,
"step": 1870
},
{
"epoch": 0.65,
"learning_rate": 7.593220338983051e-05,
"loss": 1.0872,
"step": 1880
},
{
"epoch": 0.65,
"learning_rate": 7.52542372881356e-05,
"loss": 1.113,
"step": 1890
},
{
"epoch": 0.65,
"learning_rate": 7.457627118644068e-05,
"loss": 1.0717,
"step": 1900
},
{
"epoch": 0.66,
"learning_rate": 7.389830508474577e-05,
"loss": 1.0703,
"step": 1910
},
{
"epoch": 0.66,
"learning_rate": 7.322033898305085e-05,
"loss": 1.1531,
"step": 1920
},
{
"epoch": 0.66,
"learning_rate": 7.254237288135593e-05,
"loss": 1.1379,
"step": 1930
},
{
"epoch": 0.67,
"learning_rate": 7.186440677966102e-05,
"loss": 1.1474,
"step": 1940
},
{
"epoch": 0.67,
"learning_rate": 7.11864406779661e-05,
"loss": 1.0944,
"step": 1950
},
{
"epoch": 0.67,
"learning_rate": 7.05084745762712e-05,
"loss": 1.0956,
"step": 1960
},
{
"epoch": 0.68,
"learning_rate": 6.983050847457627e-05,
"loss": 1.0674,
"step": 1970
},
{
"epoch": 0.68,
"learning_rate": 6.915254237288135e-05,
"loss": 1.0851,
"step": 1980
},
{
"epoch": 0.68,
"learning_rate": 6.847457627118645e-05,
"loss": 1.1439,
"step": 1990
},
{
"epoch": 0.69,
"learning_rate": 6.779661016949152e-05,
"loss": 1.0869,
"step": 2000
},
{
"epoch": 0.69,
"learning_rate": 6.71186440677966e-05,
"loss": 1.1284,
"step": 2010
},
{
"epoch": 0.69,
"learning_rate": 6.64406779661017e-05,
"loss": 1.0684,
"step": 2020
},
{
"epoch": 0.7,
"learning_rate": 6.576271186440678e-05,
"loss": 1.1464,
"step": 2030
},
{
"epoch": 0.7,
"learning_rate": 6.508474576271187e-05,
"loss": 1.0331,
"step": 2040
},
{
"epoch": 0.7,
"learning_rate": 6.440677966101695e-05,
"loss": 1.0975,
"step": 2050
},
{
"epoch": 0.71,
"learning_rate": 6.379661016949154e-05,
"loss": 1.0438,
"step": 2060
},
{
"epoch": 0.71,
"learning_rate": 6.311864406779661e-05,
"loss": 1.146,
"step": 2070
},
{
"epoch": 0.71,
"learning_rate": 6.244067796610171e-05,
"loss": 1.135,
"step": 2080
},
{
"epoch": 0.72,
"learning_rate": 6.176271186440679e-05,
"loss": 1.0624,
"step": 2090
},
{
"epoch": 0.72,
"learning_rate": 6.108474576271187e-05,
"loss": 1.0677,
"step": 2100
},
{
"epoch": 0.72,
"learning_rate": 6.040677966101695e-05,
"loss": 1.0814,
"step": 2110
},
{
"epoch": 0.73,
"learning_rate": 5.972881355932204e-05,
"loss": 1.1558,
"step": 2120
},
{
"epoch": 0.73,
"learning_rate": 5.905084745762712e-05,
"loss": 1.0757,
"step": 2130
},
{
"epoch": 0.73,
"learning_rate": 5.837288135593221e-05,
"loss": 1.0827,
"step": 2140
},
{
"epoch": 0.74,
"learning_rate": 5.7694915254237295e-05,
"loss": 1.1251,
"step": 2150
},
{
"epoch": 0.74,
"learning_rate": 5.7016949152542374e-05,
"loss": 1.0687,
"step": 2160
},
{
"epoch": 0.74,
"learning_rate": 5.633898305084746e-05,
"loss": 1.0848,
"step": 2170
},
{
"epoch": 0.75,
"learning_rate": 5.5661016949152545e-05,
"loss": 1.1592,
"step": 2180
},
{
"epoch": 0.75,
"learning_rate": 5.498305084745763e-05,
"loss": 1.0944,
"step": 2190
},
{
"epoch": 0.76,
"learning_rate": 5.430508474576271e-05,
"loss": 1.101,
"step": 2200
},
{
"epoch": 0.76,
"learning_rate": 5.3627118644067796e-05,
"loss": 1.0942,
"step": 2210
},
{
"epoch": 0.76,
"learning_rate": 5.294915254237288e-05,
"loss": 1.0955,
"step": 2220
},
{
"epoch": 0.77,
"learning_rate": 5.227118644067797e-05,
"loss": 1.0917,
"step": 2230
},
{
"epoch": 0.77,
"learning_rate": 5.1593220338983054e-05,
"loss": 1.1238,
"step": 2240
},
{
"epoch": 0.77,
"learning_rate": 5.091525423728813e-05,
"loss": 1.0737,
"step": 2250
},
{
"epoch": 0.78,
"learning_rate": 5.023728813559322e-05,
"loss": 1.0966,
"step": 2260
},
{
"epoch": 0.78,
"learning_rate": 4.955932203389831e-05,
"loss": 1.0809,
"step": 2270
},
{
"epoch": 0.78,
"learning_rate": 4.888135593220339e-05,
"loss": 1.0894,
"step": 2280
},
{
"epoch": 0.79,
"learning_rate": 4.8203389830508476e-05,
"loss": 1.1213,
"step": 2290
},
{
"epoch": 0.79,
"learning_rate": 4.752542372881356e-05,
"loss": 1.0784,
"step": 2300
},
{
"epoch": 0.79,
"learning_rate": 4.684745762711865e-05,
"loss": 1.123,
"step": 2310
},
{
"epoch": 0.8,
"learning_rate": 4.6169491525423734e-05,
"loss": 1.1118,
"step": 2320
},
{
"epoch": 0.8,
"learning_rate": 4.549152542372881e-05,
"loss": 1.1142,
"step": 2330
},
{
"epoch": 0.8,
"learning_rate": 4.48135593220339e-05,
"loss": 1.1157,
"step": 2340
},
{
"epoch": 0.81,
"learning_rate": 4.4135593220338984e-05,
"loss": 1.1249,
"step": 2350
},
{
"epoch": 0.81,
"learning_rate": 4.345762711864407e-05,
"loss": 1.0925,
"step": 2360
},
{
"epoch": 0.81,
"learning_rate": 4.277966101694915e-05,
"loss": 1.1079,
"step": 2370
},
{
"epoch": 0.82,
"learning_rate": 4.210169491525424e-05,
"loss": 1.119,
"step": 2380
},
{
"epoch": 0.82,
"learning_rate": 4.142372881355933e-05,
"loss": 1.0334,
"step": 2390
},
{
"epoch": 0.82,
"learning_rate": 4.0745762711864414e-05,
"loss": 1.0725,
"step": 2400
},
{
"epoch": 0.83,
"learning_rate": 4.006779661016949e-05,
"loss": 1.0998,
"step": 2410
},
{
"epoch": 0.83,
"learning_rate": 3.938983050847458e-05,
"loss": 1.107,
"step": 2420
},
{
"epoch": 0.83,
"learning_rate": 3.8711864406779664e-05,
"loss": 1.1407,
"step": 2430
},
{
"epoch": 0.84,
"learning_rate": 3.803389830508475e-05,
"loss": 1.13,
"step": 2440
},
{
"epoch": 0.84,
"learning_rate": 3.735593220338983e-05,
"loss": 1.1231,
"step": 2450
},
{
"epoch": 0.84,
"learning_rate": 3.6677966101694915e-05,
"loss": 1.0928,
"step": 2460
},
{
"epoch": 0.85,
"learning_rate": 3.6e-05,
"loss": 1.0743,
"step": 2470
},
{
"epoch": 0.85,
"learning_rate": 3.532203389830509e-05,
"loss": 1.1144,
"step": 2480
},
{
"epoch": 0.85,
"learning_rate": 3.4644067796610166e-05,
"loss": 1.0636,
"step": 2490
},
{
"epoch": 0.86,
"learning_rate": 3.396610169491525e-05,
"loss": 1.1148,
"step": 2500
},
{
"epoch": 0.86,
"learning_rate": 3.3288135593220344e-05,
"loss": 1.0876,
"step": 2510
},
{
"epoch": 0.86,
"learning_rate": 3.261016949152543e-05,
"loss": 1.1073,
"step": 2520
},
{
"epoch": 0.87,
"learning_rate": 3.193220338983051e-05,
"loss": 1.0801,
"step": 2530
},
{
"epoch": 0.87,
"learning_rate": 3.1254237288135595e-05,
"loss": 1.0995,
"step": 2540
},
{
"epoch": 0.88,
"learning_rate": 3.057627118644068e-05,
"loss": 1.1142,
"step": 2550
},
{
"epoch": 0.88,
"learning_rate": 2.9898305084745763e-05,
"loss": 1.0569,
"step": 2560
},
{
"epoch": 0.88,
"learning_rate": 2.922033898305085e-05,
"loss": 1.0937,
"step": 2570
},
{
"epoch": 0.89,
"learning_rate": 2.854237288135593e-05,
"loss": 1.0893,
"step": 2580
},
{
"epoch": 0.89,
"learning_rate": 2.7864406779661017e-05,
"loss": 1.0466,
"step": 2590
},
{
"epoch": 0.89,
"learning_rate": 2.7186440677966103e-05,
"loss": 1.0828,
"step": 2600
},
{
"epoch": 0.9,
"learning_rate": 2.6508474576271186e-05,
"loss": 1.0123,
"step": 2610
},
{
"epoch": 0.9,
"learning_rate": 2.583050847457627e-05,
"loss": 1.0969,
"step": 2620
},
{
"epoch": 0.9,
"learning_rate": 2.5152542372881354e-05,
"loss": 1.049,
"step": 2630
},
{
"epoch": 0.91,
"learning_rate": 2.4474576271186443e-05,
"loss": 1.0377,
"step": 2640
},
{
"epoch": 0.91,
"learning_rate": 2.3796610169491526e-05,
"loss": 1.103,
"step": 2650
},
{
"epoch": 0.91,
"learning_rate": 2.311864406779661e-05,
"loss": 1.0602,
"step": 2660
},
{
"epoch": 0.92,
"learning_rate": 2.2440677966101694e-05,
"loss": 1.112,
"step": 2670
},
{
"epoch": 0.92,
"learning_rate": 2.1762711864406783e-05,
"loss": 1.0804,
"step": 2680
},
{
"epoch": 0.92,
"learning_rate": 2.1084745762711866e-05,
"loss": 1.1136,
"step": 2690
},
{
"epoch": 0.93,
"learning_rate": 2.040677966101695e-05,
"loss": 1.064,
"step": 2700
},
{
"epoch": 0.93,
"learning_rate": 1.9728813559322034e-05,
"loss": 1.1352,
"step": 2710
},
{
"epoch": 0.93,
"learning_rate": 1.905084745762712e-05,
"loss": 1.0464,
"step": 2720
},
{
"epoch": 0.94,
"learning_rate": 1.8372881355932202e-05,
"loss": 1.0936,
"step": 2730
},
{
"epoch": 0.94,
"learning_rate": 1.769491525423729e-05,
"loss": 1.0586,
"step": 2740
},
{
"epoch": 0.94,
"learning_rate": 1.7016949152542374e-05,
"loss": 1.076,
"step": 2750
},
{
"epoch": 0.95,
"learning_rate": 1.633898305084746e-05,
"loss": 1.094,
"step": 2760
},
{
"epoch": 0.95,
"learning_rate": 1.5661016949152542e-05,
"loss": 1.1068,
"step": 2770
},
{
"epoch": 0.95,
"learning_rate": 1.4983050847457628e-05,
"loss": 1.0789,
"step": 2780
},
{
"epoch": 0.96,
"learning_rate": 1.4305084745762712e-05,
"loss": 1.1326,
"step": 2790
},
{
"epoch": 0.96,
"learning_rate": 1.3627118644067796e-05,
"loss": 1.0818,
"step": 2800
},
{
"epoch": 0.96,
"learning_rate": 1.2949152542372884e-05,
"loss": 1.119,
"step": 2810
},
{
"epoch": 0.97,
"learning_rate": 1.2271186440677966e-05,
"loss": 1.1013,
"step": 2820
},
{
"epoch": 0.97,
"learning_rate": 1.1593220338983052e-05,
"loss": 1.0815,
"step": 2830
},
{
"epoch": 0.97,
"learning_rate": 1.0915254237288136e-05,
"loss": 1.0326,
"step": 2840
},
{
"epoch": 0.98,
"learning_rate": 1.023728813559322e-05,
"loss": 1.1054,
"step": 2850
},
{
"epoch": 0.98,
"learning_rate": 9.559322033898306e-06,
"loss": 1.0615,
"step": 2860
},
{
"epoch": 0.99,
"learning_rate": 8.88135593220339e-06,
"loss": 1.1075,
"step": 2870
},
{
"epoch": 0.99,
"learning_rate": 8.203389830508475e-06,
"loss": 1.1043,
"step": 2880
},
{
"epoch": 0.99,
"learning_rate": 7.525423728813559e-06,
"loss": 1.1056,
"step": 2890
},
{
"epoch": 1.0,
"learning_rate": 6.8474576271186445e-06,
"loss": 1.0234,
"step": 2900
},
{
"epoch": 1.0,
"learning_rate": 6.169491525423729e-06,
"loss": 1.0732,
"step": 2910
},
{
"epoch": 1.0,
"learning_rate": 5.491525423728814e-06,
"loss": 1.0076,
"step": 2920
},
{
"epoch": 1.01,
"learning_rate": 4.813559322033899e-06,
"loss": 0.9715,
"step": 2930
},
{
"epoch": 1.01,
"learning_rate": 4.135593220338984e-06,
"loss": 0.9092,
"step": 2940
},
{
"epoch": 1.01,
"learning_rate": 3.4576271186440682e-06,
"loss": 0.8787,
"step": 2950
},
{
"epoch": 1.02,
"learning_rate": 2.7796610169491524e-06,
"loss": 0.841,
"step": 2960
},
{
"epoch": 1.02,
"learning_rate": 2.1016949152542374e-06,
"loss": 0.9436,
"step": 2970
},
{
"epoch": 1.02,
"learning_rate": 1.423728813559322e-06,
"loss": 0.9332,
"step": 2980
},
{
"epoch": 1.03,
"learning_rate": 7.457627118644068e-07,
"loss": 0.8813,
"step": 2990
},
{
"epoch": 1.03,
"learning_rate": 6.779661016949153e-08,
"loss": 0.8643,
"step": 3000
},
{
"epoch": 1.03,
"step": 3000,
"total_flos": 3.2240003230162944e+16,
"train_loss": 1.1516946287155152,
"train_runtime": 29468.0473,
"train_samples_per_second": 0.611,
"train_steps_per_second": 0.102
}
],
"logging_steps": 10,
"max_steps": 3000,
"num_train_epochs": 2,
"save_steps": 500,
"total_flos": 3.2240003230162944e+16,
"trial_name": null,
"trial_params": null
}