llama27bchat-lora-v1 / trainer_state.json
NajiAboo's picture
Upload 13 files
2384d0f
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.99809427010545,
"eval_steps": 500,
"global_step": 9835,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 4.999987245581571e-05,
"loss": 1.7374,
"step": 10
},
{
"epoch": 0.01,
"learning_rate": 4.9999489824564244e-05,
"loss": 1.5943,
"step": 20
},
{
"epoch": 0.02,
"learning_rate": 4.9998852110149786e-05,
"loss": 1.4529,
"step": 30
},
{
"epoch": 0.02,
"learning_rate": 4.999795931907928e-05,
"loss": 1.4299,
"step": 40
},
{
"epoch": 0.03,
"learning_rate": 4.999681146046236e-05,
"loss": 1.2767,
"step": 50
},
{
"epoch": 0.03,
"learning_rate": 4.9995408546011235e-05,
"loss": 1.2893,
"step": 60
},
{
"epoch": 0.04,
"learning_rate": 4.9993750590040575e-05,
"loss": 1.297,
"step": 70
},
{
"epoch": 0.04,
"learning_rate": 4.9991837609467425e-05,
"loss": 1.1932,
"step": 80
},
{
"epoch": 0.05,
"learning_rate": 4.998966962381092e-05,
"loss": 1.1411,
"step": 90
},
{
"epoch": 0.05,
"learning_rate": 4.998724665519219e-05,
"loss": 1.1558,
"step": 100
},
{
"epoch": 0.06,
"learning_rate": 4.9984568728334075e-05,
"loss": 1.1515,
"step": 110
},
{
"epoch": 0.06,
"learning_rate": 4.998163587056089e-05,
"loss": 1.0365,
"step": 120
},
{
"epoch": 0.07,
"learning_rate": 4.997844811179817e-05,
"loss": 1.0264,
"step": 130
},
{
"epoch": 0.07,
"learning_rate": 4.9975005484572305e-05,
"loss": 1.0283,
"step": 140
},
{
"epoch": 0.08,
"learning_rate": 4.997130802401027e-05,
"loss": 1.0356,
"step": 150
},
{
"epoch": 0.08,
"learning_rate": 4.9967355767839225e-05,
"loss": 1.0245,
"step": 160
},
{
"epoch": 0.09,
"learning_rate": 4.996314875638616e-05,
"loss": 1.0557,
"step": 170
},
{
"epoch": 0.09,
"learning_rate": 4.995868703257745e-05,
"loss": 1.0417,
"step": 180
},
{
"epoch": 0.1,
"learning_rate": 4.995397064193846e-05,
"loss": 1.0008,
"step": 190
},
{
"epoch": 0.1,
"learning_rate": 4.9948999632593055e-05,
"loss": 1.035,
"step": 200
},
{
"epoch": 0.11,
"learning_rate": 4.994377405526308e-05,
"loss": 1.0543,
"step": 210
},
{
"epoch": 0.11,
"learning_rate": 4.9938293963267914e-05,
"loss": 1.0384,
"step": 220
},
{
"epoch": 0.12,
"learning_rate": 4.993255941252385e-05,
"loss": 1.0359,
"step": 230
},
{
"epoch": 0.12,
"learning_rate": 4.9926570461543586e-05,
"loss": 1.0484,
"step": 240
},
{
"epoch": 0.13,
"learning_rate": 4.992032717143559e-05,
"loss": 0.9157,
"step": 250
},
{
"epoch": 0.13,
"learning_rate": 4.9913829605903486e-05,
"loss": 1.0029,
"step": 260
},
{
"epoch": 0.14,
"learning_rate": 4.990707783124541e-05,
"loss": 1.0332,
"step": 270
},
{
"epoch": 0.14,
"learning_rate": 4.990007191635334e-05,
"loss": 0.9525,
"step": 280
},
{
"epoch": 0.15,
"learning_rate": 4.989281193271236e-05,
"loss": 0.9969,
"step": 290
},
{
"epoch": 0.15,
"learning_rate": 4.9885297954399964e-05,
"loss": 0.9313,
"step": 300
},
{
"epoch": 0.16,
"learning_rate": 4.98775300580853e-05,
"loss": 0.9715,
"step": 310
},
{
"epoch": 0.16,
"learning_rate": 4.986950832302836e-05,
"loss": 0.9459,
"step": 320
},
{
"epoch": 0.17,
"learning_rate": 4.9861232831079194e-05,
"loss": 0.9614,
"step": 330
},
{
"epoch": 0.17,
"learning_rate": 4.985270366667708e-05,
"loss": 0.9995,
"step": 340
},
{
"epoch": 0.18,
"learning_rate": 4.9843920916849645e-05,
"loss": 0.9779,
"step": 350
},
{
"epoch": 0.18,
"learning_rate": 4.9834884671211976e-05,
"loss": 0.926,
"step": 360
},
{
"epoch": 0.19,
"learning_rate": 4.982559502196572e-05,
"loss": 1.0427,
"step": 370
},
{
"epoch": 0.19,
"learning_rate": 4.981605206389814e-05,
"loss": 0.9495,
"step": 380
},
{
"epoch": 0.2,
"learning_rate": 4.9806255894381135e-05,
"loss": 0.9446,
"step": 390
},
{
"epoch": 0.2,
"learning_rate": 4.979620661337026e-05,
"loss": 0.9732,
"step": 400
},
{
"epoch": 0.21,
"learning_rate": 4.978590432340371e-05,
"loss": 0.8346,
"step": 410
},
{
"epoch": 0.21,
"learning_rate": 4.9775349129601243e-05,
"loss": 0.9642,
"step": 420
},
{
"epoch": 0.22,
"learning_rate": 4.9764541139663176e-05,
"loss": 0.8718,
"step": 430
},
{
"epoch": 0.22,
"learning_rate": 4.975348046386917e-05,
"loss": 0.9406,
"step": 440
},
{
"epoch": 0.23,
"learning_rate": 4.974216721507725e-05,
"loss": 0.8534,
"step": 450
},
{
"epoch": 0.23,
"learning_rate": 4.973060150872253e-05,
"loss": 0.9735,
"step": 460
},
{
"epoch": 0.24,
"learning_rate": 4.971878346281609e-05,
"loss": 0.9225,
"step": 470
},
{
"epoch": 0.24,
"learning_rate": 4.970671319794378e-05,
"loss": 0.8771,
"step": 480
},
{
"epoch": 0.25,
"learning_rate": 4.969439083726496e-05,
"loss": 0.9068,
"step": 490
},
{
"epoch": 0.25,
"learning_rate": 4.968181650651127e-05,
"loss": 0.9524,
"step": 500
},
{
"epoch": 0.26,
"learning_rate": 4.966899033398533e-05,
"loss": 0.8811,
"step": 510
},
{
"epoch": 0.26,
"learning_rate": 4.965591245055944e-05,
"loss": 0.9009,
"step": 520
},
{
"epoch": 0.27,
"learning_rate": 4.964258298967423e-05,
"loss": 0.8791,
"step": 530
},
{
"epoch": 0.27,
"learning_rate": 4.962900208733734e-05,
"loss": 0.9129,
"step": 540
},
{
"epoch": 0.28,
"learning_rate": 4.9615169882121945e-05,
"loss": 0.9973,
"step": 550
},
{
"epoch": 0.28,
"learning_rate": 4.960108651516545e-05,
"loss": 1.0256,
"step": 560
},
{
"epoch": 0.29,
"learning_rate": 4.958675213016798e-05,
"loss": 0.8638,
"step": 570
},
{
"epoch": 0.29,
"learning_rate": 4.9572166873390925e-05,
"loss": 0.8928,
"step": 580
},
{
"epoch": 0.3,
"learning_rate": 4.955733089365546e-05,
"loss": 0.8579,
"step": 590
},
{
"epoch": 0.3,
"learning_rate": 4.9542244342341026e-05,
"loss": 0.9767,
"step": 600
},
{
"epoch": 0.31,
"learning_rate": 4.9526907373383766e-05,
"loss": 0.8605,
"step": 610
},
{
"epoch": 0.32,
"learning_rate": 4.951132014327498e-05,
"loss": 0.883,
"step": 620
},
{
"epoch": 0.32,
"learning_rate": 4.949548281105951e-05,
"loss": 0.9282,
"step": 630
},
{
"epoch": 0.33,
"learning_rate": 4.947939553833412e-05,
"loss": 0.8909,
"step": 640
},
{
"epoch": 0.33,
"learning_rate": 4.9463058489245874e-05,
"loss": 0.8618,
"step": 650
},
{
"epoch": 0.34,
"learning_rate": 4.9446471830490396e-05,
"loss": 0.8674,
"step": 660
},
{
"epoch": 0.34,
"learning_rate": 4.942963573131025e-05,
"loss": 0.9487,
"step": 670
},
{
"epoch": 0.35,
"learning_rate": 4.941255036349316e-05,
"loss": 0.959,
"step": 680
},
{
"epoch": 0.35,
"learning_rate": 4.9395215901370265e-05,
"loss": 0.9217,
"step": 690
},
{
"epoch": 0.36,
"learning_rate": 4.937763252181434e-05,
"loss": 0.9214,
"step": 700
},
{
"epoch": 0.36,
"learning_rate": 4.935980040423803e-05,
"loss": 0.9023,
"step": 710
},
{
"epoch": 0.37,
"learning_rate": 4.934171973059196e-05,
"loss": 0.863,
"step": 720
},
{
"epoch": 0.37,
"learning_rate": 4.9323390685362915e-05,
"loss": 0.9249,
"step": 730
},
{
"epoch": 0.38,
"learning_rate": 4.930481345557193e-05,
"loss": 0.9269,
"step": 740
},
{
"epoch": 0.38,
"learning_rate": 4.928598823077243e-05,
"loss": 0.8938,
"step": 750
},
{
"epoch": 0.39,
"learning_rate": 4.926691520304824e-05,
"loss": 0.9187,
"step": 760
},
{
"epoch": 0.39,
"learning_rate": 4.924759456701167e-05,
"loss": 0.8909,
"step": 770
},
{
"epoch": 0.4,
"learning_rate": 4.922802651980149e-05,
"loss": 0.8665,
"step": 780
},
{
"epoch": 0.4,
"learning_rate": 4.920821126108096e-05,
"loss": 0.9484,
"step": 790
},
{
"epoch": 0.41,
"learning_rate": 4.9188148993035754e-05,
"loss": 0.9234,
"step": 800
},
{
"epoch": 0.41,
"learning_rate": 4.916783992037193e-05,
"loss": 0.9564,
"step": 810
},
{
"epoch": 0.42,
"learning_rate": 4.914728425031379e-05,
"loss": 0.8569,
"step": 820
},
{
"epoch": 0.42,
"learning_rate": 4.912648219260188e-05,
"loss": 0.972,
"step": 830
},
{
"epoch": 0.43,
"learning_rate": 4.910543395949067e-05,
"loss": 0.7543,
"step": 840
},
{
"epoch": 0.43,
"learning_rate": 4.908413976574655e-05,
"loss": 0.8996,
"step": 850
},
{
"epoch": 0.44,
"learning_rate": 4.9062599828645574e-05,
"loss": 0.7431,
"step": 860
},
{
"epoch": 0.44,
"learning_rate": 4.9040814367971236e-05,
"loss": 0.8302,
"step": 870
},
{
"epoch": 0.45,
"learning_rate": 4.901878360601223e-05,
"loss": 0.9348,
"step": 880
},
{
"epoch": 0.45,
"learning_rate": 4.899650776756023e-05,
"loss": 0.8658,
"step": 890
},
{
"epoch": 0.46,
"learning_rate": 4.897398707990749e-05,
"loss": 0.8688,
"step": 900
},
{
"epoch": 0.46,
"learning_rate": 4.895122177284465e-05,
"loss": 0.8237,
"step": 910
},
{
"epoch": 0.47,
"learning_rate": 4.8928212078658315e-05,
"loss": 0.8714,
"step": 920
},
{
"epoch": 0.47,
"learning_rate": 4.8904958232128687e-05,
"loss": 0.8695,
"step": 930
},
{
"epoch": 0.48,
"learning_rate": 4.888146047052721e-05,
"loss": 0.8811,
"step": 940
},
{
"epoch": 0.48,
"learning_rate": 4.88577190336141e-05,
"loss": 0.954,
"step": 950
},
{
"epoch": 0.49,
"learning_rate": 4.883373416363593e-05,
"loss": 0.9335,
"step": 960
},
{
"epoch": 0.49,
"learning_rate": 4.8809506105323164e-05,
"loss": 0.864,
"step": 970
},
{
"epoch": 0.5,
"learning_rate": 4.878503510588765e-05,
"loss": 0.9355,
"step": 980
},
{
"epoch": 0.5,
"learning_rate": 4.876032141502004e-05,
"loss": 0.885,
"step": 990
},
{
"epoch": 0.51,
"learning_rate": 4.8735365284887374e-05,
"loss": 0.8106,
"step": 1000
},
{
"epoch": 0.51,
"learning_rate": 4.8710166970130376e-05,
"loss": 0.7904,
"step": 1010
},
{
"epoch": 0.52,
"learning_rate": 4.8684726727860944e-05,
"loss": 0.8767,
"step": 1020
},
{
"epoch": 0.52,
"learning_rate": 4.865904481765945e-05,
"loss": 0.9206,
"step": 1030
},
{
"epoch": 0.53,
"learning_rate": 4.863312150157216e-05,
"loss": 0.905,
"step": 1040
},
{
"epoch": 0.53,
"learning_rate": 4.8606957044108556e-05,
"loss": 0.8438,
"step": 1050
},
{
"epoch": 0.54,
"learning_rate": 4.858055171223856e-05,
"loss": 0.8928,
"step": 1060
},
{
"epoch": 0.54,
"learning_rate": 4.855390577538991e-05,
"loss": 0.8943,
"step": 1070
},
{
"epoch": 0.55,
"learning_rate": 4.8527019505445346e-05,
"loss": 0.8802,
"step": 1080
},
{
"epoch": 0.55,
"learning_rate": 4.849989317673984e-05,
"loss": 0.8525,
"step": 1090
},
{
"epoch": 0.56,
"learning_rate": 4.847252706605786e-05,
"loss": 0.8939,
"step": 1100
},
{
"epoch": 0.56,
"learning_rate": 4.844492145263044e-05,
"loss": 0.8652,
"step": 1110
},
{
"epoch": 0.57,
"learning_rate": 4.8417076618132426e-05,
"loss": 0.8308,
"step": 1120
},
{
"epoch": 0.57,
"learning_rate": 4.838899284667956e-05,
"loss": 0.8173,
"step": 1130
},
{
"epoch": 0.58,
"learning_rate": 4.836067042482557e-05,
"loss": 0.9149,
"step": 1140
},
{
"epoch": 0.58,
"learning_rate": 4.833210964155928e-05,
"loss": 0.7346,
"step": 1150
},
{
"epoch": 0.59,
"learning_rate": 4.8303310788301624e-05,
"loss": 1.0617,
"step": 1160
},
{
"epoch": 0.59,
"learning_rate": 4.827427415890271e-05,
"loss": 0.8963,
"step": 1170
},
{
"epoch": 0.6,
"learning_rate": 4.82450000496388e-05,
"loss": 0.9368,
"step": 1180
},
{
"epoch": 0.6,
"learning_rate": 4.821548875920927e-05,
"loss": 0.9157,
"step": 1190
},
{
"epoch": 0.61,
"learning_rate": 4.818574058873361e-05,
"loss": 0.7684,
"step": 1200
},
{
"epoch": 0.61,
"learning_rate": 4.8155755841748296e-05,
"loss": 0.8846,
"step": 1210
},
{
"epoch": 0.62,
"learning_rate": 4.8125534824203754e-05,
"loss": 0.9657,
"step": 1220
},
{
"epoch": 0.63,
"learning_rate": 4.8095077844461176e-05,
"loss": 0.6925,
"step": 1230
},
{
"epoch": 0.63,
"learning_rate": 4.8064385213289414e-05,
"loss": 0.8198,
"step": 1240
},
{
"epoch": 0.64,
"learning_rate": 4.8033457243861804e-05,
"loss": 0.8938,
"step": 1250
},
{
"epoch": 0.64,
"learning_rate": 4.800229425175294e-05,
"loss": 0.7988,
"step": 1260
},
{
"epoch": 0.65,
"learning_rate": 4.7970896554935506e-05,
"loss": 0.933,
"step": 1270
},
{
"epoch": 0.65,
"learning_rate": 4.7939264473776995e-05,
"loss": 0.8274,
"step": 1280
},
{
"epoch": 0.66,
"learning_rate": 4.790739833103644e-05,
"loss": 0.946,
"step": 1290
},
{
"epoch": 0.66,
"learning_rate": 4.787529845186114e-05,
"loss": 0.8698,
"step": 1300
},
{
"epoch": 0.67,
"learning_rate": 4.784296516378333e-05,
"loss": 0.9597,
"step": 1310
},
{
"epoch": 0.67,
"learning_rate": 4.7810398796716825e-05,
"loss": 0.7929,
"step": 1320
},
{
"epoch": 0.68,
"learning_rate": 4.777759968295369e-05,
"loss": 0.9,
"step": 1330
},
{
"epoch": 0.68,
"learning_rate": 4.774456815716083e-05,
"loss": 0.7947,
"step": 1340
},
{
"epoch": 0.69,
"learning_rate": 4.7711304556376555e-05,
"loss": 0.948,
"step": 1350
},
{
"epoch": 0.69,
"learning_rate": 4.767780922000718e-05,
"loss": 0.8153,
"step": 1360
},
{
"epoch": 0.7,
"learning_rate": 4.7644082489823525e-05,
"loss": 0.9613,
"step": 1370
},
{
"epoch": 0.7,
"learning_rate": 4.761012470995746e-05,
"loss": 0.861,
"step": 1380
},
{
"epoch": 0.71,
"learning_rate": 4.7575936226898366e-05,
"loss": 0.8061,
"step": 1390
},
{
"epoch": 0.71,
"learning_rate": 4.754151738948962e-05,
"loss": 0.8082,
"step": 1400
},
{
"epoch": 0.72,
"learning_rate": 4.750686854892503e-05,
"loss": 0.8568,
"step": 1410
},
{
"epoch": 0.72,
"learning_rate": 4.747199005874524e-05,
"loss": 0.8792,
"step": 1420
},
{
"epoch": 0.73,
"learning_rate": 4.7436882274834135e-05,
"loss": 0.9441,
"step": 1430
},
{
"epoch": 0.73,
"learning_rate": 4.7401545555415204e-05,
"loss": 0.8254,
"step": 1440
},
{
"epoch": 0.74,
"learning_rate": 4.73659802610479e-05,
"loss": 0.9127,
"step": 1450
},
{
"epoch": 0.74,
"learning_rate": 4.733018675462394e-05,
"loss": 0.8423,
"step": 1460
},
{
"epoch": 0.75,
"learning_rate": 4.729416540136361e-05,
"loss": 0.8832,
"step": 1470
},
{
"epoch": 0.75,
"learning_rate": 4.725791656881203e-05,
"loss": 0.8718,
"step": 1480
},
{
"epoch": 0.76,
"learning_rate": 4.722144062683543e-05,
"loss": 0.7659,
"step": 1490
},
{
"epoch": 0.76,
"learning_rate": 4.7184737947617354e-05,
"loss": 0.793,
"step": 1500
},
{
"epoch": 0.77,
"learning_rate": 4.714780890565485e-05,
"loss": 0.8891,
"step": 1510
},
{
"epoch": 0.77,
"learning_rate": 4.71106538777547e-05,
"loss": 1.041,
"step": 1520
},
{
"epoch": 0.78,
"learning_rate": 4.707327324302951e-05,
"loss": 0.8006,
"step": 1530
},
{
"epoch": 0.78,
"learning_rate": 4.703566738289389e-05,
"loss": 0.9304,
"step": 1540
},
{
"epoch": 0.79,
"learning_rate": 4.699783668106054e-05,
"loss": 0.9484,
"step": 1550
},
{
"epoch": 0.79,
"learning_rate": 4.695978152353634e-05,
"loss": 0.8737,
"step": 1560
},
{
"epoch": 0.8,
"learning_rate": 4.69215022986184e-05,
"loss": 0.8265,
"step": 1570
},
{
"epoch": 0.8,
"learning_rate": 4.688299939689015e-05,
"loss": 0.9525,
"step": 1580
},
{
"epoch": 0.81,
"learning_rate": 4.684427321121726e-05,
"loss": 0.8764,
"step": 1590
},
{
"epoch": 0.81,
"learning_rate": 4.6805324136743714e-05,
"loss": 0.9115,
"step": 1600
},
{
"epoch": 0.82,
"learning_rate": 4.676615257088776e-05,
"loss": 0.8334,
"step": 1610
},
{
"epoch": 0.82,
"learning_rate": 4.672675891333782e-05,
"loss": 0.8515,
"step": 1620
},
{
"epoch": 0.83,
"learning_rate": 4.668714356604845e-05,
"loss": 0.9023,
"step": 1630
},
{
"epoch": 0.83,
"learning_rate": 4.664730693323622e-05,
"loss": 0.8983,
"step": 1640
},
{
"epoch": 0.84,
"learning_rate": 4.660724942137561e-05,
"loss": 0.8063,
"step": 1650
},
{
"epoch": 0.84,
"learning_rate": 4.656697143919482e-05,
"loss": 0.91,
"step": 1660
},
{
"epoch": 0.85,
"learning_rate": 4.6526473397671644e-05,
"loss": 0.8909,
"step": 1670
},
{
"epoch": 0.85,
"learning_rate": 4.6485755710029256e-05,
"loss": 0.8485,
"step": 1680
},
{
"epoch": 0.86,
"learning_rate": 4.644481879173199e-05,
"loss": 0.793,
"step": 1690
},
{
"epoch": 0.86,
"learning_rate": 4.640366306048113e-05,
"loss": 0.9396,
"step": 1700
},
{
"epoch": 0.87,
"learning_rate": 4.63622889362106e-05,
"loss": 0.8538,
"step": 1710
},
{
"epoch": 0.87,
"learning_rate": 4.63206968410827e-05,
"loss": 0.8986,
"step": 1720
},
{
"epoch": 0.88,
"learning_rate": 4.627888719948385e-05,
"loss": 0.9277,
"step": 1730
},
{
"epoch": 0.88,
"learning_rate": 4.623686043802016e-05,
"loss": 0.8013,
"step": 1740
},
{
"epoch": 0.89,
"learning_rate": 4.619461698551315e-05,
"loss": 0.8565,
"step": 1750
},
{
"epoch": 0.89,
"learning_rate": 4.6152157272995355e-05,
"loss": 0.7764,
"step": 1760
},
{
"epoch": 0.9,
"learning_rate": 4.610948173370594e-05,
"loss": 0.8674,
"step": 1770
},
{
"epoch": 0.9,
"learning_rate": 4.606659080308624e-05,
"loss": 0.7947,
"step": 1780
},
{
"epoch": 0.91,
"learning_rate": 4.6023484918775364e-05,
"loss": 0.8766,
"step": 1790
},
{
"epoch": 0.91,
"learning_rate": 4.598016452060569e-05,
"loss": 0.8197,
"step": 1800
},
{
"epoch": 0.92,
"learning_rate": 4.593663005059841e-05,
"loss": 0.9353,
"step": 1810
},
{
"epoch": 0.92,
"learning_rate": 4.589288195295901e-05,
"loss": 0.8794,
"step": 1820
},
{
"epoch": 0.93,
"learning_rate": 4.584892067407272e-05,
"loss": 0.844,
"step": 1830
},
{
"epoch": 0.94,
"learning_rate": 4.580474666249997e-05,
"loss": 0.8291,
"step": 1840
},
{
"epoch": 0.94,
"learning_rate": 4.576036036897182e-05,
"loss": 0.7954,
"step": 1850
},
{
"epoch": 0.95,
"learning_rate": 4.571576224638536e-05,
"loss": 0.9331,
"step": 1860
},
{
"epoch": 0.95,
"learning_rate": 4.56709527497991e-05,
"loss": 0.8131,
"step": 1870
},
{
"epoch": 0.96,
"learning_rate": 4.562593233642828e-05,
"loss": 0.8134,
"step": 1880
},
{
"epoch": 0.96,
"learning_rate": 4.5580701465640254e-05,
"loss": 0.8629,
"step": 1890
},
{
"epoch": 0.97,
"learning_rate": 4.553526059894978e-05,
"loss": 0.8637,
"step": 1900
},
{
"epoch": 0.97,
"learning_rate": 4.548961020001432e-05,
"loss": 0.8737,
"step": 1910
},
{
"epoch": 0.98,
"learning_rate": 4.544375073462932e-05,
"loss": 0.8404,
"step": 1920
},
{
"epoch": 0.98,
"learning_rate": 4.539768267072341e-05,
"loss": 0.9458,
"step": 1930
},
{
"epoch": 0.99,
"learning_rate": 4.535140647835369e-05,
"loss": 0.8777,
"step": 1940
},
{
"epoch": 0.99,
"learning_rate": 4.5304922629700896e-05,
"loss": 0.8794,
"step": 1950
},
{
"epoch": 1.0,
"learning_rate": 4.525823159906459e-05,
"loss": 0.8848,
"step": 1960
},
{
"epoch": 1.0,
"learning_rate": 4.521133386285833e-05,
"loss": 0.8398,
"step": 1970
},
{
"epoch": 1.01,
"learning_rate": 4.5164229899604796e-05,
"loss": 0.8368,
"step": 1980
},
{
"epoch": 1.01,
"learning_rate": 4.51169201899309e-05,
"loss": 0.7742,
"step": 1990
},
{
"epoch": 1.02,
"learning_rate": 4.506940521656293e-05,
"loss": 0.875,
"step": 2000
},
{
"epoch": 1.02,
"learning_rate": 4.502168546432155e-05,
"loss": 0.8689,
"step": 2010
},
{
"epoch": 1.03,
"learning_rate": 4.497376142011693e-05,
"loss": 0.8329,
"step": 2020
},
{
"epoch": 1.03,
"learning_rate": 4.492563357294369e-05,
"loss": 0.804,
"step": 2030
},
{
"epoch": 1.04,
"learning_rate": 4.487730241387602e-05,
"loss": 0.7985,
"step": 2040
},
{
"epoch": 1.04,
"learning_rate": 4.482876843606257e-05,
"loss": 0.8019,
"step": 2050
},
{
"epoch": 1.05,
"learning_rate": 4.478003213472146e-05,
"loss": 0.7925,
"step": 2060
},
{
"epoch": 1.05,
"learning_rate": 4.473109400713525e-05,
"loss": 0.8566,
"step": 2070
},
{
"epoch": 1.06,
"learning_rate": 4.468195455264581e-05,
"loss": 0.7679,
"step": 2080
},
{
"epoch": 1.06,
"learning_rate": 4.463261427264928e-05,
"loss": 0.7556,
"step": 2090
},
{
"epoch": 1.07,
"learning_rate": 4.458307367059092e-05,
"loss": 0.7138,
"step": 2100
},
{
"epoch": 1.07,
"learning_rate": 4.4533333251959985e-05,
"loss": 0.893,
"step": 2110
},
{
"epoch": 1.08,
"learning_rate": 4.448339352428456e-05,
"loss": 0.9398,
"step": 2120
},
{
"epoch": 1.08,
"learning_rate": 4.4433254997126394e-05,
"loss": 0.9158,
"step": 2130
},
{
"epoch": 1.09,
"learning_rate": 4.438291818207569e-05,
"loss": 0.8109,
"step": 2140
},
{
"epoch": 1.09,
"learning_rate": 4.4332383592745894e-05,
"loss": 0.9455,
"step": 2150
},
{
"epoch": 1.1,
"learning_rate": 4.4281651744768436e-05,
"loss": 0.8938,
"step": 2160
},
{
"epoch": 1.1,
"learning_rate": 4.42307231557875e-05,
"loss": 0.7618,
"step": 2170
},
{
"epoch": 1.11,
"learning_rate": 4.4179598345454704e-05,
"loss": 0.8194,
"step": 2180
},
{
"epoch": 1.11,
"learning_rate": 4.4128277835423825e-05,
"loss": 0.8848,
"step": 2190
},
{
"epoch": 1.12,
"learning_rate": 4.407676214934548e-05,
"loss": 0.7657,
"step": 2200
},
{
"epoch": 1.12,
"learning_rate": 4.402505181286175e-05,
"loss": 0.8897,
"step": 2210
},
{
"epoch": 1.13,
"learning_rate": 4.3973147353600866e-05,
"loss": 0.8431,
"step": 2220
},
{
"epoch": 1.13,
"learning_rate": 4.392104930117177e-05,
"loss": 0.8565,
"step": 2230
},
{
"epoch": 1.14,
"learning_rate": 4.386875818715874e-05,
"loss": 0.8426,
"step": 2240
},
{
"epoch": 1.14,
"learning_rate": 4.3816274545116e-05,
"loss": 0.8096,
"step": 2250
},
{
"epoch": 1.15,
"learning_rate": 4.37635989105622e-05,
"loss": 0.8651,
"step": 2260
},
{
"epoch": 1.15,
"learning_rate": 4.3710731820975e-05,
"loss": 0.8658,
"step": 2270
},
{
"epoch": 1.16,
"learning_rate": 4.36576738157856e-05,
"loss": 0.8642,
"step": 2280
},
{
"epoch": 1.16,
"learning_rate": 4.3604425436373194e-05,
"loss": 0.8334,
"step": 2290
},
{
"epoch": 1.17,
"learning_rate": 4.355098722605946e-05,
"loss": 0.929,
"step": 2300
},
{
"epoch": 1.17,
"learning_rate": 4.349735973010305e-05,
"loss": 0.8318,
"step": 2310
},
{
"epoch": 1.18,
"learning_rate": 4.344354349569398e-05,
"loss": 0.7576,
"step": 2320
},
{
"epoch": 1.18,
"learning_rate": 4.3389539071948065e-05,
"loss": 0.7787,
"step": 2330
},
{
"epoch": 1.19,
"learning_rate": 4.3335347009901314e-05,
"loss": 0.7632,
"step": 2340
},
{
"epoch": 1.19,
"learning_rate": 4.328096786250432e-05,
"loss": 0.7624,
"step": 2350
},
{
"epoch": 1.2,
"learning_rate": 4.32264021846166e-05,
"loss": 0.8955,
"step": 2360
},
{
"epoch": 1.2,
"learning_rate": 4.317165053300095e-05,
"loss": 0.9177,
"step": 2370
},
{
"epoch": 1.21,
"learning_rate": 4.311671346631774e-05,
"loss": 0.9347,
"step": 2380
},
{
"epoch": 1.21,
"learning_rate": 4.306159154511925e-05,
"loss": 0.8386,
"step": 2390
},
{
"epoch": 1.22,
"learning_rate": 4.300628533184391e-05,
"loss": 0.7803,
"step": 2400
},
{
"epoch": 1.22,
"learning_rate": 4.295079539081058e-05,
"loss": 0.8833,
"step": 2410
},
{
"epoch": 1.23,
"learning_rate": 4.289512228821282e-05,
"loss": 0.896,
"step": 2420
},
{
"epoch": 1.23,
"learning_rate": 4.283926659211306e-05,
"loss": 0.8046,
"step": 2430
},
{
"epoch": 1.24,
"learning_rate": 4.278322887243683e-05,
"loss": 0.8341,
"step": 2440
},
{
"epoch": 1.25,
"learning_rate": 4.272700970096696e-05,
"loss": 0.8365,
"step": 2450
},
{
"epoch": 1.25,
"learning_rate": 4.26706096513377e-05,
"loss": 0.8236,
"step": 2460
},
{
"epoch": 1.26,
"learning_rate": 4.2614029299028944e-05,
"loss": 0.9149,
"step": 2470
},
{
"epoch": 1.26,
"learning_rate": 4.2557269221360265e-05,
"loss": 0.8772,
"step": 2480
},
{
"epoch": 1.27,
"learning_rate": 4.250032999748508e-05,
"loss": 0.8428,
"step": 2490
},
{
"epoch": 1.27,
"learning_rate": 4.2443212208384755e-05,
"loss": 0.7791,
"step": 2500
},
{
"epoch": 1.28,
"learning_rate": 4.238591643686263e-05,
"loss": 0.894,
"step": 2510
},
{
"epoch": 1.28,
"learning_rate": 4.23284432675381e-05,
"loss": 0.8412,
"step": 2520
},
{
"epoch": 1.29,
"learning_rate": 4.2270793286840636e-05,
"loss": 0.7827,
"step": 2530
},
{
"epoch": 1.29,
"learning_rate": 4.2212967083003835e-05,
"loss": 0.8868,
"step": 2540
},
{
"epoch": 1.3,
"learning_rate": 4.215496524605936e-05,
"loss": 0.8999,
"step": 2550
},
{
"epoch": 1.3,
"learning_rate": 4.209678836783098e-05,
"loss": 0.8319,
"step": 2560
},
{
"epoch": 1.31,
"learning_rate": 4.2038437041928505e-05,
"loss": 0.8147,
"step": 2570
},
{
"epoch": 1.31,
"learning_rate": 4.1979911863741686e-05,
"loss": 0.8202,
"step": 2580
},
{
"epoch": 1.32,
"learning_rate": 4.192121343043424e-05,
"loss": 0.8346,
"step": 2590
},
{
"epoch": 1.32,
"learning_rate": 4.1862342340937655e-05,
"loss": 0.8413,
"step": 2600
},
{
"epoch": 1.33,
"learning_rate": 4.1803299195945145e-05,
"loss": 0.8867,
"step": 2610
},
{
"epoch": 1.33,
"learning_rate": 4.174408459790549e-05,
"loss": 0.789,
"step": 2620
},
{
"epoch": 1.34,
"learning_rate": 4.1684699151016896e-05,
"loss": 0.7649,
"step": 2630
},
{
"epoch": 1.34,
"learning_rate": 4.162514346122083e-05,
"loss": 0.8685,
"step": 2640
},
{
"epoch": 1.35,
"learning_rate": 4.156541813619585e-05,
"loss": 0.7793,
"step": 2650
},
{
"epoch": 1.35,
"learning_rate": 4.150552378535137e-05,
"loss": 0.871,
"step": 2660
},
{
"epoch": 1.36,
"learning_rate": 4.144546101982151e-05,
"loss": 0.8534,
"step": 2670
},
{
"epoch": 1.36,
"learning_rate": 4.1385230452458756e-05,
"loss": 0.8658,
"step": 2680
},
{
"epoch": 1.37,
"learning_rate": 4.132483269782781e-05,
"loss": 0.7654,
"step": 2690
},
{
"epoch": 1.37,
"learning_rate": 4.126426837219925e-05,
"loss": 0.788,
"step": 2700
},
{
"epoch": 1.38,
"learning_rate": 4.120353809354328e-05,
"loss": 0.7899,
"step": 2710
},
{
"epoch": 1.38,
"learning_rate": 4.114264248152342e-05,
"loss": 0.8984,
"step": 2720
},
{
"epoch": 1.39,
"learning_rate": 4.108158215749014e-05,
"loss": 0.9037,
"step": 2730
},
{
"epoch": 1.39,
"learning_rate": 4.10203577444746e-05,
"loss": 0.8417,
"step": 2740
},
{
"epoch": 1.4,
"learning_rate": 4.095896986718221e-05,
"loss": 0.8844,
"step": 2750
},
{
"epoch": 1.4,
"learning_rate": 4.089741915198632e-05,
"loss": 0.8565,
"step": 2760
},
{
"epoch": 1.41,
"learning_rate": 4.0835706226921776e-05,
"loss": 0.8313,
"step": 2770
},
{
"epoch": 1.41,
"learning_rate": 4.077383172167857e-05,
"loss": 0.8312,
"step": 2780
},
{
"epoch": 1.42,
"learning_rate": 4.0711796267595355e-05,
"loss": 0.8551,
"step": 2790
},
{
"epoch": 1.42,
"learning_rate": 4.064960049765304e-05,
"loss": 0.8134,
"step": 2800
},
{
"epoch": 1.43,
"learning_rate": 4.058724504646834e-05,
"loss": 0.8246,
"step": 2810
},
{
"epoch": 1.43,
"learning_rate": 4.052473055028726e-05,
"loss": 0.7552,
"step": 2820
},
{
"epoch": 1.44,
"learning_rate": 4.046205764697862e-05,
"loss": 0.8374,
"step": 2830
},
{
"epoch": 1.44,
"learning_rate": 4.0399226976027583e-05,
"loss": 0.8721,
"step": 2840
},
{
"epoch": 1.45,
"learning_rate": 4.0336239178529075e-05,
"loss": 0.756,
"step": 2850
},
{
"epoch": 1.45,
"learning_rate": 4.0273094897181285e-05,
"loss": 0.7646,
"step": 2860
},
{
"epoch": 1.46,
"learning_rate": 4.020979477627907e-05,
"loss": 0.8254,
"step": 2870
},
{
"epoch": 1.46,
"learning_rate": 4.014633946170742e-05,
"loss": 0.843,
"step": 2880
},
{
"epoch": 1.47,
"learning_rate": 4.0082729600934844e-05,
"loss": 0.8923,
"step": 2890
},
{
"epoch": 1.47,
"learning_rate": 4.001896584300675e-05,
"loss": 0.8476,
"step": 2900
},
{
"epoch": 1.48,
"learning_rate": 3.995504883853888e-05,
"loss": 0.8202,
"step": 2910
},
{
"epoch": 1.48,
"learning_rate": 3.98909792397106e-05,
"loss": 0.8049,
"step": 2920
},
{
"epoch": 1.49,
"learning_rate": 3.9826757700258284e-05,
"loss": 0.7977,
"step": 2930
},
{
"epoch": 1.49,
"learning_rate": 3.976238487546864e-05,
"loss": 0.8494,
"step": 2940
},
{
"epoch": 1.5,
"learning_rate": 3.9697861422172034e-05,
"loss": 0.871,
"step": 2950
},
{
"epoch": 1.5,
"learning_rate": 3.963318799873575e-05,
"loss": 0.9323,
"step": 2960
},
{
"epoch": 1.51,
"learning_rate": 3.956836526505733e-05,
"loss": 0.912,
"step": 2970
},
{
"epoch": 1.51,
"learning_rate": 3.9503393882557766e-05,
"loss": 0.851,
"step": 2980
},
{
"epoch": 1.52,
"learning_rate": 3.943827451417483e-05,
"loss": 0.78,
"step": 2990
},
{
"epoch": 1.52,
"learning_rate": 3.937300782435625e-05,
"loss": 0.7798,
"step": 3000
},
{
"epoch": 1.53,
"learning_rate": 3.930759447905298e-05,
"loss": 0.8496,
"step": 3010
},
{
"epoch": 1.53,
"learning_rate": 3.9242035145712344e-05,
"loss": 0.8427,
"step": 3020
},
{
"epoch": 1.54,
"learning_rate": 3.9176330493271285e-05,
"loss": 0.8619,
"step": 3030
},
{
"epoch": 1.54,
"learning_rate": 3.9110481192149504e-05,
"loss": 0.7663,
"step": 3040
},
{
"epoch": 1.55,
"learning_rate": 3.9044487914242646e-05,
"loss": 0.7478,
"step": 3050
},
{
"epoch": 1.56,
"learning_rate": 3.897835133291539e-05,
"loss": 0.8048,
"step": 3060
},
{
"epoch": 1.56,
"learning_rate": 3.891207212299467e-05,
"loss": 0.8875,
"step": 3070
},
{
"epoch": 1.57,
"learning_rate": 3.884565096076269e-05,
"loss": 0.8649,
"step": 3080
},
{
"epoch": 1.57,
"learning_rate": 3.877908852395008e-05,
"loss": 0.8313,
"step": 3090
},
{
"epoch": 1.58,
"learning_rate": 3.8712385491729e-05,
"loss": 0.8779,
"step": 3100
},
{
"epoch": 1.58,
"learning_rate": 3.864554254470613e-05,
"loss": 0.7845,
"step": 3110
},
{
"epoch": 1.59,
"learning_rate": 3.857856036491582e-05,
"loss": 0.8581,
"step": 3120
},
{
"epoch": 1.59,
"learning_rate": 3.851143963581306e-05,
"loss": 0.8512,
"step": 3130
},
{
"epoch": 1.6,
"learning_rate": 3.844418104226656e-05,
"loss": 0.7689,
"step": 3140
},
{
"epoch": 1.6,
"learning_rate": 3.837678527055168e-05,
"loss": 0.8677,
"step": 3150
},
{
"epoch": 1.61,
"learning_rate": 3.830925300834356e-05,
"loss": 0.7601,
"step": 3160
},
{
"epoch": 1.61,
"learning_rate": 3.824158494470996e-05,
"loss": 0.8637,
"step": 3170
},
{
"epoch": 1.62,
"learning_rate": 3.817378177010431e-05,
"loss": 0.7152,
"step": 3180
},
{
"epoch": 1.62,
"learning_rate": 3.8105844176358674e-05,
"loss": 0.9339,
"step": 3190
},
{
"epoch": 1.63,
"learning_rate": 3.803777285667665e-05,
"loss": 0.8261,
"step": 3200
},
{
"epoch": 1.63,
"learning_rate": 3.7969568505626305e-05,
"loss": 0.896,
"step": 3210
},
{
"epoch": 1.64,
"learning_rate": 3.7901231819133105e-05,
"loss": 0.9026,
"step": 3220
},
{
"epoch": 1.64,
"learning_rate": 3.783276349447281e-05,
"loss": 0.8242,
"step": 3230
},
{
"epoch": 1.65,
"learning_rate": 3.7764164230264357e-05,
"loss": 0.8292,
"step": 3240
},
{
"epoch": 1.65,
"learning_rate": 3.7695434726462704e-05,
"loss": 0.9249,
"step": 3250
},
{
"epoch": 1.66,
"learning_rate": 3.762657568435174e-05,
"loss": 0.9214,
"step": 3260
},
{
"epoch": 1.66,
"learning_rate": 3.7557587806537094e-05,
"loss": 0.8414,
"step": 3270
},
{
"epoch": 1.67,
"learning_rate": 3.748847179693897e-05,
"loss": 0.7875,
"step": 3280
},
{
"epoch": 1.67,
"learning_rate": 3.741922836078499e-05,
"loss": 0.7981,
"step": 3290
},
{
"epoch": 1.68,
"learning_rate": 3.734985820460293e-05,
"loss": 0.8205,
"step": 3300
},
{
"epoch": 1.68,
"learning_rate": 3.728036203621361e-05,
"loss": 0.8429,
"step": 3310
},
{
"epoch": 1.69,
"learning_rate": 3.72107405647236e-05,
"loss": 0.8432,
"step": 3320
},
{
"epoch": 1.69,
"learning_rate": 3.7140994500517995e-05,
"loss": 0.86,
"step": 3330
},
{
"epoch": 1.7,
"learning_rate": 3.707112455525318e-05,
"loss": 0.7461,
"step": 3340
},
{
"epoch": 1.7,
"learning_rate": 3.7001131441849586e-05,
"loss": 0.8739,
"step": 3350
},
{
"epoch": 1.71,
"learning_rate": 3.693101587448436e-05,
"loss": 0.8064,
"step": 3360
},
{
"epoch": 1.71,
"learning_rate": 3.6860778568584145e-05,
"loss": 0.8171,
"step": 3370
},
{
"epoch": 1.72,
"learning_rate": 3.6790420240817715e-05,
"loss": 0.7549,
"step": 3380
},
{
"epoch": 1.72,
"learning_rate": 3.671994160908872e-05,
"loss": 0.8102,
"step": 3390
},
{
"epoch": 1.73,
"learning_rate": 3.6649343392528335e-05,
"loss": 0.8086,
"step": 3400
},
{
"epoch": 1.73,
"learning_rate": 3.657862631148791e-05,
"loss": 0.9243,
"step": 3410
},
{
"epoch": 1.74,
"learning_rate": 3.650779108753163e-05,
"loss": 0.8874,
"step": 3420
},
{
"epoch": 1.74,
"learning_rate": 3.6436838443429175e-05,
"loss": 0.7962,
"step": 3430
},
{
"epoch": 1.75,
"learning_rate": 3.636576910314831e-05,
"loss": 0.7621,
"step": 3440
},
{
"epoch": 1.75,
"learning_rate": 3.6294583791847514e-05,
"loss": 0.8126,
"step": 3450
},
{
"epoch": 1.76,
"learning_rate": 3.622328323586859e-05,
"loss": 0.8358,
"step": 3460
},
{
"epoch": 1.76,
"learning_rate": 3.615186816272925e-05,
"loss": 0.8677,
"step": 3470
},
{
"epoch": 1.77,
"learning_rate": 3.608033930111564e-05,
"loss": 0.8286,
"step": 3480
},
{
"epoch": 1.77,
"learning_rate": 3.600869738087501e-05,
"loss": 0.8292,
"step": 3490
},
{
"epoch": 1.78,
"learning_rate": 3.5936943133008183e-05,
"loss": 0.8448,
"step": 3500
},
{
"epoch": 1.78,
"learning_rate": 3.5865077289662114e-05,
"loss": 0.7162,
"step": 3510
},
{
"epoch": 1.79,
"learning_rate": 3.5793100584122426e-05,
"loss": 0.7949,
"step": 3520
},
{
"epoch": 1.79,
"learning_rate": 3.572101375080594e-05,
"loss": 0.8263,
"step": 3530
},
{
"epoch": 1.8,
"learning_rate": 3.564881752525317e-05,
"loss": 0.9174,
"step": 3540
},
{
"epoch": 1.8,
"learning_rate": 3.5576512644120804e-05,
"loss": 0.8188,
"step": 3550
},
{
"epoch": 1.81,
"learning_rate": 3.550409984517421e-05,
"loss": 0.81,
"step": 3560
},
{
"epoch": 1.81,
"learning_rate": 3.5431579867279905e-05,
"loss": 0.8592,
"step": 3570
},
{
"epoch": 1.82,
"learning_rate": 3.5358953450397995e-05,
"loss": 0.8419,
"step": 3580
},
{
"epoch": 1.82,
"learning_rate": 3.528622133557465e-05,
"loss": 0.7349,
"step": 3590
},
{
"epoch": 1.83,
"learning_rate": 3.521338426493453e-05,
"loss": 0.8005,
"step": 3600
},
{
"epoch": 1.83,
"learning_rate": 3.514044298167322e-05,
"loss": 0.7567,
"step": 3610
},
{
"epoch": 1.84,
"learning_rate": 3.506739823004963e-05,
"loss": 0.7951,
"step": 3620
},
{
"epoch": 1.84,
"learning_rate": 3.4994250755378434e-05,
"loss": 0.8423,
"step": 3630
},
{
"epoch": 1.85,
"learning_rate": 3.492100130402242e-05,
"loss": 0.844,
"step": 3640
},
{
"epoch": 1.85,
"learning_rate": 3.4847650623384914e-05,
"loss": 0.8515,
"step": 3650
},
{
"epoch": 1.86,
"learning_rate": 3.477419946190213e-05,
"loss": 0.906,
"step": 3660
},
{
"epoch": 1.87,
"learning_rate": 3.470064856903555e-05,
"loss": 0.8309,
"step": 3670
},
{
"epoch": 1.87,
"learning_rate": 3.462699869526427e-05,
"loss": 0.8666,
"step": 3680
},
{
"epoch": 1.88,
"learning_rate": 3.455325059207732e-05,
"loss": 0.7585,
"step": 3690
},
{
"epoch": 1.88,
"learning_rate": 3.4479405011966056e-05,
"loss": 0.8133,
"step": 3700
},
{
"epoch": 1.89,
"learning_rate": 3.440546270841639e-05,
"loss": 0.9355,
"step": 3710
},
{
"epoch": 1.89,
"learning_rate": 3.4331424435901214e-05,
"loss": 0.7332,
"step": 3720
},
{
"epoch": 1.9,
"learning_rate": 3.4257290949872614e-05,
"loss": 0.8603,
"step": 3730
},
{
"epoch": 1.9,
"learning_rate": 3.418306300675416e-05,
"loss": 0.8269,
"step": 3740
},
{
"epoch": 1.91,
"learning_rate": 3.410874136393327e-05,
"loss": 0.799,
"step": 3750
},
{
"epoch": 1.91,
"learning_rate": 3.403432677975341e-05,
"loss": 0.8898,
"step": 3760
},
{
"epoch": 1.92,
"learning_rate": 3.395982001350637e-05,
"loss": 0.7441,
"step": 3770
},
{
"epoch": 1.92,
"learning_rate": 3.3885221825424537e-05,
"loss": 0.8466,
"step": 3780
},
{
"epoch": 1.93,
"learning_rate": 3.381053297667309e-05,
"loss": 0.8273,
"step": 3790
},
{
"epoch": 1.93,
"learning_rate": 3.3735754229342326e-05,
"loss": 0.8397,
"step": 3800
},
{
"epoch": 1.94,
"learning_rate": 3.3660886346439765e-05,
"loss": 0.8455,
"step": 3810
},
{
"epoch": 1.94,
"learning_rate": 3.358593009188247e-05,
"loss": 0.8254,
"step": 3820
},
{
"epoch": 1.95,
"learning_rate": 3.351088623048918e-05,
"loss": 0.8374,
"step": 3830
},
{
"epoch": 1.95,
"learning_rate": 3.3435755527972536e-05,
"loss": 0.781,
"step": 3840
},
{
"epoch": 1.96,
"learning_rate": 3.336053875093128e-05,
"loss": 0.8414,
"step": 3850
},
{
"epoch": 1.96,
"learning_rate": 3.32852366668424e-05,
"loss": 0.7875,
"step": 3860
},
{
"epoch": 1.97,
"learning_rate": 3.320985004405334e-05,
"loss": 0.7889,
"step": 3870
},
{
"epoch": 1.97,
"learning_rate": 3.3134379651774114e-05,
"loss": 0.894,
"step": 3880
},
{
"epoch": 1.98,
"learning_rate": 3.30588262600695e-05,
"loss": 0.8475,
"step": 3890
},
{
"epoch": 1.98,
"learning_rate": 3.298319063985116e-05,
"loss": 0.8024,
"step": 3900
},
{
"epoch": 1.99,
"learning_rate": 3.2907473562869754e-05,
"loss": 0.8467,
"step": 3910
},
{
"epoch": 1.99,
"learning_rate": 3.283167580170712e-05,
"loss": 0.7829,
"step": 3920
},
{
"epoch": 2.0,
"learning_rate": 3.275579812976835e-05,
"loss": 0.8466,
"step": 3930
},
{
"epoch": 2.0,
"learning_rate": 3.2679841321273895e-05,
"loss": 0.7958,
"step": 3940
},
{
"epoch": 2.01,
"learning_rate": 3.260380615125171e-05,
"loss": 0.7956,
"step": 3950
},
{
"epoch": 2.01,
"learning_rate": 3.252769339552927e-05,
"loss": 0.8578,
"step": 3960
},
{
"epoch": 2.02,
"learning_rate": 3.245150383072573e-05,
"loss": 0.8806,
"step": 3970
},
{
"epoch": 2.02,
"learning_rate": 3.2375238234243965e-05,
"loss": 0.8477,
"step": 3980
},
{
"epoch": 2.03,
"learning_rate": 3.229889738426264e-05,
"loss": 0.7173,
"step": 3990
},
{
"epoch": 2.03,
"learning_rate": 3.222248205972827e-05,
"loss": 0.8259,
"step": 4000
},
{
"epoch": 2.04,
"learning_rate": 3.2145993040347264e-05,
"loss": 0.7454,
"step": 4010
},
{
"epoch": 2.04,
"learning_rate": 3.2069431106577995e-05,
"loss": 0.8054,
"step": 4020
},
{
"epoch": 2.05,
"learning_rate": 3.199279703962282e-05,
"loss": 0.7146,
"step": 4030
},
{
"epoch": 2.05,
"learning_rate": 3.1916091621420104e-05,
"loss": 0.8322,
"step": 4040
},
{
"epoch": 2.06,
"learning_rate": 3.183931563463624e-05,
"loss": 0.7718,
"step": 4050
},
{
"epoch": 2.06,
"learning_rate": 3.176246986265767e-05,
"loss": 0.9118,
"step": 4060
},
{
"epoch": 2.07,
"learning_rate": 3.1685555089582906e-05,
"loss": 0.8052,
"step": 4070
},
{
"epoch": 2.07,
"learning_rate": 3.1608572100214526e-05,
"loss": 0.8209,
"step": 4080
},
{
"epoch": 2.08,
"learning_rate": 3.15315216800511e-05,
"loss": 0.7682,
"step": 4090
},
{
"epoch": 2.08,
"learning_rate": 3.145440461527929e-05,
"loss": 0.8159,
"step": 4100
},
{
"epoch": 2.09,
"learning_rate": 3.137722169276574e-05,
"loss": 0.8396,
"step": 4110
},
{
"epoch": 2.09,
"learning_rate": 3.129997370004909e-05,
"loss": 0.7799,
"step": 4120
},
{
"epoch": 2.1,
"learning_rate": 3.122266142533191e-05,
"loss": 0.8488,
"step": 4130
},
{
"epoch": 2.1,
"learning_rate": 3.114528565747268e-05,
"loss": 0.7617,
"step": 4140
},
{
"epoch": 2.11,
"learning_rate": 3.1067847185977735e-05,
"loss": 0.8345,
"step": 4150
},
{
"epoch": 2.11,
"learning_rate": 3.099034680099321e-05,
"loss": 0.7212,
"step": 4160
},
{
"epoch": 2.12,
"learning_rate": 3.091278529329698e-05,
"loss": 0.8031,
"step": 4170
},
{
"epoch": 2.12,
"learning_rate": 3.0835163454290574e-05,
"loss": 0.8333,
"step": 4180
},
{
"epoch": 2.13,
"learning_rate": 3.075748207599114e-05,
"loss": 0.7761,
"step": 4190
},
{
"epoch": 2.13,
"learning_rate": 3.06797419510233e-05,
"loss": 0.7531,
"step": 4200
},
{
"epoch": 2.14,
"learning_rate": 3.060194387261114e-05,
"loss": 0.8292,
"step": 4210
},
{
"epoch": 2.14,
"learning_rate": 3.0524088634570035e-05,
"loss": 0.82,
"step": 4220
},
{
"epoch": 2.15,
"learning_rate": 3.0446177031298627e-05,
"loss": 0.8561,
"step": 4230
},
{
"epoch": 2.15,
"learning_rate": 3.036820985777067e-05,
"loss": 0.9112,
"step": 4240
},
{
"epoch": 2.16,
"learning_rate": 3.0290187909526914e-05,
"loss": 0.8364,
"step": 4250
},
{
"epoch": 2.16,
"learning_rate": 3.0212111982667024e-05,
"loss": 0.7643,
"step": 4260
},
{
"epoch": 2.17,
"learning_rate": 3.013398287384144e-05,
"loss": 0.8117,
"step": 4270
},
{
"epoch": 2.18,
"learning_rate": 3.0055801380243224e-05,
"loss": 0.8721,
"step": 4280
},
{
"epoch": 2.18,
"learning_rate": 2.9977568299599973e-05,
"loss": 0.76,
"step": 4290
},
{
"epoch": 2.19,
"learning_rate": 2.989928443016564e-05,
"loss": 0.7813,
"step": 4300
},
{
"epoch": 2.19,
"learning_rate": 2.9820950570712414e-05,
"loss": 0.8918,
"step": 4310
},
{
"epoch": 2.2,
"learning_rate": 2.9742567520522534e-05,
"loss": 0.8043,
"step": 4320
},
{
"epoch": 2.2,
"learning_rate": 2.966413607938019e-05,
"loss": 0.8443,
"step": 4330
},
{
"epoch": 2.21,
"learning_rate": 2.9585657047563315e-05,
"loss": 0.7935,
"step": 4340
},
{
"epoch": 2.21,
"learning_rate": 2.9507131225835432e-05,
"loss": 0.7864,
"step": 4350
},
{
"epoch": 2.22,
"learning_rate": 2.9428559415437496e-05,
"loss": 0.8375,
"step": 4360
},
{
"epoch": 2.22,
"learning_rate": 2.93499424180797e-05,
"loss": 0.8113,
"step": 4370
},
{
"epoch": 2.23,
"learning_rate": 2.9271281035933313e-05,
"loss": 0.7886,
"step": 4380
},
{
"epoch": 2.23,
"learning_rate": 2.9192576071622473e-05,
"loss": 0.9166,
"step": 4390
},
{
"epoch": 2.24,
"learning_rate": 2.9113828328216027e-05,
"loss": 0.8631,
"step": 4400
},
{
"epoch": 2.24,
"learning_rate": 2.9035038609219306e-05,
"loss": 0.861,
"step": 4410
},
{
"epoch": 2.25,
"learning_rate": 2.8956207718565942e-05,
"loss": 0.8465,
"step": 4420
},
{
"epoch": 2.25,
"learning_rate": 2.8877336460609673e-05,
"loss": 0.7999,
"step": 4430
},
{
"epoch": 2.26,
"learning_rate": 2.879842564011612e-05,
"loss": 0.8585,
"step": 4440
},
{
"epoch": 2.26,
"learning_rate": 2.871947606225458e-05,
"loss": 0.885,
"step": 4450
},
{
"epoch": 2.27,
"learning_rate": 2.8640488532589803e-05,
"loss": 0.736,
"step": 4460
},
{
"epoch": 2.27,
"learning_rate": 2.8561463857073804e-05,
"loss": 0.7454,
"step": 4470
},
{
"epoch": 2.28,
"learning_rate": 2.8482402842037614e-05,
"loss": 0.8043,
"step": 4480
},
{
"epoch": 2.28,
"learning_rate": 2.8403306294183026e-05,
"loss": 0.74,
"step": 4490
},
{
"epoch": 2.29,
"learning_rate": 2.8324175020574424e-05,
"loss": 0.8533,
"step": 4500
},
{
"epoch": 2.29,
"learning_rate": 2.8245009828630502e-05,
"loss": 0.735,
"step": 4510
},
{
"epoch": 2.3,
"learning_rate": 2.816581152611606e-05,
"loss": 0.867,
"step": 4520
},
{
"epoch": 2.3,
"learning_rate": 2.808658092113372e-05,
"loss": 0.8848,
"step": 4530
},
{
"epoch": 2.31,
"learning_rate": 2.8007318822115713e-05,
"loss": 0.7563,
"step": 4540
},
{
"epoch": 2.31,
"learning_rate": 2.792802603781562e-05,
"loss": 0.762,
"step": 4550
},
{
"epoch": 2.32,
"learning_rate": 2.7848703377300118e-05,
"loss": 0.7755,
"step": 4560
},
{
"epoch": 2.32,
"learning_rate": 2.776935164994074e-05,
"loss": 0.8489,
"step": 4570
},
{
"epoch": 2.33,
"learning_rate": 2.7689971665405578e-05,
"loss": 0.7536,
"step": 4580
},
{
"epoch": 2.33,
"learning_rate": 2.761056423365107e-05,
"loss": 0.7741,
"step": 4590
},
{
"epoch": 2.34,
"learning_rate": 2.7531130164913703e-05,
"loss": 0.7624,
"step": 4600
},
{
"epoch": 2.34,
"learning_rate": 2.7451670269701767e-05,
"loss": 0.8003,
"step": 4610
},
{
"epoch": 2.35,
"learning_rate": 2.737218535878705e-05,
"loss": 0.8823,
"step": 4620
},
{
"epoch": 2.35,
"learning_rate": 2.7292676243196608e-05,
"loss": 0.8301,
"step": 4630
},
{
"epoch": 2.36,
"learning_rate": 2.7213143734204462e-05,
"loss": 0.8486,
"step": 4640
},
{
"epoch": 2.36,
"learning_rate": 2.7133588643323334e-05,
"loss": 0.7807,
"step": 4650
},
{
"epoch": 2.37,
"learning_rate": 2.7054011782296356e-05,
"loss": 0.8104,
"step": 4660
},
{
"epoch": 2.37,
"learning_rate": 2.6974413963088797e-05,
"loss": 0.847,
"step": 4670
},
{
"epoch": 2.38,
"learning_rate": 2.6894795997879762e-05,
"loss": 0.8375,
"step": 4680
},
{
"epoch": 2.38,
"learning_rate": 2.6815158699053932e-05,
"loss": 0.834,
"step": 4690
},
{
"epoch": 2.39,
"learning_rate": 2.6735502879193264e-05,
"loss": 0.7997,
"step": 4700
},
{
"epoch": 2.39,
"learning_rate": 2.665582935106866e-05,
"loss": 0.7941,
"step": 4710
},
{
"epoch": 2.4,
"learning_rate": 2.6576138927631742e-05,
"loss": 0.8244,
"step": 4720
},
{
"epoch": 2.4,
"learning_rate": 2.6496432422006522e-05,
"loss": 0.8158,
"step": 4730
},
{
"epoch": 2.41,
"learning_rate": 2.641671064748109e-05,
"loss": 0.8289,
"step": 4740
},
{
"epoch": 2.41,
"learning_rate": 2.633697441749935e-05,
"loss": 0.8029,
"step": 4750
},
{
"epoch": 2.42,
"learning_rate": 2.6257224545652688e-05,
"loss": 0.8135,
"step": 4760
},
{
"epoch": 2.42,
"learning_rate": 2.6177461845671685e-05,
"loss": 0.8097,
"step": 4770
},
{
"epoch": 2.43,
"learning_rate": 2.6097687131417843e-05,
"loss": 0.8128,
"step": 4780
},
{
"epoch": 2.43,
"learning_rate": 2.6017901216875217e-05,
"loss": 0.8145,
"step": 4790
},
{
"epoch": 2.44,
"learning_rate": 2.5938104916142155e-05,
"loss": 0.7725,
"step": 4800
},
{
"epoch": 2.44,
"learning_rate": 2.585829904342299e-05,
"loss": 0.8902,
"step": 4810
},
{
"epoch": 2.45,
"learning_rate": 2.577848441301971e-05,
"loss": 0.8069,
"step": 4820
},
{
"epoch": 2.45,
"learning_rate": 2.569866183932368e-05,
"loss": 0.781,
"step": 4830
},
{
"epoch": 2.46,
"learning_rate": 2.5618832136807297e-05,
"loss": 0.7496,
"step": 4840
},
{
"epoch": 2.46,
"learning_rate": 2.553899612001571e-05,
"loss": 0.8554,
"step": 4850
},
{
"epoch": 2.47,
"learning_rate": 2.5459154603558483e-05,
"loss": 0.8187,
"step": 4860
},
{
"epoch": 2.47,
"learning_rate": 2.5379308402101303e-05,
"loss": 0.7848,
"step": 4870
},
{
"epoch": 2.48,
"learning_rate": 2.529945833035767e-05,
"loss": 0.7408,
"step": 4880
},
{
"epoch": 2.49,
"learning_rate": 2.521960520308056e-05,
"loss": 0.7655,
"step": 4890
},
{
"epoch": 2.49,
"learning_rate": 2.5139749835054123e-05,
"loss": 0.7614,
"step": 4900
},
{
"epoch": 2.5,
"learning_rate": 2.5059893041085392e-05,
"loss": 0.7382,
"step": 4910
},
{
"epoch": 2.5,
"learning_rate": 2.4980035635995943e-05,
"loss": 0.7321,
"step": 4920
},
{
"epoch": 2.51,
"learning_rate": 2.4900178434613566e-05,
"loss": 0.7464,
"step": 4930
},
{
"epoch": 2.51,
"learning_rate": 2.4820322251764e-05,
"loss": 0.7925,
"step": 4940
},
{
"epoch": 2.52,
"learning_rate": 2.4740467902262583e-05,
"loss": 0.8016,
"step": 4950
},
{
"epoch": 2.52,
"learning_rate": 2.466061620090594e-05,
"loss": 0.8147,
"step": 4960
},
{
"epoch": 2.53,
"learning_rate": 2.4580767962463687e-05,
"loss": 0.8129,
"step": 4970
},
{
"epoch": 2.53,
"learning_rate": 2.4500924001670088e-05,
"loss": 0.8099,
"step": 4980
},
{
"epoch": 2.54,
"learning_rate": 2.4421085133215787e-05,
"loss": 0.8304,
"step": 4990
},
{
"epoch": 2.54,
"learning_rate": 2.4341252171739436e-05,
"loss": 0.9241,
"step": 5000
},
{
"epoch": 2.55,
"learning_rate": 2.4261425931819437e-05,
"loss": 0.7888,
"step": 5010
},
{
"epoch": 2.55,
"learning_rate": 2.4181607227965604e-05,
"loss": 0.8431,
"step": 5020
},
{
"epoch": 2.56,
"learning_rate": 2.4101796874610855e-05,
"loss": 0.7654,
"step": 5030
},
{
"epoch": 2.56,
"learning_rate": 2.40219956861029e-05,
"loss": 0.8724,
"step": 5040
},
{
"epoch": 2.57,
"learning_rate": 2.3942204476695943e-05,
"loss": 0.9028,
"step": 5050
},
{
"epoch": 2.57,
"learning_rate": 2.3862424060542357e-05,
"loss": 0.7866,
"step": 5060
},
{
"epoch": 2.58,
"learning_rate": 2.3782655251684394e-05,
"loss": 0.8155,
"step": 5070
},
{
"epoch": 2.58,
"learning_rate": 2.3702898864045876e-05,
"loss": 0.766,
"step": 5080
},
{
"epoch": 2.59,
"learning_rate": 2.362315571142385e-05,
"loss": 0.7741,
"step": 5090
},
{
"epoch": 2.59,
"learning_rate": 2.3543426607480364e-05,
"loss": 0.8394,
"step": 5100
},
{
"epoch": 2.6,
"learning_rate": 2.346371236573409e-05,
"loss": 0.8572,
"step": 5110
},
{
"epoch": 2.6,
"learning_rate": 2.3384013799552072e-05,
"loss": 0.8239,
"step": 5120
},
{
"epoch": 2.61,
"learning_rate": 2.3304331722141393e-05,
"loss": 0.7008,
"step": 5130
},
{
"epoch": 2.61,
"learning_rate": 2.32246669465409e-05,
"loss": 0.7752,
"step": 5140
},
{
"epoch": 2.62,
"learning_rate": 2.3145020285612894e-05,
"loss": 0.7641,
"step": 5150
},
{
"epoch": 2.62,
"learning_rate": 2.3065392552034857e-05,
"loss": 0.8388,
"step": 5160
},
{
"epoch": 2.63,
"learning_rate": 2.298578455829114e-05,
"loss": 0.8176,
"step": 5170
},
{
"epoch": 2.63,
"learning_rate": 2.2906197116664653e-05,
"loss": 0.7676,
"step": 5180
},
{
"epoch": 2.64,
"learning_rate": 2.282663103922863e-05,
"loss": 0.7121,
"step": 5190
},
{
"epoch": 2.64,
"learning_rate": 2.2747087137838307e-05,
"loss": 0.7567,
"step": 5200
},
{
"epoch": 2.65,
"learning_rate": 2.2667566224122648e-05,
"loss": 0.8355,
"step": 5210
},
{
"epoch": 2.65,
"learning_rate": 2.2588069109476057e-05,
"loss": 0.7708,
"step": 5220
},
{
"epoch": 2.66,
"learning_rate": 2.2508596605050107e-05,
"loss": 0.8587,
"step": 5230
},
{
"epoch": 2.66,
"learning_rate": 2.2429149521745254e-05,
"loss": 0.7971,
"step": 5240
},
{
"epoch": 2.67,
"learning_rate": 2.2349728670202582e-05,
"loss": 0.7568,
"step": 5250
},
{
"epoch": 2.67,
"learning_rate": 2.2270334860795497e-05,
"loss": 0.7911,
"step": 5260
},
{
"epoch": 2.68,
"learning_rate": 2.2190968903621498e-05,
"loss": 0.8176,
"step": 5270
},
{
"epoch": 2.68,
"learning_rate": 2.2111631608493885e-05,
"loss": 0.8239,
"step": 5280
},
{
"epoch": 2.69,
"learning_rate": 2.2032323784933505e-05,
"loss": 0.8844,
"step": 5290
},
{
"epoch": 2.69,
"learning_rate": 2.1953046242160493e-05,
"loss": 0.7868,
"step": 5300
},
{
"epoch": 2.7,
"learning_rate": 2.187379978908601e-05,
"loss": 0.8765,
"step": 5310
},
{
"epoch": 2.7,
"learning_rate": 2.1794585234303993e-05,
"loss": 0.7886,
"step": 5320
},
{
"epoch": 2.71,
"learning_rate": 2.1715403386082907e-05,
"loss": 0.861,
"step": 5330
},
{
"epoch": 2.71,
"learning_rate": 2.1636255052357497e-05,
"loss": 0.8678,
"step": 5340
},
{
"epoch": 2.72,
"learning_rate": 2.1557141040720515e-05,
"loss": 0.8169,
"step": 5350
},
{
"epoch": 2.72,
"learning_rate": 2.147806215841454e-05,
"loss": 0.7597,
"step": 5360
},
{
"epoch": 2.73,
"learning_rate": 2.1399019212323697e-05,
"loss": 0.8513,
"step": 5370
},
{
"epoch": 2.73,
"learning_rate": 2.1320013008965432e-05,
"loss": 0.7976,
"step": 5380
},
{
"epoch": 2.74,
"learning_rate": 2.124104435448228e-05,
"loss": 0.7306,
"step": 5390
},
{
"epoch": 2.74,
"learning_rate": 2.1162114054633663e-05,
"loss": 0.8193,
"step": 5400
},
{
"epoch": 2.75,
"learning_rate": 2.1083222914787623e-05,
"loss": 0.8915,
"step": 5410
},
{
"epoch": 2.75,
"learning_rate": 2.1004371739912654e-05,
"loss": 0.7684,
"step": 5420
},
{
"epoch": 2.76,
"learning_rate": 2.0925561334569464e-05,
"loss": 0.7708,
"step": 5430
},
{
"epoch": 2.76,
"learning_rate": 2.0846792502902753e-05,
"loss": 0.7513,
"step": 5440
},
{
"epoch": 2.77,
"learning_rate": 2.0768066048633033e-05,
"loss": 0.7225,
"step": 5450
},
{
"epoch": 2.77,
"learning_rate": 2.0689382775048418e-05,
"loss": 0.7696,
"step": 5460
},
{
"epoch": 2.78,
"learning_rate": 2.061074348499642e-05,
"loss": 0.751,
"step": 5470
},
{
"epoch": 2.78,
"learning_rate": 2.0532148980875768e-05,
"loss": 0.7263,
"step": 5480
},
{
"epoch": 2.79,
"learning_rate": 2.045360006462822e-05,
"loss": 0.8106,
"step": 5490
},
{
"epoch": 2.8,
"learning_rate": 2.037509753773037e-05,
"loss": 0.7924,
"step": 5500
},
{
"epoch": 2.8,
"learning_rate": 2.0296642201185473e-05,
"loss": 0.8711,
"step": 5510
},
{
"epoch": 2.81,
"learning_rate": 2.02182348555153e-05,
"loss": 0.8576,
"step": 5520
},
{
"epoch": 2.81,
"learning_rate": 2.0139876300751904e-05,
"loss": 0.8587,
"step": 5530
},
{
"epoch": 2.82,
"learning_rate": 2.0061567336429527e-05,
"loss": 0.8752,
"step": 5540
},
{
"epoch": 2.82,
"learning_rate": 1.9983308761576407e-05,
"loss": 0.8727,
"step": 5550
},
{
"epoch": 2.83,
"learning_rate": 1.990510137470664e-05,
"loss": 0.7785,
"step": 5560
},
{
"epoch": 2.83,
"learning_rate": 1.9826945973812005e-05,
"loss": 0.7669,
"step": 5570
},
{
"epoch": 2.84,
"learning_rate": 1.9748843356353856e-05,
"loss": 0.8083,
"step": 5580
},
{
"epoch": 2.84,
"learning_rate": 1.9670794319254963e-05,
"loss": 0.813,
"step": 5590
},
{
"epoch": 2.85,
"learning_rate": 1.9592799658891385e-05,
"loss": 0.815,
"step": 5600
},
{
"epoch": 2.85,
"learning_rate": 1.951486017108436e-05,
"loss": 0.9141,
"step": 5610
},
{
"epoch": 2.86,
"learning_rate": 1.9436976651092144e-05,
"loss": 0.7419,
"step": 5620
},
{
"epoch": 2.86,
"learning_rate": 1.9359149893601944e-05,
"loss": 0.8557,
"step": 5630
},
{
"epoch": 2.87,
"learning_rate": 1.9281380692721786e-05,
"loss": 0.871,
"step": 5640
},
{
"epoch": 2.87,
"learning_rate": 1.9203669841972416e-05,
"loss": 0.8396,
"step": 5650
},
{
"epoch": 2.88,
"learning_rate": 1.9126018134279193e-05,
"loss": 0.8437,
"step": 5660
},
{
"epoch": 2.88,
"learning_rate": 1.904842636196402e-05,
"loss": 0.7932,
"step": 5670
},
{
"epoch": 2.89,
"learning_rate": 1.8970895316737238e-05,
"loss": 0.6945,
"step": 5680
},
{
"epoch": 2.89,
"learning_rate": 1.8893425789689575e-05,
"loss": 0.738,
"step": 5690
},
{
"epoch": 2.9,
"learning_rate": 1.8816018571284017e-05,
"loss": 0.7109,
"step": 5700
},
{
"epoch": 2.9,
"learning_rate": 1.8738674451347818e-05,
"loss": 0.8739,
"step": 5710
},
{
"epoch": 2.91,
"learning_rate": 1.866139421906439e-05,
"loss": 0.8342,
"step": 5720
},
{
"epoch": 2.91,
"learning_rate": 1.858417866296528e-05,
"loss": 0.7657,
"step": 5730
},
{
"epoch": 2.92,
"learning_rate": 1.850702857092208e-05,
"loss": 0.8464,
"step": 5740
},
{
"epoch": 2.92,
"learning_rate": 1.8429944730138448e-05,
"loss": 0.7853,
"step": 5750
},
{
"epoch": 2.93,
"learning_rate": 1.8352927927142026e-05,
"loss": 0.7642,
"step": 5760
},
{
"epoch": 2.93,
"learning_rate": 1.8275978947776436e-05,
"loss": 0.8316,
"step": 5770
},
{
"epoch": 2.94,
"learning_rate": 1.819909857719328e-05,
"loss": 0.8784,
"step": 5780
},
{
"epoch": 2.94,
"learning_rate": 1.8122287599844066e-05,
"loss": 0.7567,
"step": 5790
},
{
"epoch": 2.95,
"learning_rate": 1.8045546799472286e-05,
"loss": 0.7579,
"step": 5800
},
{
"epoch": 2.95,
"learning_rate": 1.796887695910535e-05,
"loss": 0.7957,
"step": 5810
},
{
"epoch": 2.96,
"learning_rate": 1.7892278861046648e-05,
"loss": 0.7458,
"step": 5820
},
{
"epoch": 2.96,
"learning_rate": 1.7815753286867533e-05,
"loss": 0.7479,
"step": 5830
},
{
"epoch": 2.97,
"learning_rate": 1.7739301017399355e-05,
"loss": 0.7617,
"step": 5840
},
{
"epoch": 2.97,
"learning_rate": 1.7662922832725514e-05,
"loss": 0.8818,
"step": 5850
},
{
"epoch": 2.98,
"learning_rate": 1.7586619512173458e-05,
"loss": 0.8075,
"step": 5860
},
{
"epoch": 2.98,
"learning_rate": 1.751039183430678e-05,
"loss": 0.786,
"step": 5870
},
{
"epoch": 2.99,
"learning_rate": 1.7434240576917226e-05,
"loss": 0.8369,
"step": 5880
},
{
"epoch": 2.99,
"learning_rate": 1.735816651701681e-05,
"loss": 0.8883,
"step": 5890
},
{
"epoch": 3.0,
"learning_rate": 1.7282170430829837e-05,
"loss": 0.6625,
"step": 5900
},
{
"epoch": 3.0,
"learning_rate": 1.7206253093785012e-05,
"loss": 0.838,
"step": 5910
},
{
"epoch": 3.01,
"learning_rate": 1.713041528050753e-05,
"loss": 0.6941,
"step": 5920
},
{
"epoch": 3.01,
"learning_rate": 1.705465776481114e-05,
"loss": 0.7716,
"step": 5930
},
{
"epoch": 3.02,
"learning_rate": 1.6978981319690298e-05,
"loss": 0.8099,
"step": 5940
},
{
"epoch": 3.02,
"learning_rate": 1.6903386717312236e-05,
"loss": 0.7046,
"step": 5950
},
{
"epoch": 3.03,
"learning_rate": 1.682787472900912e-05,
"loss": 0.8008,
"step": 5960
},
{
"epoch": 3.03,
"learning_rate": 1.6752446125270117e-05,
"loss": 0.8587,
"step": 5970
},
{
"epoch": 3.04,
"learning_rate": 1.6677101675733625e-05,
"loss": 0.8083,
"step": 5980
},
{
"epoch": 3.04,
"learning_rate": 1.6601842149179347e-05,
"loss": 0.7906,
"step": 5990
},
{
"epoch": 3.05,
"learning_rate": 1.6526668313520478e-05,
"loss": 0.7089,
"step": 6000
},
{
"epoch": 3.05,
"learning_rate": 1.6451580935795863e-05,
"loss": 0.8412,
"step": 6010
},
{
"epoch": 3.06,
"learning_rate": 1.637658078216217e-05,
"loss": 0.7232,
"step": 6020
},
{
"epoch": 3.06,
"learning_rate": 1.6301668617886072e-05,
"loss": 0.8772,
"step": 6030
},
{
"epoch": 3.07,
"learning_rate": 1.622684520733644e-05,
"loss": 0.8135,
"step": 6040
},
{
"epoch": 3.07,
"learning_rate": 1.615211131397654e-05,
"loss": 0.7315,
"step": 6050
},
{
"epoch": 3.08,
"learning_rate": 1.6077467700356256e-05,
"loss": 0.8048,
"step": 6060
},
{
"epoch": 3.08,
"learning_rate": 1.6002915128104284e-05,
"loss": 0.684,
"step": 6070
},
{
"epoch": 3.09,
"learning_rate": 1.592845435792039e-05,
"loss": 0.7887,
"step": 6080
},
{
"epoch": 3.09,
"learning_rate": 1.585408614956763e-05,
"loss": 0.7469,
"step": 6090
},
{
"epoch": 3.1,
"learning_rate": 1.5779811261864604e-05,
"loss": 0.7575,
"step": 6100
},
{
"epoch": 3.11,
"learning_rate": 1.5705630452677707e-05,
"loss": 0.7354,
"step": 6110
},
{
"epoch": 3.11,
"learning_rate": 1.56315444789134e-05,
"loss": 0.7734,
"step": 6120
},
{
"epoch": 3.12,
"learning_rate": 1.555755409651049e-05,
"loss": 0.804,
"step": 6130
},
{
"epoch": 3.12,
"learning_rate": 1.5483660060432432e-05,
"loss": 0.7408,
"step": 6140
},
{
"epoch": 3.13,
"learning_rate": 1.5409863124659562e-05,
"loss": 0.819,
"step": 6150
},
{
"epoch": 3.13,
"learning_rate": 1.5336164042181494e-05,
"loss": 0.7686,
"step": 6160
},
{
"epoch": 3.14,
"learning_rate": 1.5262563564989374e-05,
"loss": 0.8118,
"step": 6170
},
{
"epoch": 3.14,
"learning_rate": 1.5189062444068225e-05,
"loss": 0.7973,
"step": 6180
},
{
"epoch": 3.15,
"learning_rate": 1.5115661429389294e-05,
"loss": 0.7622,
"step": 6190
},
{
"epoch": 3.15,
"learning_rate": 1.5042361269902383e-05,
"loss": 0.875,
"step": 6200
},
{
"epoch": 3.16,
"learning_rate": 1.4969162713528212e-05,
"loss": 0.7767,
"step": 6210
},
{
"epoch": 3.16,
"learning_rate": 1.4896066507150804e-05,
"loss": 0.7193,
"step": 6220
},
{
"epoch": 3.17,
"learning_rate": 1.482307339660983e-05,
"loss": 0.7978,
"step": 6230
},
{
"epoch": 3.17,
"learning_rate": 1.4750184126693028e-05,
"loss": 0.7754,
"step": 6240
},
{
"epoch": 3.18,
"learning_rate": 1.4677399441128603e-05,
"loss": 0.8816,
"step": 6250
},
{
"epoch": 3.18,
"learning_rate": 1.4604720082577622e-05,
"loss": 0.8264,
"step": 6260
},
{
"epoch": 3.19,
"learning_rate": 1.4532146792626449e-05,
"loss": 0.7528,
"step": 6270
},
{
"epoch": 3.19,
"learning_rate": 1.4459680311779159e-05,
"loss": 0.7898,
"step": 6280
},
{
"epoch": 3.2,
"learning_rate": 1.438732137945001e-05,
"loss": 0.8136,
"step": 6290
},
{
"epoch": 3.2,
"learning_rate": 1.4315070733955888e-05,
"loss": 0.7874,
"step": 6300
},
{
"epoch": 3.21,
"learning_rate": 1.4242929112508769e-05,
"loss": 0.7637,
"step": 6310
},
{
"epoch": 3.21,
"learning_rate": 1.417089725120817e-05,
"loss": 0.8007,
"step": 6320
},
{
"epoch": 3.22,
"learning_rate": 1.4098975885033713e-05,
"loss": 0.7594,
"step": 6330
},
{
"epoch": 3.22,
"learning_rate": 1.4027165747837548e-05,
"loss": 0.7395,
"step": 6340
},
{
"epoch": 3.23,
"learning_rate": 1.3955467572336905e-05,
"loss": 0.7882,
"step": 6350
},
{
"epoch": 3.23,
"learning_rate": 1.3883882090106611e-05,
"loss": 0.7563,
"step": 6360
},
{
"epoch": 3.24,
"learning_rate": 1.381241003157162e-05,
"loss": 0.7223,
"step": 6370
},
{
"epoch": 3.24,
"learning_rate": 1.3741052125999564e-05,
"loss": 0.7144,
"step": 6380
},
{
"epoch": 3.25,
"learning_rate": 1.3669809101493314e-05,
"loss": 0.7936,
"step": 6390
},
{
"epoch": 3.25,
"learning_rate": 1.3605789203581502e-05,
"loss": 0.7189,
"step": 6400
},
{
"epoch": 3.26,
"learning_rate": 1.3534766454819494e-05,
"loss": 0.7533,
"step": 6410
},
{
"epoch": 3.26,
"learning_rate": 1.3463860691966308e-05,
"loss": 0.7467,
"step": 6420
},
{
"epoch": 3.27,
"learning_rate": 1.3393072638511351e-05,
"loss": 0.7655,
"step": 6430
},
{
"epoch": 3.27,
"learning_rate": 1.332240301674299e-05,
"loss": 0.7911,
"step": 6440
},
{
"epoch": 3.28,
"learning_rate": 1.3251852547741161e-05,
"loss": 0.8208,
"step": 6450
},
{
"epoch": 3.28,
"learning_rate": 1.3181421951370035e-05,
"loss": 0.8012,
"step": 6460
},
{
"epoch": 3.29,
"learning_rate": 1.311111194627064e-05,
"loss": 0.8289,
"step": 6470
},
{
"epoch": 3.29,
"learning_rate": 1.304092324985356e-05,
"loss": 0.7399,
"step": 6480
},
{
"epoch": 3.3,
"learning_rate": 1.2970856578291598e-05,
"loss": 0.7232,
"step": 6490
},
{
"epoch": 3.3,
"learning_rate": 1.290091264651247e-05,
"loss": 0.8301,
"step": 6500
},
{
"epoch": 3.31,
"learning_rate": 1.2831092168191517e-05,
"loss": 0.7985,
"step": 6510
},
{
"epoch": 3.31,
"learning_rate": 1.2761395855744408e-05,
"loss": 0.7452,
"step": 6520
},
{
"epoch": 3.32,
"learning_rate": 1.2691824420319895e-05,
"loss": 0.8126,
"step": 6530
},
{
"epoch": 3.32,
"learning_rate": 1.2622378571792535e-05,
"loss": 0.8346,
"step": 6540
},
{
"epoch": 3.33,
"learning_rate": 1.2553059018755454e-05,
"loss": 0.7807,
"step": 6550
},
{
"epoch": 3.33,
"learning_rate": 1.2483866468513125e-05,
"loss": 0.8407,
"step": 6560
},
{
"epoch": 3.34,
"learning_rate": 1.2414801627074144e-05,
"loss": 0.8276,
"step": 6570
},
{
"epoch": 3.34,
"learning_rate": 1.2345865199144e-05,
"loss": 0.7426,
"step": 6580
},
{
"epoch": 3.35,
"learning_rate": 1.2277057888117944e-05,
"loss": 0.7461,
"step": 6590
},
{
"epoch": 3.35,
"learning_rate": 1.220838039607376e-05,
"loss": 0.7221,
"step": 6600
},
{
"epoch": 3.36,
"learning_rate": 1.2139833423764626e-05,
"loss": 0.8026,
"step": 6610
},
{
"epoch": 3.36,
"learning_rate": 1.2071417670611959e-05,
"loss": 0.7952,
"step": 6620
},
{
"epoch": 3.37,
"learning_rate": 1.2003133834698268e-05,
"loss": 0.7638,
"step": 6630
},
{
"epoch": 3.37,
"learning_rate": 1.1934982612760049e-05,
"loss": 0.7576,
"step": 6640
},
{
"epoch": 3.38,
"learning_rate": 1.186696470018066e-05,
"loss": 0.7676,
"step": 6650
},
{
"epoch": 3.38,
"learning_rate": 1.1799080790983246e-05,
"loss": 0.7482,
"step": 6660
},
{
"epoch": 3.39,
"learning_rate": 1.1731331577823617e-05,
"loss": 0.8294,
"step": 6670
},
{
"epoch": 3.39,
"learning_rate": 1.1663717751983228e-05,
"loss": 0.8052,
"step": 6680
},
{
"epoch": 3.4,
"learning_rate": 1.15962400033621e-05,
"loss": 0.844,
"step": 6690
},
{
"epoch": 3.4,
"learning_rate": 1.152889902047179e-05,
"loss": 0.7958,
"step": 6700
},
{
"epoch": 3.41,
"learning_rate": 1.1461695490428352e-05,
"loss": 0.7427,
"step": 6710
},
{
"epoch": 3.42,
"learning_rate": 1.1394630098945342e-05,
"loss": 0.7311,
"step": 6720
},
{
"epoch": 3.42,
"learning_rate": 1.1327703530326811e-05,
"loss": 0.8388,
"step": 6730
},
{
"epoch": 3.43,
"learning_rate": 1.1260916467460336e-05,
"loss": 0.768,
"step": 6740
},
{
"epoch": 3.43,
"learning_rate": 1.1194269591810018e-05,
"loss": 0.7228,
"step": 6750
},
{
"epoch": 3.44,
"learning_rate": 1.1127763583409576e-05,
"loss": 0.8205,
"step": 6760
},
{
"epoch": 3.44,
"learning_rate": 1.1061399120855375e-05,
"loss": 0.6815,
"step": 6770
},
{
"epoch": 3.45,
"learning_rate": 1.0995176881299515e-05,
"loss": 0.7285,
"step": 6780
},
{
"epoch": 3.45,
"learning_rate": 1.092909754044292e-05,
"loss": 0.7713,
"step": 6790
},
{
"epoch": 3.46,
"learning_rate": 1.086316177252844e-05,
"loss": 0.8213,
"step": 6800
},
{
"epoch": 3.46,
"learning_rate": 1.0797370250333975e-05,
"loss": 0.8498,
"step": 6810
},
{
"epoch": 3.47,
"learning_rate": 1.0731723645165603e-05,
"loss": 0.6787,
"step": 6820
},
{
"epoch": 3.47,
"learning_rate": 1.0666222626850752e-05,
"loss": 0.8175,
"step": 6830
},
{
"epoch": 3.48,
"learning_rate": 1.0600867863731321e-05,
"loss": 0.8446,
"step": 6840
},
{
"epoch": 3.48,
"learning_rate": 1.0535660022656915e-05,
"loss": 0.7999,
"step": 6850
},
{
"epoch": 3.49,
"learning_rate": 1.0470599768978004e-05,
"loss": 0.7724,
"step": 6860
},
{
"epoch": 3.49,
"learning_rate": 1.040568776653915e-05,
"loss": 0.8615,
"step": 6870
},
{
"epoch": 3.5,
"learning_rate": 1.0340924677672223e-05,
"loss": 0.8512,
"step": 6880
},
{
"epoch": 3.5,
"learning_rate": 1.0276311163189647e-05,
"loss": 0.8195,
"step": 6890
},
{
"epoch": 3.51,
"learning_rate": 1.021184788237767e-05,
"loss": 0.7919,
"step": 6900
},
{
"epoch": 3.51,
"learning_rate": 1.0147535492989613e-05,
"loss": 0.736,
"step": 6910
},
{
"epoch": 3.52,
"learning_rate": 1.0083374651239172e-05,
"loss": 0.8283,
"step": 6920
},
{
"epoch": 3.52,
"learning_rate": 1.0019366011793732e-05,
"loss": 0.814,
"step": 6930
},
{
"epoch": 3.53,
"learning_rate": 9.955510227767665e-06,
"loss": 0.8149,
"step": 6940
},
{
"epoch": 3.53,
"learning_rate": 9.891807950715682e-06,
"loss": 0.7876,
"step": 6950
},
{
"epoch": 3.54,
"learning_rate": 9.82825983062618e-06,
"loss": 0.6661,
"step": 6960
},
{
"epoch": 3.54,
"learning_rate": 9.764866515914611e-06,
"loss": 0.7429,
"step": 6970
},
{
"epoch": 3.55,
"learning_rate": 9.701628653416867e-06,
"loss": 0.7564,
"step": 6980
},
{
"epoch": 3.55,
"learning_rate": 9.638546888382672e-06,
"loss": 0.8258,
"step": 6990
},
{
"epoch": 3.56,
"learning_rate": 9.575621864469006e-06,
"loss": 0.848,
"step": 7000
},
{
"epoch": 3.56,
"learning_rate": 9.512854223733547e-06,
"loss": 0.7468,
"step": 7010
},
{
"epoch": 3.57,
"learning_rate": 9.450244606628082e-06,
"loss": 0.7268,
"step": 7020
},
{
"epoch": 3.57,
"learning_rate": 9.38779365199202e-06,
"loss": 0.896,
"step": 7030
},
{
"epoch": 3.58,
"learning_rate": 9.325501997045847e-06,
"loss": 0.8309,
"step": 7040
},
{
"epoch": 3.58,
"learning_rate": 9.263370277384631e-06,
"loss": 0.7935,
"step": 7050
},
{
"epoch": 3.59,
"learning_rate": 9.20139912697153e-06,
"loss": 0.7681,
"step": 7060
},
{
"epoch": 3.59,
"learning_rate": 9.139589178131333e-06,
"loss": 0.7819,
"step": 7070
},
{
"epoch": 3.6,
"learning_rate": 9.077941061543996e-06,
"loss": 0.7642,
"step": 7080
},
{
"epoch": 3.6,
"learning_rate": 9.016455406238222e-06,
"loss": 0.6481,
"step": 7090
},
{
"epoch": 3.61,
"learning_rate": 8.955132839585037e-06,
"loss": 0.7378,
"step": 7100
},
{
"epoch": 3.61,
"learning_rate": 8.893973987291369e-06,
"loss": 0.8364,
"step": 7110
},
{
"epoch": 3.62,
"learning_rate": 8.832979473393693e-06,
"loss": 0.7551,
"step": 7120
},
{
"epoch": 3.62,
"learning_rate": 8.772149920251654e-06,
"loss": 0.8637,
"step": 7130
},
{
"epoch": 3.63,
"learning_rate": 8.711485948541715e-06,
"loss": 0.7805,
"step": 7140
},
{
"epoch": 3.63,
"learning_rate": 8.650988177250812e-06,
"loss": 0.8183,
"step": 7150
},
{
"epoch": 3.64,
"learning_rate": 8.590657223670059e-06,
"loss": 0.8564,
"step": 7160
},
{
"epoch": 3.64,
"learning_rate": 8.53049370338844e-06,
"loss": 0.8067,
"step": 7170
},
{
"epoch": 3.65,
"learning_rate": 8.470498230286523e-06,
"loss": 0.7626,
"step": 7180
},
{
"epoch": 3.65,
"learning_rate": 8.410671416530205e-06,
"loss": 0.8154,
"step": 7190
},
{
"epoch": 3.66,
"learning_rate": 8.351013872564447e-06,
"loss": 0.7763,
"step": 7200
},
{
"epoch": 3.66,
"learning_rate": 8.291526207107084e-06,
"loss": 0.7609,
"step": 7210
},
{
"epoch": 3.67,
"learning_rate": 8.232209027142571e-06,
"loss": 0.7759,
"step": 7220
},
{
"epoch": 3.67,
"learning_rate": 8.173062937915812e-06,
"loss": 0.8389,
"step": 7230
},
{
"epoch": 3.68,
"learning_rate": 8.114088542925993e-06,
"loss": 0.7771,
"step": 7240
},
{
"epoch": 3.68,
"learning_rate": 8.055286443920395e-06,
"loss": 0.7084,
"step": 7250
},
{
"epoch": 3.69,
"learning_rate": 7.99665724088828e-06,
"loss": 0.8226,
"step": 7260
},
{
"epoch": 3.69,
"learning_rate": 7.938201532054768e-06,
"loss": 0.7129,
"step": 7270
},
{
"epoch": 3.7,
"learning_rate": 7.879919913874695e-06,
"loss": 0.7918,
"step": 7280
},
{
"epoch": 3.7,
"learning_rate": 7.821812981026588e-06,
"loss": 0.8319,
"step": 7290
},
{
"epoch": 3.71,
"learning_rate": 7.763881326406552e-06,
"loss": 0.7772,
"step": 7300
},
{
"epoch": 3.71,
"learning_rate": 7.706125541122244e-06,
"loss": 0.8371,
"step": 7310
},
{
"epoch": 3.72,
"learning_rate": 7.64854621448682e-06,
"loss": 0.7229,
"step": 7320
},
{
"epoch": 3.73,
"learning_rate": 7.591143934012942e-06,
"loss": 0.8227,
"step": 7330
},
{
"epoch": 3.73,
"learning_rate": 7.5339192854067736e-06,
"loss": 0.8326,
"step": 7340
},
{
"epoch": 3.74,
"learning_rate": 7.476872852562003e-06,
"loss": 0.7516,
"step": 7350
},
{
"epoch": 3.74,
"learning_rate": 7.420005217553891e-06,
"loss": 0.7943,
"step": 7360
},
{
"epoch": 3.75,
"learning_rate": 7.3633169606333265e-06,
"loss": 0.8194,
"step": 7370
},
{
"epoch": 3.75,
"learning_rate": 7.306808660220909e-06,
"loss": 0.8268,
"step": 7380
},
{
"epoch": 3.76,
"learning_rate": 7.250480892901046e-06,
"loss": 0.7529,
"step": 7390
},
{
"epoch": 3.76,
"learning_rate": 7.194334233416069e-06,
"loss": 0.7817,
"step": 7400
},
{
"epoch": 3.77,
"learning_rate": 7.138369254660365e-06,
"loss": 0.8199,
"step": 7410
},
{
"epoch": 3.77,
"learning_rate": 7.082586527674542e-06,
"loss": 0.8484,
"step": 7420
},
{
"epoch": 3.78,
"learning_rate": 7.0269866216395915e-06,
"loss": 0.7921,
"step": 7430
},
{
"epoch": 3.78,
"learning_rate": 6.971570103871089e-06,
"loss": 0.8236,
"step": 7440
},
{
"epoch": 3.79,
"learning_rate": 6.9163375398134024e-06,
"loss": 0.8259,
"step": 7450
},
{
"epoch": 3.79,
"learning_rate": 6.8612894930339065e-06,
"loss": 0.8038,
"step": 7460
},
{
"epoch": 3.8,
"learning_rate": 6.806426525217266e-06,
"loss": 0.8309,
"step": 7470
},
{
"epoch": 3.8,
"learning_rate": 6.751749196159679e-06,
"loss": 0.8441,
"step": 7480
},
{
"epoch": 3.81,
"learning_rate": 6.69725806376317e-06,
"loss": 0.7264,
"step": 7490
},
{
"epoch": 3.81,
"learning_rate": 6.6429536840299035e-06,
"loss": 0.8848,
"step": 7500
},
{
"epoch": 3.82,
"learning_rate": 6.588836611056507e-06,
"loss": 0.8101,
"step": 7510
},
{
"epoch": 3.82,
"learning_rate": 6.534907397028409e-06,
"loss": 0.724,
"step": 7520
},
{
"epoch": 3.83,
"learning_rate": 6.481166592214225e-06,
"loss": 0.8091,
"step": 7530
},
{
"epoch": 3.83,
"learning_rate": 6.427614744960126e-06,
"loss": 0.7131,
"step": 7540
},
{
"epoch": 3.84,
"learning_rate": 6.374252401684233e-06,
"loss": 0.7833,
"step": 7550
},
{
"epoch": 3.84,
"learning_rate": 6.32108010687108e-06,
"loss": 0.8134,
"step": 7560
},
{
"epoch": 3.85,
"learning_rate": 6.268098403066022e-06,
"loss": 0.7773,
"step": 7570
},
{
"epoch": 3.85,
"learning_rate": 6.2153078308697125e-06,
"loss": 0.7609,
"step": 7580
},
{
"epoch": 3.86,
"learning_rate": 6.162708928932592e-06,
"loss": 0.8762,
"step": 7590
},
{
"epoch": 3.86,
"learning_rate": 6.110302233949383e-06,
"loss": 0.7745,
"step": 7600
},
{
"epoch": 3.87,
"learning_rate": 6.05808828065362e-06,
"loss": 0.7402,
"step": 7610
},
{
"epoch": 3.87,
"learning_rate": 6.006067601812187e-06,
"loss": 0.7638,
"step": 7620
},
{
"epoch": 3.88,
"learning_rate": 5.954240728219898e-06,
"loss": 0.7565,
"step": 7630
},
{
"epoch": 3.88,
"learning_rate": 5.902608188694039e-06,
"loss": 0.7291,
"step": 7640
},
{
"epoch": 3.89,
"learning_rate": 5.8511705100690314e-06,
"loss": 0.7956,
"step": 7650
},
{
"epoch": 3.89,
"learning_rate": 5.79992821719102e-06,
"loss": 0.7756,
"step": 7660
},
{
"epoch": 3.9,
"learning_rate": 5.7488818329125114e-06,
"loss": 0.8543,
"step": 7670
},
{
"epoch": 3.9,
"learning_rate": 5.698031878087071e-06,
"loss": 0.6868,
"step": 7680
},
{
"epoch": 3.91,
"learning_rate": 5.647378871563971e-06,
"loss": 0.7887,
"step": 7690
},
{
"epoch": 3.91,
"learning_rate": 5.59692333018293e-06,
"loss": 0.7896,
"step": 7700
},
{
"epoch": 3.92,
"learning_rate": 5.546665768768814e-06,
"loss": 0.8122,
"step": 7710
},
{
"epoch": 3.92,
"learning_rate": 5.496606700126397e-06,
"loss": 0.8105,
"step": 7720
},
{
"epoch": 3.93,
"learning_rate": 5.4467466350351245e-06,
"loss": 0.8576,
"step": 7730
},
{
"epoch": 3.93,
"learning_rate": 5.3970860822439045e-06,
"loss": 0.8091,
"step": 7740
},
{
"epoch": 3.94,
"learning_rate": 5.347625548465915e-06,
"loss": 0.7318,
"step": 7750
},
{
"epoch": 3.94,
"learning_rate": 5.298365538373426e-06,
"loss": 0.9173,
"step": 7760
},
{
"epoch": 3.95,
"learning_rate": 5.2493065545926644e-06,
"loss": 0.8607,
"step": 7770
},
{
"epoch": 3.95,
"learning_rate": 5.200449097698676e-06,
"loss": 0.8263,
"step": 7780
},
{
"epoch": 3.96,
"learning_rate": 5.151793666210217e-06,
"loss": 0.8193,
"step": 7790
},
{
"epoch": 3.96,
"learning_rate": 5.103340756584685e-06,
"loss": 0.7089,
"step": 7800
},
{
"epoch": 3.97,
"learning_rate": 5.055090863213008e-06,
"loss": 0.7857,
"step": 7810
},
{
"epoch": 3.97,
"learning_rate": 5.007044478414658e-06,
"loss": 0.7785,
"step": 7820
},
{
"epoch": 3.98,
"learning_rate": 4.9592020924325936e-06,
"loss": 0.8344,
"step": 7830
},
{
"epoch": 3.98,
"learning_rate": 4.911564193428259e-06,
"loss": 0.8568,
"step": 7840
},
{
"epoch": 3.99,
"learning_rate": 4.864131267476615e-06,
"loss": 0.8104,
"step": 7850
},
{
"epoch": 3.99,
"learning_rate": 4.816903798561168e-06,
"loss": 0.7698,
"step": 7860
},
{
"epoch": 4.0,
"learning_rate": 4.769882268569037e-06,
"loss": 0.7878,
"step": 7870
},
{
"epoch": 4.0,
"learning_rate": 4.723067157286038e-06,
"loss": 0.8072,
"step": 7880
},
{
"epoch": 4.01,
"learning_rate": 4.6764589423917895e-06,
"loss": 0.7909,
"step": 7890
},
{
"epoch": 4.01,
"learning_rate": 4.630058099454823e-06,
"loss": 0.8484,
"step": 7900
},
{
"epoch": 4.02,
"learning_rate": 4.583865101927756e-06,
"loss": 0.8064,
"step": 7910
},
{
"epoch": 4.02,
"learning_rate": 4.537880421142443e-06,
"loss": 0.7695,
"step": 7920
},
{
"epoch": 4.03,
"learning_rate": 4.492104526305174e-06,
"loss": 0.7681,
"step": 7930
},
{
"epoch": 4.04,
"learning_rate": 4.44653788449188e-06,
"loss": 0.7337,
"step": 7940
},
{
"epoch": 4.04,
"learning_rate": 4.401180960643375e-06,
"loss": 0.7758,
"step": 7950
},
{
"epoch": 4.05,
"learning_rate": 4.3560342175606064e-06,
"loss": 0.7611,
"step": 7960
},
{
"epoch": 4.05,
"learning_rate": 4.311098115899936e-06,
"loss": 0.762,
"step": 7970
},
{
"epoch": 4.06,
"learning_rate": 4.266373114168445e-06,
"loss": 0.7921,
"step": 7980
},
{
"epoch": 4.06,
"learning_rate": 4.22185966871923e-06,
"loss": 0.7286,
"step": 7990
},
{
"epoch": 4.07,
"learning_rate": 4.177558233746787e-06,
"loss": 0.7583,
"step": 8000
},
{
"epoch": 4.07,
"learning_rate": 4.133469261282341e-06,
"loss": 0.8479,
"step": 8010
},
{
"epoch": 4.08,
"learning_rate": 4.089593201189259e-06,
"loss": 0.8339,
"step": 8020
},
{
"epoch": 4.08,
"learning_rate": 4.045930501158443e-06,
"loss": 0.7481,
"step": 8030
},
{
"epoch": 4.09,
"learning_rate": 4.00248160670377e-06,
"loss": 0.8175,
"step": 8040
},
{
"epoch": 4.09,
"learning_rate": 3.959246961157545e-06,
"loss": 0.7694,
"step": 8050
},
{
"epoch": 4.1,
"learning_rate": 3.916227005665976e-06,
"loss": 0.7131,
"step": 8060
},
{
"epoch": 4.1,
"learning_rate": 3.873422179184677e-06,
"loss": 0.7284,
"step": 8070
},
{
"epoch": 4.11,
"learning_rate": 3.83083291847417e-06,
"loss": 0.8848,
"step": 8080
},
{
"epoch": 4.11,
"learning_rate": 3.7884596580954668e-06,
"loss": 0.7927,
"step": 8090
},
{
"epoch": 4.12,
"learning_rate": 3.7463028304055987e-06,
"loss": 0.884,
"step": 8100
},
{
"epoch": 4.12,
"learning_rate": 3.704362865553221e-06,
"loss": 0.7089,
"step": 8110
},
{
"epoch": 4.13,
"learning_rate": 3.662640191474223e-06,
"loss": 0.8192,
"step": 8120
},
{
"epoch": 4.13,
"learning_rate": 3.621135233887363e-06,
"loss": 0.8491,
"step": 8130
},
{
"epoch": 4.14,
"learning_rate": 3.5798484162899105e-06,
"loss": 0.8009,
"step": 8140
},
{
"epoch": 4.14,
"learning_rate": 3.5387801599533475e-06,
"loss": 0.8679,
"step": 8150
},
{
"epoch": 4.15,
"learning_rate": 3.4979308839190565e-06,
"loss": 0.7229,
"step": 8160
},
{
"epoch": 4.15,
"learning_rate": 3.4573010049940403e-06,
"loss": 0.8634,
"step": 8170
},
{
"epoch": 4.16,
"learning_rate": 3.4168909377466836e-06,
"loss": 0.8256,
"step": 8180
},
{
"epoch": 4.16,
"learning_rate": 3.3767010945025075e-06,
"loss": 0.7816,
"step": 8190
},
{
"epoch": 4.17,
"learning_rate": 3.3367318853399775e-06,
"loss": 0.7579,
"step": 8200
},
{
"epoch": 4.17,
"learning_rate": 3.296983718086308e-06,
"loss": 0.7629,
"step": 8210
},
{
"epoch": 4.18,
"learning_rate": 3.257456998313302e-06,
"loss": 0.8061,
"step": 8220
},
{
"epoch": 4.18,
"learning_rate": 3.2181521293332213e-06,
"loss": 0.8339,
"step": 8230
},
{
"epoch": 4.19,
"learning_rate": 3.1790695121946627e-06,
"loss": 0.677,
"step": 8240
},
{
"epoch": 4.19,
"learning_rate": 3.140209545678463e-06,
"loss": 0.7408,
"step": 8250
},
{
"epoch": 4.2,
"learning_rate": 3.101572626293642e-06,
"loss": 0.8345,
"step": 8260
},
{
"epoch": 4.2,
"learning_rate": 3.063159148273351e-06,
"loss": 0.7823,
"step": 8270
},
{
"epoch": 4.21,
"learning_rate": 3.024969503570843e-06,
"loss": 0.8105,
"step": 8280
},
{
"epoch": 4.21,
"learning_rate": 2.9870040818554934e-06,
"loss": 0.7489,
"step": 8290
},
{
"epoch": 4.22,
"learning_rate": 2.9492632705087926e-06,
"loss": 0.7394,
"step": 8300
},
{
"epoch": 4.22,
"learning_rate": 2.9117474546204283e-06,
"loss": 0.8176,
"step": 8310
},
{
"epoch": 4.23,
"learning_rate": 2.874457016984325e-06,
"loss": 0.7422,
"step": 8320
},
{
"epoch": 4.23,
"learning_rate": 2.8373923380947657e-06,
"loss": 0.7723,
"step": 8330
},
{
"epoch": 4.24,
"learning_rate": 2.80055379614248e-06,
"loss": 0.7783,
"step": 8340
},
{
"epoch": 4.24,
"learning_rate": 2.7639417670108165e-06,
"loss": 0.8144,
"step": 8350
},
{
"epoch": 4.25,
"learning_rate": 2.7275566242718846e-06,
"loss": 0.6772,
"step": 8360
},
{
"epoch": 4.25,
"learning_rate": 2.6913987391827545e-06,
"loss": 0.7854,
"step": 8370
},
{
"epoch": 4.26,
"learning_rate": 2.655468480681658e-06,
"loss": 0.8284,
"step": 8380
},
{
"epoch": 4.26,
"learning_rate": 2.6197662153842424e-06,
"loss": 0.8052,
"step": 8390
},
{
"epoch": 4.27,
"learning_rate": 2.584292307579808e-06,
"loss": 0.7989,
"step": 8400
},
{
"epoch": 4.27,
"learning_rate": 2.549047119227607e-06,
"loss": 0.6619,
"step": 8410
},
{
"epoch": 4.28,
"learning_rate": 2.5140310099531494e-06,
"loss": 0.7794,
"step": 8420
},
{
"epoch": 4.28,
"learning_rate": 2.479244337044509e-06,
"loss": 0.6983,
"step": 8430
},
{
"epoch": 4.29,
"learning_rate": 2.4446874554487216e-06,
"loss": 0.8443,
"step": 8440
},
{
"epoch": 4.29,
"learning_rate": 2.4103607177681233e-06,
"loss": 0.7472,
"step": 8450
},
{
"epoch": 4.3,
"learning_rate": 2.3762644742567786e-06,
"loss": 0.7612,
"step": 8460
},
{
"epoch": 4.3,
"learning_rate": 2.342399072816895e-06,
"loss": 0.7782,
"step": 8470
},
{
"epoch": 4.31,
"learning_rate": 2.308764858995266e-06,
"loss": 0.7171,
"step": 8480
},
{
"epoch": 4.31,
"learning_rate": 2.275362175979767e-06,
"loss": 0.777,
"step": 8490
},
{
"epoch": 4.32,
"learning_rate": 2.2421913645958304e-06,
"loss": 0.7401,
"step": 8500
},
{
"epoch": 4.32,
"learning_rate": 2.209252763302988e-06,
"loss": 0.6972,
"step": 8510
},
{
"epoch": 4.33,
"learning_rate": 2.176546708191396e-06,
"loss": 0.7424,
"step": 8520
},
{
"epoch": 4.33,
"learning_rate": 2.1440735329784273e-06,
"loss": 0.7971,
"step": 8530
},
{
"epoch": 4.34,
"learning_rate": 2.1118335690052533e-06,
"loss": 0.8577,
"step": 8540
},
{
"epoch": 4.35,
"learning_rate": 2.079827145233465e-06,
"loss": 0.8388,
"step": 8550
},
{
"epoch": 4.35,
"learning_rate": 2.048054588241721e-06,
"loss": 0.7897,
"step": 8560
},
{
"epoch": 4.36,
"learning_rate": 2.0165162222224087e-06,
"loss": 0.7454,
"step": 8570
},
{
"epoch": 4.36,
"learning_rate": 1.985212368978345e-06,
"loss": 0.6974,
"step": 8580
},
{
"epoch": 4.37,
"learning_rate": 1.954143347919482e-06,
"loss": 0.8017,
"step": 8590
},
{
"epoch": 4.37,
"learning_rate": 1.923309476059654e-06,
"loss": 0.6967,
"step": 8600
},
{
"epoch": 4.38,
"learning_rate": 1.8927110680133448e-06,
"loss": 0.7471,
"step": 8610
},
{
"epoch": 4.38,
"learning_rate": 1.8623484359924753e-06,
"loss": 0.7466,
"step": 8620
},
{
"epoch": 4.39,
"learning_rate": 1.832221889803215e-06,
"loss": 0.7727,
"step": 8630
},
{
"epoch": 4.39,
"learning_rate": 1.8023317368428272e-06,
"loss": 0.6746,
"step": 8640
},
{
"epoch": 4.4,
"learning_rate": 1.772678282096521e-06,
"loss": 0.7834,
"step": 8650
},
{
"epoch": 4.4,
"learning_rate": 1.7432618281343571e-06,
"loss": 0.854,
"step": 8660
},
{
"epoch": 4.41,
"learning_rate": 1.7140826751081417e-06,
"loss": 0.7994,
"step": 8670
},
{
"epoch": 4.41,
"learning_rate": 1.685141120748379e-06,
"loss": 0.8229,
"step": 8680
},
{
"epoch": 4.42,
"learning_rate": 1.6564374603612293e-06,
"loss": 0.7353,
"step": 8690
},
{
"epoch": 4.42,
"learning_rate": 1.6279719868254772e-06,
"loss": 0.7952,
"step": 8700
},
{
"epoch": 4.43,
"learning_rate": 1.5997449905895773e-06,
"loss": 0.866,
"step": 8710
},
{
"epoch": 4.43,
"learning_rate": 1.5717567596686661e-06,
"loss": 0.8125,
"step": 8720
},
{
"epoch": 4.44,
"learning_rate": 1.5440075796416292e-06,
"loss": 0.8302,
"step": 8730
},
{
"epoch": 4.44,
"learning_rate": 1.5164977336481896e-06,
"loss": 0.7714,
"step": 8740
},
{
"epoch": 4.45,
"learning_rate": 1.4892275023860176e-06,
"loss": 0.841,
"step": 8750
},
{
"epoch": 4.45,
"learning_rate": 1.4621971641078646e-06,
"loss": 0.8062,
"step": 8760
},
{
"epoch": 4.46,
"learning_rate": 1.4354069946187292e-06,
"loss": 0.839,
"step": 8770
},
{
"epoch": 4.46,
"learning_rate": 1.408857267273031e-06,
"loss": 0.7927,
"step": 8780
},
{
"epoch": 4.47,
"learning_rate": 1.3825482529718382e-06,
"loss": 0.7557,
"step": 8790
},
{
"epoch": 4.47,
"learning_rate": 1.3564802201600919e-06,
"loss": 0.7451,
"step": 8800
},
{
"epoch": 4.48,
"learning_rate": 1.3306534348238697e-06,
"loss": 0.7812,
"step": 8810
},
{
"epoch": 4.48,
"learning_rate": 1.305068160487674e-06,
"loss": 0.7787,
"step": 8820
},
{
"epoch": 4.49,
"learning_rate": 1.2797246582117422e-06,
"loss": 0.712,
"step": 8830
},
{
"epoch": 4.49,
"learning_rate": 1.2546231865893794e-06,
"loss": 0.8208,
"step": 8840
},
{
"epoch": 4.5,
"learning_rate": 1.2297640017443213e-06,
"loss": 0.7492,
"step": 8850
},
{
"epoch": 4.5,
"learning_rate": 1.2051473573281292e-06,
"loss": 0.8182,
"step": 8860
},
{
"epoch": 4.51,
"learning_rate": 1.180773504517585e-06,
"loss": 0.7812,
"step": 8870
},
{
"epoch": 4.51,
"learning_rate": 1.1566426920121415e-06,
"loss": 0.7459,
"step": 8880
},
{
"epoch": 4.52,
"learning_rate": 1.132755166031385e-06,
"loss": 0.8221,
"step": 8890
},
{
"epoch": 4.52,
"learning_rate": 1.1091111703125157e-06,
"loss": 0.6909,
"step": 8900
},
{
"epoch": 4.53,
"learning_rate": 1.0857109461078679e-06,
"loss": 0.8285,
"step": 8910
},
{
"epoch": 4.53,
"learning_rate": 1.0625547321824385e-06,
"loss": 0.8397,
"step": 8920
},
{
"epoch": 4.54,
"learning_rate": 1.0396427648114632e-06,
"loss": 0.765,
"step": 8930
},
{
"epoch": 4.54,
"learning_rate": 1.0169752777779984e-06,
"loss": 0.795,
"step": 8940
},
{
"epoch": 4.55,
"learning_rate": 9.945525023705327e-07,
"loss": 0.6929,
"step": 8950
},
{
"epoch": 4.55,
"learning_rate": 9.723746673806377e-07,
"loss": 0.8618,
"step": 8960
},
{
"epoch": 4.56,
"learning_rate": 9.50441999100618e-07,
"loss": 0.7577,
"step": 8970
},
{
"epoch": 4.56,
"learning_rate": 9.287547213212206e-07,
"loss": 0.8186,
"step": 8980
},
{
"epoch": 4.57,
"learning_rate": 9.07313055329334e-07,
"loss": 0.8268,
"step": 8990
},
{
"epoch": 4.57,
"learning_rate": 8.861172199057466e-07,
"loss": 0.8767,
"step": 9000
},
{
"epoch": 4.58,
"learning_rate": 8.651674313228997e-07,
"loss": 0.716,
"step": 9010
},
{
"epoch": 4.58,
"learning_rate": 8.444639033426904e-07,
"loss": 0.8054,
"step": 9020
},
{
"epoch": 4.59,
"learning_rate": 8.240068472142815e-07,
"loss": 0.7585,
"step": 9030
},
{
"epoch": 4.59,
"learning_rate": 8.037964716719609e-07,
"loss": 0.8554,
"step": 9040
},
{
"epoch": 4.6,
"learning_rate": 7.838329829329943e-07,
"loss": 0.7113,
"step": 9050
},
{
"epoch": 4.6,
"learning_rate": 7.641165846955345e-07,
"loss": 0.8091,
"step": 9060
},
{
"epoch": 4.61,
"learning_rate": 7.446474781365314e-07,
"loss": 0.7636,
"step": 9070
},
{
"epoch": 4.61,
"learning_rate": 7.254258619096982e-07,
"loss": 0.7905,
"step": 9080
},
{
"epoch": 4.62,
"learning_rate": 7.083381736919054e-07,
"loss": 0.796,
"step": 9090
},
{
"epoch": 4.62,
"learning_rate": 6.895873273412512e-07,
"loss": 0.7762,
"step": 9100
},
{
"epoch": 4.63,
"learning_rate": 6.710845331309279e-07,
"loss": 0.7687,
"step": 9110
},
{
"epoch": 4.63,
"learning_rate": 6.528299798548327e-07,
"loss": 0.769,
"step": 9120
},
{
"epoch": 4.64,
"learning_rate": 6.34823853773936e-07,
"loss": 0.8484,
"step": 9130
},
{
"epoch": 4.64,
"learning_rate": 6.170663386143721e-07,
"loss": 0.7223,
"step": 9140
},
{
"epoch": 4.65,
"learning_rate": 5.995576155655657e-07,
"loss": 0.8462,
"step": 9150
},
{
"epoch": 4.66,
"learning_rate": 5.822978632783748e-07,
"loss": 0.7518,
"step": 9160
},
{
"epoch": 4.66,
"learning_rate": 5.652872578632867e-07,
"loss": 0.7555,
"step": 9170
},
{
"epoch": 4.67,
"learning_rate": 5.485259728886055e-07,
"loss": 0.7159,
"step": 9180
},
{
"epoch": 4.67,
"learning_rate": 5.320141793786815e-07,
"loss": 0.8041,
"step": 9190
},
{
"epoch": 4.68,
"learning_rate": 5.157520458121734e-07,
"loss": 0.7807,
"step": 9200
},
{
"epoch": 4.68,
"learning_rate": 4.997397381203278e-07,
"loss": 0.7471,
"step": 9210
},
{
"epoch": 4.69,
"learning_rate": 4.839774196852831e-07,
"loss": 0.6955,
"step": 9220
},
{
"epoch": 4.69,
"learning_rate": 4.6846525133840135e-07,
"loss": 0.8771,
"step": 9230
},
{
"epoch": 4.7,
"learning_rate": 4.532033913586281e-07,
"loss": 0.7871,
"step": 9240
},
{
"epoch": 4.7,
"learning_rate": 4.3819199547089073e-07,
"loss": 0.794,
"step": 9250
},
{
"epoch": 4.71,
"learning_rate": 4.234312168444804e-07,
"loss": 0.7949,
"step": 9260
},
{
"epoch": 4.71,
"learning_rate": 4.0892120609151706e-07,
"loss": 0.8159,
"step": 9270
},
{
"epoch": 4.72,
"learning_rate": 3.946621112654009e-07,
"loss": 0.7887,
"step": 9280
},
{
"epoch": 4.72,
"learning_rate": 3.806540778593021e-07,
"loss": 0.7609,
"step": 9290
},
{
"epoch": 4.73,
"learning_rate": 3.668972488046762e-07,
"loss": 0.7927,
"step": 9300
},
{
"epoch": 4.73,
"learning_rate": 3.5339176446980424e-07,
"loss": 0.7027,
"step": 9310
},
{
"epoch": 4.74,
"learning_rate": 3.4013776265836293e-07,
"loss": 0.7591,
"step": 9320
},
{
"epoch": 4.74,
"learning_rate": 3.271353786080261e-07,
"loss": 0.8672,
"step": 9330
},
{
"epoch": 4.75,
"learning_rate": 3.143847449890658e-07,
"loss": 0.7764,
"step": 9340
},
{
"epoch": 4.75,
"learning_rate": 3.018859919030198e-07,
"loss": 0.7795,
"step": 9350
},
{
"epoch": 4.76,
"learning_rate": 2.896392468813458e-07,
"loss": 0.8484,
"step": 9360
},
{
"epoch": 4.76,
"learning_rate": 2.7764463488413327e-07,
"loss": 0.7502,
"step": 9370
},
{
"epoch": 4.77,
"learning_rate": 2.659022782988241e-07,
"loss": 0.7886,
"step": 9380
},
{
"epoch": 4.77,
"learning_rate": 2.5441229693895786e-07,
"loss": 0.7578,
"step": 9390
},
{
"epoch": 4.78,
"learning_rate": 2.431748080429619e-07,
"loss": 0.7947,
"step": 9400
},
{
"epoch": 4.78,
"learning_rate": 2.32189926272941e-07,
"loss": 0.7695,
"step": 9410
},
{
"epoch": 4.79,
"learning_rate": 2.2145776371352288e-07,
"loss": 0.8766,
"step": 9420
},
{
"epoch": 4.79,
"learning_rate": 2.109784298707007e-07,
"loss": 0.9572,
"step": 9430
},
{
"epoch": 4.8,
"learning_rate": 2.0075203167071733e-07,
"loss": 0.8072,
"step": 9440
},
{
"epoch": 4.8,
"learning_rate": 1.9077867345898282e-07,
"loss": 0.7861,
"step": 9450
},
{
"epoch": 4.81,
"learning_rate": 1.8105845699900592e-07,
"loss": 0.813,
"step": 9460
},
{
"epoch": 4.81,
"learning_rate": 1.7159148147135596e-07,
"loss": 0.714,
"step": 9470
},
{
"epoch": 4.82,
"learning_rate": 1.623778434726414e-07,
"loss": 0.8831,
"step": 9480
},
{
"epoch": 4.82,
"learning_rate": 1.5341763701453848e-07,
"loss": 0.7104,
"step": 9490
},
{
"epoch": 4.83,
"learning_rate": 1.4471095352282804e-07,
"loss": 0.7877,
"step": 9500
},
{
"epoch": 4.83,
"learning_rate": 1.362578818364546e-07,
"loss": 0.7484,
"step": 9510
},
{
"epoch": 4.84,
"learning_rate": 1.280585082066299e-07,
"loss": 0.6747,
"step": 9520
},
{
"epoch": 4.84,
"learning_rate": 1.2011291629594746e-07,
"loss": 0.7271,
"step": 9530
},
{
"epoch": 4.85,
"learning_rate": 1.1242118717753047e-07,
"loss": 0.8372,
"step": 9540
},
{
"epoch": 4.85,
"learning_rate": 1.0498339933420476e-07,
"loss": 0.8522,
"step": 9550
},
{
"epoch": 4.86,
"learning_rate": 9.779962865769654e-08,
"loss": 0.8283,
"step": 9560
},
{
"epoch": 4.86,
"learning_rate": 9.086994844786089e-08,
"loss": 0.7474,
"step": 9570
},
{
"epoch": 4.87,
"learning_rate": 8.419442941192679e-08,
"loss": 0.7889,
"step": 9580
},
{
"epoch": 4.87,
"learning_rate": 7.77731396637893e-08,
"loss": 0.7646,
"step": 9590
},
{
"epoch": 4.88,
"learning_rate": 7.160614472329907e-08,
"loss": 0.8362,
"step": 9600
},
{
"epoch": 4.88,
"learning_rate": 6.569350751560177e-08,
"loss": 0.7019,
"step": 9610
},
{
"epoch": 4.89,
"learning_rate": 6.003528837049966e-08,
"loss": 0.7858,
"step": 9620
},
{
"epoch": 4.89,
"learning_rate": 5.46315450218271e-08,
"loss": 0.7506,
"step": 9630
},
{
"epoch": 4.9,
"learning_rate": 4.9482332606867746e-08,
"loss": 0.7883,
"step": 9640
},
{
"epoch": 4.9,
"learning_rate": 4.458770366578824e-08,
"loss": 0.7803,
"step": 9650
},
{
"epoch": 4.91,
"learning_rate": 3.994770814110538e-08,
"loss": 0.772,
"step": 9660
},
{
"epoch": 4.91,
"learning_rate": 3.5562393377172595e-08,
"loss": 0.8655,
"step": 9670
},
{
"epoch": 4.92,
"learning_rate": 3.1431804119705366e-08,
"loss": 0.7657,
"step": 9680
},
{
"epoch": 4.92,
"learning_rate": 2.7555982515312107e-08,
"loss": 0.8572,
"step": 9690
},
{
"epoch": 4.93,
"learning_rate": 2.3934968111075095e-08,
"loss": 0.8006,
"step": 9700
},
{
"epoch": 4.93,
"learning_rate": 2.0568797854139678e-08,
"loss": 0.749,
"step": 9710
},
{
"epoch": 4.94,
"learning_rate": 1.745750609133956e-08,
"loss": 0.7909,
"step": 9720
},
{
"epoch": 4.94,
"learning_rate": 1.4601124568849878e-08,
"loss": 0.8854,
"step": 9730
},
{
"epoch": 4.95,
"learning_rate": 1.1999682431859672e-08,
"loss": 0.6958,
"step": 9740
},
{
"epoch": 4.95,
"learning_rate": 9.653206224272126e-09,
"loss": 0.7082,
"step": 9750
},
{
"epoch": 4.96,
"learning_rate": 7.561719888440899e-09,
"loss": 0.8519,
"step": 9760
},
{
"epoch": 4.97,
"learning_rate": 5.725244764917537e-09,
"loss": 0.7919,
"step": 9770
},
{
"epoch": 4.97,
"learning_rate": 4.143799592240538e-09,
"loss": 0.7442,
"step": 9780
},
{
"epoch": 4.98,
"learning_rate": 2.8174005067410637e-09,
"loss": 0.7768,
"step": 9790
},
{
"epoch": 4.98,
"learning_rate": 1.7460610423764011e-09,
"loss": 0.803,
"step": 9800
},
{
"epoch": 4.99,
"learning_rate": 9.297921305967405e-10,
"loss": 0.738,
"step": 9810
},
{
"epoch": 4.99,
"learning_rate": 3.686021002313744e-10,
"loss": 0.7374,
"step": 9820
},
{
"epoch": 5.0,
"learning_rate": 6.249667740265696e-11,
"loss": 0.7616,
"step": 9830
},
{
"epoch": 5.0,
"step": 9835,
"total_flos": 3.008290083981312e+18,
"train_loss": 0.8266432806266161,
"train_runtime": 62063.7747,
"train_samples_per_second": 2.536,
"train_steps_per_second": 0.158
}
],
"logging_steps": 10,
"max_steps": 9835,
"num_train_epochs": 5,
"save_steps": 1000,
"total_flos": 3.008290083981312e+18,
"trial_name": null,
"trial_params": null
}