EthioLLM-l-250K / trainer_state.json
Atnafu's picture
adding models
6b054e6
raw
history blame
144 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.0,
"eval_steps": 500,
"global_step": 584800,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"learning_rate": 4.99577633378933e-05,
"loss": 8.4265,
"step": 500
},
{
"epoch": 0.02,
"learning_rate": 4.9915013679890566e-05,
"loss": 7.7803,
"step": 1000
},
{
"epoch": 0.03,
"learning_rate": 4.987226402188783e-05,
"loss": 7.4779,
"step": 1500
},
{
"epoch": 0.03,
"learning_rate": 4.9829514363885096e-05,
"loss": 7.2649,
"step": 2000
},
{
"epoch": 0.04,
"learning_rate": 4.978676470588236e-05,
"loss": 7.0404,
"step": 2500
},
{
"epoch": 0.05,
"learning_rate": 4.974401504787962e-05,
"loss": 6.9479,
"step": 3000
},
{
"epoch": 0.06,
"learning_rate": 4.970126538987688e-05,
"loss": 6.8169,
"step": 3500
},
{
"epoch": 0.07,
"learning_rate": 4.965851573187415e-05,
"loss": 6.6874,
"step": 4000
},
{
"epoch": 0.08,
"learning_rate": 4.961576607387141e-05,
"loss": 6.6356,
"step": 4500
},
{
"epoch": 0.09,
"learning_rate": 4.957301641586867e-05,
"loss": 6.578,
"step": 5000
},
{
"epoch": 0.09,
"learning_rate": 4.953026675786594e-05,
"loss": 6.4661,
"step": 5500
},
{
"epoch": 0.1,
"learning_rate": 4.94875170998632e-05,
"loss": 6.3675,
"step": 6000
},
{
"epoch": 0.11,
"learning_rate": 4.944485294117647e-05,
"loss": 6.3715,
"step": 6500
},
{
"epoch": 0.12,
"learning_rate": 4.940218878248974e-05,
"loss": 6.3238,
"step": 7000
},
{
"epoch": 0.13,
"learning_rate": 4.9359439124487e-05,
"loss": 6.2349,
"step": 7500
},
{
"epoch": 0.14,
"learning_rate": 4.931668946648427e-05,
"loss": 6.1865,
"step": 8000
},
{
"epoch": 0.15,
"learning_rate": 4.927393980848153e-05,
"loss": 6.1535,
"step": 8500
},
{
"epoch": 0.15,
"learning_rate": 4.92311901504788e-05,
"loss": 6.0428,
"step": 9000
},
{
"epoch": 0.16,
"learning_rate": 4.9188525991792064e-05,
"loss": 6.0053,
"step": 9500
},
{
"epoch": 0.17,
"learning_rate": 4.914586183310534e-05,
"loss": 5.9903,
"step": 10000
},
{
"epoch": 0.18,
"learning_rate": 4.91031121751026e-05,
"loss": 6.0044,
"step": 10500
},
{
"epoch": 0.19,
"learning_rate": 4.906036251709987e-05,
"loss": 5.9116,
"step": 11000
},
{
"epoch": 0.2,
"learning_rate": 4.901761285909713e-05,
"loss": 5.8578,
"step": 11500
},
{
"epoch": 0.21,
"learning_rate": 4.897486320109439e-05,
"loss": 5.8959,
"step": 12000
},
{
"epoch": 0.21,
"learning_rate": 4.8932113543091655e-05,
"loss": 5.8711,
"step": 12500
},
{
"epoch": 0.22,
"learning_rate": 4.888936388508892e-05,
"loss": 5.809,
"step": 13000
},
{
"epoch": 0.23,
"learning_rate": 4.8846614227086185e-05,
"loss": 5.7566,
"step": 13500
},
{
"epoch": 0.24,
"learning_rate": 4.880386456908345e-05,
"loss": 5.6893,
"step": 14000
},
{
"epoch": 0.25,
"learning_rate": 4.8761200410396716e-05,
"loss": 5.6636,
"step": 14500
},
{
"epoch": 0.26,
"learning_rate": 4.8718450752393984e-05,
"loss": 5.6148,
"step": 15000
},
{
"epoch": 0.27,
"learning_rate": 4.8675701094391246e-05,
"loss": 5.6467,
"step": 15500
},
{
"epoch": 0.27,
"learning_rate": 4.8632951436388514e-05,
"loss": 5.5868,
"step": 16000
},
{
"epoch": 0.28,
"learning_rate": 4.8590201778385776e-05,
"loss": 5.5777,
"step": 16500
},
{
"epoch": 0.29,
"learning_rate": 4.8547452120383044e-05,
"loss": 5.5658,
"step": 17000
},
{
"epoch": 0.3,
"learning_rate": 4.8504702462380306e-05,
"loss": 5.5476,
"step": 17500
},
{
"epoch": 0.31,
"learning_rate": 4.846195280437757e-05,
"loss": 5.4681,
"step": 18000
},
{
"epoch": 0.32,
"learning_rate": 4.841928864569084e-05,
"loss": 5.4999,
"step": 18500
},
{
"epoch": 0.32,
"learning_rate": 4.8376538987688105e-05,
"loss": 5.4484,
"step": 19000
},
{
"epoch": 0.33,
"learning_rate": 4.833378932968537e-05,
"loss": 5.43,
"step": 19500
},
{
"epoch": 0.34,
"learning_rate": 4.829103967168263e-05,
"loss": 5.3776,
"step": 20000
},
{
"epoch": 0.35,
"learning_rate": 4.824829001367989e-05,
"loss": 5.3485,
"step": 20500
},
{
"epoch": 0.36,
"learning_rate": 4.8205625854993166e-05,
"loss": 5.3793,
"step": 21000
},
{
"epoch": 0.37,
"learning_rate": 4.816287619699043e-05,
"loss": 5.3071,
"step": 21500
},
{
"epoch": 0.38,
"learning_rate": 4.81202120383037e-05,
"loss": 5.3121,
"step": 22000
},
{
"epoch": 0.38,
"learning_rate": 4.807746238030096e-05,
"loss": 5.2851,
"step": 22500
},
{
"epoch": 0.39,
"learning_rate": 4.803471272229823e-05,
"loss": 5.296,
"step": 23000
},
{
"epoch": 0.4,
"learning_rate": 4.799196306429549e-05,
"loss": 5.2854,
"step": 23500
},
{
"epoch": 0.41,
"learning_rate": 4.794921340629275e-05,
"loss": 5.2706,
"step": 24000
},
{
"epoch": 0.42,
"learning_rate": 4.790646374829001e-05,
"loss": 5.278,
"step": 24500
},
{
"epoch": 0.43,
"learning_rate": 4.786371409028728e-05,
"loss": 5.2171,
"step": 25000
},
{
"epoch": 0.44,
"learning_rate": 4.782096443228454e-05,
"loss": 5.2034,
"step": 25500
},
{
"epoch": 0.44,
"learning_rate": 4.777830027359781e-05,
"loss": 5.196,
"step": 26000
},
{
"epoch": 0.45,
"learning_rate": 4.773555061559507e-05,
"loss": 5.1717,
"step": 26500
},
{
"epoch": 0.46,
"learning_rate": 4.769288645690835e-05,
"loss": 5.1113,
"step": 27000
},
{
"epoch": 0.47,
"learning_rate": 4.765013679890561e-05,
"loss": 5.1481,
"step": 27500
},
{
"epoch": 0.48,
"learning_rate": 4.760738714090288e-05,
"loss": 5.0527,
"step": 28000
},
{
"epoch": 0.49,
"learning_rate": 4.7564637482900134e-05,
"loss": 5.1138,
"step": 28500
},
{
"epoch": 0.5,
"learning_rate": 4.75218878248974e-05,
"loss": 5.1183,
"step": 29000
},
{
"epoch": 0.5,
"learning_rate": 4.7479138166894664e-05,
"loss": 5.098,
"step": 29500
},
{
"epoch": 0.51,
"learning_rate": 4.743647400820794e-05,
"loss": 5.0963,
"step": 30000
},
{
"epoch": 0.52,
"learning_rate": 4.73938098495212e-05,
"loss": 5.0569,
"step": 30500
},
{
"epoch": 0.53,
"learning_rate": 4.735106019151847e-05,
"loss": 5.0201,
"step": 31000
},
{
"epoch": 0.54,
"learning_rate": 4.730831053351573e-05,
"loss": 5.0116,
"step": 31500
},
{
"epoch": 0.55,
"learning_rate": 4.7265560875513e-05,
"loss": 5.0433,
"step": 32000
},
{
"epoch": 0.56,
"learning_rate": 4.722281121751026e-05,
"loss": 5.0234,
"step": 32500
},
{
"epoch": 0.56,
"learning_rate": 4.7180061559507524e-05,
"loss": 4.9659,
"step": 33000
},
{
"epoch": 0.57,
"learning_rate": 4.7137311901504786e-05,
"loss": 4.9612,
"step": 33500
},
{
"epoch": 0.58,
"learning_rate": 4.7094562243502054e-05,
"loss": 4.9943,
"step": 34000
},
{
"epoch": 0.59,
"learning_rate": 4.7051898084815324e-05,
"loss": 4.9673,
"step": 34500
},
{
"epoch": 0.6,
"learning_rate": 4.7009148426812585e-05,
"loss": 4.8833,
"step": 35000
},
{
"epoch": 0.61,
"learning_rate": 4.696639876880985e-05,
"loss": 4.9569,
"step": 35500
},
{
"epoch": 0.62,
"learning_rate": 4.6923649110807115e-05,
"loss": 4.933,
"step": 36000
},
{
"epoch": 0.62,
"learning_rate": 4.688089945280438e-05,
"loss": 4.8907,
"step": 36500
},
{
"epoch": 0.63,
"learning_rate": 4.6838149794801645e-05,
"loss": 4.9273,
"step": 37000
},
{
"epoch": 0.64,
"learning_rate": 4.679540013679891e-05,
"loss": 4.8889,
"step": 37500
},
{
"epoch": 0.65,
"learning_rate": 4.6752650478796175e-05,
"loss": 4.9151,
"step": 38000
},
{
"epoch": 0.66,
"learning_rate": 4.670990082079344e-05,
"loss": 4.9004,
"step": 38500
},
{
"epoch": 0.67,
"learning_rate": 4.66671511627907e-05,
"loss": 4.8357,
"step": 39000
},
{
"epoch": 0.68,
"learning_rate": 4.662440150478796e-05,
"loss": 4.8494,
"step": 39500
},
{
"epoch": 0.68,
"learning_rate": 4.658165184678523e-05,
"loss": 4.8046,
"step": 40000
},
{
"epoch": 0.69,
"learning_rate": 4.653890218878249e-05,
"loss": 4.8581,
"step": 40500
},
{
"epoch": 0.7,
"learning_rate": 4.649615253077976e-05,
"loss": 4.814,
"step": 41000
},
{
"epoch": 0.71,
"learning_rate": 4.645340287277702e-05,
"loss": 4.7839,
"step": 41500
},
{
"epoch": 0.72,
"learning_rate": 4.641073871409029e-05,
"loss": 4.8091,
"step": 42000
},
{
"epoch": 0.73,
"learning_rate": 4.636798905608755e-05,
"loss": 4.8198,
"step": 42500
},
{
"epoch": 0.74,
"learning_rate": 4.632523939808482e-05,
"loss": 4.7321,
"step": 43000
},
{
"epoch": 0.74,
"learning_rate": 4.628248974008208e-05,
"loss": 4.7552,
"step": 43500
},
{
"epoch": 0.75,
"learning_rate": 4.623974008207935e-05,
"loss": 4.7621,
"step": 44000
},
{
"epoch": 0.76,
"learning_rate": 4.619699042407661e-05,
"loss": 4.7516,
"step": 44500
},
{
"epoch": 0.77,
"learning_rate": 4.615424076607387e-05,
"loss": 4.7157,
"step": 45000
},
{
"epoch": 0.78,
"learning_rate": 4.6111491108071134e-05,
"loss": 4.6958,
"step": 45500
},
{
"epoch": 0.79,
"learning_rate": 4.60687414500684e-05,
"loss": 4.7013,
"step": 46000
},
{
"epoch": 0.8,
"learning_rate": 4.6025991792065664e-05,
"loss": 4.6959,
"step": 46500
},
{
"epoch": 0.8,
"learning_rate": 4.598324213406293e-05,
"loss": 4.7148,
"step": 47000
},
{
"epoch": 0.81,
"learning_rate": 4.5940492476060194e-05,
"loss": 4.7094,
"step": 47500
},
{
"epoch": 0.82,
"learning_rate": 4.589774281805746e-05,
"loss": 4.6568,
"step": 48000
},
{
"epoch": 0.83,
"learning_rate": 4.5854993160054724e-05,
"loss": 4.671,
"step": 48500
},
{
"epoch": 0.84,
"learning_rate": 4.5812243502051985e-05,
"loss": 4.6499,
"step": 49000
},
{
"epoch": 0.85,
"learning_rate": 4.576949384404925e-05,
"loss": 4.6552,
"step": 49500
},
{
"epoch": 0.85,
"learning_rate": 4.5726915184678524e-05,
"loss": 4.6251,
"step": 50000
},
{
"epoch": 0.86,
"learning_rate": 4.5684165526675786e-05,
"loss": 4.6063,
"step": 50500
},
{
"epoch": 0.87,
"learning_rate": 4.564150136798906e-05,
"loss": 4.6095,
"step": 51000
},
{
"epoch": 0.88,
"learning_rate": 4.5598751709986324e-05,
"loss": 4.6453,
"step": 51500
},
{
"epoch": 0.89,
"learning_rate": 4.5556002051983585e-05,
"loss": 4.5956,
"step": 52000
},
{
"epoch": 0.9,
"learning_rate": 4.551325239398085e-05,
"loss": 4.5697,
"step": 52500
},
{
"epoch": 0.91,
"learning_rate": 4.5470502735978115e-05,
"loss": 4.6187,
"step": 53000
},
{
"epoch": 0.91,
"learning_rate": 4.542775307797538e-05,
"loss": 4.6015,
"step": 53500
},
{
"epoch": 0.92,
"learning_rate": 4.5385003419972645e-05,
"loss": 4.5628,
"step": 54000
},
{
"epoch": 0.93,
"learning_rate": 4.534225376196991e-05,
"loss": 4.5632,
"step": 54500
},
{
"epoch": 0.94,
"learning_rate": 4.5299504103967175e-05,
"loss": 4.5841,
"step": 55000
},
{
"epoch": 0.95,
"learning_rate": 4.525675444596443e-05,
"loss": 4.5934,
"step": 55500
},
{
"epoch": 0.96,
"learning_rate": 4.52140047879617e-05,
"loss": 4.5902,
"step": 56000
},
{
"epoch": 0.97,
"learning_rate": 4.517125512995896e-05,
"loss": 4.56,
"step": 56500
},
{
"epoch": 0.97,
"learning_rate": 4.512850547195623e-05,
"loss": 4.6091,
"step": 57000
},
{
"epoch": 0.98,
"learning_rate": 4.508575581395349e-05,
"loss": 4.5518,
"step": 57500
},
{
"epoch": 0.99,
"learning_rate": 4.504300615595076e-05,
"loss": 4.5324,
"step": 58000
},
{
"epoch": 1.0,
"learning_rate": 4.500025649794802e-05,
"loss": 4.5271,
"step": 58500
},
{
"epoch": 1.01,
"learning_rate": 4.495750683994529e-05,
"loss": 4.5211,
"step": 59000
},
{
"epoch": 1.02,
"learning_rate": 4.491475718194254e-05,
"loss": 4.5261,
"step": 59500
},
{
"epoch": 1.03,
"learning_rate": 4.487209302325582e-05,
"loss": 4.5172,
"step": 60000
},
{
"epoch": 1.03,
"learning_rate": 4.482942886456908e-05,
"loss": 4.5021,
"step": 60500
},
{
"epoch": 1.04,
"learning_rate": 4.478667920656635e-05,
"loss": 4.5054,
"step": 61000
},
{
"epoch": 1.05,
"learning_rate": 4.474392954856361e-05,
"loss": 4.4886,
"step": 61500
},
{
"epoch": 1.06,
"learning_rate": 4.470117989056088e-05,
"loss": 4.487,
"step": 62000
},
{
"epoch": 1.07,
"learning_rate": 4.465860123119015e-05,
"loss": 4.5001,
"step": 62500
},
{
"epoch": 1.08,
"learning_rate": 4.461585157318742e-05,
"loss": 4.5064,
"step": 63000
},
{
"epoch": 1.09,
"learning_rate": 4.457310191518468e-05,
"loss": 4.4453,
"step": 63500
},
{
"epoch": 1.09,
"learning_rate": 4.453035225718194e-05,
"loss": 4.4815,
"step": 64000
},
{
"epoch": 1.1,
"learning_rate": 4.4487602599179204e-05,
"loss": 4.4853,
"step": 64500
},
{
"epoch": 1.11,
"learning_rate": 4.444485294117647e-05,
"loss": 4.4796,
"step": 65000
},
{
"epoch": 1.12,
"learning_rate": 4.4402103283173734e-05,
"loss": 4.4877,
"step": 65500
},
{
"epoch": 1.13,
"learning_rate": 4.4359353625171e-05,
"loss": 4.4423,
"step": 66000
},
{
"epoch": 1.14,
"learning_rate": 4.4316603967168264e-05,
"loss": 4.4397,
"step": 66500
},
{
"epoch": 1.15,
"learning_rate": 4.427385430916553e-05,
"loss": 4.4684,
"step": 67000
},
{
"epoch": 1.15,
"learning_rate": 4.4231104651162794e-05,
"loss": 4.4439,
"step": 67500
},
{
"epoch": 1.16,
"learning_rate": 4.4188354993160055e-05,
"loss": 4.4587,
"step": 68000
},
{
"epoch": 1.17,
"learning_rate": 4.414560533515732e-05,
"loss": 4.397,
"step": 68500
},
{
"epoch": 1.18,
"learning_rate": 4.410294117647059e-05,
"loss": 4.398,
"step": 69000
},
{
"epoch": 1.19,
"learning_rate": 4.4060191518467855e-05,
"loss": 4.3962,
"step": 69500
},
{
"epoch": 1.2,
"learning_rate": 4.4017441860465116e-05,
"loss": 4.4318,
"step": 70000
},
{
"epoch": 1.21,
"learning_rate": 4.397469220246238e-05,
"loss": 4.4008,
"step": 70500
},
{
"epoch": 1.21,
"learning_rate": 4.3931942544459646e-05,
"loss": 4.3997,
"step": 71000
},
{
"epoch": 1.22,
"learning_rate": 4.388919288645691e-05,
"loss": 4.3974,
"step": 71500
},
{
"epoch": 1.23,
"learning_rate": 4.3846443228454176e-05,
"loss": 4.4154,
"step": 72000
},
{
"epoch": 1.24,
"learning_rate": 4.380369357045144e-05,
"loss": 4.3971,
"step": 72500
},
{
"epoch": 1.25,
"learning_rate": 4.3760943912448706e-05,
"loss": 4.3549,
"step": 73000
},
{
"epoch": 1.26,
"learning_rate": 4.371819425444597e-05,
"loss": 4.3565,
"step": 73500
},
{
"epoch": 1.27,
"learning_rate": 4.3675615595075245e-05,
"loss": 4.3663,
"step": 74000
},
{
"epoch": 1.27,
"learning_rate": 4.363286593707251e-05,
"loss": 4.3174,
"step": 74500
},
{
"epoch": 1.28,
"learning_rate": 4.359011627906977e-05,
"loss": 4.3624,
"step": 75000
},
{
"epoch": 1.29,
"learning_rate": 4.354736662106703e-05,
"loss": 4.3694,
"step": 75500
},
{
"epoch": 1.3,
"learning_rate": 4.35046169630643e-05,
"loss": 4.3345,
"step": 76000
},
{
"epoch": 1.31,
"learning_rate": 4.346195280437757e-05,
"loss": 4.3558,
"step": 76500
},
{
"epoch": 1.32,
"learning_rate": 4.341928864569084e-05,
"loss": 4.3529,
"step": 77000
},
{
"epoch": 1.33,
"learning_rate": 4.33765389876881e-05,
"loss": 4.3293,
"step": 77500
},
{
"epoch": 1.33,
"learning_rate": 4.333378932968537e-05,
"loss": 4.3166,
"step": 78000
},
{
"epoch": 1.34,
"learning_rate": 4.329103967168263e-05,
"loss": 4.3186,
"step": 78500
},
{
"epoch": 1.35,
"learning_rate": 4.32483755129959e-05,
"loss": 4.372,
"step": 79000
},
{
"epoch": 1.36,
"learning_rate": 4.3205625854993167e-05,
"loss": 4.2791,
"step": 79500
},
{
"epoch": 1.37,
"learning_rate": 4.316296169630643e-05,
"loss": 4.3088,
"step": 80000
},
{
"epoch": 1.38,
"learning_rate": 4.31202120383037e-05,
"loss": 4.3065,
"step": 80500
},
{
"epoch": 1.39,
"learning_rate": 4.307746238030096e-05,
"loss": 4.3327,
"step": 81000
},
{
"epoch": 1.39,
"learning_rate": 4.303471272229823e-05,
"loss": 4.2718,
"step": 81500
},
{
"epoch": 1.4,
"learning_rate": 4.299196306429549e-05,
"loss": 4.2324,
"step": 82000
},
{
"epoch": 1.41,
"learning_rate": 4.294921340629276e-05,
"loss": 4.2851,
"step": 82500
},
{
"epoch": 1.42,
"learning_rate": 4.290654924760602e-05,
"loss": 4.2606,
"step": 83000
},
{
"epoch": 1.43,
"learning_rate": 4.286379958960329e-05,
"loss": 4.2524,
"step": 83500
},
{
"epoch": 1.44,
"learning_rate": 4.282104993160055e-05,
"loss": 4.2853,
"step": 84000
},
{
"epoch": 1.44,
"learning_rate": 4.277830027359782e-05,
"loss": 4.274,
"step": 84500
},
{
"epoch": 1.45,
"learning_rate": 4.273555061559508e-05,
"loss": 4.2519,
"step": 85000
},
{
"epoch": 1.46,
"learning_rate": 4.269280095759234e-05,
"loss": 4.2295,
"step": 85500
},
{
"epoch": 1.47,
"learning_rate": 4.26500512995896e-05,
"loss": 4.27,
"step": 86000
},
{
"epoch": 1.48,
"learning_rate": 4.260730164158687e-05,
"loss": 4.2542,
"step": 86500
},
{
"epoch": 1.49,
"learning_rate": 4.256455198358413e-05,
"loss": 4.1816,
"step": 87000
},
{
"epoch": 1.5,
"learning_rate": 4.2521802325581395e-05,
"loss": 4.2759,
"step": 87500
},
{
"epoch": 1.5,
"learning_rate": 4.247905266757866e-05,
"loss": 4.2383,
"step": 88000
},
{
"epoch": 1.51,
"learning_rate": 4.2436303009575925e-05,
"loss": 4.2417,
"step": 88500
},
{
"epoch": 1.52,
"learning_rate": 4.2393553351573186e-05,
"loss": 4.2522,
"step": 89000
},
{
"epoch": 1.53,
"learning_rate": 4.235080369357045e-05,
"loss": 4.2286,
"step": 89500
},
{
"epoch": 1.54,
"learning_rate": 4.2308054035567716e-05,
"loss": 4.213,
"step": 90000
},
{
"epoch": 1.55,
"learning_rate": 4.226530437756498e-05,
"loss": 4.2361,
"step": 90500
},
{
"epoch": 1.56,
"learning_rate": 4.2222554719562246e-05,
"loss": 4.2226,
"step": 91000
},
{
"epoch": 1.56,
"learning_rate": 4.2179976060191524e-05,
"loss": 4.2126,
"step": 91500
},
{
"epoch": 1.57,
"learning_rate": 4.2137226402188785e-05,
"loss": 4.2679,
"step": 92000
},
{
"epoch": 1.58,
"learning_rate": 4.2094476744186054e-05,
"loss": 4.2176,
"step": 92500
},
{
"epoch": 1.59,
"learning_rate": 4.2051727086183315e-05,
"loss": 4.2451,
"step": 93000
},
{
"epoch": 1.6,
"learning_rate": 4.200897742818058e-05,
"loss": 4.2348,
"step": 93500
},
{
"epoch": 1.61,
"learning_rate": 4.196622777017784e-05,
"loss": 4.1622,
"step": 94000
},
{
"epoch": 1.62,
"learning_rate": 4.19234781121751e-05,
"loss": 4.2212,
"step": 94500
},
{
"epoch": 1.62,
"learning_rate": 4.188089945280438e-05,
"loss": 4.1774,
"step": 95000
},
{
"epoch": 1.63,
"learning_rate": 4.1838149794801646e-05,
"loss": 4.1855,
"step": 95500
},
{
"epoch": 1.64,
"learning_rate": 4.179540013679891e-05,
"loss": 4.1997,
"step": 96000
},
{
"epoch": 1.65,
"learning_rate": 4.1752650478796176e-05,
"loss": 4.1479,
"step": 96500
},
{
"epoch": 1.66,
"learning_rate": 4.170990082079344e-05,
"loss": 4.1943,
"step": 97000
},
{
"epoch": 1.67,
"learning_rate": 4.16671511627907e-05,
"loss": 4.1743,
"step": 97500
},
{
"epoch": 1.68,
"learning_rate": 4.162440150478796e-05,
"loss": 4.1654,
"step": 98000
},
{
"epoch": 1.68,
"learning_rate": 4.158165184678523e-05,
"loss": 4.1624,
"step": 98500
},
{
"epoch": 1.69,
"learning_rate": 4.153890218878249e-05,
"loss": 4.204,
"step": 99000
},
{
"epoch": 1.7,
"learning_rate": 4.149615253077976e-05,
"loss": 4.0859,
"step": 99500
},
{
"epoch": 1.71,
"learning_rate": 4.145340287277702e-05,
"loss": 4.1537,
"step": 100000
},
{
"epoch": 1.72,
"learning_rate": 4.141065321477429e-05,
"loss": 4.172,
"step": 100500
},
{
"epoch": 1.73,
"learning_rate": 4.136790355677155e-05,
"loss": 4.1576,
"step": 101000
},
{
"epoch": 1.74,
"learning_rate": 4.132523939808482e-05,
"loss": 4.1124,
"step": 101500
},
{
"epoch": 1.74,
"learning_rate": 4.128248974008208e-05,
"loss": 4.1375,
"step": 102000
},
{
"epoch": 1.75,
"learning_rate": 4.123982558139535e-05,
"loss": 4.1307,
"step": 102500
},
{
"epoch": 1.76,
"learning_rate": 4.119707592339261e-05,
"loss": 4.1423,
"step": 103000
},
{
"epoch": 1.77,
"learning_rate": 4.115432626538988e-05,
"loss": 4.1378,
"step": 103500
},
{
"epoch": 1.78,
"learning_rate": 4.111157660738714e-05,
"loss": 4.0877,
"step": 104000
},
{
"epoch": 1.79,
"learning_rate": 4.106882694938441e-05,
"loss": 4.1337,
"step": 104500
},
{
"epoch": 1.8,
"learning_rate": 4.102607729138167e-05,
"loss": 4.1034,
"step": 105000
},
{
"epoch": 1.8,
"learning_rate": 4.098332763337894e-05,
"loss": 4.1387,
"step": 105500
},
{
"epoch": 1.81,
"learning_rate": 4.0940577975376195e-05,
"loss": 4.1397,
"step": 106000
},
{
"epoch": 1.82,
"learning_rate": 4.0897828317373464e-05,
"loss": 4.142,
"step": 106500
},
{
"epoch": 1.83,
"learning_rate": 4.0855078659370725e-05,
"loss": 4.1181,
"step": 107000
},
{
"epoch": 1.84,
"learning_rate": 4.0812414500684e-05,
"loss": 4.1161,
"step": 107500
},
{
"epoch": 1.85,
"learning_rate": 4.0769750341997264e-05,
"loss": 4.0865,
"step": 108000
},
{
"epoch": 1.86,
"learning_rate": 4.0727086183310534e-05,
"loss": 4.0692,
"step": 108500
},
{
"epoch": 1.86,
"learning_rate": 4.0684336525307795e-05,
"loss": 4.0814,
"step": 109000
},
{
"epoch": 1.87,
"learning_rate": 4.0641586867305064e-05,
"loss": 4.0752,
"step": 109500
},
{
"epoch": 1.88,
"learning_rate": 4.0598837209302325e-05,
"loss": 4.0893,
"step": 110000
},
{
"epoch": 1.89,
"learning_rate": 4.0556087551299594e-05,
"loss": 4.0941,
"step": 110500
},
{
"epoch": 1.9,
"learning_rate": 4.0513337893296855e-05,
"loss": 4.1135,
"step": 111000
},
{
"epoch": 1.91,
"learning_rate": 4.0470588235294124e-05,
"loss": 4.0911,
"step": 111500
},
{
"epoch": 1.92,
"learning_rate": 4.0427924076607386e-05,
"loss": 4.0741,
"step": 112000
},
{
"epoch": 1.92,
"learning_rate": 4.0385174418604655e-05,
"loss": 4.1022,
"step": 112500
},
{
"epoch": 1.93,
"learning_rate": 4.0342424760601916e-05,
"loss": 4.0507,
"step": 113000
},
{
"epoch": 1.94,
"learning_rate": 4.0299675102599185e-05,
"loss": 4.0823,
"step": 113500
},
{
"epoch": 1.95,
"learning_rate": 4.0256925444596446e-05,
"loss": 4.0645,
"step": 114000
},
{
"epoch": 1.96,
"learning_rate": 4.021417578659371e-05,
"loss": 4.0487,
"step": 114500
},
{
"epoch": 1.97,
"learning_rate": 4.017142612859097e-05,
"loss": 4.0594,
"step": 115000
},
{
"epoch": 1.98,
"learning_rate": 4.012867647058824e-05,
"loss": 4.0333,
"step": 115500
},
{
"epoch": 1.98,
"learning_rate": 4.00859268125855e-05,
"loss": 4.0575,
"step": 116000
},
{
"epoch": 1.99,
"learning_rate": 4.004317715458277e-05,
"loss": 4.0441,
"step": 116500
},
{
"epoch": 2.0,
"learning_rate": 4.000042749658003e-05,
"loss": 4.0541,
"step": 117000
},
{
"epoch": 2.01,
"learning_rate": 3.99576778385773e-05,
"loss": 4.0371,
"step": 117500
},
{
"epoch": 2.02,
"learning_rate": 3.991492818057456e-05,
"loss": 4.0368,
"step": 118000
},
{
"epoch": 2.03,
"learning_rate": 3.987226402188783e-05,
"loss": 4.0379,
"step": 118500
},
{
"epoch": 2.03,
"learning_rate": 3.982951436388509e-05,
"loss": 4.0413,
"step": 119000
},
{
"epoch": 2.04,
"learning_rate": 3.978676470588236e-05,
"loss": 3.9662,
"step": 119500
},
{
"epoch": 2.05,
"learning_rate": 3.974401504787962e-05,
"loss": 4.0108,
"step": 120000
},
{
"epoch": 2.06,
"learning_rate": 3.970126538987688e-05,
"loss": 4.0292,
"step": 120500
},
{
"epoch": 2.07,
"learning_rate": 3.965851573187414e-05,
"loss": 3.9768,
"step": 121000
},
{
"epoch": 2.08,
"learning_rate": 3.961593707250342e-05,
"loss": 4.01,
"step": 121500
},
{
"epoch": 2.09,
"learning_rate": 3.957318741450068e-05,
"loss": 4.0071,
"step": 122000
},
{
"epoch": 2.09,
"learning_rate": 3.953043775649795e-05,
"loss": 3.9862,
"step": 122500
},
{
"epoch": 2.1,
"learning_rate": 3.948768809849521e-05,
"loss": 4.0002,
"step": 123000
},
{
"epoch": 2.11,
"learning_rate": 3.944493844049248e-05,
"loss": 3.986,
"step": 123500
},
{
"epoch": 2.12,
"learning_rate": 3.940218878248974e-05,
"loss": 4.0122,
"step": 124000
},
{
"epoch": 2.13,
"learning_rate": 3.935952462380301e-05,
"loss": 4.0036,
"step": 124500
},
{
"epoch": 2.14,
"learning_rate": 3.931677496580027e-05,
"loss": 4.0221,
"step": 125000
},
{
"epoch": 2.15,
"learning_rate": 3.927402530779754e-05,
"loss": 3.9919,
"step": 125500
},
{
"epoch": 2.15,
"learning_rate": 3.9231361149110804e-05,
"loss": 3.9685,
"step": 126000
},
{
"epoch": 2.16,
"learning_rate": 3.918861149110807e-05,
"loss": 3.9836,
"step": 126500
},
{
"epoch": 2.17,
"learning_rate": 3.914594733242134e-05,
"loss": 3.9964,
"step": 127000
},
{
"epoch": 2.18,
"learning_rate": 3.9103197674418604e-05,
"loss": 3.9632,
"step": 127500
},
{
"epoch": 2.19,
"learning_rate": 3.9060448016415865e-05,
"loss": 4.0239,
"step": 128000
},
{
"epoch": 2.2,
"learning_rate": 3.9017698358413134e-05,
"loss": 3.9661,
"step": 128500
},
{
"epoch": 2.21,
"learning_rate": 3.8974948700410395e-05,
"loss": 4.0074,
"step": 129000
},
{
"epoch": 2.21,
"learning_rate": 3.8932199042407664e-05,
"loss": 3.9572,
"step": 129500
},
{
"epoch": 2.22,
"learning_rate": 3.8889449384404925e-05,
"loss": 4.0149,
"step": 130000
},
{
"epoch": 2.23,
"learning_rate": 3.8846699726402194e-05,
"loss": 3.9621,
"step": 130500
},
{
"epoch": 2.24,
"learning_rate": 3.8803950068399455e-05,
"loss": 3.9519,
"step": 131000
},
{
"epoch": 2.25,
"learning_rate": 3.876120041039672e-05,
"loss": 3.9366,
"step": 131500
},
{
"epoch": 2.26,
"learning_rate": 3.871845075239398e-05,
"loss": 3.9689,
"step": 132000
},
{
"epoch": 2.27,
"learning_rate": 3.867570109439125e-05,
"loss": 3.9984,
"step": 132500
},
{
"epoch": 2.27,
"learning_rate": 3.863295143638851e-05,
"loss": 3.9291,
"step": 133000
},
{
"epoch": 2.28,
"learning_rate": 3.859020177838578e-05,
"loss": 3.9921,
"step": 133500
},
{
"epoch": 2.29,
"learning_rate": 3.854745212038304e-05,
"loss": 3.9747,
"step": 134000
},
{
"epoch": 2.3,
"learning_rate": 3.8504702462380307e-05,
"loss": 3.9321,
"step": 134500
},
{
"epoch": 2.31,
"learning_rate": 3.846195280437757e-05,
"loss": 3.9596,
"step": 135000
},
{
"epoch": 2.32,
"learning_rate": 3.841920314637483e-05,
"loss": 3.9574,
"step": 135500
},
{
"epoch": 2.33,
"learning_rate": 3.83765389876881e-05,
"loss": 3.9476,
"step": 136000
},
{
"epoch": 2.33,
"learning_rate": 3.833378932968537e-05,
"loss": 3.937,
"step": 136500
},
{
"epoch": 2.34,
"learning_rate": 3.829103967168263e-05,
"loss": 3.9935,
"step": 137000
},
{
"epoch": 2.35,
"learning_rate": 3.82483755129959e-05,
"loss": 3.9308,
"step": 137500
},
{
"epoch": 2.36,
"learning_rate": 3.820562585499316e-05,
"loss": 3.9201,
"step": 138000
},
{
"epoch": 2.37,
"learning_rate": 3.816287619699043e-05,
"loss": 3.952,
"step": 138500
},
{
"epoch": 2.38,
"learning_rate": 3.812021203830369e-05,
"loss": 3.9014,
"step": 139000
},
{
"epoch": 2.39,
"learning_rate": 3.807746238030096e-05,
"loss": 3.9021,
"step": 139500
},
{
"epoch": 2.39,
"learning_rate": 3.803471272229822e-05,
"loss": 3.9358,
"step": 140000
},
{
"epoch": 2.4,
"learning_rate": 3.799196306429549e-05,
"loss": 3.9108,
"step": 140500
},
{
"epoch": 2.41,
"learning_rate": 3.794921340629275e-05,
"loss": 3.9362,
"step": 141000
},
{
"epoch": 2.42,
"learning_rate": 3.790646374829002e-05,
"loss": 3.9195,
"step": 141500
},
{
"epoch": 2.43,
"learning_rate": 3.786371409028728e-05,
"loss": 3.9143,
"step": 142000
},
{
"epoch": 2.44,
"learning_rate": 3.782096443228454e-05,
"loss": 3.8993,
"step": 142500
},
{
"epoch": 2.45,
"learning_rate": 3.7778214774281804e-05,
"loss": 3.8982,
"step": 143000
},
{
"epoch": 2.45,
"learning_rate": 3.773546511627907e-05,
"loss": 3.9135,
"step": 143500
},
{
"epoch": 2.46,
"learning_rate": 3.7692715458276334e-05,
"loss": 3.9076,
"step": 144000
},
{
"epoch": 2.47,
"learning_rate": 3.76499658002736e-05,
"loss": 3.9286,
"step": 144500
},
{
"epoch": 2.48,
"learning_rate": 3.7607301641586865e-05,
"loss": 3.8641,
"step": 145000
},
{
"epoch": 2.49,
"learning_rate": 3.7564551983584134e-05,
"loss": 3.9229,
"step": 145500
},
{
"epoch": 2.5,
"learning_rate": 3.7521802325581395e-05,
"loss": 3.9007,
"step": 146000
},
{
"epoch": 2.51,
"learning_rate": 3.7479052667578664e-05,
"loss": 3.8653,
"step": 146500
},
{
"epoch": 2.51,
"learning_rate": 3.7436303009575925e-05,
"loss": 3.9433,
"step": 147000
},
{
"epoch": 2.52,
"learning_rate": 3.7393553351573194e-05,
"loss": 3.934,
"step": 147500
},
{
"epoch": 2.53,
"learning_rate": 3.7350803693570455e-05,
"loss": 3.9171,
"step": 148000
},
{
"epoch": 2.54,
"learning_rate": 3.730805403556772e-05,
"loss": 3.8635,
"step": 148500
},
{
"epoch": 2.55,
"learning_rate": 3.726530437756498e-05,
"loss": 3.8977,
"step": 149000
},
{
"epoch": 2.56,
"learning_rate": 3.722255471956225e-05,
"loss": 3.931,
"step": 149500
},
{
"epoch": 2.56,
"learning_rate": 3.7179890560875516e-05,
"loss": 3.8653,
"step": 150000
},
{
"epoch": 2.57,
"learning_rate": 3.713714090287278e-05,
"loss": 3.875,
"step": 150500
},
{
"epoch": 2.58,
"learning_rate": 3.709439124487004e-05,
"loss": 3.8962,
"step": 151000
},
{
"epoch": 2.59,
"learning_rate": 3.705164158686731e-05,
"loss": 3.8875,
"step": 151500
},
{
"epoch": 2.6,
"learning_rate": 3.700897742818058e-05,
"loss": 3.8829,
"step": 152000
},
{
"epoch": 2.61,
"learning_rate": 3.696622777017784e-05,
"loss": 3.8708,
"step": 152500
},
{
"epoch": 2.62,
"learning_rate": 3.69234781121751e-05,
"loss": 3.8609,
"step": 153000
},
{
"epoch": 2.62,
"learning_rate": 3.688072845417237e-05,
"loss": 3.8731,
"step": 153500
},
{
"epoch": 2.63,
"learning_rate": 3.683797879616963e-05,
"loss": 3.8877,
"step": 154000
},
{
"epoch": 2.64,
"learning_rate": 3.67952291381669e-05,
"loss": 3.8592,
"step": 154500
},
{
"epoch": 2.65,
"learning_rate": 3.675256497948016e-05,
"loss": 3.8315,
"step": 155000
},
{
"epoch": 2.66,
"learning_rate": 3.670981532147743e-05,
"loss": 3.8739,
"step": 155500
},
{
"epoch": 2.67,
"learning_rate": 3.666706566347469e-05,
"loss": 3.8786,
"step": 156000
},
{
"epoch": 2.68,
"learning_rate": 3.662440150478796e-05,
"loss": 3.8754,
"step": 156500
},
{
"epoch": 2.68,
"learning_rate": 3.658165184678522e-05,
"loss": 3.8909,
"step": 157000
},
{
"epoch": 2.69,
"learning_rate": 3.653890218878249e-05,
"loss": 3.8487,
"step": 157500
},
{
"epoch": 2.7,
"learning_rate": 3.649615253077975e-05,
"loss": 3.8285,
"step": 158000
},
{
"epoch": 2.71,
"learning_rate": 3.645340287277702e-05,
"loss": 3.8426,
"step": 158500
},
{
"epoch": 2.72,
"learning_rate": 3.641065321477428e-05,
"loss": 3.8324,
"step": 159000
},
{
"epoch": 2.73,
"learning_rate": 3.636790355677155e-05,
"loss": 3.8314,
"step": 159500
},
{
"epoch": 2.74,
"learning_rate": 3.632515389876881e-05,
"loss": 3.8827,
"step": 160000
},
{
"epoch": 2.74,
"learning_rate": 3.6282404240766074e-05,
"loss": 3.8093,
"step": 160500
},
{
"epoch": 2.75,
"learning_rate": 3.6239654582763335e-05,
"loss": 3.8147,
"step": 161000
},
{
"epoch": 2.76,
"learning_rate": 3.619699042407661e-05,
"loss": 3.8406,
"step": 161500
},
{
"epoch": 2.77,
"learning_rate": 3.615424076607387e-05,
"loss": 3.8308,
"step": 162000
},
{
"epoch": 2.78,
"learning_rate": 3.611157660738714e-05,
"loss": 3.8018,
"step": 162500
},
{
"epoch": 2.79,
"learning_rate": 3.6068826949384404e-05,
"loss": 3.845,
"step": 163000
},
{
"epoch": 2.8,
"learning_rate": 3.602607729138167e-05,
"loss": 3.8614,
"step": 163500
},
{
"epoch": 2.8,
"learning_rate": 3.5983327633378934e-05,
"loss": 3.8299,
"step": 164000
},
{
"epoch": 2.81,
"learning_rate": 3.59405779753762e-05,
"loss": 3.857,
"step": 164500
},
{
"epoch": 2.82,
"learning_rate": 3.5897828317373464e-05,
"loss": 3.8043,
"step": 165000
},
{
"epoch": 2.83,
"learning_rate": 3.5855078659370726e-05,
"loss": 3.8535,
"step": 165500
},
{
"epoch": 2.84,
"learning_rate": 3.581232900136799e-05,
"loss": 3.806,
"step": 166000
},
{
"epoch": 2.85,
"learning_rate": 3.576983584131327e-05,
"loss": 3.8039,
"step": 166500
},
{
"epoch": 2.86,
"learning_rate": 3.5727086183310534e-05,
"loss": 3.7829,
"step": 167000
},
{
"epoch": 2.86,
"learning_rate": 3.5684336525307796e-05,
"loss": 3.7712,
"step": 167500
},
{
"epoch": 2.87,
"learning_rate": 3.5641586867305064e-05,
"loss": 3.8275,
"step": 168000
},
{
"epoch": 2.88,
"learning_rate": 3.5598922708618334e-05,
"loss": 3.84,
"step": 168500
},
{
"epoch": 2.89,
"learning_rate": 3.55561730506156e-05,
"loss": 3.8221,
"step": 169000
},
{
"epoch": 2.9,
"learning_rate": 3.551342339261286e-05,
"loss": 3.817,
"step": 169500
},
{
"epoch": 2.91,
"learning_rate": 3.5470673734610125e-05,
"loss": 3.775,
"step": 170000
},
{
"epoch": 2.92,
"learning_rate": 3.542792407660739e-05,
"loss": 3.799,
"step": 170500
},
{
"epoch": 2.92,
"learning_rate": 3.5385174418604655e-05,
"loss": 3.7927,
"step": 171000
},
{
"epoch": 2.93,
"learning_rate": 3.534242476060192e-05,
"loss": 3.7902,
"step": 171500
},
{
"epoch": 2.94,
"learning_rate": 3.5299675102599185e-05,
"loss": 3.8066,
"step": 172000
},
{
"epoch": 2.95,
"learning_rate": 3.525692544459645e-05,
"loss": 3.7513,
"step": 172500
},
{
"epoch": 2.96,
"learning_rate": 3.5214175786593715e-05,
"loss": 3.7826,
"step": 173000
},
{
"epoch": 2.97,
"learning_rate": 3.517142612859097e-05,
"loss": 3.8108,
"step": 173500
},
{
"epoch": 2.98,
"learning_rate": 3.512867647058823e-05,
"loss": 3.837,
"step": 174000
},
{
"epoch": 2.98,
"learning_rate": 3.508601231190151e-05,
"loss": 3.779,
"step": 174500
},
{
"epoch": 2.99,
"learning_rate": 3.504334815321478e-05,
"loss": 3.7988,
"step": 175000
},
{
"epoch": 3.0,
"learning_rate": 3.500059849521204e-05,
"loss": 3.7856,
"step": 175500
},
{
"epoch": 3.01,
"learning_rate": 3.495784883720931e-05,
"loss": 3.78,
"step": 176000
},
{
"epoch": 3.02,
"learning_rate": 3.491509917920657e-05,
"loss": 3.7456,
"step": 176500
},
{
"epoch": 3.03,
"learning_rate": 3.487234952120384e-05,
"loss": 3.7645,
"step": 177000
},
{
"epoch": 3.04,
"learning_rate": 3.48296853625171e-05,
"loss": 3.7486,
"step": 177500
},
{
"epoch": 3.04,
"learning_rate": 3.478693570451437e-05,
"loss": 3.7541,
"step": 178000
},
{
"epoch": 3.05,
"learning_rate": 3.474418604651163e-05,
"loss": 3.7828,
"step": 178500
},
{
"epoch": 3.06,
"learning_rate": 3.47014363885089e-05,
"loss": 3.7786,
"step": 179000
},
{
"epoch": 3.07,
"learning_rate": 3.465868673050616e-05,
"loss": 3.7776,
"step": 179500
},
{
"epoch": 3.08,
"learning_rate": 3.461593707250342e-05,
"loss": 3.783,
"step": 180000
},
{
"epoch": 3.09,
"learning_rate": 3.457318741450068e-05,
"loss": 3.7366,
"step": 180500
},
{
"epoch": 3.1,
"learning_rate": 3.453043775649795e-05,
"loss": 3.7755,
"step": 181000
},
{
"epoch": 3.1,
"learning_rate": 3.448768809849521e-05,
"loss": 3.7732,
"step": 181500
},
{
"epoch": 3.11,
"learning_rate": 3.444493844049248e-05,
"loss": 3.7646,
"step": 182000
},
{
"epoch": 3.12,
"learning_rate": 3.4402274281805744e-05,
"loss": 3.7425,
"step": 182500
},
{
"epoch": 3.13,
"learning_rate": 3.435952462380301e-05,
"loss": 3.7511,
"step": 183000
},
{
"epoch": 3.14,
"learning_rate": 3.4316774965800274e-05,
"loss": 3.7502,
"step": 183500
},
{
"epoch": 3.15,
"learning_rate": 3.427411080711354e-05,
"loss": 3.7457,
"step": 184000
},
{
"epoch": 3.15,
"learning_rate": 3.4231361149110805e-05,
"loss": 3.7421,
"step": 184500
},
{
"epoch": 3.16,
"learning_rate": 3.418861149110807e-05,
"loss": 3.7374,
"step": 185000
},
{
"epoch": 3.17,
"learning_rate": 3.4145861833105335e-05,
"loss": 3.7469,
"step": 185500
},
{
"epoch": 3.18,
"learning_rate": 3.41031121751026e-05,
"loss": 3.7475,
"step": 186000
},
{
"epoch": 3.19,
"learning_rate": 3.4060362517099865e-05,
"loss": 3.76,
"step": 186500
},
{
"epoch": 3.2,
"learning_rate": 3.401761285909713e-05,
"loss": 3.7236,
"step": 187000
},
{
"epoch": 3.21,
"learning_rate": 3.3974863201094395e-05,
"loss": 3.7304,
"step": 187500
},
{
"epoch": 3.21,
"learning_rate": 3.3932113543091656e-05,
"loss": 3.7536,
"step": 188000
},
{
"epoch": 3.22,
"learning_rate": 3.388936388508892e-05,
"loss": 3.7591,
"step": 188500
},
{
"epoch": 3.23,
"learning_rate": 3.3846699726402194e-05,
"loss": 3.7331,
"step": 189000
},
{
"epoch": 3.24,
"learning_rate": 3.3803950068399456e-05,
"loss": 3.7429,
"step": 189500
},
{
"epoch": 3.25,
"learning_rate": 3.376120041039672e-05,
"loss": 3.7781,
"step": 190000
},
{
"epoch": 3.26,
"learning_rate": 3.371845075239398e-05,
"loss": 3.6779,
"step": 190500
},
{
"epoch": 3.27,
"learning_rate": 3.367570109439125e-05,
"loss": 3.7198,
"step": 191000
},
{
"epoch": 3.27,
"learning_rate": 3.363295143638851e-05,
"loss": 3.7508,
"step": 191500
},
{
"epoch": 3.28,
"learning_rate": 3.3590287277701785e-05,
"loss": 3.7314,
"step": 192000
},
{
"epoch": 3.29,
"learning_rate": 3.354753761969904e-05,
"loss": 3.7358,
"step": 192500
},
{
"epoch": 3.3,
"learning_rate": 3.350478796169631e-05,
"loss": 3.7569,
"step": 193000
},
{
"epoch": 3.31,
"learning_rate": 3.346203830369357e-05,
"loss": 3.7113,
"step": 193500
},
{
"epoch": 3.32,
"learning_rate": 3.341928864569084e-05,
"loss": 3.7133,
"step": 194000
},
{
"epoch": 3.33,
"learning_rate": 3.33765389876881e-05,
"loss": 3.703,
"step": 194500
},
{
"epoch": 3.33,
"learning_rate": 3.333387482900137e-05,
"loss": 3.7268,
"step": 195000
},
{
"epoch": 3.34,
"learning_rate": 3.329112517099863e-05,
"loss": 3.7187,
"step": 195500
},
{
"epoch": 3.35,
"learning_rate": 3.32483755129959e-05,
"loss": 3.7187,
"step": 196000
},
{
"epoch": 3.36,
"learning_rate": 3.320562585499316e-05,
"loss": 3.7223,
"step": 196500
},
{
"epoch": 3.37,
"learning_rate": 3.316287619699043e-05,
"loss": 3.698,
"step": 197000
},
{
"epoch": 3.38,
"learning_rate": 3.312012653898769e-05,
"loss": 3.6941,
"step": 197500
},
{
"epoch": 3.39,
"learning_rate": 3.307746238030096e-05,
"loss": 3.6822,
"step": 198000
},
{
"epoch": 3.39,
"learning_rate": 3.303471272229822e-05,
"loss": 3.7184,
"step": 198500
},
{
"epoch": 3.4,
"learning_rate": 3.299196306429549e-05,
"loss": 3.6901,
"step": 199000
},
{
"epoch": 3.41,
"learning_rate": 3.294921340629275e-05,
"loss": 3.669,
"step": 199500
},
{
"epoch": 3.42,
"learning_rate": 3.290646374829002e-05,
"loss": 3.7188,
"step": 200000
},
{
"epoch": 3.43,
"learning_rate": 3.286371409028728e-05,
"loss": 3.7266,
"step": 200500
},
{
"epoch": 3.44,
"learning_rate": 3.282096443228454e-05,
"loss": 3.6421,
"step": 201000
},
{
"epoch": 3.45,
"learning_rate": 3.2778214774281805e-05,
"loss": 3.7126,
"step": 201500
},
{
"epoch": 3.45,
"learning_rate": 3.273555061559508e-05,
"loss": 3.7324,
"step": 202000
},
{
"epoch": 3.46,
"learning_rate": 3.269280095759234e-05,
"loss": 3.7318,
"step": 202500
},
{
"epoch": 3.47,
"learning_rate": 3.2650051299589604e-05,
"loss": 3.7125,
"step": 203000
},
{
"epoch": 3.48,
"learning_rate": 3.2607301641586866e-05,
"loss": 3.7445,
"step": 203500
},
{
"epoch": 3.49,
"learning_rate": 3.256463748290014e-05,
"loss": 3.6906,
"step": 204000
},
{
"epoch": 3.5,
"learning_rate": 3.2521973324213405e-05,
"loss": 3.7217,
"step": 204500
},
{
"epoch": 3.51,
"learning_rate": 3.247922366621067e-05,
"loss": 3.7128,
"step": 205000
},
{
"epoch": 3.51,
"learning_rate": 3.2436474008207935e-05,
"loss": 3.66,
"step": 205500
},
{
"epoch": 3.52,
"learning_rate": 3.23937243502052e-05,
"loss": 3.6891,
"step": 206000
},
{
"epoch": 3.53,
"learning_rate": 3.2351060191518466e-05,
"loss": 3.6576,
"step": 206500
},
{
"epoch": 3.54,
"learning_rate": 3.2308310533515734e-05,
"loss": 3.6565,
"step": 207000
},
{
"epoch": 3.55,
"learning_rate": 3.2265560875512996e-05,
"loss": 3.6723,
"step": 207500
},
{
"epoch": 3.56,
"learning_rate": 3.2222811217510264e-05,
"loss": 3.6781,
"step": 208000
},
{
"epoch": 3.57,
"learning_rate": 3.2180061559507526e-05,
"loss": 3.6858,
"step": 208500
},
{
"epoch": 3.57,
"learning_rate": 3.2137397400820795e-05,
"loss": 3.6536,
"step": 209000
},
{
"epoch": 3.58,
"learning_rate": 3.2094733242134065e-05,
"loss": 3.6654,
"step": 209500
},
{
"epoch": 3.59,
"learning_rate": 3.2051983584131326e-05,
"loss": 3.6439,
"step": 210000
},
{
"epoch": 3.6,
"learning_rate": 3.200923392612859e-05,
"loss": 3.6517,
"step": 210500
},
{
"epoch": 3.61,
"learning_rate": 3.1966484268125856e-05,
"loss": 3.6792,
"step": 211000
},
{
"epoch": 3.62,
"learning_rate": 3.192373461012312e-05,
"loss": 3.6649,
"step": 211500
},
{
"epoch": 3.63,
"learning_rate": 3.1880984952120386e-05,
"loss": 3.6359,
"step": 212000
},
{
"epoch": 3.63,
"learning_rate": 3.183823529411765e-05,
"loss": 3.6567,
"step": 212500
},
{
"epoch": 3.64,
"learning_rate": 3.1795485636114916e-05,
"loss": 3.6573,
"step": 213000
},
{
"epoch": 3.65,
"learning_rate": 3.175273597811218e-05,
"loss": 3.6925,
"step": 213500
},
{
"epoch": 3.66,
"learning_rate": 3.170998632010944e-05,
"loss": 3.6791,
"step": 214000
},
{
"epoch": 3.67,
"learning_rate": 3.16672366621067e-05,
"loss": 3.6442,
"step": 214500
},
{
"epoch": 3.68,
"learning_rate": 3.162465800273598e-05,
"loss": 3.7027,
"step": 215000
},
{
"epoch": 3.69,
"learning_rate": 3.158190834473324e-05,
"loss": 3.6328,
"step": 215500
},
{
"epoch": 3.69,
"learning_rate": 3.153915868673051e-05,
"loss": 3.6454,
"step": 216000
},
{
"epoch": 3.7,
"learning_rate": 3.149649452804378e-05,
"loss": 3.652,
"step": 216500
},
{
"epoch": 3.71,
"learning_rate": 3.145374487004104e-05,
"loss": 3.65,
"step": 217000
},
{
"epoch": 3.72,
"learning_rate": 3.141099521203831e-05,
"loss": 3.6305,
"step": 217500
},
{
"epoch": 3.73,
"learning_rate": 3.136824555403557e-05,
"loss": 3.6276,
"step": 218000
},
{
"epoch": 3.74,
"learning_rate": 3.132549589603284e-05,
"loss": 3.6349,
"step": 218500
},
{
"epoch": 3.74,
"learning_rate": 3.12827462380301e-05,
"loss": 3.6308,
"step": 219000
},
{
"epoch": 3.75,
"learning_rate": 3.123999658002736e-05,
"loss": 3.6472,
"step": 219500
},
{
"epoch": 3.76,
"learning_rate": 3.119724692202462e-05,
"loss": 3.6328,
"step": 220000
},
{
"epoch": 3.77,
"learning_rate": 3.1154497264021884e-05,
"loss": 3.639,
"step": 220500
},
{
"epoch": 3.78,
"learning_rate": 3.111174760601915e-05,
"loss": 3.6443,
"step": 221000
},
{
"epoch": 3.79,
"learning_rate": 3.1068997948016414e-05,
"loss": 3.6169,
"step": 221500
},
{
"epoch": 3.8,
"learning_rate": 3.102624829001368e-05,
"loss": 3.5953,
"step": 222000
},
{
"epoch": 3.8,
"learning_rate": 3.0983498632010944e-05,
"loss": 3.6557,
"step": 222500
},
{
"epoch": 3.81,
"learning_rate": 3.094074897400821e-05,
"loss": 3.6413,
"step": 223000
},
{
"epoch": 3.82,
"learning_rate": 3.0897999316005474e-05,
"loss": 3.6051,
"step": 223500
},
{
"epoch": 3.83,
"learning_rate": 3.0855249658002735e-05,
"loss": 3.6098,
"step": 224000
},
{
"epoch": 3.84,
"learning_rate": 3.08125e-05,
"loss": 3.6039,
"step": 224500
},
{
"epoch": 3.85,
"learning_rate": 3.076983584131327e-05,
"loss": 3.6379,
"step": 225000
},
{
"epoch": 3.86,
"learning_rate": 3.072717168262654e-05,
"loss": 3.6024,
"step": 225500
},
{
"epoch": 3.86,
"learning_rate": 3.0684422024623804e-05,
"loss": 3.6483,
"step": 226000
},
{
"epoch": 3.87,
"learning_rate": 3.0641672366621066e-05,
"loss": 3.6347,
"step": 226500
},
{
"epoch": 3.88,
"learning_rate": 3.0598922708618334e-05,
"loss": 3.6107,
"step": 227000
},
{
"epoch": 3.89,
"learning_rate": 3.0556173050615596e-05,
"loss": 3.584,
"step": 227500
},
{
"epoch": 3.9,
"learning_rate": 3.051342339261286e-05,
"loss": 3.6444,
"step": 228000
},
{
"epoch": 3.91,
"learning_rate": 3.0470844733242138e-05,
"loss": 3.5928,
"step": 228500
},
{
"epoch": 3.92,
"learning_rate": 3.04280950752394e-05,
"loss": 3.6176,
"step": 229000
},
{
"epoch": 3.92,
"learning_rate": 3.0385345417236665e-05,
"loss": 3.6444,
"step": 229500
},
{
"epoch": 3.93,
"learning_rate": 3.0342595759233926e-05,
"loss": 3.6084,
"step": 230000
},
{
"epoch": 3.94,
"learning_rate": 3.0299846101231195e-05,
"loss": 3.6357,
"step": 230500
},
{
"epoch": 3.95,
"learning_rate": 3.0257096443228456e-05,
"loss": 3.6135,
"step": 231000
},
{
"epoch": 3.96,
"learning_rate": 3.0214432284541726e-05,
"loss": 3.6289,
"step": 231500
},
{
"epoch": 3.97,
"learning_rate": 3.0171682626538987e-05,
"loss": 3.5722,
"step": 232000
},
{
"epoch": 3.98,
"learning_rate": 3.0128932968536256e-05,
"loss": 3.5999,
"step": 232500
},
{
"epoch": 3.98,
"learning_rate": 3.0086183310533517e-05,
"loss": 3.6273,
"step": 233000
},
{
"epoch": 3.99,
"learning_rate": 3.0043433652530782e-05,
"loss": 3.6238,
"step": 233500
},
{
"epoch": 4.0,
"learning_rate": 3.0000683994528044e-05,
"loss": 3.6198,
"step": 234000
},
{
"epoch": 4.01,
"learning_rate": 2.9957934336525312e-05,
"loss": 3.5947,
"step": 234500
},
{
"epoch": 4.02,
"learning_rate": 2.9915184678522574e-05,
"loss": 3.5988,
"step": 235000
},
{
"epoch": 4.03,
"learning_rate": 2.9872520519835846e-05,
"loss": 3.6401,
"step": 235500
},
{
"epoch": 4.04,
"learning_rate": 2.9829770861833105e-05,
"loss": 3.5982,
"step": 236000
},
{
"epoch": 4.04,
"learning_rate": 2.9787021203830373e-05,
"loss": 3.6065,
"step": 236500
},
{
"epoch": 4.05,
"learning_rate": 2.9744271545827635e-05,
"loss": 3.5934,
"step": 237000
},
{
"epoch": 4.06,
"learning_rate": 2.9701521887824903e-05,
"loss": 3.6165,
"step": 237500
},
{
"epoch": 4.07,
"learning_rate": 2.965877222982216e-05,
"loss": 3.6021,
"step": 238000
},
{
"epoch": 4.08,
"learning_rate": 2.9616108071135434e-05,
"loss": 3.5716,
"step": 238500
},
{
"epoch": 4.09,
"learning_rate": 2.9573358413132696e-05,
"loss": 3.586,
"step": 239000
},
{
"epoch": 4.1,
"learning_rate": 2.9530608755129964e-05,
"loss": 3.5593,
"step": 239500
},
{
"epoch": 4.1,
"learning_rate": 2.9487859097127226e-05,
"loss": 3.5961,
"step": 240000
},
{
"epoch": 4.11,
"learning_rate": 2.944510943912449e-05,
"loss": 3.6181,
"step": 240500
},
{
"epoch": 4.12,
"learning_rate": 2.9402359781121752e-05,
"loss": 3.5566,
"step": 241000
},
{
"epoch": 4.13,
"learning_rate": 2.935961012311902e-05,
"loss": 3.6061,
"step": 241500
},
{
"epoch": 4.14,
"learning_rate": 2.9316860465116282e-05,
"loss": 3.5411,
"step": 242000
},
{
"epoch": 4.15,
"learning_rate": 2.9274281805745556e-05,
"loss": 3.611,
"step": 242500
},
{
"epoch": 4.16,
"learning_rate": 2.9231532147742818e-05,
"loss": 3.5573,
"step": 243000
},
{
"epoch": 4.16,
"learning_rate": 2.9188782489740086e-05,
"loss": 3.5306,
"step": 243500
},
{
"epoch": 4.17,
"learning_rate": 2.9146032831737348e-05,
"loss": 3.5944,
"step": 244000
},
{
"epoch": 4.18,
"learning_rate": 2.9103283173734613e-05,
"loss": 3.5923,
"step": 244500
},
{
"epoch": 4.19,
"learning_rate": 2.9060533515731874e-05,
"loss": 3.5753,
"step": 245000
},
{
"epoch": 4.2,
"learning_rate": 2.9017783857729142e-05,
"loss": 3.5432,
"step": 245500
},
{
"epoch": 4.21,
"learning_rate": 2.8975034199726404e-05,
"loss": 3.5776,
"step": 246000
},
{
"epoch": 4.22,
"learning_rate": 2.8932370041039674e-05,
"loss": 3.5678,
"step": 246500
},
{
"epoch": 4.22,
"learning_rate": 2.8889620383036935e-05,
"loss": 3.6039,
"step": 247000
},
{
"epoch": 4.23,
"learning_rate": 2.8846870725034204e-05,
"loss": 3.5598,
"step": 247500
},
{
"epoch": 4.24,
"learning_rate": 2.8804121067031465e-05,
"loss": 3.5603,
"step": 248000
},
{
"epoch": 4.25,
"learning_rate": 2.876137140902873e-05,
"loss": 3.5818,
"step": 248500
},
{
"epoch": 4.26,
"learning_rate": 2.871862175102599e-05,
"loss": 3.5812,
"step": 249000
},
{
"epoch": 4.27,
"learning_rate": 2.867587209302326e-05,
"loss": 3.5672,
"step": 249500
},
{
"epoch": 4.27,
"learning_rate": 2.863312243502052e-05,
"loss": 3.5282,
"step": 250000
},
{
"epoch": 4.28,
"learning_rate": 2.8590372777017787e-05,
"loss": 3.5677,
"step": 250500
},
{
"epoch": 4.29,
"learning_rate": 2.8547623119015048e-05,
"loss": 3.5776,
"step": 251000
},
{
"epoch": 4.3,
"learning_rate": 2.8504873461012316e-05,
"loss": 3.5628,
"step": 251500
},
{
"epoch": 4.31,
"learning_rate": 2.8462123803009578e-05,
"loss": 3.5765,
"step": 252000
},
{
"epoch": 4.32,
"learning_rate": 2.8419374145006843e-05,
"loss": 3.5115,
"step": 252500
},
{
"epoch": 4.33,
"learning_rate": 2.8376624487004105e-05,
"loss": 3.5378,
"step": 253000
},
{
"epoch": 4.33,
"learning_rate": 2.8333960328317378e-05,
"loss": 3.5737,
"step": 253500
},
{
"epoch": 4.34,
"learning_rate": 2.8291296169630644e-05,
"loss": 3.5697,
"step": 254000
},
{
"epoch": 4.35,
"learning_rate": 2.824854651162791e-05,
"loss": 3.5255,
"step": 254500
},
{
"epoch": 4.36,
"learning_rate": 2.820579685362517e-05,
"loss": 3.5434,
"step": 255000
},
{
"epoch": 4.37,
"learning_rate": 2.816304719562244e-05,
"loss": 3.5507,
"step": 255500
},
{
"epoch": 4.38,
"learning_rate": 2.8120383036935705e-05,
"loss": 3.5848,
"step": 256000
},
{
"epoch": 4.39,
"learning_rate": 2.8077633378932973e-05,
"loss": 3.5669,
"step": 256500
},
{
"epoch": 4.39,
"learning_rate": 2.803488372093023e-05,
"loss": 3.5171,
"step": 257000
},
{
"epoch": 4.4,
"learning_rate": 2.79921340629275e-05,
"loss": 3.5614,
"step": 257500
},
{
"epoch": 4.41,
"learning_rate": 2.794938440492476e-05,
"loss": 3.5958,
"step": 258000
},
{
"epoch": 4.42,
"learning_rate": 2.790663474692203e-05,
"loss": 3.5079,
"step": 258500
},
{
"epoch": 4.43,
"learning_rate": 2.7863885088919288e-05,
"loss": 3.5361,
"step": 259000
},
{
"epoch": 4.44,
"learning_rate": 2.7821135430916556e-05,
"loss": 3.4883,
"step": 259500
},
{
"epoch": 4.45,
"learning_rate": 2.7778385772913818e-05,
"loss": 3.5404,
"step": 260000
},
{
"epoch": 4.45,
"learning_rate": 2.7735636114911086e-05,
"loss": 3.5473,
"step": 260500
},
{
"epoch": 4.46,
"learning_rate": 2.7692886456908344e-05,
"loss": 3.5328,
"step": 261000
},
{
"epoch": 4.47,
"learning_rate": 2.765030779753762e-05,
"loss": 3.5019,
"step": 261500
},
{
"epoch": 4.48,
"learning_rate": 2.7607558139534883e-05,
"loss": 3.5022,
"step": 262000
},
{
"epoch": 4.49,
"learning_rate": 2.756480848153215e-05,
"loss": 3.5488,
"step": 262500
},
{
"epoch": 4.5,
"learning_rate": 2.7522058823529413e-05,
"loss": 3.5467,
"step": 263000
},
{
"epoch": 4.51,
"learning_rate": 2.7479309165526678e-05,
"loss": 3.5505,
"step": 263500
},
{
"epoch": 4.51,
"learning_rate": 2.743655950752394e-05,
"loss": 3.5297,
"step": 264000
},
{
"epoch": 4.52,
"learning_rate": 2.7393809849521208e-05,
"loss": 3.5056,
"step": 264500
},
{
"epoch": 4.53,
"learning_rate": 2.735106019151847e-05,
"loss": 3.5086,
"step": 265000
},
{
"epoch": 4.54,
"learning_rate": 2.7308310533515735e-05,
"loss": 3.5174,
"step": 265500
},
{
"epoch": 4.55,
"learning_rate": 2.7265646374829e-05,
"loss": 3.5264,
"step": 266000
},
{
"epoch": 4.56,
"learning_rate": 2.7223067715458278e-05,
"loss": 3.5034,
"step": 266500
},
{
"epoch": 4.57,
"learning_rate": 2.718031805745554e-05,
"loss": 3.4838,
"step": 267000
},
{
"epoch": 4.57,
"learning_rate": 2.7137568399452805e-05,
"loss": 3.4903,
"step": 267500
},
{
"epoch": 4.58,
"learning_rate": 2.7094818741450066e-05,
"loss": 3.5118,
"step": 268000
},
{
"epoch": 4.59,
"learning_rate": 2.7052069083447335e-05,
"loss": 3.5055,
"step": 268500
},
{
"epoch": 4.6,
"learning_rate": 2.7009319425444596e-05,
"loss": 3.5211,
"step": 269000
},
{
"epoch": 4.61,
"learning_rate": 2.696656976744186e-05,
"loss": 3.5344,
"step": 269500
},
{
"epoch": 4.62,
"learning_rate": 2.6923820109439123e-05,
"loss": 3.514,
"step": 270000
},
{
"epoch": 4.63,
"learning_rate": 2.688107045143639e-05,
"loss": 3.473,
"step": 270500
},
{
"epoch": 4.63,
"learning_rate": 2.6838320793433653e-05,
"loss": 3.4917,
"step": 271000
},
{
"epoch": 4.64,
"learning_rate": 2.6795571135430918e-05,
"loss": 3.5179,
"step": 271500
},
{
"epoch": 4.65,
"learning_rate": 2.675282147742818e-05,
"loss": 3.5437,
"step": 272000
},
{
"epoch": 4.66,
"learning_rate": 2.6710157318741452e-05,
"loss": 3.5061,
"step": 272500
},
{
"epoch": 4.67,
"learning_rate": 2.6667407660738714e-05,
"loss": 3.4961,
"step": 273000
},
{
"epoch": 4.68,
"learning_rate": 2.6624743502051986e-05,
"loss": 3.5111,
"step": 273500
},
{
"epoch": 4.69,
"learning_rate": 2.6582079343365256e-05,
"loss": 3.4927,
"step": 274000
},
{
"epoch": 4.69,
"learning_rate": 2.6539329685362518e-05,
"loss": 3.4917,
"step": 274500
},
{
"epoch": 4.7,
"learning_rate": 2.649658002735978e-05,
"loss": 3.5134,
"step": 275000
},
{
"epoch": 4.71,
"learning_rate": 2.6453830369357048e-05,
"loss": 3.5344,
"step": 275500
},
{
"epoch": 4.72,
"learning_rate": 2.641108071135431e-05,
"loss": 3.5493,
"step": 276000
},
{
"epoch": 4.73,
"learning_rate": 2.6368331053351574e-05,
"loss": 3.4894,
"step": 276500
},
{
"epoch": 4.74,
"learning_rate": 2.6325581395348836e-05,
"loss": 3.4784,
"step": 277000
},
{
"epoch": 4.75,
"learning_rate": 2.6282831737346104e-05,
"loss": 3.4885,
"step": 277500
},
{
"epoch": 4.75,
"learning_rate": 2.6240082079343366e-05,
"loss": 3.5024,
"step": 278000
},
{
"epoch": 4.76,
"learning_rate": 2.6197417920656635e-05,
"loss": 3.5195,
"step": 278500
},
{
"epoch": 4.77,
"learning_rate": 2.6154668262653897e-05,
"loss": 3.4709,
"step": 279000
},
{
"epoch": 4.78,
"learning_rate": 2.6111918604651165e-05,
"loss": 3.4902,
"step": 279500
},
{
"epoch": 4.79,
"learning_rate": 2.6069168946648427e-05,
"loss": 3.4905,
"step": 280000
},
{
"epoch": 4.8,
"learning_rate": 2.602641928864569e-05,
"loss": 3.5166,
"step": 280500
},
{
"epoch": 4.81,
"learning_rate": 2.5983669630642953e-05,
"loss": 3.5104,
"step": 281000
},
{
"epoch": 4.81,
"learning_rate": 2.594091997264022e-05,
"loss": 3.495,
"step": 281500
},
{
"epoch": 4.82,
"learning_rate": 2.5898170314637483e-05,
"loss": 3.4946,
"step": 282000
},
{
"epoch": 4.83,
"learning_rate": 2.5855506155950753e-05,
"loss": 3.4997,
"step": 282500
},
{
"epoch": 4.84,
"learning_rate": 2.5812756497948014e-05,
"loss": 3.5034,
"step": 283000
},
{
"epoch": 4.85,
"learning_rate": 2.5770006839945283e-05,
"loss": 3.5261,
"step": 283500
},
{
"epoch": 4.86,
"learning_rate": 2.5727342681258555e-05,
"loss": 3.4806,
"step": 284000
},
{
"epoch": 4.86,
"learning_rate": 2.568467852257182e-05,
"loss": 3.4998,
"step": 284500
},
{
"epoch": 4.87,
"learning_rate": 2.5641928864569086e-05,
"loss": 3.5056,
"step": 285000
},
{
"epoch": 4.88,
"learning_rate": 2.5599179206566348e-05,
"loss": 3.4693,
"step": 285500
},
{
"epoch": 4.89,
"learning_rate": 2.5556429548563616e-05,
"loss": 3.4825,
"step": 286000
},
{
"epoch": 4.9,
"learning_rate": 2.5513679890560878e-05,
"loss": 3.4755,
"step": 286500
},
{
"epoch": 4.91,
"learning_rate": 2.5470930232558143e-05,
"loss": 3.4791,
"step": 287000
},
{
"epoch": 4.92,
"learning_rate": 2.5428180574555405e-05,
"loss": 3.4761,
"step": 287500
},
{
"epoch": 4.92,
"learning_rate": 2.5385430916552666e-05,
"loss": 3.4529,
"step": 288000
},
{
"epoch": 4.93,
"learning_rate": 2.5342681258549934e-05,
"loss": 3.5147,
"step": 288500
},
{
"epoch": 4.94,
"learning_rate": 2.5299931600547193e-05,
"loss": 3.4524,
"step": 289000
},
{
"epoch": 4.95,
"learning_rate": 2.525718194254446e-05,
"loss": 3.4681,
"step": 289500
},
{
"epoch": 4.96,
"learning_rate": 2.5214432284541723e-05,
"loss": 3.4865,
"step": 290000
},
{
"epoch": 4.97,
"learning_rate": 2.517168262653899e-05,
"loss": 3.5144,
"step": 290500
},
{
"epoch": 4.98,
"learning_rate": 2.512893296853625e-05,
"loss": 3.4747,
"step": 291000
},
{
"epoch": 4.98,
"learning_rate": 2.5086183310533518e-05,
"loss": 3.4518,
"step": 291500
},
{
"epoch": 4.99,
"learning_rate": 2.504343365253078e-05,
"loss": 3.5175,
"step": 292000
},
{
"epoch": 5.0,
"learning_rate": 2.5000683994528044e-05,
"loss": 3.4788,
"step": 292500
},
{
"epoch": 5.01,
"learning_rate": 2.495793433652531e-05,
"loss": 3.4503,
"step": 293000
},
{
"epoch": 5.02,
"learning_rate": 2.491527017783858e-05,
"loss": 3.4625,
"step": 293500
},
{
"epoch": 5.03,
"learning_rate": 2.4872520519835844e-05,
"loss": 3.4698,
"step": 294000
},
{
"epoch": 5.04,
"learning_rate": 2.482977086183311e-05,
"loss": 3.4249,
"step": 294500
},
{
"epoch": 5.04,
"learning_rate": 2.478702120383037e-05,
"loss": 3.4456,
"step": 295000
},
{
"epoch": 5.05,
"learning_rate": 2.4744271545827635e-05,
"loss": 3.4807,
"step": 295500
},
{
"epoch": 5.06,
"learning_rate": 2.4701607387140905e-05,
"loss": 3.4461,
"step": 296000
},
{
"epoch": 5.07,
"learning_rate": 2.465885772913817e-05,
"loss": 3.4426,
"step": 296500
},
{
"epoch": 5.08,
"learning_rate": 2.461619357045144e-05,
"loss": 3.4572,
"step": 297000
},
{
"epoch": 5.09,
"learning_rate": 2.4573529411764705e-05,
"loss": 3.4423,
"step": 297500
},
{
"epoch": 5.1,
"learning_rate": 2.453077975376197e-05,
"loss": 3.4409,
"step": 298000
},
{
"epoch": 5.1,
"learning_rate": 2.4488030095759235e-05,
"loss": 3.4681,
"step": 298500
},
{
"epoch": 5.11,
"learning_rate": 2.44452804377565e-05,
"loss": 3.4573,
"step": 299000
},
{
"epoch": 5.12,
"learning_rate": 2.4402616279069766e-05,
"loss": 3.4584,
"step": 299500
},
{
"epoch": 5.13,
"learning_rate": 2.435986662106703e-05,
"loss": 3.4655,
"step": 300000
},
{
"epoch": 5.14,
"learning_rate": 2.4317116963064296e-05,
"loss": 3.4626,
"step": 300500
},
{
"epoch": 5.15,
"learning_rate": 2.427436730506156e-05,
"loss": 3.475,
"step": 301000
},
{
"epoch": 5.16,
"learning_rate": 2.4231617647058823e-05,
"loss": 3.4343,
"step": 301500
},
{
"epoch": 5.16,
"learning_rate": 2.4188867989056088e-05,
"loss": 3.4596,
"step": 302000
},
{
"epoch": 5.17,
"learning_rate": 2.4146118331053353e-05,
"loss": 3.419,
"step": 302500
},
{
"epoch": 5.18,
"learning_rate": 2.4103368673050618e-05,
"loss": 3.4769,
"step": 303000
},
{
"epoch": 5.19,
"learning_rate": 2.4060704514363887e-05,
"loss": 3.4209,
"step": 303500
},
{
"epoch": 5.2,
"learning_rate": 2.401795485636115e-05,
"loss": 3.4429,
"step": 304000
},
{
"epoch": 5.21,
"learning_rate": 2.3975205198358414e-05,
"loss": 3.4464,
"step": 304500
},
{
"epoch": 5.22,
"learning_rate": 2.393245554035568e-05,
"loss": 3.4602,
"step": 305000
},
{
"epoch": 5.22,
"learning_rate": 2.3889705882352943e-05,
"loss": 3.465,
"step": 305500
},
{
"epoch": 5.23,
"learning_rate": 2.3846956224350205e-05,
"loss": 3.4138,
"step": 306000
},
{
"epoch": 5.24,
"learning_rate": 2.380420656634747e-05,
"loss": 3.4305,
"step": 306500
},
{
"epoch": 5.25,
"learning_rate": 2.3761456908344735e-05,
"loss": 3.4451,
"step": 307000
},
{
"epoch": 5.26,
"learning_rate": 2.3718707250341997e-05,
"loss": 3.4466,
"step": 307500
},
{
"epoch": 5.27,
"learning_rate": 2.367595759233926e-05,
"loss": 3.471,
"step": 308000
},
{
"epoch": 5.28,
"learning_rate": 2.363329343365253e-05,
"loss": 3.4632,
"step": 308500
},
{
"epoch": 5.28,
"learning_rate": 2.3590543775649796e-05,
"loss": 3.4416,
"step": 309000
},
{
"epoch": 5.29,
"learning_rate": 2.354779411764706e-05,
"loss": 3.4338,
"step": 309500
},
{
"epoch": 5.3,
"learning_rate": 2.3505129958960327e-05,
"loss": 3.391,
"step": 310000
},
{
"epoch": 5.31,
"learning_rate": 2.3462380300957592e-05,
"loss": 3.4299,
"step": 310500
},
{
"epoch": 5.32,
"learning_rate": 2.3419630642954857e-05,
"loss": 3.4384,
"step": 311000
},
{
"epoch": 5.33,
"learning_rate": 2.3376880984952122e-05,
"loss": 3.414,
"step": 311500
},
{
"epoch": 5.34,
"learning_rate": 2.3334131326949384e-05,
"loss": 3.378,
"step": 312000
},
{
"epoch": 5.34,
"learning_rate": 2.329138166894665e-05,
"loss": 3.4163,
"step": 312500
},
{
"epoch": 5.35,
"learning_rate": 2.3248632010943914e-05,
"loss": 3.4059,
"step": 313000
},
{
"epoch": 5.36,
"learning_rate": 2.3205967852257183e-05,
"loss": 3.4266,
"step": 313500
},
{
"epoch": 5.37,
"learning_rate": 2.3163218194254445e-05,
"loss": 3.4346,
"step": 314000
},
{
"epoch": 5.38,
"learning_rate": 2.312046853625171e-05,
"loss": 3.4367,
"step": 314500
},
{
"epoch": 5.39,
"learning_rate": 2.3077718878248975e-05,
"loss": 3.4176,
"step": 315000
},
{
"epoch": 5.4,
"learning_rate": 2.303496922024624e-05,
"loss": 3.4402,
"step": 315500
},
{
"epoch": 5.4,
"learning_rate": 2.29922195622435e-05,
"loss": 3.4373,
"step": 316000
},
{
"epoch": 5.41,
"learning_rate": 2.2949469904240766e-05,
"loss": 3.3886,
"step": 316500
},
{
"epoch": 5.42,
"learning_rate": 2.290672024623803e-05,
"loss": 3.3978,
"step": 317000
},
{
"epoch": 5.43,
"learning_rate": 2.2863970588235296e-05,
"loss": 3.407,
"step": 317500
},
{
"epoch": 5.44,
"learning_rate": 2.2821220930232558e-05,
"loss": 3.4255,
"step": 318000
},
{
"epoch": 5.45,
"learning_rate": 2.2778471272229823e-05,
"loss": 3.4385,
"step": 318500
},
{
"epoch": 5.45,
"learning_rate": 2.2735807113543092e-05,
"loss": 3.403,
"step": 319000
},
{
"epoch": 5.46,
"learning_rate": 2.269314295485636e-05,
"loss": 3.4244,
"step": 319500
},
{
"epoch": 5.47,
"learning_rate": 2.2650393296853627e-05,
"loss": 3.4243,
"step": 320000
},
{
"epoch": 5.48,
"learning_rate": 2.2607643638850888e-05,
"loss": 3.427,
"step": 320500
},
{
"epoch": 5.49,
"learning_rate": 2.2564893980848153e-05,
"loss": 3.4355,
"step": 321000
},
{
"epoch": 5.5,
"learning_rate": 2.2522144322845418e-05,
"loss": 3.4011,
"step": 321500
},
{
"epoch": 5.51,
"learning_rate": 2.2479394664842683e-05,
"loss": 3.3819,
"step": 322000
},
{
"epoch": 5.51,
"learning_rate": 2.2436645006839945e-05,
"loss": 3.4081,
"step": 322500
},
{
"epoch": 5.52,
"learning_rate": 2.239389534883721e-05,
"loss": 3.3752,
"step": 323000
},
{
"epoch": 5.53,
"learning_rate": 2.2351145690834475e-05,
"loss": 3.4235,
"step": 323500
},
{
"epoch": 5.54,
"learning_rate": 2.2308481532147744e-05,
"loss": 3.4504,
"step": 324000
},
{
"epoch": 5.55,
"learning_rate": 2.2265731874145006e-05,
"loss": 3.4153,
"step": 324500
},
{
"epoch": 5.56,
"learning_rate": 2.2223067715458275e-05,
"loss": 3.4146,
"step": 325000
},
{
"epoch": 5.57,
"learning_rate": 2.218031805745554e-05,
"loss": 3.3772,
"step": 325500
},
{
"epoch": 5.57,
"learning_rate": 2.2137568399452805e-05,
"loss": 3.4264,
"step": 326000
},
{
"epoch": 5.58,
"learning_rate": 2.209481874145007e-05,
"loss": 3.3695,
"step": 326500
},
{
"epoch": 5.59,
"learning_rate": 2.205206908344733e-05,
"loss": 3.3935,
"step": 327000
},
{
"epoch": 5.6,
"learning_rate": 2.2009319425444597e-05,
"loss": 3.4186,
"step": 327500
},
{
"epoch": 5.61,
"learning_rate": 2.196656976744186e-05,
"loss": 3.4184,
"step": 328000
},
{
"epoch": 5.62,
"learning_rate": 2.1923820109439126e-05,
"loss": 3.4167,
"step": 328500
},
{
"epoch": 5.63,
"learning_rate": 2.1881070451436388e-05,
"loss": 3.397,
"step": 329000
},
{
"epoch": 5.63,
"learning_rate": 2.1838320793433653e-05,
"loss": 3.3996,
"step": 329500
},
{
"epoch": 5.64,
"learning_rate": 2.1795571135430918e-05,
"loss": 3.3878,
"step": 330000
},
{
"epoch": 5.65,
"learning_rate": 2.1752821477428183e-05,
"loss": 3.43,
"step": 330500
},
{
"epoch": 5.66,
"learning_rate": 2.1710071819425445e-05,
"loss": 3.4278,
"step": 331000
},
{
"epoch": 5.67,
"learning_rate": 2.1667407660738714e-05,
"loss": 3.3853,
"step": 331500
},
{
"epoch": 5.68,
"learning_rate": 2.162465800273598e-05,
"loss": 3.4041,
"step": 332000
},
{
"epoch": 5.69,
"learning_rate": 2.1581908344733244e-05,
"loss": 3.4038,
"step": 332500
},
{
"epoch": 5.69,
"learning_rate": 2.153924418604651e-05,
"loss": 3.3846,
"step": 333000
},
{
"epoch": 5.7,
"learning_rate": 2.1496494528043775e-05,
"loss": 3.3813,
"step": 333500
},
{
"epoch": 5.71,
"learning_rate": 2.145374487004104e-05,
"loss": 3.3934,
"step": 334000
},
{
"epoch": 5.72,
"learning_rate": 2.1410995212038305e-05,
"loss": 3.3566,
"step": 334500
},
{
"epoch": 5.73,
"learning_rate": 2.1368331053351574e-05,
"loss": 3.3997,
"step": 335000
},
{
"epoch": 5.74,
"learning_rate": 2.1325581395348836e-05,
"loss": 3.4148,
"step": 335500
},
{
"epoch": 5.75,
"learning_rate": 2.12828317373461e-05,
"loss": 3.3976,
"step": 336000
},
{
"epoch": 5.75,
"learning_rate": 2.1240082079343366e-05,
"loss": 3.3818,
"step": 336500
},
{
"epoch": 5.76,
"learning_rate": 2.119733242134063e-05,
"loss": 3.3871,
"step": 337000
},
{
"epoch": 5.77,
"learning_rate": 2.1154582763337893e-05,
"loss": 3.3902,
"step": 337500
},
{
"epoch": 5.78,
"learning_rate": 2.1111833105335158e-05,
"loss": 3.4124,
"step": 338000
},
{
"epoch": 5.79,
"learning_rate": 2.1069083447332423e-05,
"loss": 3.3734,
"step": 338500
},
{
"epoch": 5.8,
"learning_rate": 2.1026333789329684e-05,
"loss": 3.3685,
"step": 339000
},
{
"epoch": 5.81,
"learning_rate": 2.0983669630642954e-05,
"loss": 3.3705,
"step": 339500
},
{
"epoch": 5.81,
"learning_rate": 2.094091997264022e-05,
"loss": 3.4106,
"step": 340000
},
{
"epoch": 5.82,
"learning_rate": 2.0898170314637484e-05,
"loss": 3.353,
"step": 340500
},
{
"epoch": 5.83,
"learning_rate": 2.085542065663475e-05,
"loss": 3.3443,
"step": 341000
},
{
"epoch": 5.84,
"learning_rate": 2.081267099863201e-05,
"loss": 3.3696,
"step": 341500
},
{
"epoch": 5.85,
"learning_rate": 2.0769921340629275e-05,
"loss": 3.3473,
"step": 342000
},
{
"epoch": 5.86,
"learning_rate": 2.0727257181942545e-05,
"loss": 3.3947,
"step": 342500
},
{
"epoch": 5.87,
"learning_rate": 2.068450752393981e-05,
"loss": 3.3491,
"step": 343000
},
{
"epoch": 5.87,
"learning_rate": 2.064175786593707e-05,
"loss": 3.3899,
"step": 343500
},
{
"epoch": 5.88,
"learning_rate": 2.0599093707250344e-05,
"loss": 3.3883,
"step": 344000
},
{
"epoch": 5.89,
"learning_rate": 2.055634404924761e-05,
"loss": 3.3623,
"step": 344500
},
{
"epoch": 5.9,
"learning_rate": 2.051359439124487e-05,
"loss": 3.3721,
"step": 345000
},
{
"epoch": 5.91,
"learning_rate": 2.0470844733242135e-05,
"loss": 3.3933,
"step": 345500
},
{
"epoch": 5.92,
"learning_rate": 2.0428095075239397e-05,
"loss": 3.3948,
"step": 346000
},
{
"epoch": 5.93,
"learning_rate": 2.0385345417236662e-05,
"loss": 3.3827,
"step": 346500
},
{
"epoch": 5.93,
"learning_rate": 2.0342681258549935e-05,
"loss": 3.367,
"step": 347000
},
{
"epoch": 5.94,
"learning_rate": 2.0299931600547197e-05,
"loss": 3.3329,
"step": 347500
},
{
"epoch": 5.95,
"learning_rate": 2.025718194254446e-05,
"loss": 3.3715,
"step": 348000
},
{
"epoch": 5.96,
"learning_rate": 2.0214432284541723e-05,
"loss": 3.356,
"step": 348500
},
{
"epoch": 5.97,
"learning_rate": 2.0171682626538988e-05,
"loss": 3.3729,
"step": 349000
},
{
"epoch": 5.98,
"learning_rate": 2.0128932968536253e-05,
"loss": 3.4025,
"step": 349500
},
{
"epoch": 5.98,
"learning_rate": 2.0086268809849522e-05,
"loss": 3.3937,
"step": 350000
},
{
"epoch": 5.99,
"learning_rate": 2.0043519151846787e-05,
"loss": 3.3799,
"step": 350500
},
{
"epoch": 6.0,
"learning_rate": 2.0000854993160057e-05,
"loss": 3.3702,
"step": 351000
},
{
"epoch": 6.01,
"learning_rate": 1.9958105335157322e-05,
"loss": 3.3405,
"step": 351500
},
{
"epoch": 6.02,
"learning_rate": 1.9915441176470588e-05,
"loss": 3.3531,
"step": 352000
},
{
"epoch": 6.03,
"learning_rate": 1.9872691518467853e-05,
"loss": 3.3501,
"step": 352500
},
{
"epoch": 6.04,
"learning_rate": 1.9829941860465118e-05,
"loss": 3.3631,
"step": 353000
},
{
"epoch": 6.04,
"learning_rate": 1.9787192202462383e-05,
"loss": 3.3329,
"step": 353500
},
{
"epoch": 6.05,
"learning_rate": 1.9744442544459645e-05,
"loss": 3.3577,
"step": 354000
},
{
"epoch": 6.06,
"learning_rate": 1.970169288645691e-05,
"loss": 3.3682,
"step": 354500
},
{
"epoch": 6.07,
"learning_rate": 1.9658943228454174e-05,
"loss": 3.3475,
"step": 355000
},
{
"epoch": 6.08,
"learning_rate": 1.961619357045144e-05,
"loss": 3.3654,
"step": 355500
},
{
"epoch": 6.09,
"learning_rate": 1.95734439124487e-05,
"loss": 3.3688,
"step": 356000
},
{
"epoch": 6.1,
"learning_rate": 1.9530694254445966e-05,
"loss": 3.3577,
"step": 356500
},
{
"epoch": 6.1,
"learning_rate": 1.948794459644323e-05,
"loss": 3.3702,
"step": 357000
},
{
"epoch": 6.11,
"learning_rate": 1.9445194938440496e-05,
"loss": 3.3792,
"step": 357500
},
{
"epoch": 6.12,
"learning_rate": 1.9402445280437758e-05,
"loss": 3.3318,
"step": 358000
},
{
"epoch": 6.13,
"learning_rate": 1.935969562243502e-05,
"loss": 3.3837,
"step": 358500
},
{
"epoch": 6.14,
"learning_rate": 1.9316945964432284e-05,
"loss": 3.3358,
"step": 359000
},
{
"epoch": 6.15,
"learning_rate": 1.927419630642955e-05,
"loss": 3.3344,
"step": 359500
},
{
"epoch": 6.16,
"learning_rate": 1.9231446648426814e-05,
"loss": 3.3393,
"step": 360000
},
{
"epoch": 6.16,
"learning_rate": 1.9188696990424076e-05,
"loss": 3.3408,
"step": 360500
},
{
"epoch": 6.17,
"learning_rate": 1.914603283173735e-05,
"loss": 3.3467,
"step": 361000
},
{
"epoch": 6.18,
"learning_rate": 1.9103368673050618e-05,
"loss": 3.3644,
"step": 361500
},
{
"epoch": 6.19,
"learning_rate": 1.9060619015047883e-05,
"loss": 3.3719,
"step": 362000
},
{
"epoch": 6.2,
"learning_rate": 1.9017869357045144e-05,
"loss": 3.3658,
"step": 362500
},
{
"epoch": 6.21,
"learning_rate": 1.897511969904241e-05,
"loss": 3.313,
"step": 363000
},
{
"epoch": 6.22,
"learning_rate": 1.893245554035568e-05,
"loss": 3.3477,
"step": 363500
},
{
"epoch": 6.22,
"learning_rate": 1.8889705882352944e-05,
"loss": 3.3276,
"step": 364000
},
{
"epoch": 6.23,
"learning_rate": 1.884704172366621e-05,
"loss": 3.32,
"step": 364500
},
{
"epoch": 6.24,
"learning_rate": 1.8804292065663475e-05,
"loss": 3.3462,
"step": 365000
},
{
"epoch": 6.25,
"learning_rate": 1.876154240766074e-05,
"loss": 3.3172,
"step": 365500
},
{
"epoch": 6.26,
"learning_rate": 1.8718792749658005e-05,
"loss": 3.3422,
"step": 366000
},
{
"epoch": 6.27,
"learning_rate": 1.8676043091655267e-05,
"loss": 3.3399,
"step": 366500
},
{
"epoch": 6.28,
"learning_rate": 1.863329343365253e-05,
"loss": 3.3611,
"step": 367000
},
{
"epoch": 6.28,
"learning_rate": 1.8590543775649796e-05,
"loss": 3.3353,
"step": 367500
},
{
"epoch": 6.29,
"learning_rate": 1.8547879616963066e-05,
"loss": 3.3103,
"step": 368000
},
{
"epoch": 6.3,
"learning_rate": 1.850512995896033e-05,
"loss": 3.3291,
"step": 368500
},
{
"epoch": 6.31,
"learning_rate": 1.8462380300957593e-05,
"loss": 3.3194,
"step": 369000
},
{
"epoch": 6.32,
"learning_rate": 1.8419630642954857e-05,
"loss": 3.3426,
"step": 369500
},
{
"epoch": 6.33,
"learning_rate": 1.8376880984952122e-05,
"loss": 3.3213,
"step": 370000
},
{
"epoch": 6.34,
"learning_rate": 1.8334131326949387e-05,
"loss": 3.3214,
"step": 370500
},
{
"epoch": 6.34,
"learning_rate": 1.829138166894665e-05,
"loss": 3.3211,
"step": 371000
},
{
"epoch": 6.35,
"learning_rate": 1.8248632010943914e-05,
"loss": 3.3533,
"step": 371500
},
{
"epoch": 6.36,
"learning_rate": 1.820588235294118e-05,
"loss": 3.3376,
"step": 372000
},
{
"epoch": 6.37,
"learning_rate": 1.8163132694938444e-05,
"loss": 3.3151,
"step": 372500
},
{
"epoch": 6.38,
"learning_rate": 1.812046853625171e-05,
"loss": 3.3452,
"step": 373000
},
{
"epoch": 6.39,
"learning_rate": 1.8077718878248975e-05,
"loss": 3.2928,
"step": 373500
},
{
"epoch": 6.4,
"learning_rate": 1.803496922024624e-05,
"loss": 3.3321,
"step": 374000
},
{
"epoch": 6.4,
"learning_rate": 1.7992219562243505e-05,
"loss": 3.3356,
"step": 374500
},
{
"epoch": 6.41,
"learning_rate": 1.7949469904240767e-05,
"loss": 3.3358,
"step": 375000
},
{
"epoch": 6.42,
"learning_rate": 1.790672024623803e-05,
"loss": 3.3385,
"step": 375500
},
{
"epoch": 6.43,
"learning_rate": 1.7863970588235296e-05,
"loss": 3.3443,
"step": 376000
},
{
"epoch": 6.44,
"learning_rate": 1.7821306429548566e-05,
"loss": 3.3189,
"step": 376500
},
{
"epoch": 6.45,
"learning_rate": 1.7778556771545828e-05,
"loss": 3.3193,
"step": 377000
},
{
"epoch": 6.46,
"learning_rate": 1.7735807113543092e-05,
"loss": 3.3541,
"step": 377500
},
{
"epoch": 6.46,
"learning_rate": 1.7693057455540357e-05,
"loss": 3.3244,
"step": 378000
},
{
"epoch": 6.47,
"learning_rate": 1.7650307797537622e-05,
"loss": 3.3357,
"step": 378500
},
{
"epoch": 6.48,
"learning_rate": 1.7607643638850892e-05,
"loss": 3.3028,
"step": 379000
},
{
"epoch": 6.49,
"learning_rate": 1.7564893980848154e-05,
"loss": 3.2885,
"step": 379500
},
{
"epoch": 6.5,
"learning_rate": 1.752214432284542e-05,
"loss": 3.3094,
"step": 380000
},
{
"epoch": 6.51,
"learning_rate": 1.7479394664842683e-05,
"loss": 3.3171,
"step": 380500
},
{
"epoch": 6.52,
"learning_rate": 1.743664500683995e-05,
"loss": 3.3147,
"step": 381000
},
{
"epoch": 6.52,
"learning_rate": 1.739389534883721e-05,
"loss": 3.313,
"step": 381500
},
{
"epoch": 6.53,
"learning_rate": 1.7351145690834475e-05,
"loss": 3.3385,
"step": 382000
},
{
"epoch": 6.54,
"learning_rate": 1.730839603283174e-05,
"loss": 3.3173,
"step": 382500
},
{
"epoch": 6.55,
"learning_rate": 1.726573187414501e-05,
"loss": 3.3167,
"step": 383000
},
{
"epoch": 6.56,
"learning_rate": 1.7223067715458276e-05,
"loss": 3.2947,
"step": 383500
},
{
"epoch": 6.57,
"learning_rate": 1.718031805745554e-05,
"loss": 3.3185,
"step": 384000
},
{
"epoch": 6.57,
"learning_rate": 1.7137568399452805e-05,
"loss": 3.3438,
"step": 384500
},
{
"epoch": 6.58,
"learning_rate": 1.709481874145007e-05,
"loss": 3.3167,
"step": 385000
},
{
"epoch": 6.59,
"learning_rate": 1.7052069083447332e-05,
"loss": 3.3086,
"step": 385500
},
{
"epoch": 6.6,
"learning_rate": 1.7009319425444597e-05,
"loss": 3.3146,
"step": 386000
},
{
"epoch": 6.61,
"learning_rate": 1.6966569767441862e-05,
"loss": 3.3034,
"step": 386500
},
{
"epoch": 6.62,
"learning_rate": 1.692390560875513e-05,
"loss": 3.3163,
"step": 387000
},
{
"epoch": 6.63,
"learning_rate": 1.6881155950752396e-05,
"loss": 3.3306,
"step": 387500
},
{
"epoch": 6.63,
"learning_rate": 1.6838406292749658e-05,
"loss": 3.2904,
"step": 388000
},
{
"epoch": 6.64,
"learning_rate": 1.6795656634746923e-05,
"loss": 3.33,
"step": 388500
},
{
"epoch": 6.65,
"learning_rate": 1.6752906976744188e-05,
"loss": 3.2945,
"step": 389000
},
{
"epoch": 6.66,
"learning_rate": 1.671015731874145e-05,
"loss": 3.3257,
"step": 389500
},
{
"epoch": 6.67,
"learning_rate": 1.6667407660738714e-05,
"loss": 3.3196,
"step": 390000
},
{
"epoch": 6.68,
"learning_rate": 1.662465800273598e-05,
"loss": 3.3317,
"step": 390500
},
{
"epoch": 6.69,
"learning_rate": 1.6581908344733244e-05,
"loss": 3.2978,
"step": 391000
},
{
"epoch": 6.69,
"learning_rate": 1.6539158686730506e-05,
"loss": 3.3037,
"step": 391500
},
{
"epoch": 6.7,
"learning_rate": 1.6496494528043776e-05,
"loss": 3.2703,
"step": 392000
},
{
"epoch": 6.71,
"learning_rate": 1.645374487004104e-05,
"loss": 3.2918,
"step": 392500
},
{
"epoch": 6.72,
"learning_rate": 1.6410995212038305e-05,
"loss": 3.3257,
"step": 393000
},
{
"epoch": 6.73,
"learning_rate": 1.636824555403557e-05,
"loss": 3.2756,
"step": 393500
},
{
"epoch": 6.74,
"learning_rate": 1.6325581395348837e-05,
"loss": 3.3043,
"step": 394000
},
{
"epoch": 6.75,
"learning_rate": 1.62828317373461e-05,
"loss": 3.3169,
"step": 394500
},
{
"epoch": 6.75,
"learning_rate": 1.6240082079343366e-05,
"loss": 3.2976,
"step": 395000
},
{
"epoch": 6.76,
"learning_rate": 1.619733242134063e-05,
"loss": 3.2912,
"step": 395500
},
{
"epoch": 6.77,
"learning_rate": 1.6154582763337893e-05,
"loss": 3.2524,
"step": 396000
},
{
"epoch": 6.78,
"learning_rate": 1.6111918604651163e-05,
"loss": 3.293,
"step": 396500
},
{
"epoch": 6.79,
"learning_rate": 1.6069168946648427e-05,
"loss": 3.2664,
"step": 397000
},
{
"epoch": 6.8,
"learning_rate": 1.6026419288645692e-05,
"loss": 3.2705,
"step": 397500
},
{
"epoch": 6.81,
"learning_rate": 1.5983669630642954e-05,
"loss": 3.3202,
"step": 398000
},
{
"epoch": 6.81,
"learning_rate": 1.594091997264022e-05,
"loss": 3.3042,
"step": 398500
},
{
"epoch": 6.82,
"learning_rate": 1.589825581395349e-05,
"loss": 3.2975,
"step": 399000
},
{
"epoch": 6.83,
"learning_rate": 1.5855506155950753e-05,
"loss": 3.305,
"step": 399500
},
{
"epoch": 6.84,
"learning_rate": 1.581275649794802e-05,
"loss": 3.2572,
"step": 400000
},
{
"epoch": 6.85,
"learning_rate": 1.5770092339261285e-05,
"loss": 3.3046,
"step": 400500
},
{
"epoch": 6.86,
"learning_rate": 1.572734268125855e-05,
"loss": 3.2563,
"step": 401000
},
{
"epoch": 6.87,
"learning_rate": 1.5684593023255814e-05,
"loss": 3.2936,
"step": 401500
},
{
"epoch": 6.87,
"learning_rate": 1.564184336525308e-05,
"loss": 3.3103,
"step": 402000
},
{
"epoch": 6.88,
"learning_rate": 1.559909370725034e-05,
"loss": 3.2714,
"step": 402500
},
{
"epoch": 6.89,
"learning_rate": 1.5556344049247606e-05,
"loss": 3.3399,
"step": 403000
},
{
"epoch": 6.9,
"learning_rate": 1.551359439124487e-05,
"loss": 3.2546,
"step": 403500
},
{
"epoch": 6.91,
"learning_rate": 1.5470844733242136e-05,
"loss": 3.282,
"step": 404000
},
{
"epoch": 6.92,
"learning_rate": 1.5428180574555402e-05,
"loss": 3.2968,
"step": 404500
},
{
"epoch": 6.93,
"learning_rate": 1.5385430916552667e-05,
"loss": 3.2788,
"step": 405000
},
{
"epoch": 6.93,
"learning_rate": 1.5342681258549932e-05,
"loss": 3.2715,
"step": 405500
},
{
"epoch": 6.94,
"learning_rate": 1.5299931600547197e-05,
"loss": 3.261,
"step": 406000
},
{
"epoch": 6.95,
"learning_rate": 1.525718194254446e-05,
"loss": 3.2855,
"step": 406500
},
{
"epoch": 6.96,
"learning_rate": 1.521451778385773e-05,
"loss": 3.277,
"step": 407000
},
{
"epoch": 6.97,
"learning_rate": 1.5171853625170997e-05,
"loss": 3.3068,
"step": 407500
},
{
"epoch": 6.98,
"learning_rate": 1.5129103967168262e-05,
"loss": 3.2982,
"step": 408000
},
{
"epoch": 6.99,
"learning_rate": 1.5086354309165526e-05,
"loss": 3.2626,
"step": 408500
},
{
"epoch": 6.99,
"learning_rate": 1.504360465116279e-05,
"loss": 3.2685,
"step": 409000
},
{
"epoch": 7.0,
"learning_rate": 1.5000854993160054e-05,
"loss": 3.2705,
"step": 409500
},
{
"epoch": 7.01,
"learning_rate": 1.4958105335157319e-05,
"loss": 3.3044,
"step": 410000
},
{
"epoch": 7.02,
"learning_rate": 1.4915355677154582e-05,
"loss": 3.2707,
"step": 410500
},
{
"epoch": 7.03,
"learning_rate": 1.4872606019151847e-05,
"loss": 3.2798,
"step": 411000
},
{
"epoch": 7.04,
"learning_rate": 1.482985636114911e-05,
"loss": 3.2493,
"step": 411500
},
{
"epoch": 7.05,
"learning_rate": 1.4787106703146375e-05,
"loss": 3.2597,
"step": 412000
},
{
"epoch": 7.05,
"learning_rate": 1.4744357045143639e-05,
"loss": 3.275,
"step": 412500
},
{
"epoch": 7.06,
"learning_rate": 1.4701692886456908e-05,
"loss": 3.2734,
"step": 413000
},
{
"epoch": 7.07,
"learning_rate": 1.465902872777018e-05,
"loss": 3.2437,
"step": 413500
},
{
"epoch": 7.08,
"learning_rate": 1.4616279069767441e-05,
"loss": 3.2447,
"step": 414000
},
{
"epoch": 7.09,
"learning_rate": 1.4573529411764706e-05,
"loss": 3.293,
"step": 414500
},
{
"epoch": 7.1,
"learning_rate": 1.453077975376197e-05,
"loss": 3.2808,
"step": 415000
},
{
"epoch": 7.1,
"learning_rate": 1.4488030095759234e-05,
"loss": 3.2617,
"step": 415500
},
{
"epoch": 7.11,
"learning_rate": 1.4445280437756497e-05,
"loss": 3.2534,
"step": 416000
},
{
"epoch": 7.12,
"learning_rate": 1.4402530779753762e-05,
"loss": 3.2641,
"step": 416500
},
{
"epoch": 7.13,
"learning_rate": 1.4359781121751026e-05,
"loss": 3.2449,
"step": 417000
},
{
"epoch": 7.14,
"learning_rate": 1.431703146374829e-05,
"loss": 3.2399,
"step": 417500
},
{
"epoch": 7.15,
"learning_rate": 1.4274281805745554e-05,
"loss": 3.2933,
"step": 418000
},
{
"epoch": 7.16,
"learning_rate": 1.4231532147742819e-05,
"loss": 3.2522,
"step": 418500
},
{
"epoch": 7.16,
"learning_rate": 1.4188782489740082e-05,
"loss": 3.2584,
"step": 419000
},
{
"epoch": 7.17,
"learning_rate": 1.4146032831737347e-05,
"loss": 3.2633,
"step": 419500
},
{
"epoch": 7.18,
"learning_rate": 1.410328317373461e-05,
"loss": 3.2595,
"step": 420000
},
{
"epoch": 7.19,
"learning_rate": 1.4060533515731875e-05,
"loss": 3.233,
"step": 420500
},
{
"epoch": 7.2,
"learning_rate": 1.4017869357045143e-05,
"loss": 3.2439,
"step": 421000
},
{
"epoch": 7.21,
"learning_rate": 1.3975205198358413e-05,
"loss": 3.257,
"step": 421500
},
{
"epoch": 7.22,
"learning_rate": 1.3932455540355676e-05,
"loss": 3.2857,
"step": 422000
},
{
"epoch": 7.22,
"learning_rate": 1.3889705882352941e-05,
"loss": 3.25,
"step": 422500
},
{
"epoch": 7.23,
"learning_rate": 1.3846956224350204e-05,
"loss": 3.2634,
"step": 423000
},
{
"epoch": 7.24,
"learning_rate": 1.380420656634747e-05,
"loss": 3.2566,
"step": 423500
},
{
"epoch": 7.25,
"learning_rate": 1.3761456908344733e-05,
"loss": 3.2589,
"step": 424000
},
{
"epoch": 7.26,
"learning_rate": 1.3718707250341997e-05,
"loss": 3.284,
"step": 424500
},
{
"epoch": 7.27,
"learning_rate": 1.367595759233926e-05,
"loss": 3.2565,
"step": 425000
},
{
"epoch": 7.28,
"learning_rate": 1.3633207934336526e-05,
"loss": 3.2414,
"step": 425500
},
{
"epoch": 7.28,
"learning_rate": 1.3590543775649795e-05,
"loss": 3.2518,
"step": 426000
},
{
"epoch": 7.29,
"learning_rate": 1.3547794117647058e-05,
"loss": 3.2658,
"step": 426500
},
{
"epoch": 7.3,
"learning_rate": 1.3505044459644323e-05,
"loss": 3.262,
"step": 427000
},
{
"epoch": 7.31,
"learning_rate": 1.3462294801641587e-05,
"loss": 3.2322,
"step": 427500
},
{
"epoch": 7.32,
"learning_rate": 1.3419630642954856e-05,
"loss": 3.2504,
"step": 428000
},
{
"epoch": 7.33,
"learning_rate": 1.337688098495212e-05,
"loss": 3.2753,
"step": 428500
},
{
"epoch": 7.34,
"learning_rate": 1.3334131326949384e-05,
"loss": 3.2577,
"step": 429000
},
{
"epoch": 7.34,
"learning_rate": 1.3291381668946648e-05,
"loss": 3.2698,
"step": 429500
},
{
"epoch": 7.35,
"learning_rate": 1.3248632010943913e-05,
"loss": 3.2636,
"step": 430000
},
{
"epoch": 7.36,
"learning_rate": 1.3205882352941176e-05,
"loss": 3.249,
"step": 430500
},
{
"epoch": 7.37,
"learning_rate": 1.3163132694938441e-05,
"loss": 3.2515,
"step": 431000
},
{
"epoch": 7.38,
"learning_rate": 1.3120468536251709e-05,
"loss": 3.2606,
"step": 431500
},
{
"epoch": 7.39,
"learning_rate": 1.3077718878248974e-05,
"loss": 3.258,
"step": 432000
},
{
"epoch": 7.4,
"learning_rate": 1.3034969220246237e-05,
"loss": 3.2687,
"step": 432500
},
{
"epoch": 7.4,
"learning_rate": 1.2992219562243502e-05,
"loss": 3.2241,
"step": 433000
},
{
"epoch": 7.41,
"learning_rate": 1.2949469904240765e-05,
"loss": 3.2589,
"step": 433500
},
{
"epoch": 7.42,
"learning_rate": 1.2906805745554038e-05,
"loss": 3.2287,
"step": 434000
},
{
"epoch": 7.43,
"learning_rate": 1.28640560875513e-05,
"loss": 3.234,
"step": 434500
},
{
"epoch": 7.44,
"learning_rate": 1.2821306429548563e-05,
"loss": 3.2273,
"step": 435000
},
{
"epoch": 7.45,
"learning_rate": 1.2778642270861834e-05,
"loss": 3.2778,
"step": 435500
},
{
"epoch": 7.46,
"learning_rate": 1.2735892612859099e-05,
"loss": 3.2463,
"step": 436000
},
{
"epoch": 7.46,
"learning_rate": 1.2693142954856362e-05,
"loss": 3.2382,
"step": 436500
},
{
"epoch": 7.47,
"learning_rate": 1.2650393296853627e-05,
"loss": 3.2488,
"step": 437000
},
{
"epoch": 7.48,
"learning_rate": 1.2607729138166897e-05,
"loss": 3.254,
"step": 437500
},
{
"epoch": 7.49,
"learning_rate": 1.256497948016416e-05,
"loss": 3.1991,
"step": 438000
},
{
"epoch": 7.5,
"learning_rate": 1.2522229822161423e-05,
"loss": 3.2751,
"step": 438500
},
{
"epoch": 7.51,
"learning_rate": 1.2479480164158687e-05,
"loss": 3.2693,
"step": 439000
},
{
"epoch": 7.52,
"learning_rate": 1.2436730506155952e-05,
"loss": 3.2495,
"step": 439500
},
{
"epoch": 7.52,
"learning_rate": 1.2393980848153215e-05,
"loss": 3.2524,
"step": 440000
},
{
"epoch": 7.53,
"learning_rate": 1.235123119015048e-05,
"loss": 3.2337,
"step": 440500
},
{
"epoch": 7.54,
"learning_rate": 1.2308481532147743e-05,
"loss": 3.2655,
"step": 441000
},
{
"epoch": 7.55,
"learning_rate": 1.2265731874145008e-05,
"loss": 3.263,
"step": 441500
},
{
"epoch": 7.56,
"learning_rate": 1.2222982216142271e-05,
"loss": 3.2338,
"step": 442000
},
{
"epoch": 7.57,
"learning_rate": 1.2180232558139536e-05,
"loss": 3.2083,
"step": 442500
},
{
"epoch": 7.58,
"learning_rate": 1.21374829001368e-05,
"loss": 3.2351,
"step": 443000
},
{
"epoch": 7.58,
"learning_rate": 1.2094733242134065e-05,
"loss": 3.2481,
"step": 443500
},
{
"epoch": 7.59,
"learning_rate": 1.2051983584131328e-05,
"loss": 3.2739,
"step": 444000
},
{
"epoch": 7.6,
"learning_rate": 1.2009233926128593e-05,
"loss": 3.2697,
"step": 444500
},
{
"epoch": 7.61,
"learning_rate": 1.1966484268125854e-05,
"loss": 3.2032,
"step": 445000
},
{
"epoch": 7.62,
"learning_rate": 1.1923820109439126e-05,
"loss": 3.2565,
"step": 445500
},
{
"epoch": 7.63,
"learning_rate": 1.1881070451436389e-05,
"loss": 3.237,
"step": 446000
},
{
"epoch": 7.64,
"learning_rate": 1.1838406292749658e-05,
"loss": 3.2526,
"step": 446500
},
{
"epoch": 7.64,
"learning_rate": 1.1795656634746922e-05,
"loss": 3.2335,
"step": 447000
},
{
"epoch": 7.65,
"learning_rate": 1.1752906976744187e-05,
"loss": 3.2464,
"step": 447500
},
{
"epoch": 7.66,
"learning_rate": 1.171015731874145e-05,
"loss": 3.2647,
"step": 448000
},
{
"epoch": 7.67,
"learning_rate": 1.166749316005472e-05,
"loss": 3.2644,
"step": 448500
},
{
"epoch": 7.68,
"learning_rate": 1.162482900136799e-05,
"loss": 3.2278,
"step": 449000
},
{
"epoch": 7.69,
"learning_rate": 1.1582164842681258e-05,
"loss": 3.2506,
"step": 449500
},
{
"epoch": 7.69,
"learning_rate": 1.1539415184678523e-05,
"loss": 3.2518,
"step": 450000
},
{
"epoch": 7.7,
"learning_rate": 1.1496665526675787e-05,
"loss": 3.235,
"step": 450500
},
{
"epoch": 7.71,
"learning_rate": 1.1453915868673052e-05,
"loss": 3.2097,
"step": 451000
},
{
"epoch": 7.72,
"learning_rate": 1.1411166210670315e-05,
"loss": 3.2222,
"step": 451500
},
{
"epoch": 7.73,
"learning_rate": 1.136841655266758e-05,
"loss": 3.2005,
"step": 452000
},
{
"epoch": 7.74,
"learning_rate": 1.1325666894664843e-05,
"loss": 3.1938,
"step": 452500
},
{
"epoch": 7.75,
"learning_rate": 1.1282917236662108e-05,
"loss": 3.2184,
"step": 453000
},
{
"epoch": 7.75,
"learning_rate": 1.1240167578659371e-05,
"loss": 3.2335,
"step": 453500
},
{
"epoch": 7.76,
"learning_rate": 1.1197503419972641e-05,
"loss": 3.2127,
"step": 454000
},
{
"epoch": 7.77,
"learning_rate": 1.1154753761969904e-05,
"loss": 3.2088,
"step": 454500
},
{
"epoch": 7.78,
"learning_rate": 1.1112089603283174e-05,
"loss": 3.2448,
"step": 455000
},
{
"epoch": 7.79,
"learning_rate": 1.1069339945280439e-05,
"loss": 3.2104,
"step": 455500
},
{
"epoch": 7.8,
"learning_rate": 1.1026590287277702e-05,
"loss": 3.2223,
"step": 456000
},
{
"epoch": 7.81,
"learning_rate": 1.0983840629274967e-05,
"loss": 3.2369,
"step": 456500
},
{
"epoch": 7.81,
"learning_rate": 1.094109097127223e-05,
"loss": 3.2023,
"step": 457000
},
{
"epoch": 7.82,
"learning_rate": 1.0898341313269495e-05,
"loss": 3.2394,
"step": 457500
},
{
"epoch": 7.83,
"learning_rate": 1.0855591655266758e-05,
"loss": 3.2306,
"step": 458000
},
{
"epoch": 7.84,
"learning_rate": 1.0812841997264023e-05,
"loss": 3.2302,
"step": 458500
},
{
"epoch": 7.85,
"learning_rate": 1.0770092339261287e-05,
"loss": 3.2492,
"step": 459000
},
{
"epoch": 7.86,
"learning_rate": 1.0727342681258552e-05,
"loss": 3.1999,
"step": 459500
},
{
"epoch": 7.87,
"learning_rate": 1.0684593023255815e-05,
"loss": 3.2289,
"step": 460000
},
{
"epoch": 7.87,
"learning_rate": 1.064184336525308e-05,
"loss": 3.1947,
"step": 460500
},
{
"epoch": 7.88,
"learning_rate": 1.0599093707250341e-05,
"loss": 3.1999,
"step": 461000
},
{
"epoch": 7.89,
"learning_rate": 1.0556344049247606e-05,
"loss": 3.2143,
"step": 461500
},
{
"epoch": 7.9,
"learning_rate": 1.0513679890560876e-05,
"loss": 3.2127,
"step": 462000
},
{
"epoch": 7.91,
"learning_rate": 1.0470930232558141e-05,
"loss": 3.2414,
"step": 462500
},
{
"epoch": 7.92,
"learning_rate": 1.0428180574555404e-05,
"loss": 3.2481,
"step": 463000
},
{
"epoch": 7.93,
"learning_rate": 1.0385430916552669e-05,
"loss": 3.1932,
"step": 463500
},
{
"epoch": 7.93,
"learning_rate": 1.0342681258549932e-05,
"loss": 3.1899,
"step": 464000
},
{
"epoch": 7.94,
"learning_rate": 1.0299931600547196e-05,
"loss": 3.2225,
"step": 464500
},
{
"epoch": 7.95,
"learning_rate": 1.0257181942544459e-05,
"loss": 3.2112,
"step": 465000
},
{
"epoch": 7.96,
"learning_rate": 1.0214432284541724e-05,
"loss": 3.2088,
"step": 465500
},
{
"epoch": 7.97,
"learning_rate": 1.0171768125854993e-05,
"loss": 3.2226,
"step": 466000
},
{
"epoch": 7.98,
"learning_rate": 1.0129103967168263e-05,
"loss": 3.19,
"step": 466500
},
{
"epoch": 7.99,
"learning_rate": 1.0086354309165528e-05,
"loss": 3.2036,
"step": 467000
},
{
"epoch": 7.99,
"learning_rate": 1.0043604651162791e-05,
"loss": 3.221,
"step": 467500
},
{
"epoch": 8.0,
"learning_rate": 1.0000854993160056e-05,
"loss": 3.2074,
"step": 468000
},
{
"epoch": 8.01,
"learning_rate": 9.95810533515732e-06,
"loss": 3.1987,
"step": 468500
},
{
"epoch": 8.02,
"learning_rate": 9.915355677154584e-06,
"loss": 3.1829,
"step": 469000
},
{
"epoch": 8.03,
"learning_rate": 9.872606019151848e-06,
"loss": 3.1928,
"step": 469500
},
{
"epoch": 8.04,
"learning_rate": 9.829856361149113e-06,
"loss": 3.2033,
"step": 470000
},
{
"epoch": 8.05,
"learning_rate": 9.78719220246238e-06,
"loss": 3.2044,
"step": 470500
},
{
"epoch": 8.05,
"learning_rate": 9.744442544459645e-06,
"loss": 3.1988,
"step": 471000
},
{
"epoch": 8.06,
"learning_rate": 9.701692886456909e-06,
"loss": 3.2401,
"step": 471500
},
{
"epoch": 8.07,
"learning_rate": 9.658943228454174e-06,
"loss": 3.1829,
"step": 472000
},
{
"epoch": 8.08,
"learning_rate": 9.616193570451437e-06,
"loss": 3.1798,
"step": 472500
},
{
"epoch": 8.09,
"learning_rate": 9.573443912448702e-06,
"loss": 3.1866,
"step": 473000
},
{
"epoch": 8.1,
"learning_rate": 9.53077975376197e-06,
"loss": 3.178,
"step": 473500
},
{
"epoch": 8.11,
"learning_rate": 9.488030095759235e-06,
"loss": 3.203,
"step": 474000
},
{
"epoch": 8.11,
"learning_rate": 9.445280437756498e-06,
"loss": 3.1573,
"step": 474500
},
{
"epoch": 8.12,
"learning_rate": 9.402530779753763e-06,
"loss": 3.229,
"step": 475000
},
{
"epoch": 8.13,
"learning_rate": 9.359866621067032e-06,
"loss": 3.2164,
"step": 475500
},
{
"epoch": 8.14,
"learning_rate": 9.317202462380302e-06,
"loss": 3.2015,
"step": 476000
},
{
"epoch": 8.15,
"learning_rate": 9.274452804377567e-06,
"loss": 3.1918,
"step": 476500
},
{
"epoch": 8.16,
"learning_rate": 9.231703146374828e-06,
"loss": 3.1751,
"step": 477000
},
{
"epoch": 8.17,
"learning_rate": 9.188953488372093e-06,
"loss": 3.204,
"step": 477500
},
{
"epoch": 8.17,
"learning_rate": 9.146203830369357e-06,
"loss": 3.2142,
"step": 478000
},
{
"epoch": 8.18,
"learning_rate": 9.103454172366622e-06,
"loss": 3.1818,
"step": 478500
},
{
"epoch": 8.19,
"learning_rate": 9.060704514363885e-06,
"loss": 3.1766,
"step": 479000
},
{
"epoch": 8.2,
"learning_rate": 9.01795485636115e-06,
"loss": 3.2069,
"step": 479500
},
{
"epoch": 8.21,
"learning_rate": 8.975205198358413e-06,
"loss": 3.2092,
"step": 480000
},
{
"epoch": 8.22,
"learning_rate": 8.932455540355678e-06,
"loss": 3.1638,
"step": 480500
},
{
"epoch": 8.23,
"learning_rate": 8.889705882352941e-06,
"loss": 3.1489,
"step": 481000
},
{
"epoch": 8.23,
"learning_rate": 8.846956224350206e-06,
"loss": 3.2052,
"step": 481500
},
{
"epoch": 8.24,
"learning_rate": 8.80420656634747e-06,
"loss": 3.2058,
"step": 482000
},
{
"epoch": 8.25,
"learning_rate": 8.761542407660739e-06,
"loss": 3.1677,
"step": 482500
},
{
"epoch": 8.26,
"learning_rate": 8.718792749658002e-06,
"loss": 3.1631,
"step": 483000
},
{
"epoch": 8.27,
"learning_rate": 8.676043091655267e-06,
"loss": 3.1883,
"step": 483500
},
{
"epoch": 8.28,
"learning_rate": 8.633378932968537e-06,
"loss": 3.2155,
"step": 484000
},
{
"epoch": 8.28,
"learning_rate": 8.5906292749658e-06,
"loss": 3.1711,
"step": 484500
},
{
"epoch": 8.29,
"learning_rate": 8.547965116279071e-06,
"loss": 3.1825,
"step": 485000
},
{
"epoch": 8.3,
"learning_rate": 8.505215458276335e-06,
"loss": 3.2052,
"step": 485500
},
{
"epoch": 8.31,
"learning_rate": 8.4624658002736e-06,
"loss": 3.2135,
"step": 486000
},
{
"epoch": 8.32,
"learning_rate": 8.419716142270863e-06,
"loss": 3.181,
"step": 486500
},
{
"epoch": 8.33,
"learning_rate": 8.376966484268126e-06,
"loss": 3.1724,
"step": 487000
},
{
"epoch": 8.34,
"learning_rate": 8.3343878248974e-06,
"loss": 3.1972,
"step": 487500
},
{
"epoch": 8.34,
"learning_rate": 8.291638166894665e-06,
"loss": 3.2148,
"step": 488000
},
{
"epoch": 8.35,
"learning_rate": 8.248888508891928e-06,
"loss": 3.2156,
"step": 488500
},
{
"epoch": 8.36,
"learning_rate": 8.206138850889193e-06,
"loss": 3.1812,
"step": 489000
},
{
"epoch": 8.37,
"learning_rate": 8.163389192886457e-06,
"loss": 3.2076,
"step": 489500
},
{
"epoch": 8.38,
"learning_rate": 8.120639534883722e-06,
"loss": 3.2092,
"step": 490000
},
{
"epoch": 8.39,
"learning_rate": 8.077889876880985e-06,
"loss": 3.173,
"step": 490500
},
{
"epoch": 8.4,
"learning_rate": 8.03514021887825e-06,
"loss": 3.1829,
"step": 491000
},
{
"epoch": 8.4,
"learning_rate": 7.992390560875513e-06,
"loss": 3.1571,
"step": 491500
},
{
"epoch": 8.41,
"learning_rate": 7.949726402188783e-06,
"loss": 3.188,
"step": 492000
},
{
"epoch": 8.42,
"learning_rate": 7.906976744186048e-06,
"loss": 3.1643,
"step": 492500
},
{
"epoch": 8.43,
"learning_rate": 7.864227086183311e-06,
"loss": 3.1916,
"step": 493000
},
{
"epoch": 8.44,
"learning_rate": 7.821477428180576e-06,
"loss": 3.1885,
"step": 493500
},
{
"epoch": 8.45,
"learning_rate": 7.778727770177839e-06,
"loss": 3.1953,
"step": 494000
},
{
"epoch": 8.46,
"learning_rate": 7.735978112175102e-06,
"loss": 3.2119,
"step": 494500
},
{
"epoch": 8.46,
"learning_rate": 7.693228454172367e-06,
"loss": 3.18,
"step": 495000
},
{
"epoch": 8.47,
"learning_rate": 7.65047879616963e-06,
"loss": 3.1932,
"step": 495500
},
{
"epoch": 8.48,
"learning_rate": 7.607729138166896e-06,
"loss": 3.163,
"step": 496000
},
{
"epoch": 8.49,
"learning_rate": 7.565064979480164e-06,
"loss": 3.1755,
"step": 496500
},
{
"epoch": 8.5,
"learning_rate": 7.522315321477428e-06,
"loss": 3.1608,
"step": 497000
},
{
"epoch": 8.51,
"learning_rate": 7.479651162790698e-06,
"loss": 3.2063,
"step": 497500
},
{
"epoch": 8.52,
"learning_rate": 7.436901504787962e-06,
"loss": 3.1997,
"step": 498000
},
{
"epoch": 8.52,
"learning_rate": 7.394151846785226e-06,
"loss": 3.2201,
"step": 498500
},
{
"epoch": 8.53,
"learning_rate": 7.35140218878249e-06,
"loss": 3.2128,
"step": 499000
},
{
"epoch": 8.54,
"learning_rate": 7.308652530779754e-06,
"loss": 3.174,
"step": 499500
},
{
"epoch": 8.55,
"learning_rate": 7.2659028727770185e-06,
"loss": 3.1539,
"step": 500000
},
{
"epoch": 8.56,
"learning_rate": 7.223153214774283e-06,
"loss": 3.1559,
"step": 500500
},
{
"epoch": 8.57,
"learning_rate": 7.180403556771547e-06,
"loss": 3.18,
"step": 501000
},
{
"epoch": 8.58,
"learning_rate": 7.137739398084815e-06,
"loss": 3.1679,
"step": 501500
},
{
"epoch": 8.58,
"learning_rate": 7.0949897400820795e-06,
"loss": 3.1785,
"step": 502000
},
{
"epoch": 8.59,
"learning_rate": 7.05232558139535e-06,
"loss": 3.1777,
"step": 502500
},
{
"epoch": 8.6,
"learning_rate": 7.009575923392612e-06,
"loss": 3.18,
"step": 503000
},
{
"epoch": 8.61,
"learning_rate": 6.966826265389876e-06,
"loss": 3.1958,
"step": 503500
},
{
"epoch": 8.62,
"learning_rate": 6.9240766073871405e-06,
"loss": 3.2005,
"step": 504000
},
{
"epoch": 8.63,
"learning_rate": 6.881412448700411e-06,
"loss": 3.1982,
"step": 504500
},
{
"epoch": 8.64,
"learning_rate": 6.838662790697675e-06,
"loss": 3.1924,
"step": 505000
},
{
"epoch": 8.64,
"learning_rate": 6.795913132694939e-06,
"loss": 3.1817,
"step": 505500
},
{
"epoch": 8.65,
"learning_rate": 6.753163474692203e-06,
"loss": 3.2,
"step": 506000
},
{
"epoch": 8.66,
"learning_rate": 6.7104138166894665e-06,
"loss": 3.1722,
"step": 506500
},
{
"epoch": 8.67,
"learning_rate": 6.667664158686731e-06,
"loss": 3.1811,
"step": 507000
},
{
"epoch": 8.68,
"learning_rate": 6.624914500683995e-06,
"loss": 3.1592,
"step": 507500
},
{
"epoch": 8.69,
"learning_rate": 6.582164842681259e-06,
"loss": 3.2065,
"step": 508000
},
{
"epoch": 8.7,
"learning_rate": 6.539415184678523e-06,
"loss": 3.1703,
"step": 508500
},
{
"epoch": 8.7,
"learning_rate": 6.496665526675787e-06,
"loss": 3.1724,
"step": 509000
},
{
"epoch": 8.71,
"learning_rate": 6.453915868673051e-06,
"loss": 3.1255,
"step": 509500
},
{
"epoch": 8.72,
"learning_rate": 6.4111662106703145e-06,
"loss": 3.1615,
"step": 510000
},
{
"epoch": 8.73,
"learning_rate": 6.368502051983584e-06,
"loss": 3.1938,
"step": 510500
},
{
"epoch": 8.74,
"learning_rate": 6.325752393980848e-06,
"loss": 3.142,
"step": 511000
},
{
"epoch": 8.75,
"learning_rate": 6.283002735978112e-06,
"loss": 3.1953,
"step": 511500
},
{
"epoch": 8.76,
"learning_rate": 6.240253077975376e-06,
"loss": 3.1544,
"step": 512000
},
{
"epoch": 8.76,
"learning_rate": 6.1975034199726405e-06,
"loss": 3.1806,
"step": 512500
},
{
"epoch": 8.77,
"learning_rate": 6.154753761969905e-06,
"loss": 3.1752,
"step": 513000
},
{
"epoch": 8.78,
"learning_rate": 6.1121751025991795e-06,
"loss": 3.1478,
"step": 513500
},
{
"epoch": 8.79,
"learning_rate": 6.069425444596444e-06,
"loss": 3.1946,
"step": 514000
},
{
"epoch": 8.8,
"learning_rate": 6.026675786593707e-06,
"loss": 3.1854,
"step": 514500
},
{
"epoch": 8.81,
"learning_rate": 5.983926128590971e-06,
"loss": 3.1483,
"step": 515000
},
{
"epoch": 8.81,
"learning_rate": 5.941176470588235e-06,
"loss": 3.1759,
"step": 515500
},
{
"epoch": 8.82,
"learning_rate": 5.8985123119015054e-06,
"loss": 3.1728,
"step": 516000
},
{
"epoch": 8.83,
"learning_rate": 5.8557626538987696e-06,
"loss": 3.1681,
"step": 516500
},
{
"epoch": 8.84,
"learning_rate": 5.813012995896034e-06,
"loss": 3.1593,
"step": 517000
},
{
"epoch": 8.85,
"learning_rate": 5.770263337893298e-06,
"loss": 3.1827,
"step": 517500
},
{
"epoch": 8.86,
"learning_rate": 5.727513679890561e-06,
"loss": 3.2048,
"step": 518000
},
{
"epoch": 8.87,
"learning_rate": 5.684764021887825e-06,
"loss": 3.1656,
"step": 518500
},
{
"epoch": 8.87,
"learning_rate": 5.642014363885089e-06,
"loss": 3.1457,
"step": 519000
},
{
"epoch": 8.88,
"learning_rate": 5.5992647058823535e-06,
"loss": 3.1451,
"step": 519500
},
{
"epoch": 8.89,
"learning_rate": 5.556515047879618e-06,
"loss": 3.1706,
"step": 520000
},
{
"epoch": 8.9,
"learning_rate": 5.513765389876882e-06,
"loss": 3.1531,
"step": 520500
},
{
"epoch": 8.91,
"learning_rate": 5.471101231190151e-06,
"loss": 3.1532,
"step": 521000
},
{
"epoch": 8.92,
"learning_rate": 5.4283515731874145e-06,
"loss": 3.1407,
"step": 521500
},
{
"epoch": 8.93,
"learning_rate": 5.385601915184679e-06,
"loss": 3.1875,
"step": 522000
},
{
"epoch": 8.93,
"learning_rate": 5.342852257181943e-06,
"loss": 3.1943,
"step": 522500
},
{
"epoch": 8.94,
"learning_rate": 5.300102599179207e-06,
"loss": 3.1857,
"step": 523000
},
{
"epoch": 8.95,
"learning_rate": 5.257352941176471e-06,
"loss": 3.1714,
"step": 523500
},
{
"epoch": 8.96,
"learning_rate": 5.214603283173735e-06,
"loss": 3.1704,
"step": 524000
},
{
"epoch": 8.97,
"learning_rate": 5.171853625170999e-06,
"loss": 3.134,
"step": 524500
},
{
"epoch": 8.98,
"learning_rate": 5.129103967168263e-06,
"loss": 3.1762,
"step": 525000
},
{
"epoch": 8.99,
"learning_rate": 5.086439808481532e-06,
"loss": 3.1784,
"step": 525500
},
{
"epoch": 8.99,
"learning_rate": 5.043690150478796e-06,
"loss": 3.1301,
"step": 526000
},
{
"epoch": 9.0,
"learning_rate": 5.00094049247606e-06,
"loss": 3.1415,
"step": 526500
},
{
"epoch": 9.01,
"learning_rate": 4.95827633378933e-06,
"loss": 3.1466,
"step": 527000
},
{
"epoch": 9.02,
"learning_rate": 4.915526675786594e-06,
"loss": 3.1724,
"step": 527500
},
{
"epoch": 9.03,
"learning_rate": 4.872777017783858e-06,
"loss": 3.1706,
"step": 528000
},
{
"epoch": 9.04,
"learning_rate": 4.8301128590971275e-06,
"loss": 3.1396,
"step": 528500
},
{
"epoch": 9.05,
"learning_rate": 4.787363201094392e-06,
"loss": 3.1442,
"step": 529000
},
{
"epoch": 9.05,
"learning_rate": 4.744613543091656e-06,
"loss": 3.16,
"step": 529500
},
{
"epoch": 9.06,
"learning_rate": 4.70186388508892e-06,
"loss": 3.1694,
"step": 530000
},
{
"epoch": 9.07,
"learning_rate": 4.659114227086184e-06,
"loss": 3.1395,
"step": 530500
},
{
"epoch": 9.08,
"learning_rate": 4.616364569083448e-06,
"loss": 3.1536,
"step": 531000
},
{
"epoch": 9.09,
"learning_rate": 4.573614911080711e-06,
"loss": 3.1339,
"step": 531500
},
{
"epoch": 9.1,
"learning_rate": 4.5308652530779755e-06,
"loss": 3.1808,
"step": 532000
},
{
"epoch": 9.11,
"learning_rate": 4.48811559507524e-06,
"loss": 3.1568,
"step": 532500
},
{
"epoch": 9.11,
"learning_rate": 4.445365937072504e-06,
"loss": 3.1689,
"step": 533000
},
{
"epoch": 9.12,
"learning_rate": 4.402616279069768e-06,
"loss": 3.1291,
"step": 533500
},
{
"epoch": 9.13,
"learning_rate": 4.359866621067032e-06,
"loss": 3.134,
"step": 534000
},
{
"epoch": 9.14,
"learning_rate": 4.317116963064296e-06,
"loss": 3.1325,
"step": 534500
},
{
"epoch": 9.15,
"learning_rate": 4.27453830369357e-06,
"loss": 3.1295,
"step": 535000
},
{
"epoch": 9.16,
"learning_rate": 4.2318741450068404e-06,
"loss": 3.1305,
"step": 535500
},
{
"epoch": 9.17,
"learning_rate": 4.1891244870041046e-06,
"loss": 3.1508,
"step": 536000
},
{
"epoch": 9.17,
"learning_rate": 4.146374829001369e-06,
"loss": 3.1902,
"step": 536500
},
{
"epoch": 9.18,
"learning_rate": 4.103625170998633e-06,
"loss": 3.1244,
"step": 537000
},
{
"epoch": 9.19,
"learning_rate": 4.060875512995897e-06,
"loss": 3.157,
"step": 537500
},
{
"epoch": 9.2,
"learning_rate": 4.01812585499316e-06,
"loss": 3.1585,
"step": 538000
},
{
"epoch": 9.21,
"learning_rate": 3.975376196990424e-06,
"loss": 3.1007,
"step": 538500
},
{
"epoch": 9.22,
"learning_rate": 3.9326265389876885e-06,
"loss": 3.1622,
"step": 539000
},
{
"epoch": 9.23,
"learning_rate": 3.8898768809849526e-06,
"loss": 3.1682,
"step": 539500
},
{
"epoch": 9.23,
"learning_rate": 3.847127222982217e-06,
"loss": 3.1487,
"step": 540000
},
{
"epoch": 9.24,
"learning_rate": 3.804548563611491e-06,
"loss": 3.1649,
"step": 540500
},
{
"epoch": 9.25,
"learning_rate": 3.7617989056087552e-06,
"loss": 3.1929,
"step": 541000
},
{
"epoch": 9.26,
"learning_rate": 3.7190492476060194e-06,
"loss": 3.1339,
"step": 541500
},
{
"epoch": 9.27,
"learning_rate": 3.676299589603283e-06,
"loss": 3.143,
"step": 542000
},
{
"epoch": 9.28,
"learning_rate": 3.633549931600547e-06,
"loss": 3.1158,
"step": 542500
},
{
"epoch": 9.29,
"learning_rate": 3.5908002735978113e-06,
"loss": 3.1746,
"step": 543000
},
{
"epoch": 9.29,
"learning_rate": 3.5480506155950754e-06,
"loss": 3.1143,
"step": 543500
},
{
"epoch": 9.3,
"learning_rate": 3.5053009575923396e-06,
"loss": 3.156,
"step": 544000
},
{
"epoch": 9.31,
"learning_rate": 3.4625512995896033e-06,
"loss": 3.1217,
"step": 544500
},
{
"epoch": 9.32,
"learning_rate": 3.4198871409028728e-06,
"loss": 3.1251,
"step": 545000
},
{
"epoch": 9.33,
"learning_rate": 3.377137482900137e-06,
"loss": 3.1339,
"step": 545500
},
{
"epoch": 9.34,
"learning_rate": 3.334387824897401e-06,
"loss": 3.176,
"step": 546000
},
{
"epoch": 9.35,
"learning_rate": 3.291638166894665e-06,
"loss": 3.1378,
"step": 546500
},
{
"epoch": 9.35,
"learning_rate": 3.2489740082079346e-06,
"loss": 3.1107,
"step": 547000
},
{
"epoch": 9.36,
"learning_rate": 3.2062243502051983e-06,
"loss": 3.1422,
"step": 547500
},
{
"epoch": 9.37,
"learning_rate": 3.1634746922024624e-06,
"loss": 3.1579,
"step": 548000
},
{
"epoch": 9.38,
"learning_rate": 3.120810533515732e-06,
"loss": 3.1277,
"step": 548500
},
{
"epoch": 9.39,
"learning_rate": 3.078060875512996e-06,
"loss": 3.1489,
"step": 549000
},
{
"epoch": 9.4,
"learning_rate": 3.03531121751026e-06,
"loss": 3.1494,
"step": 549500
},
{
"epoch": 9.4,
"learning_rate": 2.992561559507524e-06,
"loss": 3.1361,
"step": 550000
},
{
"epoch": 9.41,
"learning_rate": 2.949811901504788e-06,
"loss": 3.1434,
"step": 550500
},
{
"epoch": 9.42,
"learning_rate": 2.907062243502052e-06,
"loss": 3.1542,
"step": 551000
},
{
"epoch": 9.43,
"learning_rate": 2.8643125854993162e-06,
"loss": 3.1341,
"step": 551500
},
{
"epoch": 9.44,
"learning_rate": 2.82156292749658e-06,
"loss": 3.1375,
"step": 552000
},
{
"epoch": 9.45,
"learning_rate": 2.778813269493844e-06,
"loss": 3.1461,
"step": 552500
},
{
"epoch": 9.46,
"learning_rate": 2.736063611491108e-06,
"loss": 3.1571,
"step": 553000
},
{
"epoch": 9.46,
"learning_rate": 2.6933139534883723e-06,
"loss": 3.1532,
"step": 553500
},
{
"epoch": 9.47,
"learning_rate": 2.650649794801642e-06,
"loss": 3.1184,
"step": 554000
},
{
"epoch": 9.48,
"learning_rate": 2.607985636114911e-06,
"loss": 3.1441,
"step": 554500
},
{
"epoch": 9.49,
"learning_rate": 2.565235978112175e-06,
"loss": 3.137,
"step": 555000
},
{
"epoch": 9.5,
"learning_rate": 2.522486320109439e-06,
"loss": 3.1563,
"step": 555500
},
{
"epoch": 9.51,
"learning_rate": 2.4797366621067032e-06,
"loss": 3.1726,
"step": 556000
},
{
"epoch": 9.52,
"learning_rate": 2.436987004103967e-06,
"loss": 3.14,
"step": 556500
},
{
"epoch": 9.52,
"learning_rate": 2.394237346101231e-06,
"loss": 3.1285,
"step": 557000
},
{
"epoch": 9.53,
"learning_rate": 2.3516586867305063e-06,
"loss": 3.1474,
"step": 557500
},
{
"epoch": 9.54,
"learning_rate": 2.30890902872777e-06,
"loss": 3.1652,
"step": 558000
},
{
"epoch": 9.55,
"learning_rate": 2.266159370725034e-06,
"loss": 3.1567,
"step": 558500
},
{
"epoch": 9.56,
"learning_rate": 2.2234097127222983e-06,
"loss": 3.1305,
"step": 559000
},
{
"epoch": 9.57,
"learning_rate": 2.1806600547195624e-06,
"loss": 3.1402,
"step": 559500
},
{
"epoch": 9.58,
"learning_rate": 2.137910396716826e-06,
"loss": 3.159,
"step": 560000
},
{
"epoch": 9.58,
"learning_rate": 2.0951607387140902e-06,
"loss": 3.1684,
"step": 560500
},
{
"epoch": 9.59,
"learning_rate": 2.0524110807113544e-06,
"loss": 3.1483,
"step": 561000
},
{
"epoch": 9.6,
"learning_rate": 2.0096614227086185e-06,
"loss": 3.1277,
"step": 561500
},
{
"epoch": 9.61,
"learning_rate": 1.9669117647058826e-06,
"loss": 3.1447,
"step": 562000
},
{
"epoch": 9.62,
"learning_rate": 1.9241621067031463e-06,
"loss": 3.1503,
"step": 562500
},
{
"epoch": 9.63,
"learning_rate": 1.8814124487004106e-06,
"loss": 3.1172,
"step": 563000
},
{
"epoch": 9.64,
"learning_rate": 1.8386627906976743e-06,
"loss": 3.1413,
"step": 563500
},
{
"epoch": 9.64,
"learning_rate": 1.795998632010944e-06,
"loss": 3.1049,
"step": 564000
},
{
"epoch": 9.65,
"learning_rate": 1.7533344733242133e-06,
"loss": 3.1588,
"step": 564500
},
{
"epoch": 9.66,
"learning_rate": 1.7105848153214774e-06,
"loss": 3.1232,
"step": 565000
},
{
"epoch": 9.67,
"learning_rate": 1.6678351573187416e-06,
"loss": 3.1404,
"step": 565500
},
{
"epoch": 9.68,
"learning_rate": 1.6250854993160057e-06,
"loss": 3.1532,
"step": 566000
},
{
"epoch": 9.69,
"learning_rate": 1.5823358413132694e-06,
"loss": 3.111,
"step": 566500
},
{
"epoch": 9.7,
"learning_rate": 1.5395861833105335e-06,
"loss": 3.1191,
"step": 567000
},
{
"epoch": 9.7,
"learning_rate": 1.4968365253077976e-06,
"loss": 3.1338,
"step": 567500
},
{
"epoch": 9.71,
"learning_rate": 1.4540868673050615e-06,
"loss": 3.116,
"step": 568000
},
{
"epoch": 9.72,
"learning_rate": 1.4113372093023257e-06,
"loss": 3.138,
"step": 568500
},
{
"epoch": 9.73,
"learning_rate": 1.3685875512995896e-06,
"loss": 3.1462,
"step": 569000
},
{
"epoch": 9.74,
"learning_rate": 1.3258378932968537e-06,
"loss": 3.1099,
"step": 569500
},
{
"epoch": 9.75,
"learning_rate": 1.2830882352941176e-06,
"loss": 3.1449,
"step": 570000
},
{
"epoch": 9.76,
"learning_rate": 1.2403385772913817e-06,
"loss": 3.1476,
"step": 570500
},
{
"epoch": 9.76,
"learning_rate": 1.1977599179206568e-06,
"loss": 3.091,
"step": 571000
},
{
"epoch": 9.77,
"learning_rate": 1.1550102599179207e-06,
"loss": 3.1714,
"step": 571500
},
{
"epoch": 9.78,
"learning_rate": 1.1122606019151848e-06,
"loss": 3.1462,
"step": 572000
},
{
"epoch": 9.79,
"learning_rate": 1.0695109439124488e-06,
"loss": 3.0987,
"step": 572500
},
{
"epoch": 9.8,
"learning_rate": 1.0267612859097129e-06,
"loss": 3.1309,
"step": 573000
},
{
"epoch": 9.81,
"learning_rate": 9.840116279069768e-07,
"loss": 3.1129,
"step": 573500
},
{
"epoch": 9.82,
"learning_rate": 9.412619699042408e-07,
"loss": 3.124,
"step": 574000
},
{
"epoch": 9.82,
"learning_rate": 8.985978112175103e-07,
"loss": 3.1447,
"step": 574500
},
{
"epoch": 9.83,
"learning_rate": 8.559336525307799e-07,
"loss": 3.1516,
"step": 575000
},
{
"epoch": 9.84,
"learning_rate": 8.131839945280438e-07,
"loss": 3.1407,
"step": 575500
},
{
"epoch": 9.85,
"learning_rate": 7.705198358413133e-07,
"loss": 3.1609,
"step": 576000
},
{
"epoch": 9.86,
"learning_rate": 7.277701778385773e-07,
"loss": 3.1213,
"step": 576500
},
{
"epoch": 9.87,
"learning_rate": 6.850205198358413e-07,
"loss": 3.1234,
"step": 577000
},
{
"epoch": 9.88,
"learning_rate": 6.422708618331053e-07,
"loss": 3.1042,
"step": 577500
},
{
"epoch": 9.88,
"learning_rate": 5.995212038303694e-07,
"loss": 3.1261,
"step": 578000
},
{
"epoch": 9.89,
"learning_rate": 5.567715458276334e-07,
"loss": 3.1196,
"step": 578500
},
{
"epoch": 9.9,
"learning_rate": 5.140218878248974e-07,
"loss": 3.1622,
"step": 579000
},
{
"epoch": 9.91,
"learning_rate": 4.712722298221614e-07,
"loss": 3.1254,
"step": 579500
},
{
"epoch": 9.92,
"learning_rate": 4.2852257181942543e-07,
"loss": 3.1291,
"step": 580000
},
{
"epoch": 9.93,
"learning_rate": 3.857729138166895e-07,
"loss": 3.0939,
"step": 580500
},
{
"epoch": 9.94,
"learning_rate": 3.430232558139535e-07,
"loss": 3.1597,
"step": 581000
},
{
"epoch": 9.94,
"learning_rate": 3.002735978112175e-07,
"loss": 3.1454,
"step": 581500
},
{
"epoch": 9.95,
"learning_rate": 2.575239398084815e-07,
"loss": 3.1569,
"step": 582000
},
{
"epoch": 9.96,
"learning_rate": 2.1485978112175104e-07,
"loss": 3.1016,
"step": 582500
},
{
"epoch": 9.97,
"learning_rate": 1.7219562243502053e-07,
"loss": 3.1268,
"step": 583000
},
{
"epoch": 9.98,
"learning_rate": 1.2944596443228454e-07,
"loss": 3.1422,
"step": 583500
},
{
"epoch": 9.99,
"learning_rate": 8.669630642954856e-08,
"loss": 3.1132,
"step": 584000
},
{
"epoch": 9.99,
"learning_rate": 4.394664842681259e-08,
"loss": 3.1218,
"step": 584500
},
{
"epoch": 10.0,
"step": 584800,
"total_flos": 5.454351260793078e+18,
"train_loss": 3.7000725246486845,
"train_runtime": 189673.1431,
"train_samples_per_second": 30.832,
"train_steps_per_second": 3.083
}
],
"logging_steps": 500,
"max_steps": 584800,
"num_train_epochs": 10,
"save_steps": 100000,
"total_flos": 5.454351260793078e+18,
"trial_name": null,
"trial_params": null
}